[Zope] Re: Non-caching version of POPMail ?
Chris McDonough
chrism@digicool.com
Tue, 3 Apr 2001 19:32:52 -0400
Whoops, wrong URL...
Here it actually is: ;-)
[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\
015()]*(?:\\[^\x80-\xff][^\\\x80-\
xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:(?:[^(\040)<>@,;:
".\\\[\]\000-\037\x80-\xff]+(?![^(
\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\
xff][^\\\x80-\xff\n\015"]*)*")[\04
0\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015(
)]*(?:\\[^\x80-\xff][^\\\x80-\xff\
n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\
x80-\xff\n\015()]*(?:(?:\\[^\x80-\
xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[
^\\\x80-\xff\n\015()]*)*\)[\040\t]
*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\00
0-\037\x80-\xff])|"[^\\\x80-\xff\n
\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[\040\t]*(?:\([^\\\x80-\xf
f\n\015()]*(?:(?:\\[^\x80-\xff]|\(
[^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80
-\xff\n\015()]*)*\)[\040\t]*)*)*@[
\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\0
15()]*(?:\\[^\x80-\xff][^\\\x80-\x
ff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\
\[\]\000-\037\x80-\xff]+(?![^(\040
)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\
xff])*\])[\040\t]*(?:\([^\\\x80-\x
ff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][
^\\\x80-\xff\n\015()]*)*\))[^\\\x8
0-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:
(?:\\[^\x80-\xff]|\([^\\\x80-\xff\
n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]
*)*\)[\040\t]*)*(?:[^(\040)<>@,;:"
.\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(
?:[^\\\x80-\xff\n\015\[\]]|\\[^\x8
0-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\
\x80-\xff\n\015()]*(?:\\[^\x80-\xf
f][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*)*|(?:[^
(\040)<>@,;:".\\\[\]\000-\037\x80-
\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(
?:\\[^\x80-\xff][^\\\x80-\xff\n\01
5"]*)*")[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]*(?:(?:\([^\\\x80-\xff
\n\015()]*(?:(?:\\[^\x80-\xff]|\([
^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-
\xff\n\015()]*)*\)|"[^\\\x80-\xff\
n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[^()<>@,;:".\\\[\]\x80-\x
ff\000-\010\012-\037]*)*<[\040\t]*
(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?
:\\[^\x80-\xff][^\\\x80-\xff\n\015
()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:@[\040\t]*(?:\([^\\\x80-\x
ff\n\015()]*(?:(?:\\[^\x80-\xff]|\
([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x8
0-\xff\n\015()]*)*\)[\040\t]*)*(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037
\x80-\xff])|\[(?:[^\\\x80-\xff\n\0
15\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x
80-\xff]|\([^\\\x80-\xff\n\015()]*
(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\04
0\t]*)*(?:\.[\040\t]*(?:\([^\\\x80
-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xf
f][^\\\x80-\xff\n\015()]*)*\))[^\\
\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xf
f]+(?![^(\040)<>@,;:".\\\[\]\000-\
037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:
\([^\\\x80-\xff\n\015()]*(?:(?:\\[
^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]
*)*\))[^\\\x80-\xff\n\015()]*)*\)[
\040\t]*)*)*(?:,[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\(
[^\\\x80-\xff\n\015()]*(?:\\[^\x80
-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*@[\0
40\t]*(?:\([^\\\x80-\xff\n\015()]*
(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff
\n\015()]*)*\))[^\\\x80-\xff\n\015
()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<
>@,;:".\\\[\]\000-\037\x80-\xff])|
\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff
\n\015()]*(?:(?:\\[^\x80-\xff]|\([
^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-
\xff\n\015()]*)*\)[\040\t]*)*(?:\.
[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\
015()]*(?:\\[^\x80-\xff][^\\\x80-\
xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\
\\[\]\000-\037\x80-\xff]+(?![^(\04
0)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-
\xff])*\])[\040\t]*(?:\([^\\\x80-\
xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff]
[^\\\x80-\xff\n\015()]*)*\))[^\\\x
80-\xff\n\015()]*)*\)[\040\t]*)*)*)*:[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?
:(?:\\[^\x80-\xff]|\([^\\\x80-\xff
\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()
]*)*\)[\040\t]*)*)?(?:[^(\040)<>@,
;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|
"[^\\\x80-\xff\n\015"]*(?:\\[^\x80
-\xff][^\\\x80-\xff\n\015"]*)*")[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\
\[^\x80-\xff]|\([^\\\x80-\xff\n\01
5()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\
)[\040\t]*)*(?:\.[\040\t]*(?:\([^\
\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x8
0-\xff][^\\\x80-\xff\n\015()]*)*\)
)[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x8
0-\xff]+(?![^(\040)<>@,;:".\\\[\]\
000-\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\
015"]*)*")[\040\t]*(?:\([^\\\x80-\
xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff]
[^\\\x80-\xff\n\015()]*)*\))[^\\\x
80-\xff\n\015()]*)*\)[\040\t]*)*)*@[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(
?:\\[^\x80-\xff]|\([^\\\x80-\xff\n
\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*
)*\)[\040\t]*)*(?:[^(\040)<>@,;:".
\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?
:[^\\\x80-\xff\n\015\[\]]|\\[^\x80
-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\
x80-\xff\n\015()]*(?:\\[^\x80-\xff
][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\04
0\t]*(?:\([^\\\x80-\xff\n\015()]*(
?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\
n\015()]*)*\))[^\\\x80-\xff\n\015(
)]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>
@,;:".\\\[\]\000-\037\x80-\xff])|\
[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\
n\015()]*(?:(?:\\[^\x80-\xff]|\([^
\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\
xff\n\015()]*)*\)[\040\t]*)*)*>)
----- Original Message -----
From: "Chris McDonough" <chrism@digicool.com>
To: "David Shaw" <david.shaw@zapmedia.com>; "Loren Stafford"
<lstafford@morphics.com>
Cc: <zope@zope.org>
Sent: Tuesday, April 03, 2001 7:25 PM
Subject: Re: [Zope] Re: Non-caching version of POPMail ?
> Here's a fun regex for matching email addresses from that book:
>
> #
> # Program to build a regex to match an internet email address,
> # from Chapter 7 of _Mastering Regular Expressions_ (Friedl / O'Reilly)
> # (http://www.ora.com/catalog/regexp/)
> #
> # Optimized version.
> #
> # Copyright 1997 O'Reilly & Associates, Inc.
> #
>
>
>
> # Some things for avoiding backslashitis later on.
> $esc = '\\\\'; $Period = '\.';
> $space = '\040'; $tab = '\t';
> $OpenBR = '\['; $CloseBR = '\]';
> $OpenParen = '\('; $CloseParen = '\)';
> $NonASCII = '\x80-\xff'; $ctrl = '\000-\037';
> $CRlist = '\n\015'; # note: this should really be only \015.
>
> # Items 19, 20, 21
> $qtext = qq/[^$esc$NonASCII$CRlist\"]/; # for within "..."
> $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/; # for within [...]
> $quoted_pair = qq< $esc [^$NonASCII] >; # an escaped character
>
>
############################################################################
> ##
> # Items 22 and 23, comment.
> # Impossible to do properly with a regex, I make do by allowing at most
one
> level of nesting.
> $ctext = qq< [^$esc$NonASCII$CRlist()] >;
>
> # $Cnested matches one non-nested comment.
> # It is unrolled, with normal of $ctext, special of $quoted_pair.
> $Cnested = qq<
> $OpenParen # (
> $ctext* # normal*
> (?: $quoted_pair $ctext* )* # (special normal*)*
> $CloseParen # )
> >;
>
> # $comment allows one level of nested parentheses
> # It is unrolled, with normal of $ctext, special of
($quoted_pair|$Cnested)
> $comment = qq<
> $OpenParen # (
> $ctext* # normal*
> (?: # (
> (?: $quoted_pair | $Cnested ) # special
> $ctext* # normal*
> )* # )*
> $CloseParen # )
> >;
>
>
############################################################################
> ##
>
> # $X is optional whitespace/comments.
> $X = qq<
> [$space$tab]* # Nab whitespace.
> (?: $comment [$space$tab]* )* # If comment found, allow more spaces.
> >;
>
>
>
> # Item 10: atom
> $atom_char = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
> $atom = qq<
> $atom_char+ # some number of atom characters...
> (?!$atom_char) # ..not followed by something that could be part of an
atom
> >;
>
> # Item 11: doublequoted string, unrolled.
> $quoted_str = qq<
> \" # "
> $qtext * # normal
> (?: $quoted_pair $qtext * )* # ( special normal* )*
> \" # "
> >;
>
> # Item 7: word is an atom or quoted string
> $word = qq<
> (?:
> $atom # Atom
> | # or
> $quoted_str # Quoted string
> )
> >;
>
> # Item 12: domain-ref is just an atom
> $domain_ref = $atom;
>
> # Item 13: domain-literal is like a quoted string, but [...] instead of
> "..."
> $domain_lit = qq<
> $OpenBR # [
> (?: $dtext | $quoted_pair )* # stuff
> $CloseBR # ]
> >;
>
> # Item 9: sub-domain is a domain-ref or domain-literal
> $sub_domain = qq<
> (?:
> $domain_ref
> |
> $domain_lit
> )
> $X # optional trailing comments
> >;
>
> # Item 6: domain is a list of subdomains separated by dots.
> $domain = qq<
> $sub_domain
> (?:
> $Period $X $sub_domain
> )*
> >;
>
> # Item 8: a route. A bunch of "@ $domain" separated by commas, followed by
a
> colon.
> $route = qq<
> \@ $X $domain
> (?: , $X \@ $X $domain )* # additional domains
> :
> $X # optional trailing comments
> >;
>
> # Item 6: local-part is a bunch of $word separated by periods
> $local_part = qq<
> $word $X
> (?:
> $Period $X $word $X # additional words
> )*
> >;
>
> # Item 2: addr-spec is local@domain
> $addr_spec = qq<
> $local_part \@ $X $domain
> >;
>
> # Item 4: route-addr is <route? addr-spec>
> $route_addr = qq[
> < $X # <
> (?: $route )? # optional route
> $addr_spec # address spec
> > # >
> ];
>
>
> # Item 3: phrase........
> $phrase_ctrl = '\000-\010\012-\037'; # like ctrl, but without tab
>
> # Like atom-char, but without listing space, and uses phrase_ctrl.
> # Since the class is negated, this matches the same as atom-char plus
space
> and tab
> $phrase_char =
> qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
>
> # We've worked it so that $word, $comment, and $quoted_str to not consume
> trailing $X
> # because we take care of it manually.
> $phrase = qq<
> $word # leading word
> $phrase_char * # "normal" atoms and/or spaces
> (?:
> (?: $comment | $quoted_str ) # "special" comment or quoted string
> $phrase_char * # more "normal"
> )*
> >;
>
> ## Item #1: mailbox is an addr_spec or a phrase/route_addr
> $mailbox = qq<
> $X # optional leading comment
> (?:
> $addr_spec # address
> | # or
> $phrase $route_addr # name and address
> )
> >;
>
>
>
>
###########################################################################
> # Here's a little snippet to test it.
> # Addresses given on the commandline are described.
> #
>
> my $error = 0;
> my $valid;
> foreach $address (@ARGV) {
> $valid = $address =~ m/^$mailbox$/xo;
> printf "`$address' is syntactically %s.\n", $valid ? "valid" :
> "invalid";
> $error = 1 if not $valid;
> }
> exit $error;
>
>
>
> ----- Original Message -----
> From: "David Shaw" <david.shaw@zapmedia.com>
> To: "Loren Stafford" <lstafford@morphics.com>
> Cc: <zope@zope.org>
> Sent: Tuesday, April 03, 2001 6:37 PM
> Subject: [Zope] Re: Non-caching version of POPMail ?
>
>
> > I actually solved this on my working version in a different way. I
simply
> > call UIDL on a refresh and if any of the UIDs I have are not in the UID
> list
> > from the server, I delete the message from the MessageDict. It
> accomplishes
> > the same thing without the large performance hit of not caching.
> >
> > I've started working on this product again. I'd be happy to send you my
> > current working revision if you want to take a look. It's not anything
> > significant enough yet to warrant a new release, but I am making it
> better.
> > My next adventure is to do better message parsing to make URLs and email
> > addresses clickable. I just got the O'Reilly Regular Expression book
and
> > plan on delving into it when I get a chance.
> >
> >
> > Loren Stafford said:
> >
> > > David,
> > >
> > > I'm planning to modify POPMail (or more likely, make a derived product
> > > POPMailNc) so that there is no persistent message cache. I just wanted
> to
> > > pass the idea by you, so you could tell me if I'm doing something
stupid
> or
> > > if you've already solved my problem for me in a newer version of
> POPMail.
> > >
> > > I'm using POPMail to couple my Zope server to another product
> > > (PerfectTracker). Zope initiates Tracker incidents via sendmail and
> receives
> > > responses from the Tracker at a dedicated POP3 mailbox, which it polls
> > > (using Xron) every 5-10 minutes. I've discovered that if I ever delete
> > > messages from the mailbox manually (i.e. using a POP client other than
> > > POPMail) POPMail's persistent message cache gets hopelessly out of
sync
> with
> > > the mailbox, and it begins to deliver messages from its cache when
there
> are
> > > new messages with the same UID in the mailbox. That's bad.
> > >
> > > While I don't need to delete messages behind POPMail's back, I can't
> really
> > > prevent someone from doing so, due to the nature of our mail system.
So
> I
> > > propose to make POPMail's cache non-persistent (I guess it really
> wouldn't
> > > be a cache then, would it?). I don't expect a performance problem,
> because,
> > > if I delete old messages regularly I will never have more than a few
> hundred
> > > messages in the mailbox. (Messages correspond to new employees.)
> > >
> > > I think I can make uidDict and MessageDict nonpersistent simply by
> changing
> > > their names to _v_uidDict and _v_MessasgeDict. Is that correct?
> > >
> > > -- Thanks for your input
> > > -- Loren
> > >
> >
> > --
> > David Shaw -- Senior Software Developer -- ZapMedia -- 678.420.2715
> >
> >
> >
> > _______________________________________________
> > Zope maillist - Zope@zope.org
> > http://lists.zope.org/mailman/listinfo/zope
> > ** No cross posts or HTML encoding! **
> > (Related lists -
> > http://lists.zope.org/mailman/listinfo/zope-announce
> > http://lists.zope.org/mailman/listinfo/zope-dev )
> >
>
>
> _______________________________________________
> Zope maillist - Zope@zope.org
> http://lists.zope.org/mailman/listinfo/zope
> ** No cross posts or HTML encoding! **
> (Related lists -
> http://lists.zope.org/mailman/listinfo/zope-announce
> http://lists.zope.org/mailman/listinfo/zope-dev )
>