[Zope-CVS] CVS: Products/ZCTextIndex - QueryParser.py:1.1.2.17
Guido van Rossum
guido@python.org
Mon, 13 May 2002 16:26:19 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv11410
Modified Files:
Tag: TextIndexDS9-branch
QueryParser.py
Log Message:
Tighten the rules for double-quoted phrases; foo"bar" is now two
tokens, ``foo'' and ``"bar"''.
=== Products/ZCTextIndex/QueryParser.py 1.1.2.16 => 1.1.2.17 ===
}
-# Magical regex to tokenize. A beauty, ain't it. :-)
+# Regular expression to tokenize.
_tokenizer_regex = re.compile(r"""
# a paren
[()]
- # or a fleeblegorg (something with double quotes, or lots of double quotes)
- # XXX do we really want stuff like AB"C"D"EFG"H""I to be "a token"?
-| [^()\s"]* (?: "[^"]*"
- [^()\s"]*
- )+
+ # or a string in double quotes possibly preceded by a hyphen
+| -? " [^"]* "
# or a non-empty string without whitespace, parens or double quotes
| [^()\s"]+
""", re.VERBOSE)
@@ -84,7 +81,7 @@
def parseQuery(self, query):
# Lexical analysis.
- tokens = re.findall(_tokenizer_regex, query)
+ tokens = _tokenizer_regex.findall(query)
self.__tokens = tokens
# classify tokens
self.__tokentypes = [_keywords.get(token.upper(), _ATOM)