[Zope-CVS] CVS: Products/ZCTextIndex - QueryParser.py:1.1.2.6
Tim Peters
tim.one@comcast.net
Thu, 2 May 2002 18:43:47 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv8517
Modified Files:
Tag: TextIndexDS9-branch
QueryParser.py
Log Message:
Made keyword recognition case-insensitive ("AND", "and", "aNd", ..., all
the same thing).
=== Products/ZCTextIndex/QueryParser.py 1.1.2.5 => 1.1.2.6 ===
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
-#
+#
##############################################################################
"""Query Parser.
@@ -24,12 +24,29 @@
An ATOM is a string not containing whitespace or parentheses, and not
equal to one of the key words 'AND', 'OR', 'NOT'. The key words are
-only recognized in all upper case.
-
+recognized in any mixture of case..
"""
import re
+# Create unique symbols for token types.
+_AND = intern("AND")
+_OR = intern("OR")
+_NOT = intern("NOT")
+_LPAREN = intern("(")
+_RPAREN = intern(")")
+_ATOM = intern("ATOM")
+_EOF = intern(" EOF ")
+
+# Map keyword string to token type.
+_keywords = {
+ _AND: _AND,
+ _OR: _OR,
+ _NOT: _NOT,
+ _LPAREN: _LPAREN,
+ _RPAREN: _RPAREN,
+}
+
class ParseError(Exception):
pass
@@ -39,39 +56,47 @@
pass # This parser has no persistent state
def parseQuery(self, query):
- # Lexical analysis
+ # Lexical analysis.
tokens = re.findall(r"[()]|[^\s()]+", query)
self.__tokens = tokens
- self.__tokens.append(None) # EOF token
+ # classify tokens
+ self.__tokentypes = [_EOF] * len(tokens)
+ for i in range(len(tokens)):
+ token = tokens[i].upper()
+ self.__tokentypes[i] = _keywords.get(token, _ATOM)
+ # add _EOF
+ self.__tokens.append(_EOF)
+ self.__tokentypes.append(_EOF)
self.__index = 0
- # Syntactical analysis
+
+ # Syntactical analysis.
tree = self._parseOrExpr()
- self._require(None)
+ self._require(_EOF)
return tree
# Recursive descent parser
- def _require(self, token):
- if not self._check(token):
+ def _require(self, tokentype):
+ if not self._check(tokentype):
t = self.__tokens[self.__index]
- raise ParseError, "Token %r required, %r found" % (token, t)
+ raise ParseError, "Token %r required, %r found" % (tokentype, t)
- def _check(self, token):
- if self.__tokens[self.__index] == token:
+ def _check(self, tokentype):
+ if self.__tokentypes[self.__index] is tokentype:
self.__index += 1
return 1
else:
return 0
- def _get(self):
+ def _get(self, tokentype):
t = self.__tokens[self.__index]
- self.__index += 1
+ self._require(tokentype)
return t
def _parseOrExpr(self):
L = []
L.append(self._parseAndExpr())
- while self._check("OR"):
+ while self._check(_OR):
L.append(self._parseAndExpr())
if len(L) == 1:
return L[0]
@@ -81,7 +106,7 @@
def _parseAndExpr(self):
L = []
L.append(self._parseTerm())
- while self._check("AND"):
+ while self._check(_AND):
L.append(self._parseNotExpr())
if len(L) == 1:
return L[0]
@@ -89,19 +114,17 @@
return AndNode(L)
def _parseNotExpr(self):
- if self._check("NOT"):
+ if self._check(_NOT):
return NotNode(self._parseTerm())
else:
return self._parseTerm()
def _parseTerm(self):
- if self._check("("):
+ if self._check(_LPAREN):
tree = self._parseOrExpr()
- self._require(")")
+ self._require(_RPAREN)
else:
- t = self._get()
- if t in [")", "AND", "OR", "NOT", None]:
- raise ParseError("Token %r not expected" % t)
+ t = self._get(_ATOM)
tree = AtomNode(t)
return tree
@@ -124,16 +147,16 @@
class NotNode(ParseTreeNode):
- _nodeType = "NOT"
+ _nodeType = _NOT
class AndNode(ParseTreeNode):
- _nodeType = "AND"
+ _nodeType = _AND
class OrNode(ParseTreeNode):
- _nodeType = "OR"
+ _nodeType = _OR
class AtomNode(ParseTreeNode):
- _nodeType = "ATOM"
+ _nodeType = _ATOM