[Zope-CVS] CVS: Products/ZCTextIndex - ParseTree.py:1.1.2.1 QueryParser.py:1.1.2.11

Guido van Rossum guido@python.org
Mon, 6 May 2002 12:36:28 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv20133

Modified Files:
      Tag: TextIndexDS9-branch
	QueryParser.py 
Added Files:
      Tag: TextIndexDS9-branch
	ParseTree.py 
Log Message:
Move the parser output (the exception it raises and the parse tree
nodes) to a separate module, so other pluggable parsers can more
easily share these.  (Maybe these belong in IQueryParser???)

Changed the parser so that it parses "foo bar" the same as "foo OR
bar".  (Though maybe this should be "foo AND bar"?)



=== Added File Products/ZCTextIndex/ParseTree.py ===
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################

"""Generic parser support: exception and parse tree nodes."""

class ParseError(Exception):
    pass

class ParseTreeNode:

    _nodeType = None

    def __init__(self, value):
        self._value = value

    def nodeType(self):
        return self._nodeType

    def getValue(self):
        return self._value

    def __repr__(self):
        return "%s(%r)" % (self.__class__.__name__, self.getValue())

    def terms(self):
        t = []
        for v in self.getValue():
            t.extend(v.terms())
        return t

class NotNode(ParseTreeNode):

    _nodeType = "NOT"

    def terms(self):
        return []

class AndNode(ParseTreeNode):

    _nodeType = "AND"

class OrNode(ParseTreeNode):

    _nodeType = "OR"

class AtomNode(ParseTreeNode):

    _nodeType = "ATOM"

    def terms(self):
        return [self.getValue()]


=== Products/ZCTextIndex/QueryParser.py 1.1.2.10 => 1.1.2.11 ===
 """
 
-import operator
 import re
 
+import ParseTree # relative import
+
 # Create unique symbols for token types.
 _AND    = intern("AND")
 _OR     = intern("OR")
@@ -50,9 +51,6 @@
     _RPAREN:    _RPAREN,
 }
 
-class ParseError(Exception):
-    pass
-
 class QueryParser:
 
     def __init__(self):
@@ -60,7 +58,7 @@
 
     def parseQuery(self, query):
         # Lexical analysis.
-        tokens = re.findall(r"[()]|[^\s()]+", query)
+        tokens = re.findall(r"[()]|\w+", query)
         self.__tokens = tokens
         # classify tokens
         self.__tokentypes = [_EOF] * len(tokens)
@@ -82,7 +80,8 @@
     def _require(self, tokentype):
         if not self._check(tokentype):
             t = self.__tokens[self.__index]
-            raise ParseError, "Token %r required, %r found" % (tokentype, t)
+            msg = "Token %r required, %r found" % (tokentype, t)
+            raise ParseTree.ParseError, msg
 
     def _check(self, tokentype):
         if self.__tokentypes[self.__index] is tokentype:
@@ -107,7 +106,7 @@
         if len(L) == 1:
             return L[0]
         else:
-            return OrNode(L)
+            return ParseTree.OrNode(L)
 
     def _parseAndExpr(self):
         L = []
@@ -117,11 +116,11 @@
         if len(L) == 1:
             return L[0]
         else:
-            return AndNode(L)
+            return ParseTree.AndNode(L)
 
     def _parseNotExpr(self):
         if self._check(_NOT):
-            return NotNode(self._parseTerm())
+            return ParseTree.NotNode(self._parseTerm())
         else:
             return self._parseTerm()
 
@@ -133,48 +132,8 @@
             atoms = [self._get(_ATOM)]
             while self._peek(_ATOM):
                 atoms.append(self._get(_ATOM))
-            t = " ".join(atoms)
-            tree = AtomNode(t)
+            if len(atoms) == 1:
+                tree = ParseTree.AtomNode(atoms[0])
+            else:
+                tree = ParseTree.OrNode([ParseTree.AtomNode(t) for t in atoms])
         return tree
-
-
-class ParseTreeNode:
-
-    _nodeType = None
-
-    def __init__(self, value):
-        self._value = value
-
-    def nodeType(self):
-        return self._nodeType
-
-    def getValue(self):
-        return self._value
-
-    def __repr__(self):
-        return "%s(%r)" % (self.__class__.__name__, self.getValue())
-
-    def terms(self):
-        return reduce(operator.add, [v.terms() for v in self.getValue()])
-
-class NotNode(ParseTreeNode):
-
-    _nodeType = "NOT"
-
-    def terms(self):
-        return []
-
-class AndNode(ParseTreeNode):
-
-    _nodeType = "AND"
-
-class OrNode(ParseTreeNode):
-
-    _nodeType = "OR"
-
-class AtomNode(ParseTreeNode):
-
-    _nodeType = "ATOM"
-
-    def terms(self):
-        return [self.getValue()]