[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.10
Andreas Jung
andreas@digicool.com
Mon, 14 Jan 2002 15:46:55 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv31579
Modified Files:
Tag: ajung-textindexng-branch
TextIndexNG.py
Log Message:
- removed old parser for textindex queries
- replaced by parser generated by kwParsing
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.9 => 1.2.2.10 ===
from BTrees.IIBTree import weightedIntersection
-from Products.PluginIndexes.TextIndex.Lexicon import Lexicon
-from Products.PluginIndexes.TextIndex.GlobbingLexicon import GlobbingLexicon
+from LexiconNG import LexiconNG
+from GlobbingLexiconNG import GlobbingLexiconNG
from Products.PluginIndexes.TextIndex import Splitter
from ProximityLexicon import ProximityLexicon
-
from types import IntType, StringType, UnicodeType, InstanceType
from TextOperators import *
from TextIndexCommon import *
+from queryparser.TextIndexGgen import LoadTextIndexG
+
+from queryparser.TextIndexGgen import Collector,C
import Stemmer
import Proximity
@@ -85,10 +87,10 @@
self.splitterMaxLen= getattr(extra,'splitterMaxLen', 64)
# index numbers
- self.splitterIndexNumbers = getattr(extra,'splitterIndexNumbers')
+ self.splitterIndexNumbers = getattr(extra,'splitterIndexNumbers',0)
# allow single characters
- self.splitterSingleChars = getattr(extra,'splitterSingleChars')
+ self.splitterSingleChars = getattr(extra,'splitterSingleChars',0)
# name of stemmer or None
self.useStemmer = getattr(extra,'useStemmer', None) or None
@@ -120,11 +122,13 @@
# Thesaurus: either filename or StopWord object
self.thesaurus = getattr(extra,'thesaurus', None) or None
-
if not self.nearStorage in ('internal','documentLookup'):
raise ValueError,'nearStorage must be either "internal"'\
' or "documentLookup"'
+ # get instance for query parser
+ self._parser = LoadTextIndexG()
+
self.clear()
@@ -197,7 +201,7 @@
if self.useGlobbing:
- self._LEXICON = GlobbingLexicon()
+ self._LEXICON = GlobbingLexiconNG()
debug('created new globbing lexicon')
if self._v_stemmerfunc:
@@ -205,7 +209,7 @@
self._v_stemmerfunc = None
else:
- self._LEXICON = Lexicon()
+ self._LEXICON = LexiconNG()
debug('created new lexicon')
@@ -493,18 +497,29 @@
"for a TextIndex" % qop)
r = None
- for key in record.keys:
- key = key.strip()
- if not key:
- continue
+ q = record.keys[0]
- b = self.query(key, query_operator).keys()
- w, r = weightedIntersection(r, b)
+ res = self.query( q )
- if r is not None:
- return r, (self.id,)
-
- return (IIBucket(), (self.id,))
+ print res
+ return res
+
+
+ def query(self, q):
+ """ to be finished """
+
+ print "query",q
+
+ self._parser.DoParse1( q )
+ # XXX: Hack !!!
+ parsed_query = C.getResult()
+ print "parsed query", parsed_query
+
+ res = eval( parsed_query )
+
+ print "result",res
+
+ return res
def positionsFromDocumentLookup(self,docId, words):
@@ -569,121 +584,6 @@
debug(k,v)
return res
-
-
- def query(self, s, default_operator=Or):
- """ Evaluate a query string.
-
- Convert the query string into a data structure of nested lists
- and strings, based on the grouping of whitespace-separated
- strings by parentheses and quotes. The 'Near' operator is
- inserted between the strings of a quoted group.
-
- The Lexicon is given the opportunity to transform the
- data structure. Stemming, wildcards, and translation are
- possible Lexicon services.
-
- Finally, the query list is normalized so that it and every
- sub-list consist of non-operator strings or lists separated
- by operators. This list is evaluated.
- """
-
- # First replace any occurences of " and not " with " andnot "
- s = re.sub('(?i)\s+and\s*not\s+', ' andnot ', s)
-
- # Parse parentheses and quotes
- q = parse(s)
-
- # Allow the Lexicon to process the query
- q = self.getLexicon().query_hook(q)
-
- # Insert the default operator between any two search terms not
- # already joined by an operator.
- q = parse2(q, default_operator)
- debug('before eval',q)
-
- # evalute the final 'expression'
- return self.evaluate(q)
-
-
- def get_operands(self, q, i):
-
- """Evaluate and return the left and right operands for an operator"""
-
- try:
- left = q[i - 1]
- right = q[i + 1]
- except IndexError:
- raise QueryError, "Malformed query"
-
- if isinstance(left, IntType):
- left = self[left]
- elif isinstance(left, StringType) or isinstance(left,UnicodeType):
- left = self[left]
- elif isinstance(left, ListType):
- left = self.evaluate(left)
-
- if isinstance(right, IntType):
- right = self[right]
- elif isinstance(right, StringType) or isinstance(right,UnicodeType):
- right = self[right]
- elif isinstance(right, ListType):
- right = self.evaluate(right)
-
- return (left, right)
-
-
-
- def evaluate(self, query):
- """Evaluate a parsed query"""
-
- # Strip off meaningless layers
- while isinstance(query, ListType) and len(query) == 1:
- query = query[0]
-
- # If it's not a list, assume a string or number
- if not isinstance(query, ListType):
- return self[query]
-
- # Now we need to loop through the query and reduce
- # operators. They are currently evaluated in the following
- # order: AndNot -> And -> Or -> Near
- i = 0
- while (i < len(query)):
- if query[i] is AndNot:
- left, right = self.get_operands(query, i)
- val = left.and_not(right)
- query[(i - 1) : (i + 2)] = [ val ]
- else: i = i + 1
-
- i = 0
- while (i < len(query)):
- if query[i] is And:
- left, right = self.get_operands(query, i)
- val = left & right
- query[(i - 1) : (i + 2)] = [ val ]
- else: i = i + 1
-
- i = 0
- while (i < len(query)):
- if query[i] is Or:
- left, right = self.get_operands(query, i)
- val = left | right
- query[(i - 1) : (i + 2)] = [ val ]
- else: i = i + 1
-
- i = 0
- while (i < len(query)):
- if query[i] is Near:
- left, right = self.get_operands(query, i)
- val = left.near(right)
- query[(i - 1) : (i + 2)] = [ val ]
- else: i = i + 1
-
- if (len(query) != 1):
- raise QueryError, "Malformed query"
-
- return query[0]
def numObjects(self):