[Zope-CVS] CVS: Products/ZCTextIndex - IQueryParser.py:1.1.2.4 ParseTree.py:1.1.2.2 ZCTextIndex.py:1.1.2.13 IQueryEngine.py:NONE QueryEngine.py:NONE
Guido van Rossum
guido@python.org
Mon, 6 May 2002 13:05:35 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv4358/lib/python/Products/ZCTextIndex
Modified Files:
Tag: TextIndexDS9-branch
IQueryParser.py ParseTree.py ZCTextIndex.py
Removed Files:
Tag: TextIndexDS9-branch
IQueryEngine.py QueryEngine.py
Log Message:
More refactoring: move the query engine functionality into the parse
tree node implementation. (The query engine test remains, to test
this functionality in the parse tree.)
=== Products/ZCTextIndex/IQueryParser.py 1.1.2.3 => 1.1.2.4 ===
Return a parse tree (which implements IQueryParseTree).
+
+ May raise ParseTree.ParseError.
"""
class IQueryParseTree(Interface.Base):
@@ -41,5 +43,19 @@
'AND' a list of parse trees
'OR' a list of parse trees
'NOT' a parse tree
- 'ATOM' a string
+ 'ATOM' a string (representing a single search term)
+ """
+
+ def terms():
+ """Return a list of all terms in this node, excluding NOT subtrees."""
+
+ def executeQuery(index):
+ """Execute the query represented by this node against the index.
+
+ The index argument must implement the IIndex interface.
+
+ Return an IIBucket or IIBTree mapping document ids to scores
+ (higher scores mean better results).
+
+ May raise ParseTree.QueryError.
"""
=== Products/ZCTextIndex/ParseTree.py 1.1.2.1 => 1.1.2.2 ===
"""Generic parser support: exception and parse tree nodes."""
+from BTrees.IIBTree import difference, weightedIntersection, weightedUnion
+from Products.ZCTextIndex.NBest import NBest
+
+class QueryError(Exception):
+ pass
+
class ParseError(Exception):
pass
@@ -39,6 +45,9 @@
t.extend(v.terms())
return t
+ def executeQuery(self, index):
+ raise NotImplementedError
+
class NotNode(ParseTreeNode):
_nodeType = "NOT"
@@ -46,17 +55,60 @@
def terms(self):
return []
+ def executeQuery(self, index):
+ raise QueryError, "NOT operator must occur right after AND"
+
class AndNode(ParseTreeNode):
_nodeType = "AND"
+ def executeQuery(self, index):
+ L = []
+ Nots = []
+ for subnode in self.getValue():
+ if subnode.nodeType() == "NOT":
+ Nots.append(subnode.getValue().executeQuery(index))
+ else:
+ L.append(subnode.executeQuery(index))
+ assert L
+ L.sort(lambda x, y: cmp(len(x), len(y)))
+ set = L[0]
+ for x in L[1:]:
+ dummy, set = weightedIntersection(set, x)
+ if Nots:
+ Nots.sort(lambda x, y: cmp(len(x), len(y)))
+ notset = Nots[0]
+ for x in Nots[1:]:
+ dummy, notset = weightedUnion(notset, x)
+ set = difference(set, notset)
+ return set
+
class OrNode(ParseTreeNode):
_nodeType = "OR"
+ def executeQuery(self, index):
+ # Balance unions as closely as possible, smallest to largest.
+ allofem = self.getValue()
+ merge = NBest(len(allofem))
+ for subnode in allofem:
+ result = subnode.executeQuery(index)
+ merge.add(result, len(result))
+ while len(merge) > 1:
+ # Merge the two smallest so far, and add back to the queue.
+ x, dummy = merge.pop_smallest()
+ y, dummy = merge.pop_smallest()
+ dummy, z = weightedUnion(x, y)
+ merge.add(z, len(z))
+ result, dummy = merge.pop_smallest()
+ return result
+
class AtomNode(ParseTreeNode):
_nodeType = "ATOM"
def terms(self):
return [self.getValue()]
+
+ def executeQuery(self, index):
+ return index.search(self.getValue())
=== Products/ZCTextIndex/ZCTextIndex.py 1.1.2.12 => 1.1.2.13 ===
self._fieldname = doc_attr
self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
- self.engine = QueryEngine()
self.index = Index(self.lexicon)
self.parser = QueryParser()
@@ -53,7 +52,7 @@
def query(self, query, nbest=10):
# returns a mapping from docids to scores
tree = self.parser.parseQuery(query)
- results = self.engine.executeQuery(self.index, tree)
+ results = tree.executeQuery(self.index)
chooser = NBest(nbest)
chooser.addmany(results.items())
return chooser.getbest()
=== Removed File Products/ZCTextIndex/IQueryEngine.py ===
=== Removed File Products/ZCTextIndex/QueryEngine.py ===