[Zope-CVS] CVS: Products/ZCTextIndex - OkapiIndex.py:1.4
Tim Peters
tim.one@comcast.net
Tue, 14 May 2002 20:11:32 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv21396
Modified Files:
OkapiIndex.py
Log Message:
Use the new SetOps for mass union/intersection.
=== Products/ZCTextIndex/OkapiIndex.py 1.3 => 1.4 ===
from BTrees.IOBTree import IOBTree
-from BTrees.IIBTree import IIBTree, IIBucket, IISet
-from BTrees.IIBTree import weightedIntersection, weightedUnion
+from BTrees.IIBTree import IIBTree, IIBucket
from Products.ZCTextIndex.IIndex import IIndex
-from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.NBest import NBest
+from Products.ZCTextIndex import WidCode
+from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
+ mass_weightedUnion
# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
@@ -98,15 +99,15 @@
def search(self, term):
wids = self._lexicon.termToWordIds(term)
- return self._union(self._search_wids(wids))
+ return mass_weightedUnion(self._search_wids(wids))
def search_glob(self, pattern):
wids = self._lexicon.globToWordIds(pattern)
- return self._union(self._search_wids(wids))
+ return mass_weightedUnion(self._search_wids(wids))
def search_phrase(self, phrase):
wids = self._lexicon.termToWordIds(phrase)
- hits = self._intersection(self._search_wids(wids))
+ hits = mass_weightedIntersection(self._search_wids(wids))
if not hits:
return hits
code = WidCode.encode(wids)
@@ -155,34 +156,6 @@
# skating near the edge, it's not a speed cure, since the computation
# of tf would still done at Python speed, and it's a lot more
# work than just multiplying by idf.
-
- def _intersection(self, L):
- if not L:
- return IIBTree()
- # Intersect with smallest first.
- L = L[:] # don't mutate the caller's L
- L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
- d2w, weight = L[0]
- dummy, result = weightedUnion(IIBTree(), d2w, 1, weight)
- for d2w, weight in L[1:]:
- dummy, result = weightedIntersection(result, d2w, 1, weight)
- return result
-
- def _union(self, L):
- if not L:
- return IIBTree()
- # Balance unions as closely as possible, smallest to largest.
- merge = NBest(len(L))
- for x, weight in L:
- merge.add((x, weight), len(x))
- while len(merge) > 1:
- # Merge the two smallest so far, and add back to the queue.
- (x, wx), dummy = merge.pop_smallest()
- (y, wy), dummy = merge.pop_smallest()
- dummy, z = weightedUnion(x, y, wx, wy)
- merge.add((z, 1), len(z))
- (result, weight), score = merge.pop_smallest()
- return result
def query_weight(self, terms):
# XXX I have no idea what to put here