[Zope-Checkins] CVS: Products/ZCTextIndex - BaseIndex.py:1.26
Casey Duncan
casey@zope.com
Wed, 12 Jun 2002 17:45:53 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv24512
Modified Files:
BaseIndex.py
Log Message:
Reimplemented Index.length to use a BTree.Length. Previous dynamic computation was way too slow for big indexes.
Updated tests to include length value checks
=== Products/ZCTextIndex/BaseIndex.py 1.25 => 1.26 ===
from BTrees.IIBTree import IIBTree, IIBucket, IITreeSet
from BTrees.IIBTree import intersection, difference
+import BTrees.Length
from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode
@@ -52,6 +53,8 @@
class BaseIndex(Persistent):
__implements__ = IIndex
+
+ word_count = 0
def __init__(self, lexicon):
self._lexicon = lexicon
@@ -80,13 +83,18 @@
# docid -> WidCode'd list of wids
# Used for un-indexing, and for phrase search.
self._docwords = IOBTree()
-
+
+ # Use a BTree length for efficient length computation w/o conflicts
+ self.length = BTrees.Length.Length()
+
def length(self):
"""Return the number of words in the index."""
+ # This is overridden per instance
return len(self._wordinfo)
def get_words(self, docid):
"""Return a list of the wordids for a given docid."""
+ # Note this is overridden in the instance
return WidCode.decode(self._docwords[docid])
# A subclass may wish to extend or override this.
@@ -239,6 +247,7 @@
doc2score = self._wordinfo.get(wid)
if doc2score is None:
doc2score = {}
+ self.length.change(1)
else:
# _add_wordinfo() is called for each update. If the map
# size exceeds the DICT_CUTOFF, convert to an IIBTree.
@@ -262,15 +271,19 @@
def _mass_add_wordinfo(self, wid2weight, docid):
dicttype = type({})
get_doc2score = self._wordinfo.get
+ new_word_count = 0
for wid, weight in wid2weight.items():
doc2score = get_doc2score(wid)
if doc2score is None:
doc2score = {}
+ new_word_count += 1
elif (isinstance(doc2score, dicttype) and
len(doc2score) == self.DICT_CUTOFF):
doc2score = IIBTree(doc2score)
doc2score[docid] = weight
self._wordinfo[wid] = doc2score # not redundant: Persistency!
+ self.length.change(new_word_count)
+
def _del_wordinfo(self, wid, docid):
doc2score = self._wordinfo[wid]
@@ -278,6 +291,7 @@
numdocs = len(doc2score)
if numdocs == 0:
del self._wordinfo[wid]
+ self.length.change(-1)
return
if numdocs == self.DICT_CUTOFF:
new = {}