[Zope-CVS] CVS: Products/ZCTextIndex - BaseIndex.py:1.19
Tim Peters
tim.one@comcast.net
Thu, 23 May 2002 12:21:15 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv2564
Modified Files:
BaseIndex.py
Log Message:
Changed index_doc to call a new _mass_add_wordinfo method, instead of
calling _add_wordinfo in a loop. This is a simple way to save oodles
of functions calls. In a brief but non-trivial test, this boosted
overall indexing rate by 12% (so huge bang for the buck).
=== Products/ZCTextIndex/BaseIndex.py 1.18 => 1.19 ===
wids = self._lexicon.sourceToWordIds(text)
wid2weight, docweight = self._get_frequencies(wids)
- for wid, weight in wid2weight.items():
- self._add_wordinfo(wid, weight, docid)
+ self._mass_add_wordinfo(wid2weight, docid)
self._docweight[docid] = docweight
self._docwords[docid] = WidCode.encode(wids)
return len(wids)
@@ -239,6 +238,26 @@
doc2score = IIBTree(doc2score)
doc2score[docid] = f
self._wordinfo[wid] = doc2score # not redundant: Persistency!
+
+ # self._mass_add_wordinfo(wid2weight, docid)
+ #
+ # is the same as
+ #
+ # for wid, weight in wid2weight.items():
+ # self._add_wordinfo(wid, weight, docid)
+ #
+ # except that _mass_add_wordinfo doesn't require so many function calls.
+ def _mass_add_wordinfo(self, wid2weight, docid):
+ get_doc2score = self._wordinfo.get
+ for wid, weight in wid2weight.items():
+ doc2score = get_doc2score(wid)
+ if doc2score is None:
+ doc2score = {}
+ else:
+ if len(doc2score) == self.DICT_CUTOFF:
+ doc2score = IIBTree(doc2score)
+ doc2score[docid] = weight
+ self._wordinfo[wid] = doc2score # not redundant: Persistency!
def _del_wordinfo(self, wid, docid):
doc2score = self._wordinfo[wid]