[Zope-CVS] CVS: Products/ZCTextIndex - BaseIndex.py:1.11
Jeremy Hylton
jeremy@zope.com
Fri, 17 May 2002 11:24:56 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv23017
Modified Files:
BaseIndex.py
Log Message:
Two changes and a question posing as a comment.
In unindex_doc() call _del_wordinfo() for each unique wid in the doc,
not for each wid. Before we had WidCode and phrase searching,
_docwords stored a list of the unique wids. The unindex code wasn't
updated when _docwords started storing all the wids, even duplicates.
Replace the try/except around __getitem__ in _add_wordinfo() with a
.get() call.
Add XXX comment about the purpose of the try/except(s) in
_del_wordinfo(). I suspect they only existed because _del_wordinfo()
was called repeatedly when a wid existed more than once.
=== Products/ZCTextIndex/BaseIndex.py 1.10 => 1.11 ===
return int(f * scale + 0.5)
+def unique(l):
+ """Return a list of the unique elements in l."""
+ d = {}
+ for elt in l:
+ d[elt] = 1
+ return d.keys()
+
class BaseIndex(Persistent):
__implements__ = IIndex
@@ -108,7 +115,7 @@
# A subclass may wish to extend or override this.
def unindex_doc(self, docid):
- for wid in self.get_words(docid):
+ for wid in unique(self.get_words(docid)):
self._del_wordinfo(wid, docid)
del self._docwords[docid]
del self._docweight[docid]
@@ -184,9 +191,8 @@
# space when it is live in memory. An IIBTree stores two C
# arrays of ints, one for the keys and one for the values. It
# holds upto 120 key-value pairs in a single bucket.
- try:
- map = self._wordinfo[wid]
- except KeyError:
+ map = self._wordinfo.get(wid)
+ if map is None:
map = {}
else:
# _add_wordinfo() is called for each update. If the map
@@ -197,10 +203,19 @@
self._wordinfo[wid] = map # Not redundant, because of Persistency!
def _del_wordinfo(self, wid, docid):
+ # XXX Not clear if the try/excepts here are guarding against
+ # corrupt data structures or if it is possible for the index
+ # to get in a state where it thinks an entry exits for the
+ # wid, docid pair and it doesn't.
try:
map = self._wordinfo[wid]
+ except KeyError:
+## print "No info for wid", wid
+ return
+ try:
del map[docid]
except KeyError:
+## print "doc %s does not use %s" % (docid, wid)
return
if len(map) == 0:
del self._wordinfo[wid]