[Zope-Checkins] CVS: Zope/lib/python/Products/ZCTextIndex - Lexicon.py:1.18.26.2
Casey Duncan
casey@zope.com
Wed, 4 Jun 2003 23:52:24 -0400
Update of /cvs-repository/Zope/lib/python/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv5611
Modified Files:
Tag: casey-zctextindex-fewer-conflicts-branch
Lexicon.py
Log Message:
Finish refactor of Lexicon to remove conflict causing counters
Move new wid generation back where it belongs
Add a sanity check to the wid generation
Remove counter references in mhindex test module
=== Zope/lib/python/Products/ZCTextIndex/Lexicon.py 1.18.26.1 => 1.18.26.2 ===
--- Zope/lib/python/Products/ZCTextIndex/Lexicon.py:1.18.26.1 Thu May 29 23:53:11 2003
+++ Zope/lib/python/Products/ZCTextIndex/Lexicon.py Wed Jun 4 23:51:53 2003
@@ -39,13 +39,8 @@
# filtered out). Returning a special wid value for OOV words is a
# way to let clients know when an OOV word appears.
self.length = Length()
- self._nextwid = 1
self._pipeline = pipeline
- # Keep some statistics about indexing
- self._nbytes = 0 # Number of bytes indexed (at start of pipeline)
- self._nwords = 0 # Number of words indexed (after pipeline)
-
def length(self):
"""Return the number of unique terms in the lexicon."""
# Overridden in instances
@@ -62,11 +57,8 @@
def sourceToWordIds(self, text):
last = _text2list(text)
- #for t in last:
- # self._nbytes += len(t)
for element in self._pipeline:
last = element.process(last)
- #self._nwords += len(last)
return map(self._getWordIdCreate, last)
def termToWordIds(self, text):
@@ -135,16 +127,16 @@
def _getWordIdCreate(self, word):
wid = self._wids.get(word)
if wid is None:
- self.length.change(1)
- wid = self.length()
+ wid = self._new_wid()
self._wids[word] = wid
self._words[wid] = word
return wid
def _new_wid(self):
- wid = self._nextwid
- self._nextwid += 1
- return wid
+ self.length.change(1)
+ while self._words.has_key(self.length()): # just to be safe
+ self.length.change(1)
+ return self.length()
def _text2list(text):
# Helper: splitter input may be a string or a list of strings