[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.3
Andreas Jung
andreas@zope.com
Sat, 5 Jan 2002 16:35:55 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv4934
Modified Files:
Tag: ajung-textindexng-branch
TextIndexNG.py
Log Message:
+ added backward index
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.2 => 1.2.2.3 ===
self.useNearSearch = getattr(extra,'useNearSearch', 1)
self.nearDistance = getattr(extra,'nearDistance', 5)
+ self.useProximity = getattr(extra,'useProximity', None)
if self.lexicon== 'None': self.lexicon = None
if self.useStemmer == 'None': self.useStemmer = None
@@ -85,6 +86,7 @@
def clear(self):
self._IDX = IOBTree()
+ self._invIDX = IOBTree()
# get splitter function
self._splitterfunc = self._stemmerfunc = None
@@ -99,8 +101,6 @@
if self.lexicon:
# try to get lexicon through acquisition
- import pdb
- pdb.set_trace()
self._LEXICON = getattr(self,self.lexicon)
else:
@@ -133,6 +133,20 @@
return Stemmer.availableStemmers()
+ def insertBackwardEntries(self,widLst, documentId):
+ """ insert a list of wordIds for the given documentId
+ into the backward index
+ """
+
+ idx = self._invIDX
+
+ if idx.has_key(documentId)==0:
+ idx[documentId] = IISet(widLst)
+ else:
+ idx[documentId].update(widLst)
+
+
+
def insertForwardEntry(self,wordId,pos,documentId):
# self._IDX is a mapping:
@@ -149,6 +163,13 @@
tree[documentId].insert(pos)
+ # and insert reverse entry
+ invidx = self._invIDX
+ if invidx.has_key(documentId)==0:
+ invidx[documentId] = IISet()
+
+ invidx[document].insert(wordId)
+
def _printIndex(self):
@@ -183,7 +204,14 @@
# Split the text into a list of words
+
words = self._splitterfunc(source,encoding=encoding)
+
+ # we collect all wordIds for performance reasons in a list
+ # and update the backward index once instead of inserting
+ # every single wordId
+
+ widLst = []
for i in range(len(words)):
word = words[i]
@@ -194,10 +222,13 @@
# get (new) wordId for word
wid = self._v_getWordId(word)
+ widLst.append(wid)
# and insert the wordId, its position and the documentId
# in the index
self.insertForwardEntry(wid,i,documentId)
+
+ self.insertBackwardEntries(self,widLst,documentId)
def __getitem__(self, word):