[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.3

Sat, 5 Jan 2002 16:35:55 -0500

Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv4934

Modified Files:
      Tag: ajung-textindexng-branch
	TextIndexNG.py 
Log Message:
+ added backward index


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.2 => 1.2.2.3 ===
         self.useNearSearch = getattr(extra,'useNearSearch', 1)
         self.nearDistance  = getattr(extra,'nearDistance',  5)
+        self.useProximity  = getattr(extra,'useProximity',  None)
 
         if self.lexicon== 'None':     self.lexicon    = None
         if self.useStemmer == 'None': self.useStemmer = None
@@ -85,6 +86,7 @@
     def clear(self):
 
         self._IDX     = IOBTree()
+        self._invIDX  = IOBTree()
 
         # get splitter function
         self._splitterfunc = self._stemmerfunc = None
@@ -99,8 +101,6 @@
         if self.lexicon:
 
             # try to get lexicon through acquisition
-            import pdb
-            pdb.set_trace()
             self._LEXICON = getattr(self,self.lexicon)
 
         else:
@@ -133,6 +133,20 @@
         return Stemmer.availableStemmers()    
 
 
+    def insertBackwardEntries(self,widLst, documentId):
+        """ insert a list of wordIds for the given documentId
+            into the backward index 
+        """
+
+        idx = self._invIDX
+
+        if idx.has_key(documentId)==0:
+            idx[documentId] = IISet(widLst)
+        else:
+            idx[documentId].update(widLst)
+
+
+
     def insertForwardEntry(self,wordId,pos,documentId):
 
         # self._IDX is a mapping:
@@ -149,6 +163,13 @@
 
         tree[documentId].insert(pos)
 
+        # and insert reverse entry
+        invidx = self._invIDX
+        if invidx.has_key(documentId)==0:
+            invidx[documentId] = IISet()
+
+        invidx[document].insert(wordId)
+
 
 
     def _printIndex(self):
@@ -183,7 +204,14 @@
 
 
         # Split the text into a list of words
+
         words = self._splitterfunc(source,encoding=encoding)
+
+        # we collect all wordIds for performance reasons in a list
+        # and update the backward index once instead of inserting
+        # every single wordId
+
+        widLst = []
     
         for i in range(len(words)):
             word = words[i]
@@ -194,10 +222,13 @@
 
             # get (new) wordId for word
             wid = self._v_getWordId(word)
+            widLst.append(wid)
 
             # and insert the wordId, its position and the documentId 
             # in the index
             self.insertForwardEntry(wid,i,documentId)
+
+        self.insertBackwardEntries(self,widLst,documentId)
 
 
     def __getitem__(self, word):