[Zope3-checkins] CVS: Zope3/src/zope/index/keyword - index.py:1.3

Andreas Jung andreas@andreas-jung.com
Sat, 2 Aug 2003 02:51:19 -0400


Update of /cvs-repository/Zope3/src/zope/index/keyword
In directory cvs.zope.org:/tmp/cvs-serv27608/keyword

Modified Files:
	index.py 
Log Message:
improved reindexing efficiency by added/removing only changed
keywords


=== Zope3/src/zope/index/keyword/index.py 1.2 => 1.3 ===
--- Zope3/src/zope/index/keyword/index.py:1.2	Tue Jul 15 13:52:01 2003
+++ Zope3/src/zope/index/keyword/index.py	Sat Aug  2 02:50:44 2003
@@ -17,7 +17,7 @@
 from persistence import Persistent
 
 from zodb.btrees.IOBTree import IOBTree
-from zodb.btrees.OOBTree import OOBTree, OOSet
+from zodb.btrees.OOBTree import OOBTree, OOSet, difference, intersection
 from zodb.btrees.IIBTree import IISet, union, intersection
 from zodb.btrees.Length import Length
 
@@ -40,8 +40,7 @@
 
         # The reverse index maps a docid to its keywords
         # TODO: Using a vocabulary might be the better choice to store
-        # keywords since it would allow use to use integers instead of
-        # strings
+        # keywords since it would allow use to use integers instead of strings
         self._rev_index = IOBTree()
         self._num_docs = Length(0)
 
@@ -58,19 +57,32 @@
 
     def index_doc(self, docid, seq):
         
-        if not seq: return
-        seq = [w.lower() for w in seq]
-            
-        if self.has_doc(docid):       # unindex doc if present
-            self.unindex_doc(docid)
-
         if not isinstance(seq, (TupleType, ListType)):
             raise TypeError, 'seq argument must be a list/tuple of strings'
+    
+        if not seq: return
+        seq = [w.lower() for w in seq]
 
-        self._insert_forward(docid, seq)
-        self._insert_reverse(docid, seq)
-        self._num_docs.change(1)
+        old_kw = self._rev_index.get(docid, None)
+        new_kw = OOSet(seq)
 
+        if old_kw is None:
+            self._insert_forward(docid, new_kw)
+            self._insert_reverse(docid, new_kw)
+            self._num_docs.change(1)
+        else:
+
+            # determine added and removed keywords
+            kw_added = difference(new_kw, old_kw)
+            kw_removed = difference(old_kw, new_kw)
+
+            # removed keywords are removed from the forward index
+            for word in kw_removed:
+                self._fwd_index[word].remove(docid)
+            
+            # now update reverse and forward indexes
+            self._insert_forward(docid, kw_added)
+            self._insert_reverse(docid, new_kw)
         
     def unindex_doc(self, docid):
 
@@ -102,7 +114,7 @@
         """ add words to forward index """
 
         if words:  
-            self._rev_index[docid] = OOSet(words)
+            self._rev_index[docid] = words
 
     def search(self, query, operator='and'):