[Zope3-checkins] SVN: Zope3/trunk/src/zope/index/text/ make index_doc more intelligent in reindexing, so that only an object is reindexed if the words change

Bernd Dorn bernd.dorn at lovelysystems.com
Wed Apr 4 12:50:23 EDT 2007


Log message for revision 74005:
  make index_doc more intelligent in reindexing, so that only an object is reindexed if the words change

Changed:
  U   Zope3/trunk/src/zope/index/text/baseindex.py
  U   Zope3/trunk/src/zope/index/text/okapiindex.py
  U   Zope3/trunk/src/zope/index/text/textindex.txt

-=-
Modified: Zope3/trunk/src/zope/index/text/baseindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/baseindex.py	2007-04-04 16:23:54 UTC (rev 74004)
+++ Zope3/trunk/src/zope/index/text/baseindex.py	2007-04-04 16:50:22 UTC (rev 74005)
@@ -104,11 +104,16 @@
     # faster than simply unindexing the old version in its entirety and then
     # adding the new version in its entirety.
     def _reindex_doc(self, docid, text):
+
         # Touch as few docid->w(docid, score) maps in ._wordinfo as possible.
         old_wids = self.get_words(docid)
+        new_wids = self._lexicon.sourceToWordIds(text)
+
+        if old_wids == new_wids:
+            # we return -1 if not changed
+            return -1
+        
         old_wid2w, old_docw = self._get_frequencies(old_wids)
-
-        new_wids = self._lexicon.sourceToWordIds(text)
         new_wid2w, new_docw = self._get_frequencies(new_wids)
 
         old_widset = IFTreeSet(old_wid2w.keys())

Modified: Zope3/trunk/src/zope/index/text/okapiindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/okapiindex.py	2007-04-04 16:23:54 UTC (rev 74004)
+++ Zope3/trunk/src/zope/index/text/okapiindex.py	2007-04-04 16:50:22 UTC (rev 74005)
@@ -223,12 +223,16 @@
 
     def index_doc(self, docid, text):
         count = BaseIndex.index_doc(self, docid, text)
+        if count == -1:
+            return count
         self._totaldoclen += count
         return count
 
     def _reindex_doc(self, docid, text):
-        self._totaldoclen -= self._docweight[docid]
-        return BaseIndex._reindex_doc(self, docid, text)
+        count = BaseIndex._reindex_doc(self, docid, text)
+        if count > -1:
+            self._totaldoclen -= self._docweight[docid]            
+        return count
 
     def unindex_doc(self, docid):
         self._totaldoclen -= self._docweight.get(docid, 0)

Modified: Zope3/trunk/src/zope/index/text/textindex.txt
===================================================================
--- Zope3/trunk/src/zope/index/text/textindex.txt	2007-04-04 16:23:54 UTC (rev 74004)
+++ Zope3/trunk/src/zope/index/text/textindex.txt	2007-04-04 16:50:22 UTC (rev 74005)
@@ -107,3 +107,46 @@
     8
     >>> index.wordCount()
     114
+
+Tracking Changes
+================
+
+In order to have as few writes as possible, the index doesn't change
+its state if we index a docid with the same values twice. To
+test this behaviour we have to create a simple data manager.
+
+    >>> class DM:
+    ...     def __init__(self):
+    ...         self.called = 0
+    ...     def register(self, ob):
+    ...         self.called += 1
+    ...     def setstate(self, ob):
+    ...         ob.__setstate__({'x': 42})
+
+If we index a document the first time it changes the state of the
+underlying index. At the start _p_changed is False.
+
+    >>> index._p_jar = index.index._p_jar = DM()
+    >>> index.index._p_changed
+    False
+    >>> index.index_doc(100, u"a new funky value")
+    >>> index.index._p_changed
+    True
+
+Now for testing we set the changed flag to false again.
+
+    >>> index.index._p_changed = False
+
+If we index it a second time, the underlying index should not be
+changed.
+
+    >>> index.index_doc(100, u"a new funky value")
+    >>> index._p_changed is index.index._p_changed is False
+    True
+    >>> index.index._p_changed = False
+
+But if we change it the state changes too.
+
+    >>> index.index_doc(100, u"an even newer funky value")
+    >>> index.index._p_changed
+    True



More information about the Zope3-Checkins mailing list