[Zope3-checkins] SVN: Zope3/trunk/src/zope/index/text/ make
index_doc more intelligent in reindexing,
so that only an object is reindexed if the words change
Bernd Dorn
bernd.dorn at lovelysystems.com
Wed Apr 4 12:50:23 EDT 2007
Log message for revision 74005:
make index_doc more intelligent in reindexing, so that only an object is reindexed if the words change
Changed:
U Zope3/trunk/src/zope/index/text/baseindex.py
U Zope3/trunk/src/zope/index/text/okapiindex.py
U Zope3/trunk/src/zope/index/text/textindex.txt
-=-
Modified: Zope3/trunk/src/zope/index/text/baseindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/baseindex.py 2007-04-04 16:23:54 UTC (rev 74004)
+++ Zope3/trunk/src/zope/index/text/baseindex.py 2007-04-04 16:50:22 UTC (rev 74005)
@@ -104,11 +104,16 @@
# faster than simply unindexing the old version in its entirety and then
# adding the new version in its entirety.
def _reindex_doc(self, docid, text):
+
# Touch as few docid->w(docid, score) maps in ._wordinfo as possible.
old_wids = self.get_words(docid)
+ new_wids = self._lexicon.sourceToWordIds(text)
+
+ if old_wids == new_wids:
+ # we return -1 if not changed
+ return -1
+
old_wid2w, old_docw = self._get_frequencies(old_wids)
-
- new_wids = self._lexicon.sourceToWordIds(text)
new_wid2w, new_docw = self._get_frequencies(new_wids)
old_widset = IFTreeSet(old_wid2w.keys())
Modified: Zope3/trunk/src/zope/index/text/okapiindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/okapiindex.py 2007-04-04 16:23:54 UTC (rev 74004)
+++ Zope3/trunk/src/zope/index/text/okapiindex.py 2007-04-04 16:50:22 UTC (rev 74005)
@@ -223,12 +223,16 @@
def index_doc(self, docid, text):
count = BaseIndex.index_doc(self, docid, text)
+ if count == -1:
+ return count
self._totaldoclen += count
return count
def _reindex_doc(self, docid, text):
- self._totaldoclen -= self._docweight[docid]
- return BaseIndex._reindex_doc(self, docid, text)
+ count = BaseIndex._reindex_doc(self, docid, text)
+ if count > -1:
+ self._totaldoclen -= self._docweight[docid]
+ return count
def unindex_doc(self, docid):
self._totaldoclen -= self._docweight.get(docid, 0)
Modified: Zope3/trunk/src/zope/index/text/textindex.txt
===================================================================
--- Zope3/trunk/src/zope/index/text/textindex.txt 2007-04-04 16:23:54 UTC (rev 74004)
+++ Zope3/trunk/src/zope/index/text/textindex.txt 2007-04-04 16:50:22 UTC (rev 74005)
@@ -107,3 +107,46 @@
8
>>> index.wordCount()
114
+
+Tracking Changes
+================
+
+In order to have as few writes as possible, the index doesn't change
+its state if we index a docid with the same values twice. To
+test this behaviour we have to create a simple data manager.
+
+ >>> class DM:
+ ... def __init__(self):
+ ... self.called = 0
+ ... def register(self, ob):
+ ... self.called += 1
+ ... def setstate(self, ob):
+ ... ob.__setstate__({'x': 42})
+
+If we index a document the first time it changes the state of the
+underlying index. At the start _p_changed is False.
+
+ >>> index._p_jar = index.index._p_jar = DM()
+ >>> index.index._p_changed
+ False
+ >>> index.index_doc(100, u"a new funky value")
+ >>> index.index._p_changed
+ True
+
+Now for testing we set the changed flag to false again.
+
+ >>> index.index._p_changed = False
+
+If we index it a second time, the underlying index should not be
+changed.
+
+ >>> index.index_doc(100, u"a new funky value")
+ >>> index._p_changed is index.index._p_changed is False
+ True
+ >>> index.index._p_changed = False
+
+But if we change it the state changes too.
+
+ >>> index.index_doc(100, u"an even newer funky value")
+ >>> index.index._p_changed
+ True
More information about the Zope3-Checkins
mailing list