[Zope-CVS] CVS: Products/ZCTextIndex/tests - testZCTextIndex.py:1.13
Jeremy Hylton
jeremy@zope.com
Thu, 16 May 2002 19:21:25 -0400
Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv1858/tests
Modified Files:
testZCTextIndex.py
Log Message:
Add an incorrect test of reindexing.
If we update a document and reindex it, ZCTextIndex is currently
broken. The test passes py virtue of calling unindex_object() after
each update, then calling index_object() again. We need to fix our
code, and then remove the calls to unindex_object() from the test.
XXX This code causes OkapiIndex to fail because it doesn't expect to
have no wordinfo for a wid. I tried to fix this in CosineIndex, but I
want to Tim think more about it and try to fix OkapiIndex.
=== Products/ZCTextIndex/tests/testZCTextIndex.py 1.12 => 1.13 ===
from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.QueryParser import QueryParser
+from Products.ZCTextIndex.StopDict import get_stopdict
+import re
import unittest
class Indexable:
@@ -33,6 +35,43 @@
# Subclasses should derive from one of testIndex.{CosineIndexTest,
# OkapiIndexTest} too.
+# a series of text chunks to use for the re-index tests
+text = [
+ """Here's a knocking indeed! If a
+ man were porter of hell-gate, he should have
+ old turning the key.""",
+
+ """Knock,
+ knock, knock! Who's there, i' the name of
+ Beelzebub? Here's a farmer, that hanged
+ himself on the expectation of plenty: come in
+ time; have napkins enow about you; here
+ you'll sweat for't.""",
+
+ """Knock,
+ knock! Who's there, in the other devil's
+ name? Faith, here's an equivocator, that could
+ swear in both the scales against either scale;
+ who committed treason enough for God's sake,
+ yet could not equivocate to heaven: O, come
+ in, equivocator.""",
+
+ """Knock,
+ knock, knock! Who's there? Faith, here's an
+ English tailor come hither, for stealing out of
+ a French hose: come in, tailor; here you may
+ roast your goose.""",
+
+ """Knock,
+ knock; never at quiet! What are you? But
+ this place is too cold for hell. I'll devil-porter
+ it no further: I had thought to have let in
+ some of all professions that go the primrose
+ way to the everlasting bonfire."""
+]
+
+
+
class ZCIndexTestsBase:
def setUp(self):
@@ -57,6 +96,42 @@
self.assertEqual(wids, [])
self.assertEqual(len(self.index._get_undoinfo(1)), 1)
+ def testDocUpdate(self):
+ docid = 1
+ stop = get_stopdict()
+ unique = {} # compute a set of unique words for each version
+ d = {} # find some common words
+ common = []
+ N = len(text)
+ for version, i in zip(text, range(N)):
+ # use a simple splitter rather than an official one
+ words = [w for w in re.split("\W+", version.lower())
+ if len(w) > 1 and not stop.has_key(w)]
+ # count occurences of each word
+ for w in words:
+ l = d[w] = d.get(w, [])
+ l.append(i)
+ for k, v in d.items():
+ if len(v) == 1:
+ v = v[0]
+ l = unique[v] = unique.get(v, [])
+ l.append(k)
+ elif len(v) == N:
+ common.append(k)
+
+ for version, i in zip(text, range(N)):
+ doc = Indexable(version)
+ self.zc_index.index_object(docid, doc)
+ for w in common:
+ nbest, total = self.zc_index.query(w)
+ self.assertEqual(total, 1, "did not find %s" % w)
+ for k, v in unique.items():
+ if k == i:
+ continue
+ for w in v:
+ nbest, total = self.zc_index.query(w)
+ self.assertEqual(total, 0, "did not expect to find %s" % w)
+ self.zc_index.unindex_object(docid)
class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):