[Zope-CVS] CVS: Products/ZCTextIndex/tests - testZCTextIndex.py:1.17
Tim Peters
tim.one@comcast.net
Fri, 17 May 2002 03:29:20 -0400
Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv29108/tests
Modified Files:
testZCTextIndex.py
Log Message:
testDocUpdate(): Thanks to stop-word removal, there weren't actually
*any* words in common across the versions. Helped Will along by adding
a pragmatic comment to his "knocking indeed" rant. Reworked to use
the inscrutable magic of dict.setdefault.
=== Products/ZCTextIndex/tests/testZCTextIndex.py 1.16 => 1.17 ===
raise AssertionError, "%s != %s" % (scaled1, scaled2)
-# a series of text chunks to use for the re-index tests
+# A series of text chunks to use for the re-index tests (testDocUpdate).
text = [
"""Here's a knocking indeed! If a
man were porter of hell-gate, he should have
- old turning the key.""",
+ old turning the key. knock (that made sure
+ sure there's at least one word in common)."""
"""Knock,
knock, knock! Who's there, i' the name of
@@ -96,26 +97,27 @@
def testDocUpdate(self):
docid = 1
- stop = get_stopdict()
- unique = {} # compute a set of unique words for each version
- d = {} # find some common words
- common = []
N = len(text)
+ stop = get_stopdict()
+
+ d = {} # word -> list of version numbers containing that word
for version, i in zip(text, range(N)):
# use a simple splitter rather than an official one
words = [w for w in re.split("\W+", version.lower())
if len(w) > 1 and not stop.has_key(w)]
- # count occurences of each word
+ word_seen = {}
for w in words:
- l = d[w] = d.get(w, [])
- l.append(i)
- for k, v in d.items():
- if len(v) == 1:
- v = v[0]
- l = unique[v] = unique.get(v, [])
- l.append(k)
- elif len(v) == N:
- common.append(k)
+ if not word_seen.has_key(w):
+ d.setdefault(w, []).append(i)
+ word_seen[w] = 1
+
+ unique = {} # version number -> list of words unique to that version
+ common = [] # list of words common to all versions
+ for w, versionlist in d.items():
+ if len(versionlist) == 1:
+ unique.setdefault(versionlist[0], []).append(w)
+ elif len(versionlist) == N:
+ common.append(w)
for version, i in zip(text, range(N)):
doc = Indexable(version)