[Zope-Checkins] CVS: Zope/lib/python/Products/ZCTextIndex/tests - mhindex.py:1.16.8.1 testIndex.py:1.11.10.1 testLexicon.py:1.3.10.3 testStopper.py:1.2.10.1 testZCTextIndex.py:1.36.6.1
Casey Duncan
casey@zope.com
Thu, 5 Jun 2003 16:37:06 -0400
Update of /cvs-repository/Zope/lib/python/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv28323/lib/python/Products/ZCTextIndex/tests
Modified Files:
Tag: Zope-2_6-branch
mhindex.py testIndex.py testLexicon.py testStopper.py
testZCTextIndex.py
Log Message:
Backport casey-zctextindex-fewer-conflicts-branch:
- Indexes and Lexicon now much less likely to generate write conflicts.
Previously *any* concurrent index/unindex operation would conflict
- Performance and scalability fix for queries
=== Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py 1.16 => 1.16.8.1 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py:1.16 Wed Jul 10 14:02:09 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/mhindex.py Thu Jun 5 16:37:05 2003
@@ -441,8 +441,6 @@
self.updatefolder(f, f.listmessages())
print "Total", len(self.docpaths)
self.commit()
- print "Indexed", self.index.lexicon._nbytes, "bytes and",
- print self.index.lexicon._nwords, "words;",
print len(self.index.lexicon._words), "unique words."
def updatefolder(self, f, msgs):
@@ -468,7 +466,7 @@
self.index.index_text(docid, text)
self.maycommit()
# Remove messages from the folder that no longer exist
- for path in self.path2docid.keys(f.name):
+ for path in list(self.path2docid.keys(f.name)):
if not path.startswith(f.name + "/"):
break
if self.getmtime(path) == 0:
@@ -544,7 +542,7 @@
st = os.stat(path)
except os.error, msg:
return 0
- return st[ST_MTIME]
+ return int(st[ST_MTIME])
def maycommit(self):
self.trans_count += 1
=== Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py 1.11 => 1.11.10.1 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py:1.11 Wed Jun 12 17:45:53 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testIndex.py Thu Jun 5 16:37:05 2003
@@ -12,8 +12,10 @@
#
##############################################################################
+import os
from unittest import TestCase, TestSuite, main, makeSuite
+from BTrees.Length import Length
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex
@@ -34,6 +36,8 @@
self.assert_(self.index.has_doc(DOCID))
self.assert_(self.index._docweight[DOCID])
self.assertEqual(len(self.index._docweight), 1)
+ self.assertEqual(
+ len(self.index._docweight), self.index.document_count())
self.assertEqual(len(self.index._wordinfo), 5)
self.assertEqual(len(self.index._docwords), 1)
self.assertEqual(len(self.index.get_words(DOCID)), 5)
@@ -48,6 +52,8 @@
self.test_index_document(DOCID)
self.index.unindex_doc(DOCID)
self.assertEqual(len(self.index._docweight), 0)
+ self.assertEqual(
+ len(self.index._docweight), self.index.document_count())
self.assertEqual(len(self.index._wordinfo), 0)
self.assertEqual(len(self.index._docwords), 0)
self.assertEqual(len(self.index._wordinfo),
@@ -60,6 +66,8 @@
self.index.index_doc(DOCID, doc)
self.assert_(self.index._docweight[DOCID])
self.assertEqual(len(self.index._docweight), 2)
+ self.assertEqual(
+ len(self.index._docweight), self.index.document_count())
self.assertEqual(len(self.index._wordinfo), 8)
self.assertEqual(len(self.index._docwords), 2)
self.assertEqual(len(self.index.get_words(DOCID)), 4)
@@ -82,6 +90,8 @@
self.index.unindex_doc(1)
DOCID = 2
self.assertEqual(len(self.index._docweight), 1)
+ self.assertEqual(
+ len(self.index._docweight), self.index.document_count())
self.assert_(self.index._docweight[DOCID])
self.assertEqual(len(self.index._wordinfo), 4)
self.assertEqual(len(self.index._docwords), 1)
@@ -101,6 +111,8 @@
self.assertEqual(len(self.index.get_words(DOCID)), 7)
self.assertEqual(len(self.index._wordinfo),
self.index.length())
+ self.assertEqual(
+ len(self.index._docweight), self.index.document_count())
wids = self.lexicon.termToWordIds("repeat")
self.assertEqual(len(wids), 1)
repititive_wid = wids[0]
@@ -145,9 +157,130 @@
class OkapiIndexTest(IndexTest):
IndexFactory = OkapiIndex
+class TestIndexConflict(TestCase):
+
+ storage = None
+
+ def tearDown(self):
+ if self.storage is not None:
+ self.storage.close()
+
+ def openDB(self):
+ from ZODB.FileStorage import FileStorage
+ from ZODB.DB import DB
+ n = 'fs_tmp__%s' % os.getpid()
+ self.storage = FileStorage(n)
+ self.db = DB(self.storage)
+
+ def test_index_doc_conflict(self):
+ self.index = OkapiIndex(Lexicon())
+ self.openDB()
+ r1 = self.db.open().root()
+ r1['i'] = self.index
+ get_transaction().commit()
+
+ r2 = self.db.open().root()
+ copy = r2['i']
+ # Make sure the data is loaded
+ list(copy._docweight.items())
+ list(copy._docwords.items())
+ list(copy._wordinfo.items())
+ list(copy._lexicon._wids.items())
+ list(copy._lexicon._words.items())
+
+ self.assertEqual(self.index._p_serial, copy._p_serial)
+
+ self.index.index_doc(0, 'The time has come')
+ get_transaction().commit()
+
+ copy.index_doc(1, 'That time has gone')
+ get_transaction().commit()
+
+ def test_reindex_doc_conflict(self):
+ self.index = OkapiIndex(Lexicon())
+ self.index.index_doc(0, 'Sometimes change is good')
+ self.index.index_doc(1, 'Then again, who asked')
+ self.openDB()
+ r1 = self.db.open().root()
+ r1['i'] = self.index
+ get_transaction().commit()
+
+ r2 = self.db.open().root()
+ copy = r2['i']
+ # Make sure the data is loaded
+ list(copy._docweight.items())
+ list(copy._docwords.items())
+ list(copy._wordinfo.items())
+ list(copy._lexicon._wids.items())
+ list(copy._lexicon._words.items())
+
+ self.assertEqual(self.index._p_serial, copy._p_serial)
+
+ self.index.index_doc(0, 'Sometimes change isn\'t bad')
+ get_transaction().commit()
+
+ copy.index_doc(1, 'Then again, who asked you?')
+ get_transaction().commit()
+
+class TestUpgrade(TestCase):
+
+ def test_query_before_totaldoclen_upgrade(self):
+ self.index1 = OkapiIndex(Lexicon(Splitter()))
+ self.index1.index_doc(0, 'The quiet of night')
+ # Revert index1 back to a long to simulate an older index instance
+ self.index1._totaldoclen = long(self.index1._totaldoclen())
+ self.assertEqual(len(self.index1.search('night')), 1)
+
+ def test_upgrade_totaldoclen(self):
+ self.index1 = OkapiIndex(Lexicon())
+ self.index2 = OkapiIndex(Lexicon())
+ self.index1.index_doc(0, 'The quiet of night')
+ self.index2.index_doc(0, 'The quiet of night')
+ # Revert index1 back to a long to simulate an older index instance
+ self.index1._totaldoclen = long(self.index1._totaldoclen())
+ self.index1.index_doc(1, 'gazes upon my shadow')
+ self.index2.index_doc(1, 'gazes upon my shadow')
+ self.assertEqual(
+ self.index1._totaldoclen(), self.index2._totaldoclen())
+ self.index1._totaldoclen = long(self.index1._totaldoclen())
+ self.index1.unindex_doc(0)
+ self.index2.unindex_doc(0)
+ self.assertEqual(
+ self.index1._totaldoclen(), self.index2._totaldoclen())
+
+ def test_query_before_document_count_upgrade(self):
+ self.index1 = OkapiIndex(Lexicon(Splitter()))
+ self.index1.index_doc(0, 'The quiet of night')
+ # Revert index1 back to a long to simulate an older index instance
+ del self.index1.document_count
+ self.assertEqual(len(self.index1.search('night')), 1)
+
+ def test_upgrade_document_count(self):
+ self.index1 = OkapiIndex(Lexicon())
+ self.index2 = OkapiIndex(Lexicon())
+ self.index1.index_doc(0, 'The quiet of night')
+ self.index2.index_doc(0, 'The quiet of night')
+ # Revert index1 back to simulate an older index instance
+ del self.index1.document_count
+ self.index1.index_doc(1, 'gazes upon my shadow')
+ self.index2.index_doc(1, 'gazes upon my shadow')
+ self.assert_(self.index1.document_count.__class__ is Length)
+ self.assertEqual(
+ self.index1.document_count(), self.index2.document_count())
+ del self.index1.document_count
+ self.index1.unindex_doc(0)
+ self.index2.unindex_doc(0)
+ self.assert_(self.index1.document_count.__class__ is Length)
+ self.assertEqual(
+ self.index1.document_count(), self.index2.document_count())
+
+
+
def test_suite():
return TestSuite((makeSuite(CosineIndexTest),
makeSuite(OkapiIndexTest),
+ makeSuite(TestIndexConflict),
+ makeSuite(TestUpgrade),
))
if __name__=='__main__':
=== Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py 1.3.10.2 => 1.3.10.3 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py:1.3.10.2 Thu Dec 19 10:37:36 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testLexicon.py Thu Jun 5 16:37:05 2003
@@ -12,9 +12,11 @@
#
##############################################################################
-import sys
+import os, sys
from unittest import TestCase, TestSuite, main, makeSuite
+import ZODB
+
from Products.ZCTextIndex.Lexicon import Lexicon
from Products.ZCTextIndex.Lexicon import Splitter, CaseNormalizer
@@ -134,9 +136,59 @@
words = HTMLWordSplitter().process(words)
self.assertEqual(words, expected)
locale.setlocale(locale.LC_ALL, loc) # restore saved locale
+
+ def testUpgradeLength(self):
+ from BTrees.Length import Length
+ lexicon = Lexicon(Splitter())
+ del lexicon.length # Older instances don't override length
+ lexicon.sourceToWordIds('how now brown cow')
+ self.assert_(lexicon.length.__class__ is Length)
+
+class TestLexiconConflict(TestCase):
+
+ storage = None
+
+ def tearDown(self):
+ if self.storage is not None:
+ self.storage.close()
+
+ def openDB(self):
+ from ZODB.FileStorage import FileStorage
+ from ZODB.DB import DB
+ n = 'fs_tmp__%s' % os.getpid()
+ self.storage = FileStorage(n)
+ self.db = DB(self.storage)
+
+ def testAddWordConflict(self):
+ self.l = Lexicon(Splitter())
+ self.openDB()
+ r1 = self.db.open().root()
+ r1['l'] = self.l
+ get_transaction().commit()
+
+ r2 = self.db.open().root()
+ copy = r2['l']
+ # Make sure the data is loaded
+ list(copy._wids.items())
+ list(copy._words.items())
+ copy.length()
+
+ self.assertEqual(self.l._p_serial, copy._p_serial)
+
+ self.l.sourceToWordIds('mary had a little lamb')
+ get_transaction().commit()
+
+ copy.sourceToWordIds('whose fleece was')
+ copy.sourceToWordIds('white as snow')
+ get_transaction().commit()
+ self.assertEqual(copy.length(), 11)
+ self.assertEqual(copy.length(), len(copy._words))
def test_suite():
- return makeSuite(Test)
+ suite = TestSuite()
+ suite.addTest(makeSuite(Test))
+ suite.addTest(makeSuite(TestLexiconConflict))
+ return suite
if __name__=='__main__':
main(defaultTest='test_suite')
=== Zope/lib/python/Products/ZCTextIndex/tests/testStopper.py 1.2 => 1.2.10.1 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testStopper.py:1.2 Wed May 22 12:44:54 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testStopper.py Thu Jun 5 16:37:05 2003
@@ -1,3 +1,16 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
"""Tests for the C version of the StopWordRemover."""
import unittest
=== Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py 1.36 => 1.36.6.1 ===
--- Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py:1.36 Wed Aug 14 18:25:14 2002
+++ Zope/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py Thu Jun 5 16:37:05 2003
@@ -331,7 +331,7 @@
self._checkAbsoluteScores()
def _checkAbsoluteScores(self):
- self.assertEqual(self.index._totaldoclen, 6)
+ self.assertEqual(self.index._totaldoclen(), 6)
# So the mean doc length is 2. We use that later.
r, num = self.zc_index.query("one")