[Zope-Checkins] SVN: Zope/trunk/ LP #142478: normalize terms passed to ``PLexicon.queryLexicon``
Tres Seaver
tseaver at palladion.com
Mon Apr 12 08:44:02 EDT 2010
Log message for revision 110743:
LP #142478: normalize terms passed to ``PLexicon.queryLexicon``
o Use the lexicon's pipeline (e.g., case flattening, stop word removal, etc.)
o Forward-port from 2.12 branch.
Changed:
U Zope/trunk/doc/CHANGES.rst
U Zope/trunk/src/Products/ZCTextIndex/ZCTextIndex.py
U Zope/trunk/src/Products/ZCTextIndex/tests/testZCTextIndex.py
-=-
Modified: Zope/trunk/doc/CHANGES.rst
===================================================================
--- Zope/trunk/doc/CHANGES.rst 2010-04-12 12:26:43 UTC (rev 110742)
+++ Zope/trunk/doc/CHANGES.rst 2010-04-12 12:44:01 UTC (rev 110743)
@@ -153,6 +153,9 @@
Bugs Fixed
++++++++++
+- LP #142478: normalize terms passed to ``PLexicon.queryLexicon`` using
+ the lexicon's pipeline (e.g., case flattening, stop word removal, etc.)
+
- LP #143604: Removed top-level database-quota-size from zope.conf, some
storages support a quota option instead.
Modified: Zope/trunk/src/Products/ZCTextIndex/ZCTextIndex.py
===================================================================
--- Zope/trunk/src/Products/ZCTextIndex/ZCTextIndex.py 2010-04-12 12:26:43 UTC (rev 110742)
+++ Zope/trunk/src/Products/ZCTextIndex/ZCTextIndex.py 2010-04-12 12:44:01 UTC (rev 110743)
@@ -358,7 +358,7 @@
"""
if words:
wids = []
- for word in words:
+ for word in self.parseTerms(words):
wids.extend(self.globToWordIds(word))
words = [self.get_word(wid) for wid in wids]
else:
@@ -384,17 +384,21 @@
columns.append(words[i:i + rows])
i += rows
- return self._queryLexicon(self, REQUEST,
- page=page,
- rows=rows,
- cols=cols,
- start_word=start+1,
- end_word=end,
- word_count=word_count,
- page_count=page_count,
- page_range=xrange(page_count),
- page_columns=columns)
+ info = dict(page=page,
+ rows=rows,
+ cols=cols,
+ start_word=start+1,
+ end_word=end,
+ word_count=word_count,
+ page_count=page_count,
+ page_range=xrange(page_count),
+ page_columns=columns)
+ if REQUEST is not None:
+ return self._queryLexicon(self, REQUEST, **info)
+
+ return info
+
security.declareProtected(LexiconMgmtPerm, 'manage_main')
manage_main = DTMLFile('dtml/manageLexicon', globals())
Modified: Zope/trunk/src/Products/ZCTextIndex/tests/testZCTextIndex.py
===================================================================
--- Zope/trunk/src/Products/ZCTextIndex/tests/testZCTextIndex.py 2010-04-12 12:26:43 UTC (rev 110742)
+++ Zope/trunk/src/Products/ZCTextIndex/tests/testZCTextIndex.py 2010-04-12 12:44:01 UTC (rev 110743)
@@ -245,6 +245,7 @@
nbest, total = self.zc_index.query(w)
self.assertEqual(total, 0, "did not expect to find %s" % w)
+
class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):
# A fairly involved test of the ranking calculations based on
@@ -566,15 +567,146 @@
class PLexiconTests(unittest.TestCase):
- def test_z3interfaces(self):
+ def _getTargetClass(self):
+ from Products.ZCTextIndex.ZCTextIndex import PLexicon
+ return PLexicon
+
+ def _makeOne(self, id='testing', title='Testing', *pipeline):
+ return self._getTargetClass()(id, title, *pipeline)
+
+ def test_class_conforms_to_ILexicon(self):
from Products.ZCTextIndex.interfaces import ILexicon
+ from zope.interface.verify import verifyClass
+ verifyClass(ILexicon, self._getTargetClass())
+
+ def test_instance_conforms_to_ILexicon(self):
+ from Products.ZCTextIndex.interfaces import ILexicon
+ from zope.interface.verify import verifyObject
+ verifyObject(ILexicon, self._makeOne())
+
+ def test_class_conforms_to_IZCLexicon(self):
from Products.ZCTextIndex.interfaces import IZCLexicon
from zope.interface.verify import verifyClass
+ verifyClass(IZCLexicon, self._getTargetClass())
- verifyClass(ILexicon, PLexicon)
- verifyClass(IZCLexicon, PLexicon)
+ def test_instance_conforms_to_IZCLexicon(self):
+ from Products.ZCTextIndex.interfaces import IZCLexicon
+ from zope.interface.verify import verifyObject
+ verifyObject(IZCLexicon, self._makeOne())
+ def test_queryLexicon_defaults_empty(self):
+ lexicon = self._makeOne()
+ info = lexicon.queryLexicon(REQUEST=None, words=None)
+ self.assertEqual(info['page'], 0)
+ self.assertEqual(info['rows'], 20)
+ self.assertEqual(info['cols'], 4)
+ self.assertEqual(info['start_word'], 1)
+ self.assertEqual(info['end_word'], 0)
+ self.assertEqual(info['word_count'], 0)
+ self.assertEqual(list(info['page_range']), [])
+ self.assertEqual(info['page_columns'], [])
+ def test_queryLexicon_defaults_non_empty(self):
+ WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+ lexicon = self._makeOne()
+ lexicon.sourceToWordIds(WORDS)
+ info = lexicon.queryLexicon(REQUEST=None, words=None)
+ self.assertEqual(info['page'], 0)
+ self.assertEqual(info['rows'], 20)
+ self.assertEqual(info['cols'], 4)
+ self.assertEqual(info['start_word'], 1)
+ self.assertEqual(info['end_word'], 7)
+ self.assertEqual(info['word_count'], 7)
+ self.assertEqual(list(info['page_range']), [0])
+ self.assertEqual(info['page_columns'], [WORDS])
+
+ def test_queryLexicon_row_breaks(self):
+ WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+ lexicon = self._makeOne()
+ lexicon.sourceToWordIds(WORDS)
+ info = lexicon.queryLexicon(REQUEST=None, words=None, rows=4)
+ self.assertEqual(info['page'], 0)
+ self.assertEqual(info['rows'], 4)
+ self.assertEqual(info['cols'], 4)
+ self.assertEqual(info['start_word'], 1)
+ self.assertEqual(info['end_word'], 7)
+ self.assertEqual(info['word_count'], 7)
+ self.assertEqual(list(info['page_range']), [0])
+ self.assertEqual(info['page_columns'], [WORDS[0:4], WORDS[4:]])
+
+ def test_queryLexicon_page_breaks(self):
+ WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+ lexicon = self._makeOne()
+ lexicon.sourceToWordIds(WORDS)
+ info = lexicon.queryLexicon(REQUEST=None, words=None, rows=2, cols=2)
+ self.assertEqual(info['page'], 0)
+ self.assertEqual(info['rows'], 2)
+ self.assertEqual(info['cols'], 2)
+ self.assertEqual(info['start_word'], 1)
+ self.assertEqual(info['end_word'], 4)
+ self.assertEqual(info['word_count'], 7)
+ self.assertEqual(list(info['page_range']), [0, 1])
+ self.assertEqual(info['page_columns'], [WORDS[0:2], WORDS[2:4]])
+
+ def test_queryLexicon_page_break_not_first(self):
+ WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+ lexicon = self._makeOne()
+ lexicon.sourceToWordIds(WORDS)
+ info = lexicon.queryLexicon(REQUEST=None, words=None,
+ page=1, rows=2, cols=2)
+ self.assertEqual(info['page'], 1)
+ self.assertEqual(info['rows'], 2)
+ self.assertEqual(info['cols'], 2)
+ self.assertEqual(info['start_word'], 5)
+ self.assertEqual(info['end_word'], 7)
+ self.assertEqual(info['word_count'], 7)
+ self.assertEqual(list(info['page_range']), [0, 1])
+ self.assertEqual(info['page_columns'], [WORDS[4:6], WORDS[6:]])
+
+ def test_queryLexicon_words_no_globbing(self):
+ WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+ lexicon = self._makeOne()
+ lexicon.sourceToWordIds(WORDS)
+ info = lexicon.queryLexicon(REQUEST=None, words=['aaa', 'bbb'])
+ self.assertEqual(info['page'], 0)
+ self.assertEqual(info['rows'], 20)
+ self.assertEqual(info['cols'], 4)
+ self.assertEqual(info['start_word'], 1)
+ self.assertEqual(info['end_word'], 2)
+ self.assertEqual(info['word_count'], 2)
+ self.assertEqual(list(info['page_range']), [0])
+ self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
+
+ def test_queryLexicon_words_w_globbing(self):
+ WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+ lexicon = self._makeOne()
+ lexicon.sourceToWordIds(WORDS)
+ info = lexicon.queryLexicon(REQUEST=None, words=['aa*', 'bbb*'])
+ self.assertEqual(info['page'], 0)
+ self.assertEqual(info['rows'], 20)
+ self.assertEqual(info['cols'], 4)
+ self.assertEqual(info['start_word'], 1)
+ self.assertEqual(info['end_word'], 2)
+ self.assertEqual(info['word_count'], 2)
+ self.assertEqual(list(info['page_range']), [0])
+ self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
+
+ def test_queryLexicon_uses_pipeline_for_normalization(self):
+ from Products.ZCTextIndex.Lexicon import CaseNormalizer
+ WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
+ lexicon = self._makeOne('test', 'Testing', CaseNormalizer())
+ lexicon.sourceToWordIds(WORDS)
+ info = lexicon.queryLexicon(REQUEST=None, words=['AA*', 'Bbb*'])
+ self.assertEqual(info['page'], 0)
+ self.assertEqual(info['rows'], 20)
+ self.assertEqual(info['cols'], 4)
+ self.assertEqual(info['start_word'], 1)
+ self.assertEqual(info['end_word'], 2)
+ self.assertEqual(info['word_count'], 2)
+ self.assertEqual(list(info['page_range']), [0])
+ self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
+
+
def test_suite():
s = unittest.TestSuite()
for klass in (CosineIndexTests, OkapiIndexTests,
More information about the Zope-Checkins
mailing list