[Zope-Checkins] SVN: Zope/trunk/ - Collector #1815: ZCTextIndex
accepts (again) sequences of strings to
Andreas Jung
andreas at andreas-jung.com
Mon Jul 4 13:53:52 EDT 2005
Log message for revision 30995:
- Collector #1815: ZCTextIndex accepts (again) sequences of strings to
be indexed.
Changed:
U Zope/trunk/doc/CHANGES.txt
U Zope/trunk/lib/python/Products/ZCTextIndex/IIndex.py
U Zope/trunk/lib/python/Products/ZCTextIndex/ZCTextIndex.py
U Zope/trunk/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
-=-
Modified: Zope/trunk/doc/CHANGES.txt
===================================================================
--- Zope/trunk/doc/CHANGES.txt 2005-07-04 16:59:17 UTC (rev 30994)
+++ Zope/trunk/doc/CHANGES.txt 2005-07-04 17:53:52 UTC (rev 30995)
@@ -34,6 +34,9 @@
Bugs fixed
+ - Collector #1815: ZCTextIndex accepts (again) sequences of strings to
+ be indexed.
+
- Collector #1812: Fixed key error in ZSQL ZMI/Test
- Fixed CMFBTreeFolder for CMF 1.5+
Modified: Zope/trunk/lib/python/Products/ZCTextIndex/IIndex.py
===================================================================
--- Zope/trunk/lib/python/Products/ZCTextIndex/IIndex.py 2005-07-04 16:59:17 UTC (rev 30994)
+++ Zope/trunk/lib/python/Products/ZCTextIndex/IIndex.py 2005-07-04 17:53:52 UTC (rev 30995)
@@ -68,6 +68,9 @@
"""Add a document with the specified id and text to the index. If a
document by that id already exists, replace its text with the new
text provided
+ text may be either a string (Unicode or otherwise) or a list
+ of strings from which to extract the terms under which to
+ index the source document.
"""
def unindex_doc(docid):
Modified: Zope/trunk/lib/python/Products/ZCTextIndex/ZCTextIndex.py
===================================================================
--- Zope/trunk/lib/python/Products/ZCTextIndex/ZCTextIndex.py 2005-07-04 16:59:17 UTC (rev 30994)
+++ Zope/trunk/lib/python/Products/ZCTextIndex/ZCTextIndex.py 2005-07-04 17:53:52 UTC (rev 30995)
@@ -152,8 +152,15 @@
## Pluggable Index APIs ##
def index_object(self, documentId, obj, threshold=None):
- """ wrapper to handle indexing of multiple attributes """
+ """Wrapper for index_doc() handling indexing of multiple attributes.
+ Enter the document with the specified documentId in the index
+ under the terms extracted from the indexed text attributes,
+ each of which should yield either a string or a list of
+ strings (Unicode or otherwise) to be passed to index_doc().
+ """
+ # XXX We currently ignore subtransaction threshold
+
# needed for backward compatibility
try: fields = self._indexed_attrs
except: fields = [ self._fieldname ]
@@ -168,12 +175,22 @@
text = text()
if text is None:
continue
- all_texts.append(text)
+ # To index each attribute separately, we could use the
+ # following line, but we have preferred to make a single
+ # call to index_doc() for all attributes together.
+ # res += self.index.index_doc(documentId, text)
+ if text:
+ if isinstance(text, (list, tuple, )):
+ all_texts.extend(text)
+ else:
+ all_texts.append(text)
- if all_texts:
- return self.index.index_doc(documentId, ' '.join(all_texts))
- else:
- return 0
+ # Check that we're sending only strings
+ all_texts = filter(lambda text: isinstance(text, basestring), \
+ all_texts)
+ if all_texts:
+ return self.index.index_doc(documentId, all_texts)
+ return res
def unindex_object(self, docid):
if self.index.has_doc(docid):
Modified: Zope/trunk/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
===================================================================
--- Zope/trunk/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py 2005-07-04 16:59:17 UTC (rev 30994)
+++ Zope/trunk/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py 2005-07-04 17:53:52 UTC (rev 30995)
@@ -151,6 +151,29 @@
nbest, total = zc_index.query('foo alpha gamma')
self.assertEqual(len(nbest), 0)
+ def testListAttributes(self):
+ lexicon = PLexicon('lexicon', '',
+ Splitter(),
+ CaseNormalizer(),
+ StopWordRemover())
+ caller = LexiconHolder(self.lexicon)
+ zc_index = ZCTextIndex('name',
+ None,
+ caller,
+ self.IndexFactory,
+ 'text1,text2',
+ 'lexicon')
+ doc = Indexable2('Hello Tim', \
+ ['Now is the winter of our discontent',
+ 'Made glorious summer by this sun of York', ])
+ zc_index.index_object(1, doc)
+ nbest, total = zc_index.query('glorious')
+ self.assertEqual(len(nbest), 1)
+ nbest, total = zc_index.query('York Tim')
+ self.assertEqual(len(nbest), 1)
+ nbest, total = zc_index.query('Tuesday Tim York')
+ self.assertEqual(len(nbest), 0)
+
def testStopWords(self):
# the only non-stopword is question
text = ("to be or not to be "
More information about the Zope-Checkins
mailing list