[Zope-CVS] CVS: Products/ZCTextIndex - StopDict.py:1.1.2.1 ZCTextIndex.py:1.1.2.7
Jeremy Hylton
jeremy@zope.com
Thu, 2 May 2002 12:42:14 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv15019
Modified Files:
Tag: TextIndexDS9-branch
ZCTextIndex.py
Added Files:
Tag: TextIndexDS9-branch
StopDict.py
Log Message:
Add a very short list of stop words (based on Lucene list).
Change the text of documents in testIndex because they contained
stopwords that screwed up the tests.
=== Added File Products/ZCTextIndex/StopDict.py ===
"""Provide a default list of stop words for the index.
The specific splitter and lexicon are customizable, but the default
ZCTextIndex should do something useful.
"""
def get_stopdict():
"""Return a dictionary of stopwords."""
return _dict
# This list of English stopwords comes from Lucene
_words = [
"a", "and", "are", "as", "at", "be", "but", "by",
"for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "such",
"that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with"
]
_dict = {}
for w in _words:
_dict[w] = 1
=== Products/ZCTextIndex/ZCTextIndex.py 1.1.2.6 => 1.1.2.7 ===
from Products.ZCTextIndex.Splitter import Splitter
+from Products.ZCTextIndex.StopDict import get_stopdict
+
from Products.PluginIndexes.TextIndex.Splitter.ZopeSplitter.ZopeSplitter \
import ZopeSplitter
@@ -18,7 +20,7 @@
__implements__ = PluggableIndexInterface
def __init__(self, doc_attr="text"):
- self.lexicon = Lexicon(Splitter(ZopeSplitter))
+ self.lexicon = Lexicon(Splitter(ZopeSplitter, get_stopdict()))
self.engine = QueryEngine()
self.index = Index(self.lexicon, doc_attr)
self.parser = QueryParser()