[Zope-CVS] CVS: Products/ZCTextIndex - ZCTextIndex.py:1.1.2.9 Splitter.py:NONE
Guido van Rossum
guido@python.org
Fri, 3 May 2002 13:10:11 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv9684
Modified Files:
Tag: TextIndexDS9-branch
ZCTextIndex.py
Removed Files:
Tag: TextIndexDS9-branch
Splitter.py
Log Message:
Don't use the cumbersome and broken ZopeSplitter.
re.findall(r"\w+") is fast enough.
=== Products/ZCTextIndex/ZCTextIndex.py 1.1.2.8 => 1.1.2.9 ===
-from Products.ZCTextIndex.Lexicon import Lexicon
-from Products.ZCTextIndex.NBest import NBest
-from Products.ZCTextIndex.QueryEngine import QueryEngine
-from Products.ZCTextIndex.QueryParser import QueryParser
-from Products.ZCTextIndex.Splitter import Splitter
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
-from Products.ZCTextIndex.StopDict import get_stopdict
+import re
-from Products.PluginIndexes.TextIndex.Splitter.ZopeSplitter.ZopeSplitter \
- import ZopeSplitter
+import ZODB
+from Persistence import Persistent
from Products.PluginIndexes.common.PluggableIndex \
import PluggableIndexInterface
-import ZODB
-from Persistence import Persistent
+from Products.ZCTextIndex.Index import Index
+from Products.ZCTextIndex.Lexicon import Lexicon
+from Products.ZCTextIndex.NBest import NBest
+from Products.ZCTextIndex.QueryEngine import QueryEngine
+from Products.ZCTextIndex.QueryParser import QueryParser
+from Products.ZCTextIndex.StopDict import get_stopdict
class ZCTextIndex(Persistent):
__implements__ = PluggableIndexInterface
def __init__(self, doc_attr="text"):
self._fieldname = doc_attr
- self.lexicon = Lexicon(Splitter(ZopeSplitter, get_stopdict(),
- index_numbers=1))
+ self.lexicon = Lexicon(Splitter(),
+ [CaseNormalizer(), StopWordRemover()])
self.engine = QueryEngine()
self.index = Index(self.lexicon)
self.parser = QueryParser()
@@ -52,3 +63,23 @@
return x()
else:
return x
+
+# Trivial pipeline elements
+
+class Splitter:
+
+ def process(self, text):
+ return re.findall(r"\w+", text)
+
+class CaseNormalizer:
+
+ def process(self, lst):
+ return [w.lower() for w in lst]
+
+class StopWordRemover:
+
+ dict = get_stopdict()
+
+ def process(self, lst):
+ d = self.dict
+ return [w for w in lst if len(w) > 1 and not d.has_key(w)]
=== Removed File Products/ZCTextIndex/Splitter.py ===