[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.55
Andreas Jung
andreas@digicool.com
Sun, 17 Mar 2002 12:25:07 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv26061
Modified Files:
Tag: ajung-textindexng-branch
TextIndexNG.py
Log Message:
performing normalization for performances reasons
at start of processing pipline. if normalization is disabled,
we convert the string to unicode
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.54 => 1.2.2.55 ===
try:
encoding = getattr(obj, self.id+'_encoding')
- if callable(encoding ):
+ if callable(encoding):
encoding = str(encoding())
else:
encoding = str(encoding)
@@ -333,6 +333,15 @@
T('converter')
+ # Normalization
+
+ if self._normalizer:
+ source = self._normalizer.normalize(source)
+ else:
+ souce = unicode(source, encoding)
+
+ T("Normalizer")
+
# Split the text into a list of words
SP = Splitter.getSplitter(self.useSplitter)
@@ -371,19 +380,10 @@
T("Stemmer")
-
- # Normalization
-
- if self._normalizer:
- words = self._normalizer.normalize(words)
-
- T("Normalizer")
-
# We pass the list of words to the corresponding lexicon
# and obtain a list of wordIds. The "old" TextIndex iterated
# over every single words (overhead).
-
widLst = self._v_getWordIdList(words)
assert len(widLst)==len(words)
T("Widlist")
@@ -403,6 +403,8 @@
"""
self.WordDocStorage.removeDocument(documentId)
+ if self.useSimilarity:
+ self.SimilarityWordDocStorage.removeDocument(documentId)
def getLexicon(self):