[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.55

Sun, 17 Mar 2002 12:25:07 -0500

Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv26061

Modified Files:
      Tag: ajung-textindexng-branch
	TextIndexNG.py 
Log Message:
performing normalization for performances reasons
at start of processing pipline. if normalization is disabled,
we convert the string to unicode


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.54 => 1.2.2.55 ===
         try:
             encoding = getattr(obj, self.id+'_encoding')
-            if callable(encoding ):
+            if callable(encoding):
                 encoding = str(encoding())
             else:
                 encoding = str(encoding)
@@ -333,6 +333,15 @@
 
         T('converter')
 
+        # Normalization
+
+        if self._normalizer:
+            source = self._normalizer.normalize(source)    
+        else:
+            souce = unicode(source, encoding)
+ 
+        T("Normalizer")
+
         # Split the text into a list of words
 
         SP = Splitter.getSplitter(self.useSplitter)
@@ -371,19 +380,10 @@
 
         T("Stemmer")
 
-
-        # Normalization
-
-        if self._normalizer:
-            words = self._normalizer.normalize(words)    
-        
-        T("Normalizer")
-
         # We pass the list of words to the corresponding lexicon
         # and obtain a list of wordIds. The "old" TextIndex iterated
         # over every single words (overhead).
 
-
         widLst = self._v_getWordIdList(words)
         assert len(widLst)==len(words)
         T("Widlist")
@@ -403,6 +403,8 @@
         """
         
         self.WordDocStorage.removeDocument(documentId)
+        if self.useSimilarity:
+            self.SimilarityWordDocStorage.removeDocument(documentId)
        
 
     def getLexicon(self):