[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.43

Andreas Jung andreas@digicool.com
Sun, 24 Feb 2002 14:43:17 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv31987/lib/python/Products/PluginIndexes/TextIndexNG

Modified Files:
      Tag: ajung-textindexng-branch
	TextIndexNG.py 
Log Message:
- NEAR search works now with enabled stemmer support
- minor code cleanup
- added more tests


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.42 => 1.2.2.43 ===
         self.createShortcuts()
         
-        # HACK !
-        # We store references to the object for testing purposes
-        # only. A later implementation must be more clever
-
-        #self.__OBJECTS[documentId] = obj
-
         T = Timer(self.timed_statistics)
 
         try:
@@ -452,7 +446,6 @@
         # over every single words (overhead).
 
 
-
         widLst = self._v_getWordIdList(words)
         assert len(widLst)==len(words)
         T("Widlist")
@@ -680,7 +673,6 @@
     def txNear(self, *sets, **kw):
         """ perform near search on results sets """
 
-
         distance = kw.get('distance',self.nearDistance)
         bidirectional = kw.get('bidirectional',1)
         
@@ -694,7 +686,7 @@
         r = self.txIntersection(*sets)
         docIds = r.docIds()
 
-        debug('txNear (%s)' % str(kw))
+        debug('txNear (kw=%s)' % str(kw))
 
         # Now we determine for every document the positions of all
         # the words inside the document. then we compare all the positions
@@ -767,8 +759,6 @@
     def getDataFromObject(self, docId): 
         """ get data from a object (used for near search) """
 
-        # HACK: we must ask the catalog for the path for a given 
-        # docId :-(
         path = self.catalog.paths[docId]
 
         # retrieve the object
@@ -793,20 +783,18 @@
         res = []
 
         # obtain object from ZCatalog
-        # THis is a bad hack !
         data = self.getDataFromObject( docId )         
 
         # Split retrieved document and obtain list of word positions
 
-        SP = Splitter.getSplitter( self.useSplitter )( data )
+        word_lst = Splitter.getSplitter( self.useSplitter )( data ).split()
 
-        for word in words:
+        # apply stemmer if necessary
+        if self.useStemmer:
+            word_lst = Stemmer.Stemmer(self.useStemmer).stem(word_lst)
 
-            # TODO: this only works with disabled stemmer support
-            # For enabled stemmer support we must stem all words
-            # from the splitter run and search for the stemed word !
-            
-            posLst = SP.indexes(word)        
+        for word in words:
+            posLst = indexsupport.listIndexes(word_lst, word)        
             res.append( (word, IISet(posLst)) )
 
         return res