[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.43
Andreas Jung
andreas@digicool.com
Sun, 24 Feb 2002 14:43:17 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv31987/lib/python/Products/PluginIndexes/TextIndexNG
Modified Files:
Tag: ajung-textindexng-branch
TextIndexNG.py
Log Message:
- NEAR search works now with enabled stemmer support
- minor code cleanup
- added more tests
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.42 => 1.2.2.43 ===
self.createShortcuts()
- # HACK !
- # We store references to the object for testing purposes
- # only. A later implementation must be more clever
-
- #self.__OBJECTS[documentId] = obj
-
T = Timer(self.timed_statistics)
try:
@@ -452,7 +446,6 @@
# over every single words (overhead).
-
widLst = self._v_getWordIdList(words)
assert len(widLst)==len(words)
T("Widlist")
@@ -680,7 +673,6 @@
def txNear(self, *sets, **kw):
""" perform near search on results sets """
-
distance = kw.get('distance',self.nearDistance)
bidirectional = kw.get('bidirectional',1)
@@ -694,7 +686,7 @@
r = self.txIntersection(*sets)
docIds = r.docIds()
- debug('txNear (%s)' % str(kw))
+ debug('txNear (kw=%s)' % str(kw))
# Now we determine for every document the positions of all
# the words inside the document. then we compare all the positions
@@ -767,8 +759,6 @@
def getDataFromObject(self, docId):
""" get data from a object (used for near search) """
- # HACK: we must ask the catalog for the path for a given
- # docId :-(
path = self.catalog.paths[docId]
# retrieve the object
@@ -793,20 +783,18 @@
res = []
# obtain object from ZCatalog
- # THis is a bad hack !
data = self.getDataFromObject( docId )
# Split retrieved document and obtain list of word positions
- SP = Splitter.getSplitter( self.useSplitter )( data )
+ word_lst = Splitter.getSplitter( self.useSplitter )( data ).split()
- for word in words:
+ # apply stemmer if necessary
+ if self.useStemmer:
+ word_lst = Stemmer.Stemmer(self.useStemmer).stem(word_lst)
- # TODO: this only works with disabled stemmer support
- # For enabled stemmer support we must stem all words
- # from the splitter run and search for the stemed word !
-
- posLst = SP.indexes(word)
+ for word in words:
+ posLst = indexsupport.listIndexes(word_lst, word)
res.append( (word, IISet(posLst)) )
return res