[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - BaseStopWords.py:1.1.2.1 BaseThesaurus.py:1.1.2.1 BaseProximityLexicon.py:1.1.2.2 StopWords.py:1.1.2.2 TextIndexNG.py:1.2.2.5
Andreas Jung
andreas@zope.com
Sun, 6 Jan 2002 11:13:55 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv2663
Modified Files:
Tag: ajung-textindexng-branch
BaseProximityLexicon.py StopWords.py TextIndexNG.py
Added Files:
Tag: ajung-textindexng-branch
BaseStopWords.py BaseThesaurus.py
Log Message:
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseStopWords.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
#############################################################################
from types import DictType, StringType
from defaultStopWords import stop_word_dict
class BaseStopWords:
def __init__(self, words=stop_word_dict):
if isinstance(words,DictType):
self._words = words.copy()
elif isinstance(words,StringType):
self._words = self.readStopWords(words)
else:
raise ValueError
self.keys = self._words.keys
self.values = self._words.values
self.items = self._words.items
self.has_key= self._words.has_key
self.get = self._words.get
def __len__(self): return len(self._words)
def readStopWords(self, fname):
d = {}
lines = open(fname).readlines()
for l in lines:
l = l.strip()
if l:
d[ l ] = None
return d.copy()
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseThesaurus.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
#############################################################################
from types import DictType, StringType
from BTrees.OOBTree import OOBTree
class BaseThesaurus:
def __init__(self, filename):
self.clear()
self.readThesaurus(filename)
self.keys = self._thesaurus.keys
self.values = self._thesaurus.values
self.items = self._thesaurus.items
self.has_key= self._thesaurus.has_key
self.get = self._thesaurus.get
def clear(self):
self._thesaurus = OOBTree()
def __len__(self):
return len(self._thesaurus)
def readThesaurus(self, fname,append=0):
""" Just lame parser for a line-by-line thesaurus. Different thesaurii
formats require a different parser.
"""
if not append: self.clear()
lines = open(fname).readlines()
for l in lines:
l = l.strip()
fields = l.split(' ',1)
if len(fields) != 2: continue
key = fields[0]
values = [ x.strip() for x in fields[1].split(',') ]
self._thesaurus[key] = values
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseProximityLexicon.py 1.1.2.1 => 1.1.2.2 ===
def __init__(self, algorithm):
- if algorithm=='metaphone':
- self._v_proximity = Proximity.metaphone
-
- elif algorithm=='soundex':
- self._v_proximity = Proximity.soundex
-
+ if algorithm in Proximity.availableAlgorithms():
+ self._v_proximity = getattr(Proximity,algorithm)
else:
raise ValueError,'unsupported proximity algorithm "%s"' % algorithm
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/StopWords.py 1.1.2.1 => 1.1.2.2 ===
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
+# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
@@ -9,52 +9,16 @@
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
-#############################################################################
-
-from types import DictType, StringType
-from defaultStopWords import stop_word_dict
-
-
-class StopWords:
-
- def __init__(self, words=stop_word_dict):
-
- if isinstance(words,DictType):
-
- self._words = words.copy()
-
- elif isinstance(words,StringType):
-
- self._words = self.readStopWords(words)
-
- else:
- raise ValueError
-
- self.keys = self._words.keys
- self.values = self._words.values
- self.items = self._words.items
- self.has_key= self._words.has_key
- self.get = self._words.get
-
-
- def __len__(self): return len(self._words)
-
-
- def readStopWords(self, fname):
-
- d = {}
+##############################################################################
- lines = open(fname).readlines()
- for l in lines:
-
- l = l.strip()
- if l:
- d[ l ] = None
+__doc__=""" same as BaseProximityLexicon but usable for Zope """
- return d.copy()
+from Persistence import Persistent
+from Acquisition import Implicit
+from BaseStopWords import BaseStopWords
+class StopWords(BaseStopWords, Persistent, Implicit):
+ pass
-if __name__=='__main__':
- test()
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.4 => 1.2.2.5 ===
widLst = []
-
- for i in range(len(words)):
- word = words[i]
+
+ pos = 0
+ for word in words:
# stem the single word
if self._stemmerfunc:
@@ -219,7 +219,9 @@
# and insert the wordId, its position and the documentId
# in the index
- self.insertForwardEntry(wid,i,documentId)
+ self.insertForwardEntry(wid,pos,documentId)
+
+ pos+=1
self.insertBackwardEntries(self,widLst,documentId)
@@ -485,7 +487,7 @@
REQUEST=None,RESPONSE=None,URL2=None):
""" preferences of TextIndex """
- for x in ('useOperator','useGlobbing',\
+ for x in ('useOperator','useGlobbing','useProximity',\
'useNearSearch','useSplitter','useStemmer'):
changed = 0