[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - BaseStopWords.py:1.1.2.1 BaseThesaurus.py:1.1.2.1 BaseProximityLexicon.py:1.1.2.2 StopWords.py:1.1.2.2 TextIndexNG.py:1.2.2.5

Andreas Jung andreas@zope.com
Sun, 6 Jan 2002 11:13:55 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv2663

Modified Files:
      Tag: ajung-textindexng-branch
	BaseProximityLexicon.py StopWords.py TextIndexNG.py 
Added Files:
      Tag: ajung-textindexng-branch
	BaseStopWords.py BaseThesaurus.py 
Log Message:



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseStopWords.py ===
##############################################################################
# 
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
#############################################################################

from types import DictType, StringType
from defaultStopWords import stop_word_dict


class BaseStopWords:

    def __init__(self, words=stop_word_dict):

        if isinstance(words,DictType):

            self._words = words.copy()

        elif isinstance(words,StringType):

            self._words = self.readStopWords(words)

        else: 
            raise ValueError

        self.keys   = self._words.keys
        self.values = self._words.values
        self.items  = self._words.items
        self.has_key= self._words.has_key
        self.get    = self._words.get


    def __len__(self):  return len(self._words)


    def readStopWords(self, fname):

        d = {}

        lines = open(fname).readlines()
        for l in lines: 
     
            l = l.strip()
            if l:
                d[ l ] = None

        return d.copy()





=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseThesaurus.py ===
##############################################################################
# 
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
#############################################################################

from types import DictType, StringType
from BTrees.OOBTree import OOBTree

class BaseThesaurus:

    def __init__(self, filename):
    
        self.clear()
        self.readThesaurus(filename)

        self.keys   = self._thesaurus.keys
        self.values = self._thesaurus.values
        self.items  = self._thesaurus.items
        self.has_key= self._thesaurus.has_key
        self.get    = self._thesaurus.get


    def clear(self):
        self._thesaurus = OOBTree()

    def __len__(self):  
        return len(self._thesaurus)


    def readThesaurus(self, fname,append=0):
        """ Just lame parser for a line-by-line thesaurus. Different thesaurii
            formats require a different parser.
        """

        if not append: self.clear()

        lines = open(fname).readlines()
        for l in lines: 
     
            l = l.strip()
            
            fields = l.split(' ',1)
            if len(fields) != 2: continue

            key = fields[0]
            values = [ x.strip() for x in fields[1].split(',') ]
            
            self._thesaurus[key] = values



=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseProximityLexicon.py 1.1.2.1 => 1.1.2.2 ===
     def __init__(self, algorithm):
 
-        if algorithm=='metaphone':
-            self._v_proximity = Proximity.metaphone
-
-        elif algorithm=='soundex':
-            self._v_proximity = Proximity.soundex
-
+        if algorithm in Proximity.availableAlgorithms():
+            self._v_proximity = getattr(Proximity,algorithm)
         else:
             raise ValueError,'unsupported proximity algorithm "%s"' % algorithm
         


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/StopWords.py 1.1.2.1 => 1.1.2.2 ===
-# 
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
 #
+# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
+# 
 # This software is subject to the provisions of the Zope Public License,
 # Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
@@ -9,52 +9,16 @@
 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
 # FOR A PARTICULAR PURPOSE
 # 
-#############################################################################
-
-from types import DictType, StringType
-from defaultStopWords import stop_word_dict
-
-
-class StopWords:
-
-    def __init__(self, words=stop_word_dict):
-
-        if isinstance(words,DictType):
-
-            self._words = words.copy()
-
-        elif isinstance(words,StringType):
-
-            self._words = self.readStopWords(words)
-
-        else: 
-            raise ValueError
-
-        self.keys   = self._words.keys
-        self.values = self._words.values
-        self.items  = self._words.items
-        self.has_key= self._words.has_key
-        self.get    = self._words.get
-
-
-    def __len__(self):  return len(self._words)
-
-
-    def readStopWords(self, fname):
-
-        d = {}
+##############################################################################
 
-        lines = open(fname).readlines()
-        for l in lines: 
-     
-            l = l.strip()
-            if l:
-                d[ l ] = None
+__doc__=""" same as BaseProximityLexicon but usable for Zope """
 
-        return d.copy()
+from Persistence import Persistent
+from Acquisition import Implicit
 
+from BaseStopWords import BaseStopWords
 
 
+class StopWords(BaseStopWords, Persistent, Implicit):
+    pass
 
-if __name__=='__main__':
-    test()


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.4 => 1.2.2.5 ===
 
         widLst = []
-    
-        for i in range(len(words)):
-            word = words[i]
+
+        pos = 0    
+        for word in words:
 
             # stem the single word        
             if self._stemmerfunc:
@@ -219,7 +219,9 @@
 
             # and insert the wordId, its position and the documentId 
             # in the index
-            self.insertForwardEntry(wid,i,documentId)
+            self.insertForwardEntry(wid,pos,documentId)
+
+            pos+=1
 
         self.insertBackwardEntries(self,widLst,documentId)
 
@@ -485,7 +487,7 @@
                                REQUEST=None,RESPONSE=None,URL2=None):
         """ preferences of TextIndex """
 
-        for x in ('useOperator','useGlobbing',\
+        for x in ('useOperator','useGlobbing','useProximity',\
                     'useNearSearch','useSplitter','useStemmer'):
 
             changed = 0