[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - BaseSimilarityLexicon.py:1.1.2.1 SimilarityLexicon.py:1.1.2.1

Andreas Jung andreas@digicool.com
Tue, 12 Feb 2002 20:39:09 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv28243

Added Files:
      Tag: ajung-textindexng-branch
	BaseSimilarityLexicon.py SimilarityLexicon.py 
Log Message:
renamed all 'Proximity' stuff to 'Similarity'


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseSimilarityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################

__doc__=""" This is the base class for a Zope lexicon that
supports storage of strings based on their encoding using a 
Similarity algorithm (metaphone, soundex).
"""

from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IISet

from Products.PluginIndexes.TextIndex.randid import randid
from types import StringType, UnicodeType

import Proximity as Similarity # we should change the Python module name

class BaseSimilarityLexicon:

    """Maps words to word ids and then some

    The Lexicon object is an attempt to abstract vocabularies out of
    Text indexes.  This abstraction is not totally cooked yet, this
    module still includes the parser for the 'Text Index Query
    Language' and a few other hacks.

    """

    # default for older objects

    def __init__(self, algorithm):

        if algorithm in Similarity.availableAlgorithms():
            self._v_Similarity = getattr(Similarity,algorithm)
        else:
            raise ValueError,'unsupported Similarity algorithm "%s"' % algorithm
        
        self.clear()


    def clear(self):
        self._lexicon    = OIBTree()
        self._inverseLex = IOBTree()
        
    def getWordIdList(self,words):
        """ return a list a wordIds for a list of words """
        return [ self.getWordId(word)   for word in words] 


    def getWordId(self, word):
        """ return the word id of 'word' """

        try:
            word = self._v_Similarity(word)
        except TypeError:
            if isinstance(word, UnicodeType):
                word = ''.join([ chr(ord(x) & 127) for x in word])
                word = self._v_Similarity(word)
 

        wid=self._lexicon.get(word, None)

        if wid is None: 
            wid=self.assignWordId(word)

        return wid
        
    set = getWordId


    def getWord(self, wid):
        """ return word from inverse lexicon by its wordId """
        return self._inverseLex.get(wid, None)
        

    def assignWordId(self, word):
        """Assigns a new word id to the provided word and returns it."""

        # First make sure it's not already in there

        if self._lexicon.has_key(word):
            return self._lexicon[word]

        inverse=self._inverseLex

        wid=randid()
        while not inverse.insert(wid, word):
            wid=randid()

        if isinstance(word,StringType):        
            self._lexicon[intern(word)] = wid
        else:
            self._lexicon[word] = wid

            
        return wid


    def get(self, word, default=None):
        """Return the matched word against the key."""

        try:
            word = self._v_Similarity(word)
        except TypeError:
            if isinstance(word, UnicodeType):
                word = ''.join([ chr(ord(x) & 127) for x in word])
                word = self._v_Similarity(word)


        r=IISet()
        wid=self._lexicon.get(word, default)
        if wid is not None: r.insert(wid)
        return r


    def __getitem__(self, key):
        return self.get(key)


    def __len__(self):
        return len(self._lexicon)

    def query_hook(self, q):
        """ we don't want to modify the query cuz we're dumb """
        return q


    def __call__(self,word):
        return self._v_Similarity(word)



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/SimilarityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################

__doc__=""" same as BaseSimilarityLexicon but usable for Zope """

from Persistence import Persistent
from Acquisition import Implicit

from BaseSimilarityLexicon import BaseSimilarityLexicon


class SimilarityLexicon(BaseSimilarityLexicon, Persistent, Implicit):
    pass