[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - BaseSimilarityLexicon.py:1.1.2.1 SimilarityLexicon.py:1.1.2.1
   
    Andreas Jung
     
    andreas@digicool.com
       
    Tue, 12 Feb 2002 20:39:09 -0500
    
    
  
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv28243
Added Files:
      Tag: ajung-textindexng-branch
	BaseSimilarityLexicon.py SimilarityLexicon.py 
Log Message:
renamed all 'Proximity' stuff to 'Similarity'
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseSimilarityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################
__doc__=""" This is the base class for a Zope lexicon that
supports storage of strings based on their encoding using a 
Similarity algorithm (metaphone, soundex).
"""
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IISet
from Products.PluginIndexes.TextIndex.randid import randid
from types import StringType, UnicodeType
import Proximity as Similarity # we should change the Python module name
class BaseSimilarityLexicon:
    """Maps words to word ids and then some
    The Lexicon object is an attempt to abstract vocabularies out of
    Text indexes.  This abstraction is not totally cooked yet, this
    module still includes the parser for the 'Text Index Query
    Language' and a few other hacks.
    """
    # default for older objects
    def __init__(self, algorithm):
        if algorithm in Similarity.availableAlgorithms():
            self._v_Similarity = getattr(Similarity,algorithm)
        else:
            raise ValueError,'unsupported Similarity algorithm "%s"' % algorithm
        
        self.clear()
    def clear(self):
        self._lexicon    = OIBTree()
        self._inverseLex = IOBTree()
        
    def getWordIdList(self,words):
        """ return a list a wordIds for a list of words """
        return [ self.getWordId(word)   for word in words] 
    def getWordId(self, word):
        """ return the word id of 'word' """
        try:
            word = self._v_Similarity(word)
        except TypeError:
            if isinstance(word, UnicodeType):
                word = ''.join([ chr(ord(x) & 127) for x in word])
                word = self._v_Similarity(word)
 
        wid=self._lexicon.get(word, None)
        if wid is None: 
            wid=self.assignWordId(word)
        return wid
        
    set = getWordId
    def getWord(self, wid):
        """ return word from inverse lexicon by its wordId """
        return self._inverseLex.get(wid, None)
        
    def assignWordId(self, word):
        """Assigns a new word id to the provided word and returns it."""
        # First make sure it's not already in there
        if self._lexicon.has_key(word):
            return self._lexicon[word]
        inverse=self._inverseLex
        wid=randid()
        while not inverse.insert(wid, word):
            wid=randid()
        if isinstance(word,StringType):        
            self._lexicon[intern(word)] = wid
        else:
            self._lexicon[word] = wid
            
        return wid
    def get(self, word, default=None):
        """Return the matched word against the key."""
        try:
            word = self._v_Similarity(word)
        except TypeError:
            if isinstance(word, UnicodeType):
                word = ''.join([ chr(ord(x) & 127) for x in word])
                word = self._v_Similarity(word)
        r=IISet()
        wid=self._lexicon.get(word, default)
        if wid is not None: r.insert(wid)
        return r
    def __getitem__(self, key):
        return self.get(key)
    def __len__(self):
        return len(self._lexicon)
    def query_hook(self, q):
        """ we don't want to modify the query cuz we're dumb """
        return q
    def __call__(self,word):
        return self._v_Similarity(word)
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/SimilarityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################
__doc__=""" same as BaseSimilarityLexicon but usable for Zope """
from Persistence import Persistent
from Acquisition import Implicit
from BaseSimilarityLexicon import BaseSimilarityLexicon
class SimilarityLexicon(BaseSimilarityLexicon, Persistent, Implicit):
    pass