[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - BaseProximityLexicon.py:1.1.2.1 ProximityLexicon.py:1.1.2.1

Andreas Jung andreas@zope.com
Sat, 5 Jan 2002 13:32:39 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv16126

Added Files:
      Tag: ajung-textindexng-branch
	BaseProximityLexicon.py ProximityLexicon.py 
Log Message:
added


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseProximityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################

__doc__=""" This is the base class for a Zope lexicon that
supports storage of strings based on their encoding using a 
proximity algorithm (metaphone, soundex).
"""

from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IISet

from Products.PluginIndexes.TextIndex.randid import randid
from types import StringType

import Proximity

class BaseProximityLexicon:

    """Maps words to word ids and then some

    The Lexicon object is an attempt to abstract vocabularies out of
    Text indexes.  This abstraction is not totally cooked yet, this
    module still includes the parser for the 'Text Index Query
    Language' and a few other hacks.

    """

    # default for older objects

    def __init__(self, algorithm):

        if algorithm=='metaphone':
            self._v_proximity = Proximity.metaphone

        elif algorithm=='soundex':
            self._v_proximity = Proximity.soundex

        else:
            raise ValueError,'unsupported proximity algorithm "%s"' % algorithm
        
        self.clear()


    def clear(self):
        self._lexicon    = OIBTree()
        self._inverseLex = IOBTree()
        

    def getWordId(self, word):
        """ return the word id of 'word' """

        word = self._v_proximity(word)

        wid=self._lexicon.get(word, None)

        if wid is None: 
            wid=self.assignWordId(word)

        return wid
        
    set = getWordId


    def getWord(self, wid):
        """ return word from inverse lexicon by its wordId """
        return self._inverseLex.get(wid, None)
        

    def assignWordId(self, word):
        """Assigns a new word id to the provided word and returns it."""

        word = self._v_proximity(word)

        # First make sure it's not already in there

        if self._lexicon.has_key(word):
            return self._lexicon[word]

        inverse=self._inverseLex

        wid=randid()
        while not inverse.insert(wid, word):
            wid=randid()

        if isinstance(word,StringType):        
            self._lexicon[intern(word)] = wid
        else:
            self._lexicon[word] = wid
            
        return wid


    def get(self, key, default=None):
        """Return the matched word against the key."""

        r=IISet()
        wid=self._lexicon.get(key, default)
        if wid is not None: r.insert(wid)
        return r


    def __getitem__(self, key):
        return self.get(key)


    def __len__(self):
        return len(self._lexicon)


    def query_hook(self, q):
        """ we don't want to modify the query cuz we're dumb """
        return q


def test():

    for a in ('metaphone','soundex'):
        print 'Algorithm:',a

        PL = BaseProximityLexicon(a)

        for w in 'the quick brown fox jumps over the '\
                 'lazy dog brown fox'.split():

            wid = PL.getWordId(w)
            word = PL.getWord(wid)
            print w,wid,word


if __name__ == '__main__':
    test()


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/ProximityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################

__doc__=""" same as BaseProximityLexicon but usable for Zope """

from Persistence import Persistent
from Acquisition import Implicit

from BaseProximityLexicon import BaseProximityLexicon


class ProximityLexicon(BaseProximityLexicon, Persistent, Implicit):
    pass