[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - BaseProximityLexicon.py:1.1.2.1 ProximityLexicon.py:1.1.2.1
Andreas Jung
andreas@zope.com
Sat, 5 Jan 2002 13:32:39 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv16126
Added Files:
Tag: ajung-textindexng-branch
BaseProximityLexicon.py ProximityLexicon.py
Log Message:
added
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseProximityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__=""" This is the base class for a Zope lexicon that
supports storage of strings based on their encoding using a
proximity algorithm (metaphone, soundex).
"""
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IISet
from Products.PluginIndexes.TextIndex.randid import randid
from types import StringType
import Proximity
class BaseProximityLexicon:
"""Maps words to word ids and then some
The Lexicon object is an attempt to abstract vocabularies out of
Text indexes. This abstraction is not totally cooked yet, this
module still includes the parser for the 'Text Index Query
Language' and a few other hacks.
"""
# default for older objects
def __init__(self, algorithm):
if algorithm=='metaphone':
self._v_proximity = Proximity.metaphone
elif algorithm=='soundex':
self._v_proximity = Proximity.soundex
else:
raise ValueError,'unsupported proximity algorithm "%s"' % algorithm
self.clear()
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
def getWordId(self, word):
""" return the word id of 'word' """
word = self._v_proximity(word)
wid=self._lexicon.get(word, None)
if wid is None:
wid=self.assignWordId(word)
return wid
set = getWordId
def getWord(self, wid):
""" return word from inverse lexicon by its wordId """
return self._inverseLex.get(wid, None)
def assignWordId(self, word):
"""Assigns a new word id to the provided word and returns it."""
word = self._v_proximity(word)
# First make sure it's not already in there
if self._lexicon.has_key(word):
return self._lexicon[word]
inverse=self._inverseLex
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
if isinstance(word,StringType):
self._lexicon[intern(word)] = wid
else:
self._lexicon[word] = wid
return wid
def get(self, key, default=None):
"""Return the matched word against the key."""
r=IISet()
wid=self._lexicon.get(key, default)
if wid is not None: r.insert(wid)
return r
def __getitem__(self, key):
return self.get(key)
def __len__(self):
return len(self._lexicon)
def query_hook(self, q):
""" we don't want to modify the query cuz we're dumb """
return q
def test():
for a in ('metaphone','soundex'):
print 'Algorithm:',a
PL = BaseProximityLexicon(a)
for w in 'the quick brown fox jumps over the '\
'lazy dog brown fox'.split():
wid = PL.getWordId(w)
word = PL.getWord(wid)
print w,wid,word
if __name__ == '__main__':
test()
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/ProximityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__=""" same as BaseProximityLexicon but usable for Zope """
from Persistence import Persistent
from Acquisition import Implicit
from BaseProximityLexicon import BaseProximityLexicon
class ProximityLexicon(BaseProximityLexicon, Persistent, Implicit):
pass