[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - BaseSimilarityLexicon.py:1.1.2.1 SimilarityLexicon.py:1.1.2.1
Andreas Jung
andreas@digicool.com
Tue, 12 Feb 2002 20:39:09 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv28243
Added Files:
Tag: ajung-textindexng-branch
BaseSimilarityLexicon.py SimilarityLexicon.py
Log Message:
renamed all 'Proximity' stuff to 'Similarity'
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseSimilarityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__=""" This is the base class for a Zope lexicon that
supports storage of strings based on their encoding using a
Similarity algorithm (metaphone, soundex).
"""
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IISet
from Products.PluginIndexes.TextIndex.randid import randid
from types import StringType, UnicodeType
import Proximity as Similarity # we should change the Python module name
class BaseSimilarityLexicon:
"""Maps words to word ids and then some
The Lexicon object is an attempt to abstract vocabularies out of
Text indexes. This abstraction is not totally cooked yet, this
module still includes the parser for the 'Text Index Query
Language' and a few other hacks.
"""
# default for older objects
def __init__(self, algorithm):
if algorithm in Similarity.availableAlgorithms():
self._v_Similarity = getattr(Similarity,algorithm)
else:
raise ValueError,'unsupported Similarity algorithm "%s"' % algorithm
self.clear()
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
def getWordIdList(self,words):
""" return a list a wordIds for a list of words """
return [ self.getWordId(word) for word in words]
def getWordId(self, word):
""" return the word id of 'word' """
try:
word = self._v_Similarity(word)
except TypeError:
if isinstance(word, UnicodeType):
word = ''.join([ chr(ord(x) & 127) for x in word])
word = self._v_Similarity(word)
wid=self._lexicon.get(word, None)
if wid is None:
wid=self.assignWordId(word)
return wid
set = getWordId
def getWord(self, wid):
""" return word from inverse lexicon by its wordId """
return self._inverseLex.get(wid, None)
def assignWordId(self, word):
"""Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there
if self._lexicon.has_key(word):
return self._lexicon[word]
inverse=self._inverseLex
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
if isinstance(word,StringType):
self._lexicon[intern(word)] = wid
else:
self._lexicon[word] = wid
return wid
def get(self, word, default=None):
"""Return the matched word against the key."""
try:
word = self._v_Similarity(word)
except TypeError:
if isinstance(word, UnicodeType):
word = ''.join([ chr(ord(x) & 127) for x in word])
word = self._v_Similarity(word)
r=IISet()
wid=self._lexicon.get(word, default)
if wid is not None: r.insert(wid)
return r
def __getitem__(self, key):
return self.get(key)
def __len__(self):
return len(self._lexicon)
def query_hook(self, q):
""" we don't want to modify the query cuz we're dumb """
return q
def __call__(self,word):
return self._v_Similarity(word)
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/SimilarityLexicon.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__=""" same as BaseSimilarityLexicon but usable for Zope """
from Persistence import Persistent
from Acquisition import Implicit
from BaseSimilarityLexicon import BaseSimilarityLexicon
class SimilarityLexicon(BaseSimilarityLexicon, Persistent, Implicit):
pass