[Zope-CVS] CVS: Products/ZCTextIndex - IPipelineElement.py:1.1.2.1 Lexicon.py:1.1.2.1

Barry Warsaw barry@wooz.org
Wed, 1 May 2002 10:56:37 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv12384

Added Files:
      Tag: TextIndexDS9-branch
	IPipelineElement.py Lexicon.py 
Log Message:
The interfaces for pipeline elements, the lexicon.

An implementation of the ILexicon.

Test cases.


=== Added File Products/ZCTextIndex/IPipelineElement.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################
"""

Revision information:
$Id: IPipelineElement.py,v 1.1.2.1 2002/05/01 14:56:36 bwarsaw Exp $
"""

from Interface import Base as Interface

class IPipelineElement(Interface):

    def __call__(source):
        """Provide a text processing step.

        Process a source sequence of words into a result sequence.
        """


=== Added File Products/ZCTextIndex/Lexicon.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################
"""

Revision information:
$Id: Lexicon.py,v 1.1.2.1 2002/05/01 14:56:36 bwarsaw Exp $
"""

from BTrees.IOBTree import IOBTree
from BTrees.OIBTree import OIBTree

class Lexicon:
    def __init__(self, splitter, pipeline=()):
        self.__wids = OIBTree()
        self.__words = IOBTree()
        # XXX we're reserving wid 0, but that might be yagni
        self.__lastwid = 1
        self.__pipeline = pipeline
        self.__splitter = splitter

    def sourceToWordIds(self, text):
        last = self.__splitter(text)
        for element in self.__pipeline:
            last = element(last)
        wids = []
        for word in last:
            wids.append(self._getWordIdCreate(word))
        return wids
        
    def termToWordIds(self, text):
        last = self.__splitter(text)
        for element in self.__pipeline:
            last = element(last)
        wids = []
        for word in last:
            wid = self.__wids.get(word)
            if wid is not None:
                wids.append(wid)
        return wids

    def _getWordIdCreate(self, word):
        wid = self.__wids.get(word)
        if wid is None:
            wid = self.__new_wid()
            self.__wids[word] = wid
            self.__words[wid] = word
        return wid

    def __new_wid(self):
        wid = self.__lastwid
        self.__lastwid += 1
        return wid