[Zope-CVS] CVS: Products/ZCTextIndex - IPipelineElement.py:1.1.2.1 Lexicon.py:1.1.2.1
Barry Warsaw
barry@wooz.org
Wed, 1 May 2002 10:56:37 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv12384
Added Files:
Tag: TextIndexDS9-branch
IPipelineElement.py Lexicon.py
Log Message:
The interfaces for pipeline elements, the lexicon.
An implementation of the ILexicon.
Test cases.
=== Added File Products/ZCTextIndex/IPipelineElement.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""
Revision information:
$Id: IPipelineElement.py,v 1.1.2.1 2002/05/01 14:56:36 bwarsaw Exp $
"""
from Interface import Base as Interface
class IPipelineElement(Interface):
def __call__(source):
"""Provide a text processing step.
Process a source sequence of words into a result sequence.
"""
=== Added File Products/ZCTextIndex/Lexicon.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""
Revision information:
$Id: Lexicon.py,v 1.1.2.1 2002/05/01 14:56:36 bwarsaw Exp $
"""
from BTrees.IOBTree import IOBTree
from BTrees.OIBTree import OIBTree
class Lexicon:
def __init__(self, splitter, pipeline=()):
self.__wids = OIBTree()
self.__words = IOBTree()
# XXX we're reserving wid 0, but that might be yagni
self.__lastwid = 1
self.__pipeline = pipeline
self.__splitter = splitter
def sourceToWordIds(self, text):
last = self.__splitter(text)
for element in self.__pipeline:
last = element(last)
wids = []
for word in last:
wids.append(self._getWordIdCreate(word))
return wids
def termToWordIds(self, text):
last = self.__splitter(text)
for element in self.__pipeline:
last = element(last)
wids = []
for word in last:
wid = self.__wids.get(word)
if wid is not None:
wids.append(wid)
return wids
def _getWordIdCreate(self, word):
wid = self.__wids.get(word)
if wid is None:
wid = self.__new_wid()
self.__wids[word] = wid
self.__words[wid] = word
return wid
def __new_wid(self):
wid = self.__lastwid
self.__lastwid += 1
return wid