[Zope-CVS] CVS: Products/ZCTextIndex - IPipelineElementFactory.py:1.1 PipelineFactory.py:1.1 HTMLSplitter.py:1.7 Lexicon.py:1.13 ZCTextIndex.py:1.20 __init__.py:1.6
Casey Duncan
casey@zope.com
Wed, 22 May 2002 13:13:09 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv28796
Modified Files:
HTMLSplitter.py Lexicon.py ZCTextIndex.py __init__.py
Added Files:
IPipelineElementFactory.py PipelineFactory.py
Log Message:
Improved Zope integration
* A pipeline factory registry now allows registration of possible
pipeline elements for use by Zope lexicons.
* ZMI constructor form for lexicon uses pipeline registry to generate form
fields
* ZMI constructor form for ZCTextindex allows you to choose between
Okapi and Cosine relevance algorithms
=== Added File Products/ZCTextIndex/IPipelineElementFactory.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
from Interface import Base as Interface
class IPipelineElementFactory(Interface):
"""Class for creating pipeline elements by name"""
def registerFactory(name, factory):
"""Registers a pipeline factory by name.
Each name can be registered only once. Duplicate registrations
will raise a ValueError
"""
def getFactoryNames():
"""Returns a sorted list of registered pipeline factory names
"""
def instantiate(name):
"""Instantiates a pipeline element by name. If name is not registered
raise a KeyError.
"""
=== Added File Products/ZCTextIndex/PipelineFactory.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
from Products.ZCTextIndex.IPipelineElementFactory \
import IPipelineElementFactory
class PipelineElementFactory:
__implements__ = IPipelineElementFactory
def __init__(self):
self._elements = {}
def registerFactory(self, name, factory):
if self._elements.has_key(name):
raise ValueError, 'ZCTextIndex splitter named' + \
'"%s" already registered'
self._elements[name] = factory
def getFactoryNames(self):
names = self._elements.keys()
names.sort()
return names
def instantiate(self, name):
return self._elements[name]()
splitter_factory = PipelineElementFactory()
element_factory = PipelineElementFactory()
=== Products/ZCTextIndex/HTMLSplitter.py 1.6 => 1.7 ===
from Products.ZCTextIndex.ISplitter import ISplitter
+from Products.ZCTextIndex.PipelineFactory import splitter_factory
import re
@@ -43,6 +44,8 @@
rx = re.compile("[A-Za-z]")
return [word for word in text.split()
if len(word) > 1 and rx.search(word)]
+
+splitter_factory.registerFactory('HTML Word Splitter', HTMLWordSplitter)
if __name__ == "__main__":
import sys
=== Products/ZCTextIndex/Lexicon.py 1.12 => 1.13 ===
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.StopDict import get_stopdict
+from PipelineFactory import splitter_factory, element_factory
class Lexicon:
@@ -140,11 +141,15 @@
for s in lst:
result += self.rxGlob.findall(s)
return result
+
+splitter_factory.registerFactory('Regex Splitter', Splitter)
class CaseNormalizer:
def process(self, lst):
return [w.lower() for w in lst]
+
+element_factory.registerFactory('Case Normalizer', CaseNormalizer)
class StopWordRemover:
@@ -161,3 +166,6 @@
else:
def process(self, lst):
return self._process(self.dict, lst)
+
+
+element_factory.registerFactory('Stop Word Remover', StopWordRemover)
=== Products/ZCTextIndex/ZCTextIndex.py 1.19 => 1.20 ===
from Products.PluginIndexes.common.util import parseIndexRequest
-from Products.ZCTextIndex.OkapiIndex import OkapiIndex
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.Lexicon import \
Lexicon, Splitter, CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.NBest import NBest
from Products.ZCTextIndex.QueryParser import QueryParser
+from PipelineFactory import splitter_factory, element_factory
+
+from Products.ZCTextIndex.CosineIndex import CosineIndex
+from Products.ZCTextIndex.OkapiIndex import OkapiIndex
+index_types = {'Okapi BM25 Rank':OkapiIndex,
+ 'Cosine Measure':CosineIndex}
class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent TextIndex"""
@@ -50,7 +55,7 @@
## Constructor ##
- def __init__(self, id, extra, caller, index_factory=OkapiIndex):
+ def __init__(self, id, extra, caller, index_factory=None):
self.id = id
self._fieldname = extra.doc_attr
lexicon = getattr(caller, extra.lexicon_id, None)
@@ -64,7 +69,15 @@
% lexicon.getId()
self.lexicon = lexicon
- self._index_factory = index_factory
+
+ if index_factory is None:
+ if extra.index_type not in index_types.keys():
+ raise ValueError, 'Invalid index type "%s"' % extra.index_type
+ self._index_factory = index_types[extra.index_type]
+ self._index_type = extra.index_type
+ else:
+ self._index_factory = index_factory
+
self.clear()
## External methods not in the Pluggable Index API ##
@@ -144,6 +157,10 @@
## User Interface Methods ##
manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
+
+ def getIndexType(self):
+ """Return index type string"""
+ return getattr(self, '_index_type', self._index_factory.__name__)
InitializeClass(ZCTextIndex)
@@ -157,29 +174,39 @@
manage_addLexiconForm = DTMLFile('dtml/addLexicon', globals())
-def manage_addLexicon(self, id, title, splitter=None, normalizer=None,
- stopwords=None, REQUEST=None):
+def manage_addLexicon(self, id, title='', splitter_name=None,
+ element_names=None, REQUEST=None):
"""Add ZCTextIndex Lexicon"""
- elements = []
- if splitter:
- elements.append(Splitter())
- if normalizer:
- elements.append(CaseNormalizer())
- if stopwords:
- elements.append(StopWordRemover())
+
+ elements = [element_factory.instantiate(name) for name in element_names]
+
+ if splitter_name:
+ elements.insert(0, splitter_factory.instantiate(splitter_name))
+
lexicon = PLexicon(id, title, *elements)
self._setObject(id, lexicon)
if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1)
class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
- """Persistent Lexcion for ZCTextIndex"""
+ """Persistent Lexicon for ZCTextIndex"""
meta_type = 'ZCTextIndex Lexicon'
+
+ manage_options = ({'label':'Overview', 'action':'manage_main'},) + \
+ SimpleItem.manage_options
def __init__(self, id, title='', *pipeline):
self.id = str(id)
self.title = str(title)
PLexicon.inheritedAttribute('__init__')(self, *pipeline)
+
+ ## User Interface Methods ##
+
+ def getPipelineNames(self):
+ """Return list of names of pipeline element classes"""
+ return [element.__class__.__name__ for element in self._pipeline]
+
+ manage_main = DTMLFile('dtml/manageLexicon', globals())
InitializeClass(PLexicon)
=== Products/ZCTextIndex/__init__.py 1.5 => 1.6 ===
"""
+from PipelineFactory import splitter_factory, element_factory
+from Products.ZCTextIndex import ZCTextIndex, HTMLSplitter
+
def initialize(context):
- from Products.ZCTextIndex import ZCTextIndex
context.registerClass(
ZCTextIndex.ZCTextIndex,
permission = 'Add Pluggable Index',
constructors = (ZCTextIndex.manage_addZCTextIndexForm,
- ZCTextIndex.manage_addZCTextIndex),
+ ZCTextIndex.manage_addZCTextIndex,
+ getIndexTypes),
icon='www/index.gif',
visibility=None
)
@@ -32,6 +35,19 @@
ZCTextIndex.PLexicon,
permission = 'Add Vocabularies',
constructors = (ZCTextIndex.manage_addLexiconForm,
- ZCTextIndex.manage_addLexicon),
+ ZCTextIndex.manage_addLexicon,
+ getSplitterNames, getElementNames),
icon='www/lexicon.gif'
)
+
+## Functions below are for use in the ZMI constructor forms ##
+
+def getSplitterNames(self):
+ return splitter_factory.getFactoryNames()
+
+def getElementNames(self):
+ return element_factory.getFactoryNames()
+
+def getIndexTypes(self):
+ return ZCTextIndex.index_types.keys()
+