[Zope-CVS] CVS: Products/ZCTextIndex - HTMLSplitter.py:1.10 IPipelineElementFactory.py:1.3 Lexicon.py:1.17 PipelineFactory.py:1.3 ZCTextIndex.py:1.23 __init__.py:1.8
Tim Peters
tim.one@comcast.net
Thu, 23 May 2002 11:05:34 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv17499
Modified Files:
HTMLSplitter.py IPipelineElementFactory.py Lexicon.py
PipelineFactory.py ZCTextIndex.py __init__.py
Log Message:
Whitespace normalization.
=== Products/ZCTextIndex/HTMLSplitter.py 1.9 => 1.10 ===
text = re.sub(pat, " ", text)
return re.findall(wordpat, text)
-
-element_factory.registerFactory('Word Splitter',
+
+element_factory.registerFactory('Word Splitter',
'HTML aware splitter',
HTMLWordSplitter)
=== Products/ZCTextIndex/IPipelineElementFactory.py 1.2 => 1.3 ===
def registerFactory(group, name, factory):
"""Registers a pipeline factory by name and element group.
-
- Each name can be registered only once for a given group. Duplicate
+
+ Each name can be registered only once for a given group. Duplicate
registrations will raise a ValueError
"""
-
+
def getFactoryGroups():
"""Returns a sorted list of element group names
"""
-
+
def getFactoryNames(group):
"""Returns a sorted list of registered pipeline factory names
in the specified element group
"""
-
+
def instantiate(group, name):
- """Instantiates a pipeline element by group and name. If name is not
+ """Instantiates a pipeline element by group and name. If name is not
registered raise a KeyError.
"""
=== Products/ZCTextIndex/Lexicon.py 1.16 => 1.17 ===
result += self.rxGlob.findall(s)
return result
-
-element_factory.registerFactory('Word Splitter',
- 'Whitespace splitter',
+
+element_factory.registerFactory('Word Splitter',
+ 'Whitespace splitter',
Splitter)
class CaseNormalizer:
def process(self, lst):
return [w.lower() for w in lst]
-
+
element_factory.registerFactory('Case Normalizer',
- 'Case Normalizer',
+ 'Case Normalizer',
CaseNormalizer)
-element_factory.registerFactory('Stop Words',
- ' Don\'t remove stop words',
+element_factory.registerFactory('Stop Words',
+ ' Don\'t remove stop words',
None)
class StopWordRemover:
@@ -202,8 +202,8 @@
def process(self, lst):
return self._process(self.dict, lst)
-element_factory.registerFactory('Stop Words',
- 'Remove listed stop words only',
+element_factory.registerFactory('Stop Words',
+ 'Remove listed stop words only',
StopWordRemover)
class StopWordAndSingleCharRemover(StopWordRemover):
@@ -211,7 +211,7 @@
dict = get_stopdict().copy()
for c in range(255):
dict[chr(c)] = None
-
-element_factory.registerFactory('Stop Words',
- 'Remove listed and single char words',
+
+element_factory.registerFactory('Stop Words',
+ 'Remove listed and single char words',
StopWordAndSingleCharRemover)
=== Products/ZCTextIndex/PipelineFactory.py 1.2 => 1.3 ===
from Products.ZCTextIndex.IPipelineElementFactory \
import IPipelineElementFactory
-
+
class PipelineElementFactory:
-
+
__implements__ = IPipelineElementFactory
-
+
def __init__(self):
self._groups = {}
-
+
def registerFactory(self, group, name, factory):
if self._groups.has_key(group) and \
self._groups[group].has_key(name):
raise ValueError('ZCTextIndex lexicon element "%s" '
- 'already registered in group "%s"'
+ 'already registered in group "%s"'
% (name, group))
-
+
elements = self._groups.get(group)
if elements is None:
elements = self._groups[group] = {}
elements[name] = factory
-
+
def getFactoryGroups(self):
groups = self._groups.keys()
groups.sort()
return groups
-
+
def getFactoryNames(self, group):
names = self._groups[group].keys()
names.sort()
return names
-
+
def instantiate(self, group, name):
factory = self._groups[group][name]
if factory is not None:
=== Products/ZCTextIndex/ZCTextIndex.py 1.22 => 1.23 ===
from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex
-index_types = {'Okapi BM25 Rank':OkapiIndex,
+index_types = {'Okapi BM25 Rank':OkapiIndex,
'Cosine Measure':CosineIndex}
class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
@@ -77,7 +77,7 @@
self._index_type = extra.index_type
else:
self._index_factory = index_factory
-
+
self.clear()
## External methods not in the Pluggable Index API ##
@@ -157,7 +157,7 @@
## User Interface Methods ##
manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
-
+
def getIndexType(self):
"""Return index type string"""
return getattr(self, '_index_type', self._index_factory.__name__)
@@ -176,10 +176,10 @@
def manage_addLexicon(self, id, title='', elements=[], REQUEST=None):
"""Add ZCTextIndex Lexicon"""
-
+
pipeline = []
for el_record in elements:
- if not hasattr(el_record, 'name'):
+ if not hasattr(el_record, 'name'):
continue # Skip over records that only specify element group
element = element_factory.instantiate(el_record.group, el_record.name)
if element is not None:
@@ -199,7 +199,7 @@
"""Lexicon for ZCTextIndex"""
meta_type = 'ZCTextIndex Lexicon'
-
+
manage_options = ({'label':'Overview', 'action':'manage_main'},) + \
SimpleItem.manage_options
@@ -207,13 +207,13 @@
self.id = str(id)
self.title = str(title)
PLexicon.inheritedAttribute('__init__')(self, *pipeline)
-
+
## User Interface Methods ##
-
+
def getPipelineNames(self):
"""Return list of names of pipeline element classes"""
return [element.__class__.__name__ for element in self._pipeline]
-
+
manage_main = DTMLFile('dtml/manageLexicon', globals())
InitializeClass(PLexicon)
=== Products/ZCTextIndex/__init__.py 1.7 => 1.8 ===
icon='www/lexicon.gif'
)
-
+
## Functions below are for use in the ZMI constructor forms ##
-
+
def getElementGroups(self):
return element_factory.getFactoryGroups()
-
+
def getElementNames(self, group):
return element_factory.getFactoryNames(group)
-
+
def getIndexTypes(self):
return ZCTextIndex.index_types.keys()
-