[Zope-CVS] CVS: Products/ZCTextIndex - HTMLSplitter.py:1.5 Lexicon.py:1.6 RiceCode.py:1.3 ZCTextIndex.py:1.8
Tim Peters
tim.one@comcast.net
Tue, 14 May 2002 23:50:07 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv17116
Modified Files:
HTMLSplitter.py Lexicon.py RiceCode.py ZCTextIndex.py
Log Message:
Whitespace normalization.
=== Products/ZCTextIndex/HTMLSplitter.py 1.4 => 1.5 ===
for t in text:
splat += self._split(t)
- return splat
+ return splat
- def _split(self, text):
+ def _split(self, text):
text = text.lower()
remove = ["<[^>]*>",
"&[A-Za-z]+;",
=== Products/ZCTextIndex/Lexicon.py 1.5 => 1.6 ===
wids.append(wid)
return wids
-
+
def get_word(self, wid):
"""Return the word for the given word id"""
return self._words[wid]
=== Products/ZCTextIndex/RiceCode.py 1.2 => 1.3 ===
Based on a Java implementation by Glen McCluskey described in a Usenix
- ;login: article at
+ ;login: article at
http://www.usenix.org/publications/login/2000-4/features/java.html
McCluskey's article explains the approach as follows. The encoding
@@ -33,7 +33,7 @@
def __getitem__(self, i):
byte, offset = divmod(i, 8)
- mask = 2 ** offset
+ mask = 2 ** offset
if self.bytes[byte] & mask:
return 1
else:
@@ -41,12 +41,12 @@
def __setitem__(self, i, val):
byte, offset = divmod(i, 8)
- mask = 2 ** offset
+ mask = 2 ** offset
if val:
self.bytes[byte] |= mask
else:
self.bytes[byte] &= ~mask
-
+
def __len__(self):
return self.nbits
@@ -78,7 +78,7 @@
def init(self, m):
self.m = m
- self.lower = (1 << m) - 1
+ self.lower = (1 << m) - 1
self.mask = 1 << (m - 1)
def append(self, val):
@@ -123,7 +123,7 @@
def tostring(self):
"""Return a binary string containing the encoded data.
-
+
The binary string may contain some extra zeros at the end.
"""
return self.bits.tostring()
=== Products/ZCTextIndex/ZCTextIndex.py 1.7 => 1.8 ===
class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent TextIndex"""
-
+
__implements__ = PluggableIndexInterface
-
+
meta_type = 'ZCTextIndex'
-
+
manage_options= (
{'label': 'Settings', 'action': 'manage_main'},
)
-
+
query_options = ['query']
def __init__(self, id, extra, caller, index_factory=Index):
self.id = id
self._fieldname = extra.doc_attr
lexicon = getattr(caller, extra.lexicon_id, None)
-
+
if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % extra.lexicon_id
-
+
if not ILexicon.isImplementedBy(lexicon):
raise ValueError, \
'Object "%s" does not implement lexicon interface' \
@@ -63,7 +63,7 @@
self.lexicon = lexicon
self.index = index_factory(self.lexicon)
self.parser = QueryParser()
-
+
## Pluggable Index APIs ##
def index_object(self, docid, obj, threshold=None):
@@ -78,7 +78,7 @@
def _apply_index(self, request, cid=''):
"""Apply query specified by request, a mapping containing the query.
-
+
Returns two object on success, the resultSet containing the
matching record numbers and a tuple containing the names of
the fields used
@@ -86,7 +86,7 @@
Returns None if request is not valid for this index.
"""
record = parseIndexRequest(request, self.id, self.query_options)
- if record.keys is None:
+ if record.keys is None:
return None
query_str = ' '.join(record.keys)
tree = self.parser.parseQuery(query_str)
@@ -100,11 +100,11 @@
chooser = NBest(nbest)
chooser.addmany(results.items())
return chooser.getbest()
-
+
def numObjects(self):
"""Return number of object indexed"""
return self.index.length()
-
+
def getEntryForObject(self, documentId, default=None):
"""Return the list of words indexed for documentId"""
try:
@@ -113,28 +113,28 @@
return default
get_word = self.lexicon.get_word
return [get_word(wid) for wid in word_ids]
-
+
def clear(self):
"""reinitialize the index"""
self.index = Index(self.lexicon)
-
+
def _get_object_text(self, obj):
x = getattr(obj, self._fieldname)
if callable(x):
return x()
else:
return x
-
+
## User Interface Methods ##
-
+
manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
InitializeClass(ZCTextIndex)
-def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
+def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
RESPONSE=None):
"""Add a text index"""
- return self.manage_addIndex(id, 'ZCTextIndex', extra,
+ return self.manage_addIndex(id, 'ZCTextIndex', extra,
REQUEST, RESPONSE, REQUEST.URL3)
manage_addZCTextIndexForm = DTMLFile('dtml/addZCTextIndex', globals())
@@ -155,17 +155,15 @@
self._setObject(id, lexicon)
if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1)
-
+
class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent Lexcion for ZCTextIndex"""
-
+
meta_type = 'ZCTextIndex Lexicon'
-
+
def __init__(self, id, title='', *pipeline):
self.id = str(id)
self.title = str(title)
PLexicon.inheritedAttribute('__init__')(self, *pipeline)
-
+
InitializeClass(PLexicon)
-
-