[Zope-CVS] CVS: Products/ZCTextIndex - Index.py:1.3 Lexicon.py:1.3 ZCTextIndex.py:1.3 __init__.py:1.3
Casey Duncan
casey@zope.com
Tue, 14 May 2002 15:09:28 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv5974
Modified Files:
Index.py Lexicon.py ZCTextIndex.py __init__.py
Log Message:
Integration with Zope complete. ZCTextIndex is now a bonafide Plug-in index.
Some additional plug-in index APIs were added to ZCTextIndex and support APIs added to Index and Lexicon.
_apply_index does not use NBest since ZCatalog has an incompatible strategy for finding the top results. NBest might be abstracted from this product for general consumption in application code.
=== Products/ZCTextIndex/Index.py 1.2 => 1.3 ===
from Products.ZCTextIndex import WidCode
+import ZODB
+from Persistence import Persistent
+
# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
@@ -39,7 +42,7 @@
# expensive.
return int(f * scale + 0.5)
-class Index:
+class Index(Persistent):
__implements__ = IIndex
@@ -59,6 +62,10 @@
def length(self):
"""Return the number of documents in the index."""
return len(self._docwords)
+
+ def get_words(self, docid):
+ """Returns the wordids for a given docid"""
+ return WidCode.decode(self._docwords[docid])
# Most of the computation for computing a relevance score for the
# document occurs in the search() method. The code currently
@@ -97,6 +104,7 @@
self._add_wordinfo(uniqwids[i], freqs[i], docid)
self._docweight[docid] = docweight
self._add_undoinfo(docid, wids)
+ return len(wids)
def unindex_doc(self, docid):
for wid in self._get_undoinfo(docid):
=== Products/ZCTextIndex/Lexicon.py 1.2 => 1.3 ===
wids.append(wid)
return wids
+
+ def get_word(self, wid):
+ """Return the word for the given word id"""
+ return self.__words[wid]
def globToWordIds(self, pattern):
if not re.match("^\w+\*$", pattern):
=== Products/ZCTextIndex/ZCTextIndex.py 1.2 => 1.3 ===
from Products.PluginIndexes.common.PluggableIndex \
import PluggableIndexInterface
+from Products.PluginIndexes.common.util import parseIndexRequest
from Products.ZCTextIndex.Index import Index
from Products.ZCTextIndex.ILexicon import ILexicon
+from Products.ZCTextIndex.Lexicon \
+ import Lexicon, Splitter, CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.NBest import NBest
from Products.ZCTextIndex.QueryParser import QueryParser
-from Globals import DTMLFile
+from Globals import DTMLFile, InitializeClass
from Interface import verify_class_implementation
+from AccessControl.SecurityInfo import ClassSecurityInfo
class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
+ """Persistent TextIndex"""
+
__implements__ = PluggableIndexInterface
meta_type = 'ZCTextIndex'
@@ -37,6 +43,8 @@
manage_options= (
{'label': 'Settings', 'action': 'manage_main'},
)
+
+ query_options = ['query', 'nbest']
def __init__(self, id, extra, caller):
self.id = id
@@ -45,23 +53,46 @@
if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % extra.lexicon_id
-
- verify_class_implementation(ILexicon, lexicon.__class__)
-
+
+ if not ILexicon.isImplementedBy(lexicon):
+ raise ValueError, \
+ 'Object "%s" does not implement lexicon interface' \
+ % lexicon.getId()
+
self.lexicon = lexicon
self.index = Index(self.lexicon)
self.parser = QueryParser()
+
+ ## Pluggable Index APIs ##
- def index_object(self, docid, obj):
- self.index.index_doc(docid, self._get_object_text(obj))
+ def index_object(self, docid, obj, threshold=None):
+ # XXX We currently ignore subtransaction threshold
+ count = self.index.index_doc(docid, self._get_object_text(obj))
self._p_changed = 1 # XXX
+ return count
def unindex_object(self, docid):
self.index.unindex_doc(docid)
self._p_changed = 1 # XXX
- def _apply_index(self, req):
- pass # XXX
+ def _apply_index(self, request, cid=''):
+ """Apply the query specified by request which is a mapping
+ containing the query
+
+ Returns two object on success, the resultSet containing the
+ matching record numbers and a tuple containing the names of the
+ fields used
+
+ Returns None if request is not valid for this index.
+ """
+ record = parseIndexRequest(request, self.id, self.query_options)
+ if record.keys==None:
+ return None
+ query_str = ' '.join(record.keys)
+ tree = self.parser.parseQuery(query_str)
+ results = tree.executeQuery(self.index)
+ return results, (self._fieldname,)
+
def query(self, query, nbest=10):
# returns a mapping from docids to scores
@@ -70,7 +101,20 @@
chooser = NBest(nbest)
chooser.addmany(results.items())
return chooser.getbest()
-
+
+ def numObjects(self):
+ """Return number of object indexed"""
+ return self.index.length()
+
+ def getEntryForObject(self, documentId, default=None):
+ """Return the list of words indexed for documentId"""
+ try:
+ word_ids = self.index.get_words(documentId)
+ except KeyError:
+ return default
+ get_word = self.lexicon.get_word
+ return [get_word(wid) for wid in word_ids]
+
def _get_object_text(self, obj):
x = getattr(obj, self._fieldname)
if callable(x):
@@ -82,6 +126,8 @@
manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
+InitializeClass(ZCTextIndex)
+
def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
RESPONSE=None):
"""Add a text index"""
@@ -93,15 +139,30 @@
manage_addLexiconForm = DTMLFile('dtml/addLexicon', globals())
def manage_addLexicon(self, id, title, splitter=None, normalizer=None,
- stopword=None, REQUEST=None):
+ stopwords=None, REQUEST=None):
+ """Add ZCTextIndex Lexicon"""
elements = []
if splitter:
- elements.append(Lexicon.Splitter())
+ elements.append(Splitter())
if normalizer:
elements.append(CaseNormalizer())
if stopwords:
elements.append(StopWordRemover())
- lexicon = Lexicon(*elements)
+ lexicon = PLexicon(id, title, *elements)
self._setObject(id, lexicon)
if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1)
+
+class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
+ """Persistent Lexcion for ZCTextIndex"""
+
+ meta_type = 'ZCTextIndex Lexicon'
+
+ def __init__(self, id, title='', *pipeline):
+ self.id = str(id)
+ self.title = str(title)
+ PLexicon.inheritedAttribute('__init__')(self, *pipeline)
+
+InitializeClass(PLexicon)
+
+
=== Products/ZCTextIndex/__init__.py 1.2 => 1.3 ===
context.registerClass(
ZCTextIndex.ZCTextIndex,
- permission='Add Pluggable Index',
- constructors=(ZCTextIndex.manage_addZCTextIndexForm,
+ permission = 'Add Pluggable Index',
+ constructors = (ZCTextIndex.manage_addZCTextIndexForm,
ZCTextIndex.manage_addZCTextIndex),
visibility=None
+ )
+
+ context.registerClass(
+ ZCTextIndex.PLexicon,
+ permission = 'Add Vocabularies',
+ constructors = (ZCTextIndex.manage_addLexiconForm,
+ ZCTextIndex.manage_addLexicon),
)