[Zope-CVS] CVS: Products/ZCTextIndex - Index.py:1.3 Lexicon.py:1.3 ZCTextIndex.py:1.3 __init__.py:1.3

Casey Duncan casey@zope.com
Tue, 14 May 2002 15:09:28 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv5974

Modified Files:
	Index.py Lexicon.py ZCTextIndex.py __init__.py 
Log Message:
Integration with Zope complete. ZCTextIndex is now a bonafide Plug-in index.

Some additional plug-in index APIs were added to ZCTextIndex and support APIs added to Index and Lexicon.

_apply_index does not use NBest since ZCatalog has an incompatible strategy for finding the top results. NBest might be abstracted from this product for general consumption in application code.


=== Products/ZCTextIndex/Index.py 1.2 => 1.3 ===
 from Products.ZCTextIndex import WidCode
 
+import ZODB
+from Persistence import Persistent
+
 # Instead of storing floats, we generally store scaled ints.  Binary pickles
 # can store those more efficiently.  The default SCALE_FACTOR of 1024
 # is large enough to get about 3 decimal digits of fractional info, and
@@ -39,7 +42,7 @@
     # expensive.
     return int(f * scale + 0.5)
 
-class Index:
+class Index(Persistent):
 
     __implements__ = IIndex
 
@@ -59,6 +62,10 @@
     def length(self):
         """Return the number of documents in the index."""
         return len(self._docwords)
+        
+    def get_words(self, docid):
+        """Returns the wordids for a given docid"""
+        return WidCode.decode(self._docwords[docid])
 
     # Most of the computation for computing a relevance score for the
     # document occurs in the search() method.  The code currently
@@ -97,6 +104,7 @@
             self._add_wordinfo(uniqwids[i], freqs[i], docid)
         self._docweight[docid] = docweight
         self._add_undoinfo(docid, wids)
+        return len(wids)
 
     def unindex_doc(self, docid):
         for wid in self._get_undoinfo(docid):


=== Products/ZCTextIndex/Lexicon.py 1.2 => 1.3 ===
                 wids.append(wid)
         return wids
+        
+    def get_word(self, wid):
+        """Return the word for the given word id"""
+        return self.__words[wid]
 
     def globToWordIds(self, pattern):
         if not re.match("^\w+\*$", pattern):


=== Products/ZCTextIndex/ZCTextIndex.py 1.2 => 1.3 ===
 from Products.PluginIndexes.common.PluggableIndex \
      import PluggableIndexInterface
+from Products.PluginIndexes.common.util import parseIndexRequest
 
 from Products.ZCTextIndex.Index import Index
 from Products.ZCTextIndex.ILexicon import ILexicon
+from Products.ZCTextIndex.Lexicon \
+     import Lexicon, Splitter, CaseNormalizer, StopWordRemover
 from Products.ZCTextIndex.NBest import NBest
 from Products.ZCTextIndex.QueryParser import QueryParser
-from Globals import DTMLFile
+from Globals import DTMLFile, InitializeClass
 from Interface import verify_class_implementation
+from AccessControl.SecurityInfo import ClassSecurityInfo
 
 class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
+    """Persistent TextIndex"""
+    
     __implements__ = PluggableIndexInterface
     
     meta_type = 'ZCTextIndex'
@@ -37,6 +43,8 @@
     manage_options= (
         {'label': 'Settings', 'action': 'manage_main'},
     )
+    
+    query_options = ['query', 'nbest']
 
     def __init__(self, id, extra, caller):
         self.id = id
@@ -45,23 +53,46 @@
         
         if lexicon is None:
             raise LookupError, 'Lexicon "%s" not found' % extra.lexicon_id
-            
-        verify_class_implementation(ILexicon, lexicon.__class__)
-            
+        
+        if not ILexicon.isImplementedBy(lexicon):
+            raise ValueError, \
+                'Object "%s" does not implement lexicon interface' \
+                % lexicon.getId()
+
         self.lexicon = lexicon
         self.index = Index(self.lexicon)
         self.parser = QueryParser()
+        
+    ## Pluggable Index APIs ##
 
-    def index_object(self, docid, obj):
-        self.index.index_doc(docid, self._get_object_text(obj))
+    def index_object(self, docid, obj, threshold=None):
+        # XXX We currently ignore subtransaction threshold
+        count = self.index.index_doc(docid, self._get_object_text(obj))
         self._p_changed = 1 # XXX
+        return count
 
     def unindex_object(self, docid):
         self.index.unindex_doc(docid)
         self._p_changed = 1 # XXX
 
-    def _apply_index(self, req):
-        pass # XXX
+    def _apply_index(self, request, cid=''):
+        """Apply the query specified by request which is a mapping
+           containing the query 
+           
+           Returns two object on success, the resultSet containing the
+           matching record numbers and a tuple containing the names of the
+           fields used
+
+           Returns None if request is not valid for this index.
+        """
+        record = parseIndexRequest(request, self.id, self.query_options)
+        if record.keys==None: 
+            return None
+        query_str = ' '.join(record.keys)
+        tree = self.parser.parseQuery(query_str)
+        results = tree.executeQuery(self.index)
+        return  results, (self._fieldname,)
+        
 
     def query(self, query, nbest=10):
         # returns a mapping from docids to scores
@@ -70,7 +101,20 @@
         chooser = NBest(nbest)
         chooser.addmany(results.items())
         return chooser.getbest()
-
+    
+    def numObjects(self):
+        """Return number of object indexed"""
+        return self.index.length()
+        
+    def getEntryForObject(self, documentId, default=None):
+        """Return the list of words indexed for documentId"""
+        try:
+            word_ids = self.index.get_words(documentId)
+        except KeyError:
+            return default
+        get_word = self.lexicon.get_word
+        return [get_word(wid) for wid in word_ids]
+        
     def _get_object_text(self, obj):
         x = getattr(obj, self._fieldname)
         if callable(x):
@@ -82,6 +126,8 @@
     
     manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
 
+InitializeClass(ZCTextIndex)
+
 def manage_addZCTextIndex(self, id, extra=None, REQUEST=None, 
                           RESPONSE=None):
     """Add a text index"""
@@ -93,15 +139,30 @@
 manage_addLexiconForm = DTMLFile('dtml/addLexicon', globals())
 
 def manage_addLexicon(self, id, title, splitter=None, normalizer=None,
-                      stopword=None, REQUEST=None):
+                      stopwords=None, REQUEST=None):
+    """Add ZCTextIndex Lexicon"""
     elements = []
     if splitter:
-        elements.append(Lexicon.Splitter())
+        elements.append(Splitter())
     if normalizer:
         elements.append(CaseNormalizer())
     if stopwords:
         elements.append(StopWordRemover())
-    lexicon = Lexicon(*elements)
+    lexicon = PLexicon(id, title, *elements)
     self._setObject(id, lexicon)
     if REQUEST is not None:
         return self.manage_main(self, REQUEST, update_menu=1)
+        
+class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
+    """Persistent Lexcion for ZCTextIndex"""
+    
+    meta_type = 'ZCTextIndex Lexicon'
+    
+    def __init__(self, id, title='', *pipeline):
+        self.id = str(id)
+        self.title = str(title)
+        PLexicon.inheritedAttribute('__init__')(self, *pipeline)
+        
+InitializeClass(PLexicon)
+    
+    


=== Products/ZCTextIndex/__init__.py 1.2 => 1.3 ===
     context.registerClass(
         ZCTextIndex.ZCTextIndex,
-        permission='Add Pluggable Index',
-        constructors=(ZCTextIndex.manage_addZCTextIndexForm,
+        permission = 'Add Pluggable Index',
+        constructors = (ZCTextIndex.manage_addZCTextIndexForm,
                       ZCTextIndex.manage_addZCTextIndex),
         visibility=None
+    )
+
+    context.registerClass(
+        ZCTextIndex.PLexicon,
+        permission = 'Add Vocabularies',
+        constructors = (ZCTextIndex.manage_addLexiconForm,
+                        ZCTextIndex.manage_addLexicon),
     )