[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/PathIndex - PathIndex.py:1.35.2.1

Thu Sep 18 10:16:45 EDT 2003

Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/PathIndex
In directory cvs.zope.org:/tmp/cvs-serv6537/lib/python/Products/PluginIndexes/PathIndex

Modified Files:
      Tag: Zope-2_7-branch
	PathIndex.py 
Log Message:
Merge from HEAD:

     - PathIndex and TopicIndex are now using a counter for the number
       of indexed objects instead of using a very expensive calculation
       based on the keys of their indexes. 


=== Zope/lib/python/Products/PluginIndexes/PathIndex/PathIndex.py 1.35 => 1.35.2.1 ===

--- Zope/lib/python/Products/PluginIndexes/PathIndex/PathIndex.py:1.35	Tue Jun 17 15:01:07 2003
+++ Zope/lib/python/Products/PluginIndexes/PathIndex/PathIndex.py	Thu Sep 18 10:16:14 2003
@@ -13,24 +13,24 @@
 
 __version__ = '$Id$'
 
-from Products.PluginIndexes import PluggableIndex
-from Products.PluginIndexes.common.util import parseIndexRequest
-from Products.PluginIndexes.common import safe_callable
+import warnings
+from types import StringType, ListType, TupleType
 
 from Globals import Persistent, DTMLFile
-from Acquisition import Implicit
-
+from OFS.SimpleItem import SimpleItem
 from BTrees.IOBTree import IOBTree
 from BTrees.OOBTree import OOBTree
 from BTrees.IIBTree import IITreeSet, IISet, intersection, union
-from OFS.SimpleItem import SimpleItem
+from BTrees.Length import Length
 from zLOG import LOG, ERROR
-from types import StringType, ListType, TupleType
-import warnings
+
+from Products.PluginIndexes import PluggableIndex
+from Products.PluginIndexes.common.util import parseIndexRequest
+from Products.PluginIndexes.common import safe_callable
 
 _marker = []
 
-class PathIndex(Persistent, Implicit, SimpleItem):
+class PathIndex(Persistent, SimpleItem):
     """ A path index stores all path components of the physical
     path of an object:
 
@@ -41,7 +41,7 @@
     - every component is kept as a  key of a OOBTree in self._indexes
 
     - the value is a mapping 'level of the path component' to
-      'all documentIds with this path component on this level'
+      'all docids with this path component on this level'
 
     """
 
@@ -55,33 +55,26 @@
          'help': ('PathIndex','PathIndex_Settings.stx')},
     )
 
-    query_options = ["query", "level", "operator"]
-
+    query_options = ("query", "level", "operator")
 
     def __init__(self,id,caller=None):
         self.id = id
-
-        # experimental code for specifing the operator
-        self.operators = ['or','and']
+        self.operators = ('or','and')
         self.useOperator = 'or'
-
         self.clear()
 
-
     def clear(self):
-        """ clear everything """
-
-        self._depth   = 0
-        self._index   = OOBTree()
+        self._depth = 0
+        self._index = OOBTree()
         self._unindex = IOBTree()
-
+        self._length = Length(0)
 
     def insertEntry(self, comp, id, level):
         """Insert an entry.
 
-        comp is a path component (generated by splitPath() )
-        id is the documentId
-        level is the level of the component inside the path
+           comp is a path component 
+           id is the docid
+           level is the level of the component inside the path
         """
 
         if not self._index.has_key(comp):
@@ -94,16 +87,11 @@
         if level > self._depth:
             self._depth = level
 
-
-    def index_object(self, documentId, obj ,threshold=100):
+    def index_object(self, docid, obj ,threshold=100):
         """ hook for (Z)Catalog """
 
-        # first we check if the object provide an attribute or
-        # method to be used as hook for the PathIndex
-
-        if hasattr(obj, self.id):
-            f = getattr(obj, self.id)
-
+        f = getattr(obj, self.id, None)
+        if f is not None:
             if safe_callable(f):
                 try:
                     path = f()
@@ -112,8 +100,7 @@
             else:
                 path = f
 
-            if not (isinstance(path, StringType) or
-                    isinstance(path, TupleType)):
+            if not isinstance(path, (StringType, TupleType)):
                 raise TypeError('path value must be string or tuple of strings')
         else:
             try:
@@ -121,36 +108,34 @@
             except AttributeError:
                 return 0
 
-        if type(path) in (ListType, TupleType):
+        if isinstance(path, (ListType, TupleType)):
             path = '/'+ '/'.join(path[1:])
-
-        comps = self.splitPath(path, obj)
+        comps = filter(None, path.split('/'))
+       
+        if not self._unindex.has_key(docid):
+            self._length.change(1)
 
         for i in range(len(comps)):
-            self.insertEntry(comps[i], documentId, i)
-
-        self._unindex[documentId] = path
-
+            self.insertEntry(comps[i], docid, i)
+        self._unindex[docid] = path
         return 1
 
-
-    def unindex_object(self, documentId):
+    def unindex_object(self, docid):
         """ hook for (Z)Catalog """
 
-        if not self._unindex.has_key(documentId):
+        if not self._unindex.has_key(docid):
             LOG(self.__class__.__name__, ERROR,
                 'Attempt to unindex nonexistent document'
-                ' with id %s' % documentId)
+                ' with id %s' % docid)
             return
 
-        path = self._unindex[documentId]
-        comps = path.split('/')
+        comps =  self._unindex[docid].split('/')
 
         for level in range(len(comps[1:])):
             comp = comps[level+1]
 
             try:
-                self._index[comp][level].remove(documentId)
+                self._index[comp][level].remove(docid)
 
                 if not self._index[comp][level]:
                     del self._index[comp][level]
@@ -160,34 +145,10 @@
             except KeyError:
                 LOG(self.__class__.__name__, ERROR,
                     'Attempt to unindex document'
-                    ' with id %s failed' % documentId)
-
-        del self._unindex[documentId]
-
-
-    def printIndex(self):
-        for k,v in self._index.items():
-            print "-"*78
-            print k
-            for k1,v1 in v.items():
-                print k1,v1,
-
-            print
-
-
-    def splitPath(self, path, obj=None):
-        """ split physical path of object. If the object has
-        as function splitPath() we use this user-defined function
-        to split the path
-        """
-
-        if hasattr(obj, "splitPath"):
-            comps = obj.splitPath(path)
-        else:
-            comps = filter(None, path.split('/'))
-
-        return comps
+                    ' with id %s failed' % docid)
 
+        self._length.change(-1)
+        del self._unindex[docid]
 
     def search(self, path, default_level=0):
         """
@@ -199,95 +160,61 @@
         level <  0  not implemented yet
         """
 
-        if isinstance(path,StringType):
+        if isinstance(path, StringType):
             level = default_level
         else:
             level = int(path[1])
             path  = path[0]
 
-        comps = self.splitPath(path)
+        comps = filter(None, path.split('/'))
 
         if len(comps) == 0:
             return IISet(self._unindex.keys())
 
         if level >= 0:
-
             results = []
             for i in range(len(comps)):
                 comp = comps[i]
-
                 if not self._index.has_key(comp): return IISet()
                 if not self._index[comp].has_key(level+i): return IISet()
-
                 results.append( self._index[comp][level+i] )
 
             res = results[0]
-
             for i in range(1,len(results)):
                 res = intersection(res,results[i])
-
             return res
 
         else:
-
             results = IISet()
-
             for level in range(0,self._depth + 1):
-
                 ids = None
                 error = 0
-
                 for cn in range(0,len(comps)):
                     comp = comps[cn]
-
                     try:
                         ids = intersection(ids,self._index[comp][level+cn])
                     except KeyError:
                         error = 1
-
                 if error==0:
                     results = union(results,ids)
-
             return results
 
-
-
-    def __len__(self):
-        """ len """
-        # XXX REALLY inefficient
-        return len(self._index)
-
-
     def numObjects(self):
         """ return the number of indexed objects"""
-        # XXX REALLY inefficient
-        return len(self._unindex)
-
-
-    def keys(self):
-        """ return list of all path components """
-        # XXX Could this be lazy, does it need to be a list?
-        return list(self._index.keys())
-
-
-    def values(self):
-        # XXX Could this be lazy, does it need to be a list?
-        return list(self._index.values())
-
-
-    def items(self):
-        """ mapping path components : documentIds """
-        # XXX Could this be lazy, does it need to be a list?
-        return list(self._index.items())
-
+        try:
+            return self._length()
+        except AttributeError:        # backward compatibility
+            l = len(self._unindex)
+            self._length = Length(l)
+            return l
 
     def _apply_index(self, request, cid=''):
         """ hook for (Z)Catalog
-        request   mapping type (usually {"path": "..." }
-                  additionaly a parameter "path_level" might be passed
-                  to specify the level (see search())
+            'request' --  mapping type (usually {"path": "..." }
+             additionaly a parameter "path_level" might be passed
+             to specify the level (see search())
 
-        cid      ???
+            'cid' -- ???
         """
 
         record = parseIndexRequest(request,self.id,self.query_options)
@@ -299,19 +226,14 @@
                           "Please use a mapping object and the "
                           "'level' key to specify the operator." % cid)
 
-
-        # get the level parameter
         level    = record.get("level",0)
-
-        # experimental code for specifing the operator
         operator = record.get('operator',self.useOperator).lower()
 
         # depending on the operator we use intersection of union
-        if operator=="or":  set_func = union
-        else:               set_func = intersection
+        if operator == "or":  set_func = union
+        else: set_func = intersection
 
         res = None
-
         for k in record.keys:
             rows = self.search(k,level)
             res = set_func(res,rows)
@@ -325,26 +247,23 @@
         """has unique values for column name"""
         return name == self.id
 
-
     def uniqueValues(self, name=None, withLength=0):
         """ needed to be consistent with the interface """
         return self._index.keys()
 
-
     def getIndexSourceNames(self):
         """ return names of indexed attributes """
         return ('getPhysicalPath', )
 
-
-    def getEntryForObject(self, documentId, default=_marker):
-        """ Takes a document ID and returns all the information we have
-        on that specific object. """
+    def getEntryForObject(self, docid, default=_marker):
+        """ Takes a document ID and returns all the information 
+            we have on that specific object. 
+        """
         try:
-            return self._unindex[documentId]
+            return self._unindex[docid]
         except KeyError:
             # XXX Why is default ignored?
             return None
-
 
     index_html = DTMLFile('dtml/index', globals())
     manage_workspace = DTMLFile('dtml/managePathIndex', globals())