[Zope-Checkins] CVS: Zope2 - UnIndex.py:1.29

Tue, 17 Apr 2001 13:05:49 -0400

Update of /cvs-repository/Zope2/lib/python/SearchIndex
In directory serenade.digicool.com:/home/chrism/sandboxes/testtrunk/lib/python/SearchIndex

Modified Files:
	UnIndex.py 
Log Message:
Merging into trunk.

--- Updated File UnIndex.py in package Zope2 --
--- UnIndex.py	2001/03/17 00:48:59	1.28
+++ UnIndex.py	2001/04/17 17:05:49	1.29
@@ -91,12 +91,11 @@
 from Acquisition import Implicit
 import BTree
 import IOBTree
-import operator
-import string, pdb
+import string
 from zLOG import LOG, ERROR
-from types import *
+from types import StringType, ListType, IntType, TupleType
 
-from BTrees.OOBTree import OOBTree
+from BTrees.OOBTree import OOBTree, OOSet
 from BTrees.IOBTree import IOBTree
 from BTrees.IIBTree import IITreeSet, IISet, union
 import BTrees.Length
@@ -105,15 +104,6 @@
 
 _marker = []
 
-def nonEmpty(s):
-    "returns true if a non-empty string or any other (nonstring) type"
-    if type(s) is StringType:
-        if s: return 1
-        else: return 0
-    else:
-        return 1
-
-
 class UnIndex(Persistent, Implicit):
     """UnIndex object interface"""
 
@@ -132,6 +122,10 @@
         self._index = {datum:[documentId1, documentId2]}
         self._unindex = {documentId:datum}
 
+        If any item in self._index has a length-one value, the value is an
+        integer, and not a set.  There are special cases in the code to deal
+        with this.
+
         The arguments are:
 
           'id' -- the name of the item attribute to index.  This is
@@ -207,8 +201,12 @@
         elements found at each point in the index."""
 
         histogram = {}
-        for (key, value) in self._index.items():
-            entry = len(value)
+        for item in self._index.items():
+            if type(item) is IntType:
+                entry = 1 # "set" length is 1
+            else:
+                key, value = item
+                entry = len(value)
             histogram[entry] = histogram.get(entry, 0) + 1
 
         return histogram
@@ -329,28 +327,45 @@
                 ' with id %s' % documentId)
 
     def _apply_index(self, request, cid='', type=type, None=None): 
-        """Apply the index to query parameters given in the argument,
-        request
+        """Apply the index to query parameters given in the request arg.
+
+        The request argument should be a mapping object.
+
+        If the request does not have a key which matches the "id" of
+        the index instance, then None is returned.
+
+        If the request *does* have a key which matches the "id" of
+        the index instance, one of a few things can happen:
 
-        The argument should be a mapping object.
+          - if the value is a blank string, None is returned (in
+            order to support requests from web forms where
+            you can't tell a blank string from empty).
 
-        If the request does not contain the needed parameters, then
-        None is returned.
+          - if the value is a nonblank string, turn the value into
+            a single-element sequence, and proceed.
 
+          - if the value is a sequence, return a union search.
+
         If the request contains a parameter with the name of the
         column + '_usage', it is sniffed for information on how to
         handle applying the index.
 
-        Otherwise two objects are returned.  The first object is a
+        If None is not returned as a result of the abovementioned
+        constraints, two objects are returned.  The first object is a
         ResultSet containing the record numbers of the matching
         records.  The second object is a tuple containing the names of
         all data fields used.
 
+        FAQ answer:  to search a Field Index for documents that
+        have a blank string as their value, wrap the request value
+        up in a tuple ala: request = {'id':('',)}
+
         """
         id = self.id              #name of the column
 
         cidid = "%s/%s" % (cid,id)
 
+        # i have no f'ing clue what this cdid stuff is for - chrism
         if request.has_key(cidid):
             keys = request[cidid]
         elif request.has_key(id):
@@ -359,60 +374,47 @@
             return None
 
         if type(keys) not in (ListType, TupleType):
-            keys = [keys]
+            if keys == '':
+                return None
+            else:
+                keys = [keys]
 
         index = self._index
         r = None
-        anyTrue = 0
         opr = None
-        IntType=type(1)
 
         if request.has_key(id+'_usage'):
             # see if any usage params are sent to field
             opr=string.split(string.lower(request[id+"_usage"]),':')
             opr, opr_args=opr[0], opr[1:]
 
-        if opr=="range":
+        if opr=="range":   # range search
             if 'min' in opr_args: lo = min(keys)
             else: lo = None
             if 'max' in opr_args: hi = max(keys)
             else: hi = None
-
-            anyTrue=1
-            try:
-                if hi:
-                    setlist = index.items(lo,hi)
-                else:
-                    setlist = index.items(lo)
-
-                for k, set in setlist:
-                    r = union(r, set)
-
-            except KeyError:
-                pass
+            if hi:
+                setlist = index.items(lo,hi)
+            else:
+                setlist = index.items(lo)
 
-        else:           #not a range
-            get = index.get
+            for k, set in setlist:
+                if type(set) is IntType:
+                    set = IISet((set,))
+                r = union(r, set)
+        else: # not a range search
             for key in keys:
-                if nonEmpty(key):
-                    anyTrue = 1
-                set=get(key, None)
+                set=index.get(key, None)
                 if set is not None:
+                    if type(set) is IntType:
+                        set = IISet((set,))
                     r = union(r, set)
 
-        if type(r) is IntType: r=IISet((r,))
-        if r:
-            return r, (id,)
-
-                
+        if type(r) is IntType:  r=IISet((r,))
         if r is None:
-            if anyTrue:
-                r=IISet()
-            else:
-                return None
-
-        return r, (id,)
-
+            return IISet(), (id,)
+        else:
+            return r, (id,)
 
     def hasUniqueValuesFor(self, name):
         ' has unique values for column NAME '
@@ -434,18 +436,27 @@
         elif name != self.id:
             return []
 
-        if not withLengths: return tuple(
-            filter(nonEmpty, self._index.keys())
-            )
+        if not withLengths:
+            return tuple(self._index.keys())
         else: 
             rl=[]
             for i in self._index.keys():
-                if not nonEmpty(i): continue
-                else: rl.append((i, len(self._index[i])))
+                set = self._index[i]
+                if type(set) is IntType:
+                    l = 1
+                else:
+                    l = len(set)
+                rl.append((i, l))
             return tuple(rl)
 
     def keyForDocument(self, id):
         return self._unindex[id]
 
-    def items(self): return self._index.items()
+    def items(self):
+        items = []
+        for k,v in self._index.items():
+            if type(v) is IntType:
+                v = IISet((v,))
+            items.append((k, v))
+        return items