[Zope] Searching a FieldIndex for prefix or OR
Chris McDonough
chrism@digicool.com
Mon, 02 Apr 2001 23:56:55 -0400
This is a multi-part message in MIME format.
--------------1A24B64A8F3095D7EA7650D5
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
These are probably pretty crufty at this point (especially the patch, I
have no idea what version of ZCatalog it patches)... also, intSets used
by the PathIndex have been superseded by new BTree set classes (see
lib/python/BTrees in 2.3.1 +)
"Randall F. Kern" wrote:
>
> Sure, I'd love a place to start.
>
> -Randy
>
> > -----Original Message-----
> > From: Chris McDonough [mailto:chrism@digicool.com]
> > Sent: Monday, April 02, 2001 8:51 PM
> > To: Randall F. Kern
> > Cc: zope@zope.org
> > Subject: Re: [Zope] Searching a FieldIndex for prefix or OR
> >
> >
> > "Randall F. Kern" wrote:
> > >
> > > Is it possible to search a catalog on a field index for
> > either a given
> > > prefix, or any of a list of values?
> > >
> > > Maybe I'm going about this wrong; I want to add the ability
> > to only show
> > > objects found below a specific place on my site, and to do that I
> > > created a field index on the path to each object. Then at
> > query time I
> > > would like to search for /foo/bar/*, or failing that create
> > a list of
> > > all paths below /foo/bar and create an OR query.
> > >
> > > The solution I'm using now is to post-process the search
> > results, but
> > > that seems lame :)
> >
> > Yes, it is lame. And no, it's not possible with a FieldIndex. ;-)
> >
> > There's a proposal floating around verbally at DC to create a
> > "PathIndex" that has this behavior. I think Tres actually
> > sent me some
> > code for it at some point. You want that I should look it up and send
> > it to you?
> >
> > - C
> >
>
> _______________________________________________
> Zope maillist - Zope@zope.org
> http://lists.zope.org/mailman/listinfo/zope
> ** No cross posts or HTML encoding! **
> (Related lists -
> http://lists.zope.org/mailman/listinfo/zope-announce
> http://lists.zope.org/mailman/listinfo/zope-dev )
--------------1A24B64A8F3095D7EA7650D5
Content-Type: text/plain; charset=us-ascii;
name="PathIndex.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="PathIndex.patch"
? PathIndex.patch
? PathIndex.py
? tests
Index: Catalog.py
===================================================================
RCS file: /cvs-repository/Zope2/lib/python/Products/ZCatalog/Catalog.py,v
retrieving revision 1.60
diff -u -r1.60 Catalog.py
--- Catalog.py 2001/01/15 21:45:47 1.60
+++ Catalog.py 2001/01/17 04:21:43
@@ -100,6 +100,7 @@
from Lazy import LazyMap, LazyFilter, LazyCat
from CatalogBrains import AbstractCatalogBrain, NoBrainer
+import PathIndex
class KWMultiMapping(MultiMapping):
def has_key(self, name):
@@ -169,6 +170,10 @@
self._v_brains = brains
self.updateBrains()
+
+ indexes = self.indexes
+ indexes[ 'path' ] = PathIndex.PathIndex( self )
+ self.indexes = indexes
def updateBrains(self):
self.useBrains(self._v_brains)
--------------1A24B64A8F3095D7EA7650D5
Content-Type: text/plain; charset=us-ascii;
name="PathIndex.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="PathIndex.py"
"""
"""
from intSet import intSet
from Acquisition import Implicit
from string import split, lower
class PathIndex( Implicit ):
"""
Implement the index searching protocol as a search against the
host catalog's keys (which are paths).
"""
id = INDEX_ID = 'path'
meta_type = 'Path Index'
def __init__( self, host_catalog=None ):
"""
"""
######################################################################
# For b/w compatability, have to allow __init__ calls with zero args
self._host_catalog = host_catalog
# for b/w compatability
_init = __init__
def __len__( self ):
return len( self._host_catalog.uids )
def hasUniqueValuesFor(self, name):
"""
Does this index have unique values for column 'name'?
"""
return name == self.INDEX_ID
def uniqueValues( self, name=None, withLengths=0 ):
"""
Return unique values for 'name'
"""
if not name in ( None, self.INDEX_ID ):
return []
paths = self._host_catalog.uids.keys()
if withLengths:
return map( lambda x: ( x, 1 ), paths )
else:
return paths
def clear( self ):
"""
"Empty" the index.
As this "index" has no datastructure of its own, this is a NOOP.
"""
pass #NOOP
def _reindex( self, start=0 ):
"""
Recompute index data for data with ids >= start.
As this "index" has no datastructure of its own, this is a NOOP.
"""
pass #NOOP
def index_object( self, i, obj=None, threshold=None):
"""
"Index" object.
As this "index" has no datastructure of its own, this is a NOOP.
"""
return 1 #NOOP
def unindex_object( self, i, obj=None, threshold=None ):
"""
"Unindex" object.
As this "index" has no datastructure of its own, this is a NOOP.
"""
pass #NOOP
def _apply_index(self, request, cid=''):
"""
Apply the index to query parameters given in the argument,
request
The argument should be a mapping object.
If the request does not contain the needed parameters, then
None is returned.
If the request contains a parameter with the name of the
column + '_usage', it is sniffed for information on how to
handle applying the index.
Otherwise two objects are returned. The first object is a
ResultSet containing the record numbers of the matching
records. The second object is a tuple containing the names of
all data fields used.
"""
id = self.INDEX_ID #name of the column
usage = '%s_usage' % id
cidid = "%s/%s" % ( cid, id )
has_key = request.has_key
if has_key( cidid ):
keys = request[ cidid ]
elif has_key( id ):
keys = request[ id ]
else:
return None
if type( keys ) is not type( [] ):
if type( keys ) == type( () ):
keys = list( keys )
else:
keys = [ keys ] # XXX: Why not list()?
index = self._host_catalog.uids
r = intSet()
anyTrue = 0
opr = None
if request.has_key( usage ):
# see if any usage params are sent to field
opr = split( lower( request[ usage ] ), ':' )
opr, opr_args=opr[0], opr[1:]
if opr == "range":
if 'min' in opr_args:
lo = min( keys )
else:
lo = None
if 'max' in opr_args:
hi = max( keys )
else:
hi = None
anyTrue=1
try: # return .values() instead of .items(), coz they are the IDs.
if hi:
for idx in index.values( lo, hi ):
r.insert( idx )
else:
for idx in index.values( lo ):
r.insert( idx )
except KeyError:
pass
else: #not a range
get = index.get
for key in keys:
if key:
i = get( key )
if i is not None:
anyTrue = 1
r.insert( get( key ) )
if not anyTrue:
return None
return r, ( id, )
--------------1A24B64A8F3095D7EA7650D5--