[Zope-Checkins] CVS: Zope2 - Catalog.py:1.60.2.9.2.1
chrism@serenade.digicool.com
chrism@serenade.digicool.com
Tue, 17 Apr 2001 02:43:33 -0400
Update of /cvs-repository/Zope2/lib/python/Products/ZCatalog
In directory serenade.digicool.com:/home/chrism/sandboxes/CatalogForNow/lib/python/Products/ZCatalog
Modified Files:
Tag: chrism-CatalogForNow-branch
Catalog.py
Log Message:
Broke out body of _indexedSearch method into several functions. Most things that instantiate a Lazy object now try to pass in the length. "sort_on" and "sort-on" are now removed from the query before the query is passed to indexes. Fixed a bug in the searchResults method which may have lead to people believing they could sort on a nontext or nonkeyword index.
Miserable.
--- Updated File Catalog.py in package Zope2 --
--- Catalog.py 2001/03/23 20:50:03 1.60.2.9
+++ Catalog.py 2001/04/17 06:43:33 1.60.2.9.2.1
@@ -98,7 +98,7 @@
from Lazy import LazyMap, LazyFilter, LazyCat
from CatalogBrains import AbstractCatalogBrain, NoBrainer
-from BTrees.IIBTree import intersection, weightedIntersection
+from BTrees.IIBTree import intersection, weightedIntersection, IISet
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
import BTrees.Length
@@ -527,85 +527,127 @@
## Searching engine. You don't really have to worry about what goes
## on below here... Most of this stuff came from ZTables with tweaks.
+## ^^^^ in some warped fantasy land, the prior comment might even be true. ;-)
+## i'm leaving it in here for entertainment value - chrism
+
def _indexedSearch(self, args, sort_index, append, used):
"""
Iterate through the indexes, applying the query to each one.
"""
-
- rs=None
- data=self.data
-
+ resultSet = None
if used is None: used={}
- for i in self.indexes.keys():
- index = self.indexes[i].__of__(self)
- if hasattr(index,'_apply_index'):
- r=index._apply_index(args)
- if r is not None:
- r, u = r
- for name in u:
+
+ for index in self.indexes.values():
+ if hasattr(index, '_apply_index'):
+ index = index.__of__(self)
+ indexResult=index._apply_index(args)
+ if indexResult is not None:
+ indexResult, indexUsed = indexResult
+ for name in indexUsed:
used[name]=1
- w, rs = weightedIntersection(rs, r)
+ weight, resultSet=weightedIntersection(resultSet,
+ indexResult)
- #assert rs==None or hasattr(rs, 'values') or hasattr(rs, 'keys')
- if rs is None:
- # return everything
- if sort_index is None:
- rs=data.items()
- append(LazyMap(self.instantiate, rs, len(self)))
- else:
- try:
- for k, intset in sort_index.items():
- append((k,LazyMap(self.__getitem__, intset)))
- except AttributeError:
- raise ValueError, (
- "Incorrect index name passed as"
- " 'sort_on' parameter. Note that you may only"
- " sort on values for which there is a matching"
- " index available.")
- elif rs:
- # this is reached by having an empty result set (ie non-None)
- if sort_index is None and hasattr(rs, 'values'):
- # having a 'values' means we have a data structure with
- # scores. Build a new result set, sort it by score, reverse
- # it, compute the normalized score, and Lazify it.
- rset = rs.byValue(0) # sort it by score
- max = float(rset[0][0])
- rs = []
- for score, key in rset:
- # compute normalized scores
- rs.append(( int((score/max)*100), score, key))
- append(LazyMap(self.__getitem__, rs))
-
- elif sort_index is None and not hasattr(rs, 'values'):
- # no scores? Just Lazify.
- if hasattr(rs, 'keys'): rs=rs.keys()
- append(LazyMap(self.__getitem__, rs))
+ if resultSet is None:
+ # Case 1: resultSet came back as None
+ #
+ # none of the indexes were remotely interested in the args,
+ # so return everything. we return everything instead of
+ # returning nothing for hysterical reasons.
+ self._appendAllResults(sort_index, append)
+
+ elif resultSet:
+ # Case 2: nonempty resultSet
+ #
+ # this is reached by having a non-empty, non-None result set,
+ # meaning that at least one of the indexes was interested
+ # in the args and had data that matched the query.
+ self._appendSpecifiedResults(sort_index, append, resultSet)
+
+ # Case 3: empty resultSet (implied)
+ #
+ # this is reached by having an empty result set, meaning that
+ # at least one of the indexes was interested in the args, but
+ # the query matched none of the documents in any of the indexes.
+ # We do nothing in this case.
+
+ # return the names used by the indexes for an unknown reason ;-)
+ return used
+
+ def _appendAllResults(self, sort_index, append):
+ # this method is internal and is meant to be called only
+ # by _indexedSearch!
+ if sort_index is None:
+ # we don't have a sort index, just return everything
+ # in an undetermined order.
+ resultSet=self.data.items()
+ append(LazyMap(self.instantiate, resultSet, len(self)))
+ else:
+ # we have a sort index, return stuff sorted by sort index.
+ for k, intSet in sort_index.items():
+ append((k,LazyMap(self.__getitem__, intSet, len(intSet))))
+
+ def _appendSpecifiedResults(self, sort_index, append, resultSet):
+ # this method is internal and is meant to be called only
+ # by _indexedSearch!
+
+ haveScores = hasattr(resultSet, 'byValue')
+
+ if sort_index is None and haveScores:
+ # we have no sort_index, but we do have scores to sort by.
+ # having a 'byValue' means we have a data structure with
+ # scores. Build a new result set using byValue, compute
+ # the normalized score, and Lazify it.
+ scoreSorted = resultSet.byValue(0)
+ # scoreSorted is now a list of two-tuples where the first
+ # element of the tuple is a score and the second is a docId.
+ # The two-tuples are sorted highest-score-first.
+ max = float(scoreSorted[0][0]) or 1.0
+ sortedSet = []
+ length = 0
+ for score, key in scoreSorted:
+ # compute normalized scores
+ normScore = int((score/max) * 100)
+ sortedSet.append((normScore, score, key))
+ length = length + 1
+ append(LazyMap(self.__getitem__, sortedSet, length))
+
+ elif sort_index is None and not haveScores:
+ # we don't have a 'byValue' which means there are no
+ # scores in the resultSet. We want to just lazify the
+ # results and return them. If we have a dictionaryish
+ # mapping object, get just its keys for use by LazyMap.
+ if hasattr(resultSet, 'keys'):
+ resultSet=resultSet.keys()
+ length = len(resultSet)
+ append(LazyMap(self.__getitem__, resultSet, length))
+
+ else:
+ # We have a sort_index.
+ # We do nothing with scores.
+ if ((len(resultSet) / 4) > len(sort_index)):
+ # if the sort index has a quarter as many keys as
+ # the result set
+ for k, intSet in sort_index.items():
+ # We have an index that has a set of values for
+ # each sort key, so we interset with each set and
+ # get a sorted sequence of the intersections.
+
+ # This only makes sense if the number of
+ # keys is much less then the number of results.
+ intSet = intersection(resultSet, intSet)
+ if intSet:
+ if hasattr(intSet, 'keys'):
+ intSet=intSet.keys()
+ length = len(intSet)
+ append((k, LazyMap(self.__getitem__,intSet,length)))
else:
- # sort. If there are scores, then this block is not
- # reached, therefor 'sort-on' does not happen in the
- # context of text index query. This should probably
- # sort by relevance first, then the 'sort-on' attribute.
- if ((len(rs) / 4) > len(sort_index)):
- # if the sorted index has a quarter as many keys as
- # the result set
- for k, intset in sort_index.items():
- # We have an index that has a set of values for
- # each sort key, so we interset with each set and
- # get a sorted sequence of the intersections.
-
- # This only makes sense if the number of
- # keys is much less then the number of results.
- intset = intersection(rs, intset)
- if intset:
- if hasattr(intset, 'keys'): intset=intset.keys()
- append((k,LazyMap(self.__getitem__, intset)))
- else:
- if hasattr(rs, 'keys'): rs=rs.keys()
- for did in rs:
- append((sort_index.keyForDocument(did),
- LazyMap(self.__getitem__,[did])))
+ if hasattr(resultSet, 'keys'):
+ resultSet=resultSet.keys()
+ for docId in resultSet:
+ append((sort_index.keyForDocument(docId),
+ LazyMap(self.__getitem__,[docId])))
- return used
def searchResults(self, REQUEST=None, used=None,
query_map={
@@ -635,16 +677,24 @@
# Compute "sort_index", which is a sort index, or none:
if kw.has_key('sort-on'):
sort_index=kw['sort-on']
+ del kw['sort-on']
elif hasattr(self, 'sort-on'):
sort_index=getattr(self, 'sort-on')
elif kw.has_key('sort_on'):
sort_index=kw['sort_on']
+ del kw['sort_on']
else: sort_index=None
sort_order=''
- if sort_index is not None and self.indexes.has_key(sort_index):
- sort_index=self.indexes[sort_index]
- if not hasattr(sort_index, 'keyForDocument'):
- raise CatalogError('Invalid sort index')
+ if sort_index is not None:
+ if self.indexes.has_key(sort_index):
+ sort_index=self.indexes[sort_index]
+ if not hasattr(sort_index, 'keyForDocument'):
+ raise CatalogError(
+ 'The index chosen for sort_on is not capable of being'
+ ' used as a sort index.'
+ )
+ else:
+ raise CatalogError('Unknown sort_on index %s' % sort_index)
# Perform searches with indexes and sort_index
r=[]