[Zope-Checkins] CVS: Zope/lib/python/Products/ZCatalog - Catalog.py:1.102
Casey Duncan
casey@zope.com
Mon, 9 Dec 2002 17:37:01 -0500
Update of /cvs-repository/Zope/lib/python/Products/ZCatalog
In directory cvs.zope.org:/tmp/cvs-serv544
Modified Files:
Catalog.py
Log Message:
More sorting improvements:
* Changed logic for activating first sort algorithm to elminate bad performance with large result sets (20k+). The full sort is now faster for a larger proportion of cases. This algorithm is also skipped now if a sort limit value is passed.
* Full sort now handles sort limits where the limit is 25% or greater of the total result where N-Best performance degrades. This allows the application to always apply a sort limit up to and beyond the result set length.
* Added an "N-worst" sort handler to deal with forward sort limits (previously only reverse limits worked properly).
* Small optimizations to N-best/worst to wring out a few more CPU cycles.
=== Zope/lib/python/Products/ZCatalog/Catalog.py 1.101 => 1.102 ===
--- Zope/lib/python/Products/ZCatalog/Catalog.py:1.101 Fri Dec 6 16:53:20 2002
+++ Zope/lib/python/Products/ZCatalog/Catalog.py Mon Dec 9 17:37:00 2002
@@ -22,6 +22,7 @@
from Lazy import LazyMap, LazyFilter, LazyCat, LazyValues
from CatalogBrains import AbstractCatalogBrain, NoBrainer
+from sorters import buildSortableResults
from BTrees.IIBTree import intersection, weightedIntersection, IISet
from BTrees.OIBTree import OIBTree
@@ -524,24 +525,29 @@
def sortResults(self, rs, sort_index, reverse=0, limit=None, merge=1):
# Sort a result set using a sort index. Return a lazy
# result set in sorted order if merge is true otherwise
- # returns a list of (sortkey, results) tuples
+ # returns a list of (sortkey, uid, getter_function) tuples
#
# The two 'for' loops in here contribute a significant
# proportion of the time to perform an indexed search.
# Try to avoid all non-local attribute lookup inside
# those loops.
+ assert limit is None or limit > 0, 'Limit value must be 1 or greater'
_lazymap = LazyMap
_intersection = intersection
_self__getitem__ = self.__getitem__
index_key_map = sort_index.documentToKeyMap()
_None = None
+ _keyerror = KeyError
result = []
append = result.append
+ if hasattr(rs, 'keys'):
+ rs = rs.keys()
+ rlen = len(rs)
- if (len(rs) > (len(sort_index) * 4)):
+ if limit is None and (rlen > (len(sort_index) * (rlen / 100 + 1))):
# The result set is much larger than the sorted index,
# so iterate over the sorted index for speed.
- # This is almost never exercised in practice...
+ # This is rarely exercised in practice...
length = 0
@@ -550,8 +556,6 @@
except TypeError:
# rs is not an object in the IIBTree family.
# Try to turn rs into an IISet.
- if hasattr(rs, 'keys'):
- rs = rs.keys()
rs = IISet(rs)
for k, intset in sort_index.items():
@@ -575,64 +579,87 @@
result = LazyCat(LazyValues(result), length)
else:
return result
- else:
+ elif limit is None or (limit * 4 > rlen):
# Iterate over the result set getting sort keys from the index
- if hasattr(rs, 'keys'):
- rs = rs.keys()
- _keyerror = KeyError
- if limit is None:
- for did in rs:
- try:
- key = index_key_map[did]
- except _keyerror:
- # This document is not in the sort key index.
- # skip it.
- pass
- else:
- append((key, did, _self__getitem__))
- # The reference back to __getitem__ is used in case
- # we do not merge now and need to intermingle the
- # results with those of other catalogs while avoiding
- # the cost of instantiating a LazyMap per result
-
- if merge:
- result.sort()
- if reverse:
- result.reverse()
- result = LazyValues(result)
+ for did in rs:
+ try:
+ key = index_key_map[did]
+ except _keyerror:
+ # This document is not in the sort key index, skip it.
+ pass
+ else:
+ append((key, did, _self__getitem__))
+ # The reference back to __getitem__ is used in case
+ # we do not merge now and need to intermingle the
+ # results with those of other catalogs while avoiding
+ # the cost of instantiating a LazyMap per result
+ if merge:
+ result.sort()
+ if reverse:
+ result.reverse()
+ if limit is not None:
+ result = result[:limit]
+ result = LazyValues(result)
+ else:
+ return result
+ elif reverse:
+ # Limit/sort results using N-Best algorithm
+ # This is faster for large sets then a full sort
+ # And uses far less memory
+ keys = []
+ n = 0
+ worst = None
+ for did in rs:
+ try:
+ key = index_key_map[did]
+ except _keyerror:
+ # This document is not in the sort key index, skip it.
+ pass
else:
- return result
- else:
- # Limit/sort results using N-Best algorithm
- # This is faster for large sets then a full sort
- # And uses far less memory
- keys = []
- n = 0
- for did in rs:
- try:
- key = index_key_map[did]
- except _keyerror:
- # This document is not in the sort key index.
- # skip it.
- pass
+ if n >= limit and key <= worst:
+ continue
+ i = bisect(keys, key)
+ keys.insert(i, key)
+ result.insert(i, (key, did, _self__getitem__))
+ if n == limit:
+ del keys[0], result[0]
else:
- if n >= limit and key <= keys[0]:
- continue
- i = bisect(keys, key)
- keys.insert(i, key)
- result.insert(i, (key, did, _self__getitem__))
- if n == limit:
- del keys[0], result[0]
- else:
- n += 1
- result.reverse()
- if merge:
- result = LazyValues(result)
+ n += 1
+ worst = keys[0]
+ result.reverse()
+ if merge:
+ result = LazyValues(result)
+ else:
+ return result
+ elif not reverse:
+ # Limit/sort results using N-Best algorithm in reverse (N-Worst?)
+ keys = []
+ n = 0
+ best = None
+ for did in rs:
+ try:
+ key = index_key_map[did]
+ except _keyerror:
+ # This document is not in the sort key index, skip it.
+ pass
else:
- return result
+ if n >= limit and key >= best:
+ continue
+ i = bisect(keys, key)
+ keys.insert(i, key)
+ result.insert(i, (key, did, _self__getitem__))
+ if n == limit:
+ del keys[-1], result[-1]
+ else:
+ n += 1
+ best = keys[-1]
+ if merge:
+ result = LazyValues(result)
+ else:
+ return result
result = LazyMap(self.__getitem__, result, len(result))
- result.actual_result_count = len(rs)
+ result.actual_result_count = rlen
return result
def _get_sort_attr(self, attr, kw):
@@ -743,9 +770,14 @@
# Concatenate the catalog results into one list and sort it
# Each result record consists of a list of tuples with three values:
# (sortkey, docid, catalog__getitem__)
- all = []
- for r in results:
- all.extend(r)
+ if len(results) > 1:
+ all = []
+ for r in results:
+ all.extend(r)
+ elif len(results) == 1:
+ all = results[0]
+ else:
+ return []
all.sort()
if reverse:
all.reverse()