[Zope-Checkins] SVN: Zope/trunk/ More catalog work, merge in the non-queryplan and non-btree changes of queryplan and optimize more of date range index internals
Hanno Schlichting
hannosch at hannosch.eu
Sun Jul 25 18:03:14 EDT 2010
Log message for revision 115092:
More catalog work, merge in the non-queryplan and non-btree changes of queryplan and optimize more of date range index internals
Changed:
U Zope/trunk/doc/CHANGES.rst
U Zope/trunk/src/Products/PluginIndexes/DateIndex/DateIndex.py
U Zope/trunk/src/Products/PluginIndexes/DateRangeIndex/DateRangeIndex.py
U Zope/trunk/src/Products/PluginIndexes/common/UnIndex.py
U Zope/trunk/src/Products/PluginIndexes/interfaces.py
U Zope/trunk/src/Products/ZCatalog/Catalog.py
-=-
Modified: Zope/trunk/doc/CHANGES.rst
===================================================================
--- Zope/trunk/doc/CHANGES.rst 2010-07-25 18:09:26 UTC (rev 115091)
+++ Zope/trunk/doc/CHANGES.rst 2010-07-25 22:03:14 UTC (rev 115092)
@@ -56,6 +56,17 @@
Features Added
++++++++++++++
+- Various optimizations to indexes _apply_index and the catalog's search
+ method inspired by experimental.catalogqueryplan.
+
+- Added a new ILimitedResultIndex to Products.PluginIndexes and made most
+ built-in indexes compatible with it. This allows indexes to consider the
+ already calculated result set inside their own calculations.
+
+- Changed the internals of the DateRangeIndex to always use IITreeSet and do
+ an inline migration from IISet. Some datum tend to have large number of
+ documents, for example when using default floor or ceiling dates.
+
- Added a new reporting tab to `Products.ZCatalog` instances. You can use this
to get an overview of slow catalog queries, as specified by a configurable
threshold value. The reports are per running Zope process.
Modified: Zope/trunk/src/Products/PluginIndexes/DateIndex/DateIndex.py
===================================================================
--- Zope/trunk/src/Products/PluginIndexes/DateIndex/DateIndex.py 2010-07-25 18:09:26 UTC (rev 115091)
+++ Zope/trunk/src/Products/PluginIndexes/DateIndex/DateIndex.py 2010-07-25 22:03:14 UTC (rev 115092)
@@ -157,7 +157,7 @@
return returnStatus
- def _apply_index(self, request):
+ def _apply_index(self, request, resultset=None):
"""Apply the index to query parameters given in the argument
Normalize the 'query' arguments into integer values at minute
@@ -176,7 +176,7 @@
#experimental code for specifing the operator
operator = record.get( 'operator', self.useOperator )
if not operator in self.operators :
- raise RuntimeError, "operator not valid: %s" % operator
+ raise RuntimeError("operator not valid: %s" % operator)
# depending on the operator we use intersection or union
if operator=="or":
@@ -223,6 +223,9 @@
if set is not None:
if isinstance(set, int):
set = IISet((set,))
+ else:
+ # set can't be bigger than resultset
+ set = intersection(set, resultset)
r = set_func(r, set)
if isinstance(r, int):
Modified: Zope/trunk/src/Products/PluginIndexes/DateRangeIndex/DateRangeIndex.py
===================================================================
--- Zope/trunk/src/Products/PluginIndexes/DateRangeIndex/DateRangeIndex.py 2010-07-25 18:09:26 UTC (rev 115091)
+++ Zope/trunk/src/Products/PluginIndexes/DateRangeIndex/DateRangeIndex.py 2010-07-25 22:03:14 UTC (rev 115092)
@@ -28,7 +28,6 @@
from BTrees.IIBTree import IITreeSet
from BTrees.IIBTree import intersection
from BTrees.IIBTree import multiunion
-from BTrees.IIBTree import union
from BTrees.IOBTree import IOBTree
from BTrees.Length import Length
from DateTime.DateTime import DateTime
@@ -242,7 +241,7 @@
return tuple( result )
- def _apply_index(self, request):
+ def _apply_index(self, request, resultset=None):
"""
Apply the index to query parameters given in 'request', which
should be a mapping object.
@@ -265,34 +264,17 @@
# Aggregate sets for each bucket separately, to avoid
# large-small union penalties.
#
- #until_only = IISet()
- #map( until_only.update, self._until_only.values( term ) )
- # XXX use multi-union
until_only = multiunion( self._until_only.values( term ) )
-
- #since_only = IISet()
- #map( since_only.update, self._since_only.values( None, term ) )
- # XXX use multi-union
since_only = multiunion( self._since_only.values( None, term ) )
-
- #until = IISet()
- #map( until.update, self._until.values( term ) )
- # XXX use multi-union
until = multiunion( self._until.values( term ) )
- #since = IISet()
- #map( since.update, self._since.values( None, term ) )
- # XXX use multi-union
- since = multiunion( self._since.values( None, term ) )
+ # Total result is bound by resultset
+ until = intersection(resultset, until)
+ since = multiunion(self._since.values(None, term))
+ bounded = intersection(until, since)
- bounded = intersection( until, since )
-
# Merge from smallest to largest.
- #result = union( self._always, until_only )
- result = union( bounded, until_only )
- result = union( result, since_only )
- #result = union( result, bounded )
- result = union( result, self._always )
+ result = multiunion([bounded, until_only, since_only, self._always])
return result, ( self._since_field, self._until_field )
@@ -314,8 +296,8 @@
if set is None:
self._until_only[ until ] = documentId
else:
- if isinstance(set, int):
- set = self._until_only[ until ] = IISet((set, documentId))
+ if isinstance(set, (int, IISet)):
+ set = self._until_only[until] = IITreeSet((set, documentId))
else:
set.insert( documentId )
elif until is None:
@@ -324,8 +306,8 @@
if set is None:
self._since_only[ since ] = documentId
else:
- if isinstance(set, int):
- set = self._since_only[ since ] = IISet((set, documentId))
+ if isinstance(set, (int, IISet)):
+ set = self._since_only[since] = IITreeSet((set, documentId))
else:
set.insert( documentId )
@@ -335,8 +317,8 @@
if set is None:
self._since[ since ] = documentId
else:
- if isinstance(set, int):
- set = self._since[ since ] = IISet((set, documentId))
+ if isinstance(set, (int, IISet)):
+ set = self._since[since] = IITreeSet((set, documentId))
else:
set.insert( documentId )
@@ -344,8 +326,8 @@
if set is None:
self._until[ until ] = documentId
else:
- if isinstance(set, int):
- set = self._until[ until ] = IISet((set, documentId))
+ if isinstance(set, (int, IISet)):
+ set = self._until[until] = IITreeSet((set, documentId))
else:
set.insert( documentId )
Modified: Zope/trunk/src/Products/PluginIndexes/common/UnIndex.py
===================================================================
--- Zope/trunk/src/Products/PluginIndexes/common/UnIndex.py 2010-07-25 18:09:26 UTC (rev 115091)
+++ Zope/trunk/src/Products/PluginIndexes/common/UnIndex.py 2010-07-25 22:03:14 UTC (rev 115092)
@@ -21,7 +21,7 @@
from BTrees.IIBTree import intersection
from BTrees.IIBTree import IITreeSet
from BTrees.IIBTree import IISet
-from BTrees.IIBTree import union
+from BTrees.IIBTree import multiunion
from BTrees.IOBTree import IOBTree
from BTrees.Length import Length
from BTrees.OOBTree import OOBTree
@@ -31,7 +31,7 @@
from Products.PluginIndexes.common import safe_callable
from Products.PluginIndexes.common.util import parseIndexRequest
-from Products.PluginIndexes.interfaces import IPluggableIndex
+from Products.PluginIndexes.interfaces import ILimitedResultIndex
from Products.PluginIndexes.interfaces import ISortIndex
from Products.PluginIndexes.interfaces import IUniqueValueIndex
@@ -43,7 +43,7 @@
"""Simple forward and reverse index.
"""
- implements(IPluggableIndex, IUniqueValueIndex, ISortIndex)
+ implements(ILimitedResultIndex, IUniqueValueIndex, ISortIndex)
def __init__(
self, id, ignore_ex=None, call_methods=None, extra=None, caller=None):
@@ -302,7 +302,7 @@
LOG.debug('Attempt to unindex nonexistent document'
' with id %s' % documentId,exc_info=True)
- def _apply_index(self, request):
+ def _apply_index(self, request, resultset=None):
"""Apply the index to query parameters given in the request arg.
The request argument should be a mapping object.
@@ -348,12 +348,8 @@
# experimental code for specifing the operator
operator = record.get('operator',self.useOperator)
if not operator in self.operators :
- raise RuntimeError,"operator not valid: %s" % escape(operator)
+ raise RuntimeError("operator not valid: %s" % escape(operator))
- # depending on the operator we use intersection or union
- if operator=="or": set_func = union
- else: set_func = intersection
-
# Range parameter
range_parm = record.get('range',None)
if range_parm:
@@ -375,24 +371,84 @@
if 'max' in opr_args: hi = max(record.keys)
else: hi = None
if hi:
- setlist = index.items(lo,hi)
+ setlist = index.values(lo,hi)
else:
- setlist = index.items(lo)
+ setlist = index.values(lo)
- for k, set in setlist:
- if isinstance(set, int):
- set = IISet((set,))
- r = set_func(r, set)
+ # If we only use one key, intersect and return immediately
+ if len(setlist) == 1:
+ result = setlist[0]
+ if isinstance(result, int):
+ result = IISet((result,))
+ return result, (self.id,)
+
+ if operator == 'or':
+ tmp = []
+ for s in setlist:
+ if isinstance(s, int):
+ s = IISet((s,))
+ tmp.append(s)
+ r = multiunion(tmp)
+ else:
+ # For intersection, sort with smallest data set first
+ tmp = []
+ for s in setlist:
+ if isinstance(s, int):
+ s = IISet((s,))
+ tmp.append(s)
+ if len(tmp) > 2:
+ setlist = sorted(tmp, key=len)
+ else:
+ setlist = tmp
+ r = resultset
+ for s in setlist:
+ # the result is bound by the resultset
+ r = intersection(r, s)
+
else: # not a range search
- for key in record.keys:
- set=index.get(key, None)
- if set is None:
- set = IISet(())
- elif isinstance(set, int):
- set = IISet((set,))
- r = set_func(r, set)
+ # Filter duplicates
+ setlist = []
+ for k in record.keys:
+ s = index.get(k, None)
+ # If None, try to bail early
+ if s is None:
+ if operator == 'or':
+ # If union, we can't possibly get a bigger result
+ continue
+ # If intersection, we can't possibly get a smaller result
+ return IISet(), (self.id,)
+ elif isinstance(s, int):
+ s = IISet((s,))
+ setlist.append(s)
- if isinstance(r, int): r=IISet((r,))
+ # If we only use one key return immediately
+ if len(setlist) == 1:
+ result = setlist[0]
+ if isinstance(result, int):
+ result = IISet((result,))
+ return result, (self.id,)
+
+ if operator == 'or':
+ # If we already get a small result set passed in, intersecting
+ # the various indexes with it and doing the union later is
+ # faster than creating a multiunion first.
+ if resultset is not None and len(resultset) < 200:
+ smalllist = []
+ for s in setlist:
+ smalllist.append(intersection(resultset, s))
+ r = multiunion(smalllist)
+ else:
+ r = multiunion(setlist)
+ else:
+ # For intersection, sort with smallest data set first
+ if len(setlist) > 2:
+ setlist = sorted(setlist, key=len)
+ r = resultset
+ for s in setlist:
+ r = intersection(r, s)
+
+ if isinstance(r, int):
+ r = IISet((r, ))
if r is None:
return IISet(), (self.id,)
else:
Modified: Zope/trunk/src/Products/PluginIndexes/interfaces.py
===================================================================
--- Zope/trunk/src/Products/PluginIndexes/interfaces.py 2010-07-25 18:09:26 UTC (rev 115091)
+++ Zope/trunk/src/Products/PluginIndexes/interfaces.py 2010-07-25 22:03:14 UTC (rev 115092)
@@ -85,6 +85,15 @@
"""Empty the index"""
+class ILimitedResultIndex(IPluggableIndex):
+
+ def _apply_index(request, resultset=None):
+ """Same as IPluggableIndex' _apply_index method. The additional
+ resultset argument contains the resultset, as already calculated by
+ ZCatalog's search method.
+ """
+
+
class IUniqueValueIndex(IPluggableIndex):
"""An index which can return lists of unique values contained in it"""
Modified: Zope/trunk/src/Products/ZCatalog/Catalog.py
===================================================================
--- Zope/trunk/src/Products/ZCatalog/Catalog.py 2010-07-25 18:09:26 UTC (rev 115091)
+++ Zope/trunk/src/Products/ZCatalog/Catalog.py 2010-07-25 22:03:14 UTC (rev 115092)
@@ -22,6 +22,7 @@
import ExtensionClass
from Missing import MV
from Persistence import Persistent
+from Products.PluginIndexes.interfaces import ILimitedResultIndex
import BTrees.Length
from BTrees.IIBTree import intersection, weightedIntersection, IISet
@@ -475,6 +476,10 @@
query[iid] = value
return query
+ def _sorted_search_indexes(self, query):
+ # Simple implementation doing no ordering.
+ return self.indexes.keys()
+
def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
"""Iterate through the indexes, applying the query to each one. If
merge is true then return a lazy result set (sorted if appropriate)
@@ -497,25 +502,44 @@
# Canonicalize the request into a sensible query before passing it on
query = self.make_query(query)
+ query_keys = query.keys()
cr = self.getCatalogReport(query)
cr.start()
- for i in self.indexes.keys():
+ for i in self._sorted_search_indexes(query):
+ if i not in query_keys:
+ # Do not ask indexes to restrict the result, which aren't
+ # part of the query
+ continue
+
index = self.getIndex(i)
_apply_index = getattr(index, "_apply_index", None)
if _apply_index is None:
continue
+ limit_result = False
+ if ILimitedResultIndex.providedBy(index):
+ limit_result = True
+
cr.split(i)
- r = _apply_index(query)
+ if limit_result:
+ r = _apply_index(query, rs)
+ else:
+ r = _apply_index(query)
cr.split(i)
if r is not None:
r, u = r
+ # Short circuit if empty result
+ # BBB: We can remove the "r is not None" check in Zope 2.14
+ # once we don't need to support the "return everything" case
+ # anymore
+ if r is not None and not r:
+ return LazyCat([])
w, rs = weightedIntersection(rs, r)
if not rs:
- break
+ break
cr.stop()
More information about the Zope-Checkins
mailing list