[Checkins] SVN: zope.index/trunk/ Move FieldIndex's sorting functionality to a mixin class so it can be reused by zc.catalog's ValueIndex.
Dan Korostelev
nadako at gmail.com
Fri Feb 27 05:33:42 EST 2009
Log message for revision 97337:
Move FieldIndex's sorting functionality to a mixin class so it can be reused by zc.catalog's ValueIndex.
Changed:
U zope.index/trunk/CHANGES.txt
U zope.index/trunk/src/zope/index/field/index.py
A zope.index/trunk/src/zope/index/field/sorting.py
-=-
Modified: zope.index/trunk/CHANGES.txt
===================================================================
--- zope.index/trunk/CHANGES.txt 2009-02-27 10:28:19 UTC (rev 97336)
+++ zope.index/trunk/CHANGES.txt 2009-02-27 10:33:42 UTC (rev 97337)
@@ -15,6 +15,9 @@
- Don't modify given query dictionary in the KeywordIndex.apply method.
+- Move FieldIndex's sorting functionality to a mixin class so it can
+ be reused by zc.catalog's ValueIndex.
+
3.5.0 (2008-12-30)
------------------
Modified: zope.index/trunk/src/zope/index/field/index.py
===================================================================
--- zope.index/trunk/src/zope/index/field/index.py 2009-02-27 10:28:19 UTC (rev 97336)
+++ zope.index/trunk/src/zope/index/field/index.py 2009-02-27 10:33:42 UTC (rev 97337)
@@ -15,24 +15,20 @@
$Id$
"""
-import heapq
-import bisect
-from itertools import islice
-
import BTrees
import persistent
import zope.interface
from BTrees.Length import Length
from zope.index import interfaces
+from zope.index.field.sorting import SortingIndexMixin
-class FieldIndex(persistent.Persistent):
+class FieldIndex(SortingIndexMixin, persistent.Persistent):
zope.interface.implements(
interfaces.IInjection,
interfaces.IStatistics,
interfaces.IIndexSearch,
- interfaces.IIndexSort,
)
family = BTrees.family32
@@ -109,102 +105,3 @@
raise TypeError("two-length tuple expected", query)
return self.family.IF.multiunion(
self._fwd_index.values(*query))
-
- def sort(self, docids, reverse=False, limit=None):
- if (limit is not None) and (limit < 1):
- raise ValueError('limit value must be 1 or greater')
-
- numdocs = self._num_docs.value
- if not numdocs:
- raise StopIteration
-
- if not isinstance(docids, (self.family.IF.Set, self.family.IF.TreeSet)):
- docids = self.family.IF.Set(docids)
- if not docids:
- raise StopIteration
-
- rlen = len(docids)
-
- fwd_index = self._fwd_index
- rev_index = self._rev_index
- getValue = rev_index.get
- marker = object()
-
- # use_lazy and use_nbest computations lifted wholesale from
- # Zope2 catalog without questioning reasoning
- use_lazy = rlen > numdocs * (rlen / 100 + 1)
- use_nbest = limit and limit * 4 < rlen
-
- # overrides for unit tests
- if getattr(self, '_use_lazy', False):
- use_lazy = True
- if getattr(self, '_use_nbest', False):
- use_nbest = True
-
- if use_nbest:
- # this is a sort with a limit that appears useful, try to
- # take advantage of the fact that we can keep a smaller
- # set of simultaneous values in memory; use generators
- # and heapq functions to do so.
-
- def nsort():
- for docid in docids:
- val = getValue(docid, marker)
- if val is not marker:
- yield (val, docid)
-
- iterable = nsort()
-
- if reverse:
- # we use a generator as an iterable in the reverse
- # sort case because the nlargest implementation does
- # not manifest the whole thing into memory at once if
- # we do so.
- for val in heapq.nlargest(limit, iterable):
- yield val[1]
- else:
- # lifted from heapq.nsmallest
- it = iter(iterable)
- result = sorted(islice(it, 0, limit))
- if not result:
- raise StopIteration
- insort = bisect.insort
- pop = result.pop
- los = result[-1] # los --> Largest of the nsmallest
- for elem in it:
- if los <= elem:
- continue
- insort(result, elem)
- pop()
- los = result[-1]
- for val in result:
- yield val[1]
-
- else:
- if use_lazy and not reverse:
- # Since this the sort is not reversed, and the number
- # of results in the search result set is much larger
- # than the number of items in this index, we assume it
- # will be fastest to iterate over all of our forward
- # BTree's items instead of using a full sort, as our
- # forward index is already sorted in ascending order
- # by value. The Zope 2 catalog implementation claims
- # that this case is rarely exercised in practice.
- n = 0
- for stored_docids in fwd_index.values():
- for docid in self.family.IF.intersection(docids, stored_docids):
- n += 1
- yield docid
- if limit and n >= limit:
- raise StopIteration
- else:
- # If the result set is not much larger than the number
- # of documents in this index, or if we need to sort in
- # reverse order, use a non-lazy sort.
- n = 0
- for docid in sorted(docids, key=getValue, reverse=reverse):
- if getValue(docid, marker) is not marker:
- n += 1
- yield docid
- if limit and n >= limit:
- raise StopIteration
Added: zope.index/trunk/src/zope/index/field/sorting.py
===================================================================
--- zope.index/trunk/src/zope/index/field/sorting.py (rev 0)
+++ zope.index/trunk/src/zope/index/field/sorting.py 2009-02-27 10:33:42 UTC (rev 97337)
@@ -0,0 +1,130 @@
+##############################################################################
+#
+# Copyright (c) 2009 Zope Foundation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""A sorting mixin class for FieldIndex-like indexes.
+
+$Id$
+"""
+import heapq
+import bisect
+from itertools import islice
+
+from zope.interface import implements
+from zope.index.interfaces import IIndexSort
+
+class SortingIndexMixin(object):
+
+ implements(IIndexSort)
+
+ _sorting_num_docs_attr = '_num_docs' # Length object
+ _sorting_fwd_index_attr = '_fwd_index' # forward BTree index
+ _sorting_rev_index_attr = '_rev_index' # reverse BTree index
+
+ def sort(self, docids, reverse=False, limit=None):
+ if (limit is not None) and (limit < 1):
+ raise ValueError('limit value must be 1 or greater')
+
+ numdocs = getattr(self, self._sorting_num_docs_attr).value
+ if not numdocs:
+ raise StopIteration
+
+ if not isinstance(docids, (self.family.IF.Set, self.family.IF.TreeSet)):
+ docids = self.family.IF.Set(docids)
+ if not docids:
+ raise StopIteration
+
+ rlen = len(docids)
+
+ fwd_index = getattr(self, self._sorting_fwd_index_attr)
+ rev_index = getattr(self, self._sorting_rev_index_attr)
+ getValue = rev_index.get
+ marker = object()
+
+ # use_lazy and use_nbest computations lifted wholesale from
+ # Zope2 catalog without questioning reasoning
+ use_lazy = rlen > numdocs * (rlen / 100 + 1)
+ use_nbest = limit and limit * 4 < rlen
+
+ # overrides for unit tests
+ if getattr(self, '_use_lazy', False):
+ use_lazy = True
+ if getattr(self, '_use_nbest', False):
+ use_nbest = True
+
+ if use_nbest:
+ # this is a sort with a limit that appears useful, try to
+ # take advantage of the fact that we can keep a smaller
+ # set of simultaneous values in memory; use generators
+ # and heapq functions to do so.
+
+ def nsort():
+ for docid in docids:
+ val = getValue(docid, marker)
+ if val is not marker:
+ yield (val, docid)
+
+ iterable = nsort()
+
+ if reverse:
+ # we use a generator as an iterable in the reverse
+ # sort case because the nlargest implementation does
+ # not manifest the whole thing into memory at once if
+ # we do so.
+ for val in heapq.nlargest(limit, iterable):
+ yield val[1]
+ else:
+ # lifted from heapq.nsmallest
+ it = iter(iterable)
+ result = sorted(islice(it, 0, limit))
+ if not result:
+ raise StopIteration
+ insort = bisect.insort
+ pop = result.pop
+ los = result[-1] # los --> Largest of the nsmallest
+ for elem in it:
+ if los <= elem:
+ continue
+ insort(result, elem)
+ pop()
+ los = result[-1]
+ for val in result:
+ yield val[1]
+
+ else:
+ if use_lazy and not reverse:
+ # Since this the sort is not reversed, and the number
+ # of results in the search result set is much larger
+ # than the number of items in this index, we assume it
+ # will be fastest to iterate over all of our forward
+ # BTree's items instead of using a full sort, as our
+ # forward index is already sorted in ascending order
+ # by value. The Zope 2 catalog implementation claims
+ # that this case is rarely exercised in practice.
+ n = 0
+ for stored_docids in fwd_index.values():
+ for docid in self.family.IF.intersection(docids, stored_docids):
+ n += 1
+ yield docid
+ if limit and n >= limit:
+ raise StopIteration
+ else:
+ # If the result set is not much larger than the number
+ # of documents in this index, or if we need to sort in
+ # reverse order, use a non-lazy sort.
+ n = 0
+ for docid in sorted(docids, key=getValue, reverse=reverse):
+ if getValue(docid, marker) is not marker:
+ n += 1
+ yield docid
+ if limit and n >= limit:
+ raise StopIteration
Property changes on: zope.index/trunk/src/zope/index/field/sorting.py
___________________________________________________________________
Added: svn:keywords
+ Id
More information about the Checkins
mailing list