[Zope-CVS] CVS: Packages/pypes/pypes - order.py:1.1
exceptions.py:1.9 expression.py:1.20
Casey Duncan
casey at zope.com
Fri Aug 20 16:21:54 EDT 2004
Update of /cvs-repository/Packages/pypes/pypes
In directory cvs.zope.org:/tmp/cvs-serv3318
Modified Files:
exceptions.py expression.py
Added Files:
order.py
Log Message:
Initial sort implementation, not quite ready for primetime
=== Added File Packages/pypes/pypes/order.py ===
##############################################################################
#
# Copyright (c) 2004 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Order and group by support for queries
$Id: order.py,v 1.1 2004/08/20 20:21:23 caseman Exp $"""
import itertools
from BTrees.OOBTree import OOBTree
from pypes.exceptions import SortError
from pypes.expression import combineOrderExprs
try:
from itertools import reverse
except ImportError:
def reverse(sequence):
"""Reverse iterator"""
index = -1
try:
while 1:
yield sequence[index]
index -= 1
except (IndexError, KeyError):
raise StopIteration
class Sort:
"""Encapsulates a sort run which creates an ordered iterable from a set.
The sorted set may be iterated multiple times efficiently
"""
def __init__(self, set, *order_exprs):
"""Create and execute the sort run.
The order expressions are one or more IOrderExpression objects. Each
must have one free variable which is assumed to refer to the member of
the set. The order expression is used to derive the sort key value by
performing the equivilant of binding each member of the set in turn to
the expressions free name and evaluating it. If any expression is not
acceptable, raise SortError.
"""
self._order_exprs = order_exprs
if len(order_exprs) == 1:
self._sorted, self._groups = _sort_single(set, order_exprs[0])
elif order_exprs:
self._sorted, self._groups = _sort_multiple(set, *order_exprs)
else:
raise SortError('expected order expression second argument')
def __iter__(self):
"""Iterate the members of the input set in sorted order"""
depth = 0
order_exprs = self._order_exprs
max_depth = len(order_exprs) - 1
if order_exprs[0].order > 0:
# ascending
sortiter = iter(self._sorted.items())
else:
# descending
sortiter = reverse(self._sorted.items())
groups = self._groups
stack = []
while 1:
if groups:
if depth == max_depth:
for key, value in sortiter:
if key in groups:
for obj in value:
yield obj
else:
yield value
else:
for key, value in sortiter:
if key in groups:
stack.append((sortiter, groups))
sorted, groups = groups[key]
depth += 1
if order_exprs[depth].order > 0:
sortiter = iter(sorted.items())
else:
sortiter = reverse(sorted.items())
break
else:
yield value
else:
if depth:
sortiter, groups = stack.pop()
depth -= 1
else:
return
continue
else:
# All sort keys are unique, no need to check groups
for key, value in sortiter:
yield value
if depth:
sortiter, groups = stack.pop()
depth -= 1
else:
return
def _sort_single(iterable, expr):
"""Sort by a single order expression
This sort uses a btree and dict to preform the sort. This performs
better than the usual decorate-sort-undecorate algorithm, especially
when the number of sort keys is small compared to size of the iterable.
It's worst-case behavior is when the number of sort keys are roughly
half the size of the iterable. In this case it performs only slightly
worse then the d-s-u pattern.
If any sort keys are not hashable, then the dict is abandoned, which
slows things down a bit.
"""
varnames = expr.freeNames()
if len(varnames) == 1:
sortkey = expr.makeFunction(varnames)
else:
raise SortError('order expression must have one free variable name')
groups = {} # sort key => group list
sort_tree = OOBTree() # sort key => object or group list
for obj in iterable:
key = sortkey(obj)
try:
key_in_groups = key in groups
except TypeError:
if isinstance(groups, dict):
# Key is not hashable, switch to a btree
# which does not require hashable keys
groups = OOBTree(groups)
key_in_groups = key in groups
else:
raise
if key_in_groups:
# Already seen multiple objects for key append to the group
groups[key].append(obj)
elif not sort_tree.insert(key, obj):
# Already seen one other object for key, create a group
groups[key] = sort_tree[key] = [sort_tree[key], obj]
return sort_tree, groups
def _sort_multiple(iterable, *exprs):
"""Sort by multiple sort expressions
Algorithm is similar to the single sort and allows multiple simultaneous
ordering. As with the single sort, the algorithm has performance
advantages over the decorate-sort-undecorate algorithm when there are
relatively few sort keys. Otherwise performance is similar to d-s-u.
If possible, the sort is performed in two levels. This may be accomplished
by combining sort expressions so they can be evaluated together.
"""
# Try to consolidate sort expressions where possible
lastorder = None
combined = []
for expr in exprs:
if expr.order == lastorder:
sameorder.append(expr)
else:
sameorder = [expr]
lastorder = expr.order
combined.append(sameorder)
exprs = []
for orders in combined:
if len(orders) > 1:
exprs.append(combineOrderExprs(*orders))
else:
exprs.append(orders[0])
if len(exprs) == 1:
return _sort_single(iterable, exprs[0])
sortfuncs = _make_sort_funcs(exprs)
top_groups = {} # sort key => group trees
top_ordered = OOBTree() # sort key => object or group tree
deferred = []
for obj in itertools.chain(iterable, deferred):
groups = top_groups
ordered = top_ordered
for sortkey in sortfuncs:
key = sortkey(obj)
try:
ingroup = key in groups
except TypeError:
# Key is not hashable, switch to BTree
if isinstance(groups, dict):
if groups is top_groups:
top_groups = groups = OOBTree(groups)
else:
groups = OOBTree(groups)
else:
raise
if ingroup:
groups, ordered = groups[key]
elif ordered.insert(key, obj):
break
else:
depth = sortfuncs.index(sortkey) + 1
firstobj = ordered[key]
try:
sortkey = sortfuncs[depth]
except IndexError:
groups[key] = ordered[key] = [firstobj, obj]
else:
old_groups = groups
groups = {}
old_groups[key] = ordered[key] = groups, OOBTree()
firstkey = sortkey(firstobj)
if firstkey != key:
ordered[firstkey] = firstobj
else:
# come back for this one later
deferred.append(firstobj)
ordered[key] = obj
break
else:
if key in groups:
groups[key].append(obj)
else:
groups[key] = ordered[key] = [ordered[key], obj]
return top_ordered, top_groups
def _make_sort_funcs(exprs):
"""Return a list of functions accepting a single argument that execute the
input expressions and return the result
"""
sortfuncs = []
for expr in exprs:
varnames = expr.freeNames()
if len(varnames) == 1:
sortfuncs.append(expr.makeFunction(varnames))
else:
raise SortError(
'order expression must have one free variable name')
return sortfuncs
=== Packages/pypes/pypes/exceptions.py 1.8 => 1.9 ===
--- Packages/pypes/pypes/exceptions.py:1.8 Thu Jul 1 23:12:25 2004
+++ Packages/pypes/pypes/exceptions.py Fri Aug 20 16:21:23 2004
@@ -65,3 +65,7 @@
class MultipleResultsError(QueryError):
"""Multiple results found for query, expected one"""
+
+class SortError(QueryError):
+ """Error during sort phase of query"""
+
=== Packages/pypes/pypes/expression.py 1.19 => 1.20 ===
--- Packages/pypes/pypes/expression.py:1.19 Fri Jul 23 01:03:09 2004
+++ Packages/pypes/pypes/expression.py Fri Aug 20 16:21:23 2004
@@ -233,7 +233,7 @@
tree = self.ast().getChildNodes()[0]
tree = ast.Stmt([ast.Return(tree)])
tree = ast.Function(
- '__func__', args, defaults=[], flags=0, doc=None, code=tree)
+ '__func__', tuple(args), defaults=[], flags=0, doc=None, code=tree)
tree = ast.Module(None, ast.Stmt([tree]))
misc.set_filename('<pypes expression>', tree)
ns = self._bindings.copy()
@@ -278,7 +278,7 @@
ast.And((left, right)), self._combineBindings(other))
def __str__(self):
- return "Expression('%s')" % self._expr
+ return "%s('%s')" % (self.__class__.__name__, self._expr)
__repr__ = __str__
More information about the Zope-CVS
mailing list