[Zope-CVS] CVS: Packages/pypes/pypes - order.py:1.1 exceptions.py:1.9 expression.py:1.20

Casey Duncan casey at zope.com
Fri Aug 20 16:21:54 EDT 2004


Update of /cvs-repository/Packages/pypes/pypes
In directory cvs.zope.org:/tmp/cvs-serv3318

Modified Files:
	exceptions.py expression.py 
Added Files:
	order.py 
Log Message:
Initial sort implementation, not quite ready for primetime


=== Added File Packages/pypes/pypes/order.py ===
##############################################################################
#
# Copyright (c) 2004 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Order and group by support for queries

$Id: order.py,v 1.1 2004/08/20 20:21:23 caseman Exp $"""

import itertools
from BTrees.OOBTree import OOBTree
from pypes.exceptions import SortError
from pypes.expression import combineOrderExprs
try:
    from itertools import reverse
except ImportError:
    def reverse(sequence):
        """Reverse iterator"""
        index = -1
        try:
            while 1:
                yield sequence[index]
                index -= 1
        except (IndexError, KeyError):
            raise StopIteration
                
class Sort:
    """Encapsulates a sort run which creates an ordered iterable from a set.
    
    The sorted set may be iterated multiple times efficiently
    """
    
    def __init__(self, set, *order_exprs): 
        """Create and execute the sort run.

        The order expressions are one or more IOrderExpression objects. Each
        must  have one free variable which is assumed to refer to the member of
        the set. The order expression is used to derive the sort key value by 
        performing the equivilant of binding each member of the set in turn to
        the expressions free name and evaluating it. If any expression is not 
        acceptable, raise SortError.
        """
        self._order_exprs = order_exprs
        if len(order_exprs) == 1:
            self._sorted, self._groups = _sort_single(set, order_exprs[0])
        elif order_exprs:
            self._sorted, self._groups = _sort_multiple(set, *order_exprs)
        else:
            raise SortError('expected order expression second argument')
    
    def __iter__(self):
        """Iterate the members of the input set in sorted order"""
        depth = 0
        order_exprs = self._order_exprs
        max_depth = len(order_exprs) - 1
        if order_exprs[0].order > 0:
            # ascending
            sortiter = iter(self._sorted.items())
        else:
            # descending
            sortiter = reverse(self._sorted.items())
        groups = self._groups
        stack = []
        while 1:
            if groups:
                if depth == max_depth:
                    for key, value in sortiter:
                        if key in groups:
                            for obj in value:
                                yield obj
                        else:
                            yield value
                else:
                    for key, value in sortiter:
                        if key in groups:
                            stack.append((sortiter, groups))
                            sorted, groups = groups[key]
                            depth += 1
                            if order_exprs[depth].order > 0:
                                sortiter = iter(sorted.items())
                            else:
                                sortiter = reverse(sorted.items())
                            break
                        else:
                            yield value
                    else:
                        if depth:
                            sortiter, groups = stack.pop()
                            depth -= 1
                        else:
                            return
                    continue
            else:
                # All sort keys are unique, no need to check groups
                for key, value in sortiter:
                    yield value
            if depth:
                sortiter, groups = stack.pop()
                depth -= 1
            else:
                return


def _sort_single(iterable, expr):
    """Sort by a single order expression
    
    This sort uses a btree and dict to preform the sort. This performs
    better than the usual decorate-sort-undecorate algorithm, especially
    when the number of sort keys is small compared to size of the iterable.
    It's worst-case behavior is when the number of sort keys are roughly
    half the size of the iterable. In this case it performs only slightly
    worse then the d-s-u pattern.
    
    If any sort keys are not hashable, then the dict is abandoned, which
    slows things down a bit.
    """
    varnames = expr.freeNames()
    if len(varnames) == 1:
        sortkey = expr.makeFunction(varnames)
    else:
        raise SortError('order expression must have one free variable name')
    groups = {} # sort key => group list
    sort_tree = OOBTree() # sort key => object or group list
    for obj in iterable:
        key = sortkey(obj)
        try:
            key_in_groups = key in groups
        except TypeError:
            if isinstance(groups, dict):
                # Key is not hashable, switch to a btree
                # which does not require hashable keys
                groups = OOBTree(groups)
                key_in_groups = key in groups
            else:
                raise
        if key_in_groups:
            # Already seen multiple objects for key append to the group
            groups[key].append(obj)                 
        elif not sort_tree.insert(key, obj):
            # Already seen one other object for key, create a group
            groups[key] = sort_tree[key] = [sort_tree[key], obj]
    return sort_tree, groups
            

def _sort_multiple(iterable, *exprs):
    """Sort by multiple sort expressions
    
    Algorithm is similar to the single sort and allows multiple simultaneous
    ordering. As with the single sort, the algorithm has performance
    advantages over the decorate-sort-undecorate algorithm when there are
    relatively few sort keys. Otherwise performance is similar to d-s-u.
    
    If possible, the sort is performed in two levels. This may be accomplished
    by combining sort expressions so they can be evaluated together.
    """
    # Try to consolidate sort expressions where possible
    lastorder = None
    combined = []
    for expr in exprs:
        if expr.order == lastorder:
            sameorder.append(expr)
        else:
            sameorder = [expr]
            lastorder = expr.order
            combined.append(sameorder)
    exprs = []
    for orders in combined:
        if len(orders) > 1:
            exprs.append(combineOrderExprs(*orders))
        else:
            exprs.append(orders[0])
    if len(exprs) == 1:
        return _sort_single(iterable, exprs[0])
    sortfuncs = _make_sort_funcs(exprs)
    top_groups = {} # sort key => group trees
    top_ordered = OOBTree() # sort key => object or group tree
    deferred = []
    for obj in itertools.chain(iterable, deferred):
        groups = top_groups
        ordered = top_ordered
        for sortkey in sortfuncs:
            key = sortkey(obj)
            try:
                ingroup = key in groups
            except TypeError:
                # Key is not hashable, switch to BTree
                if isinstance(groups, dict):
                    if groups is top_groups:
                        top_groups = groups = OOBTree(groups)
                    else:
                        groups = OOBTree(groups)
                else:
                    raise
            if ingroup:
                groups, ordered = groups[key]
            elif ordered.insert(key, obj):
                break
            else:
                depth = sortfuncs.index(sortkey) + 1
                firstobj = ordered[key]
                try:
                    sortkey = sortfuncs[depth]
                except IndexError:
                    groups[key] = ordered[key] = [firstobj, obj]
                else:
                    old_groups = groups
                    groups = {}
                    old_groups[key] = ordered[key] = groups, OOBTree()
                    firstkey = sortkey(firstobj)
                    if firstkey != key:
                        ordered[firstkey] = firstobj
                    else:
                        # come back for this one later
                        deferred.append(firstobj)
                    ordered[key] = obj
                break
        else:
            if key in groups:
                groups[key].append(obj)
            else:
                groups[key] = ordered[key] = [ordered[key], obj]
    return top_ordered, top_groups


def _make_sort_funcs(exprs):
    """Return a list of functions accepting a single argument that execute the 
    input expressions and return the result
    """
    sortfuncs = []
    for expr in exprs:
        varnames = expr.freeNames()
        if len(varnames) == 1:
            sortfuncs.append(expr.makeFunction(varnames))
        else:
            raise SortError(
                'order expression must have one free variable name')
    return sortfuncs
            
                
              


=== Packages/pypes/pypes/exceptions.py 1.8 => 1.9 ===
--- Packages/pypes/pypes/exceptions.py:1.8	Thu Jul  1 23:12:25 2004
+++ Packages/pypes/pypes/exceptions.py	Fri Aug 20 16:21:23 2004
@@ -65,3 +65,7 @@
 
 class MultipleResultsError(QueryError):
     """Multiple results found for query, expected one"""
+
+class SortError(QueryError):
+    """Error during sort phase of query"""
+


=== Packages/pypes/pypes/expression.py 1.19 => 1.20 ===
--- Packages/pypes/pypes/expression.py:1.19	Fri Jul 23 01:03:09 2004
+++ Packages/pypes/pypes/expression.py	Fri Aug 20 16:21:23 2004
@@ -233,7 +233,7 @@
         tree = self.ast().getChildNodes()[0]
         tree = ast.Stmt([ast.Return(tree)])
         tree = ast.Function(
-            '__func__', args, defaults=[], flags=0, doc=None, code=tree)
+            '__func__', tuple(args), defaults=[], flags=0, doc=None, code=tree)
         tree = ast.Module(None, ast.Stmt([tree]))
         misc.set_filename('<pypes expression>', tree)
         ns = self._bindings.copy()
@@ -278,7 +278,7 @@
             ast.And((left, right)), self._combineBindings(other))
     
     def __str__(self):
-        return "Expression('%s')" % self._expr
+        return "%s('%s')" % (self.__class__.__name__, self._expr)
     
     __repr__ = __str__
 



More information about the Zope-CVS mailing list