[Zope-CVS] CVS: Packages/pypes/pypes - queryops.py:1.1 query.py:1.20

Thu Jul 1 23:19:27 EDT 2004

Update of /cvs-repository/Packages/pypes/pypes
In directory cvs.zope.org:/tmp/cvs-serv17543

Modified Files:
	query.py 
Added Files:
	queryops.py 
Log Message:
Factor out query primitives into separate queryops module
Implement query results and queries (sans sorting right now)


=== Added File Packages/pypes/pypes/queryops.py ===
##############################################################################
#
# Copyright (c) 2004 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Query operation primitives

$Id: queryops.py,v 1.1 2004/07/02 03:18:56 caseman Exp $"""

from operator import mul
from sets import Set
from itertools import imap, ifilter
from compiler import ast
from BTrees.OOBTree import OOBTree, OOTreeSet
from BTrees.IIBTree import IITreeSet
from zope.interface import implements
from pypes import services
from pypes.identity import IdentitySet
from pypes.interfaces import IPartialResult, IActualizedPartialResult
from pypes.exceptions import PypesLookupError, QueryInputError, CantProcess
from pypes.exceptions import QueryError
from pypes.expression import Expression

_empty_iter = iter(())

def union_input_maps(input1, input2):
    """Create an input map dict whose values are the union of the cooresponding
    values of input1 and input2 which are mappings of name => input set. Both
    input1 and input2 should have the same name keys
    """
    assert Set(input1) == Set(input2), \
        'cannot union results with different inputs'
    if input1 is input2:
        return input1.copy()
    else:
        inmap = {}
        for name, set1 in input1.items():
            set2 = input2[name]
            if set1 is not set2:
                inmap[name] = set1.union(set2)
            else:
                inmap[name] = set1
        return inmap                    


def intersect_input_maps(input1, input2):
    """Create an input map dict whose values are the intersection of the
    cooresponding values of input1 and input2 which are mappings of name =>
    input set. Both input1 and input2 should have the same name keys 
    """
    assert Set(input1) == Set(input2), \
        'cannot union results with different inputs'
    if input1 is input2:
        return input1.copy()
    else:
        inmap = {}
        for name, set1 in input1.items():
            set2 = input2[name]
            if set1 is not set2:
                inmap[name] = set1.intersection(set2)
            else:
                inmap[name] = set1
        return inmap


class PartialResult:
    """Abstract partial result class"""
    
    _inputs = None # Input map
    _criteria = None # Criteria expression
    
    def inputMap(self):
        return self._inputs.copy()
        
    def criteriaExpr(self):
        return self._criteria
    
    def namesUsed(self):
        return self._criteria.freeNames(self._inputs.keys())
    
    def union(self, other):
        raise NotImplementedError
    
    def intersection(self, other):
        raise NotImplementedError
    
    def iterResult(self, *names, **fixed):
        raise NotImplementedError
        
    def resultSet(self, name):
        """Return a set of objects from the input named which satisfy the
        criteria.
        """
        if name not in self._inputs:
            raise QueryError('no input named "%s"' % name)
        if self._criteria is None:
            return self._inputs[name]
        if name in self._criteria.freeNames(self._inputs.keys()):
            # Input is referenced in criteria, create set from product result
            maxlength = len(self._inputs[name])
            length = 0
            idset = IITreeSet()
            insert = idset.insert
            for obj in self.iterResult(name):
                try:
                    pypesid = obj._pypes_id_
                except AttributeError:
                    # object not identified
                    idset = None
                    break
                else:
                    if insert(pypesid):
                        length += 1
                    elif length == maxlen:
                        # All objects from the input were found
                        return self._inputs[name]
            if idset is not None:
                dbconn = services.accesspointFrom(self._inputs[name])
                return IdentitySet.fromIdSet(
                    idset, dbconn=dbconn, length=length)
            else:
                # can't use identity set
                # XXX won't work with unhashable objects
                return Set(self.iterResult(name))
        else:
            # Input not referenced from criteria, if any results are
            # generated, then we can return the entire input set. If
            # not return an empty set.
            try:
                self.iterResult(name).next()
            except StopIteration:
                return IdentitySet()
            else:
                return self._inputs[name]


class CartProduct(PartialResult):
    """Cartesian product of one or more inputs to a query. Allows lazy 
    evaluation, and efficient combinatorial operations
    """
    
    implements(IPartialResult)

    def __init__(self, input_map, criteria=None):
        """Construct the product
        
        input_map -- a mapping object mapping names => inputs.
        
        criteria -- optional criteria expression, an IExpression object. 
        Used to select the input items to create a partial filtered product.
        If omitted then a full product is generated.
        """
        self._inputs = input_map
        self._criteria = criteria
    
    def namesUsed(self):
        if self._criteria is not None:
            return self._criteria.freeNames(self._inputs.keys())
        else:
            return Set()
        
    def magnitude(self, *names):
        """Return the maximum integer length of the product. This value 
        is calculated by multiplying the lengths of the inputs together.
        This value matches the actual length for a full product.
        
        This method assumes all the inputs support __len__
        """
        if not names:
            names = self._inputs.keys()
        return reduce(mul, [len(self._inputs[nm]) for nm in names])
        
    def union(self, other):
        inputs = union_input_maps(self.inputMap(), other.inputMap())
        criteria = self.criteriaExpr() | other.criteriaExpr()
        return self.__class__(inputs, criteria)
    
    def intersection(self, other):
        inputs = intersect_input_maps(self.inputMap(), other.inputMap())
        criteria = self.criteriaExpr() & other.criteriaExpr()
        return self.__class__(inputs, criteria)
    
    def iterResult(self, *names, **fixed):
        """Iterate the cartesian product yielding items from the inputs named
        where the criteria is satisfied. If a single name is specified, yield
        each matching item from the input. If multiple names are specified,
        yield tuples of the matching items from the cooresponding named inputs.
        
        Fixed values may be passed as keyword args. The arg name matches an
        input name, and the value must be a member of the input set. Static
        inputs are not iterated, but are returned in every result. They are
        useful when using a cart product inside of another iterator which
        handles iterating those values itself.
        """
        for name, obj in fixed.items():
            if name not in self._inputs:
                raise QueryError(
                    'fixed input %s not part of cart. product' % name)
            if obj not in self._inputs[name]:
                raise StopIteration
        criteria = self._criteria
        # Prime the input iterators and get the first row
        row = {}
        input_iters = []
        for name, ipt in self._inputs.items():
            if name not in fixed:
                obj_iter = iter(ipt)
                input_iters.append((name, obj_iter))
                row[name] = obj_iter.next()
            else:
                input_iters.append(None)
                row[name] = fixed[name]
        # Create output factory from input names
        if len(names) == 1:
            out, = names
            output = lambda row: row[out]
        elif len(names) == 2:
            out1, out2 = names
            output = lambda row: (row[out1], row[out2])
        else:
            output = lambda row: tuple([row[n] for n in names])
        if criteria is None or criteria(row):
            yield output(row)
        # Get subsequent rows by combining inputs
        i = 0
        while True:
            try:
                name, obj_iter = input_iters[i]
            except TypeError:
                # Static value, skip it
                i += 1
                continue
            except IndexError:
                break
            while True:
                try:
                    row[name] = obj_iter.next()
                except StopIteration:
                    obj_iter = iter(self._inputs[name])
                    input_iters[i] = name, obj_iter
                    row[name] = obj_iter.next()
                    i += 1
                    break   
                if criteria is None or criteria(row):
                    yield output(row)
                if i > 0:
                    i = 0
                    break


def equijoin(left_iter, left_expr, right_iter, right_expr):
    """Join the values of the left_iter and right_iter iterables where the
    value of left_expr equals the value of right_expr.
    
    left_expr and right_expr are callables accepting the members of their
    respective iterable input as a single argument which derive the values
    to be compared.
    
    Generate the resulting (left, right) tuple pairs where a join is made.
    The resulting tuples are yielded in arbitrary order.
    
    Complexity: O(N+M) where N=len(left), M=len(right)
    """
    left_by_val = {}
    for obj in left_iter:
        val = left_expr(obj)
        try:
            left_by_val[val].append(obj)
        except KeyError:
            left_by_val[val] = [obj]
        except TypeError:
            if isinstance(left_by_val, dict):
                # Likely val is not hashable
                # switch to using BTree which does not require hashable keys
                # (but lookup is slower)
                left_by_val = OOBTree(left_by_val)
                left_by_val[val] = left_by_val.get(val, []) + [obj]
            else:
                raise
    
    for right in right_iter:
        val = right_expr(right)
        try:
            left_matches = left_by_val[val]
        except KeyError:
            continue # No matching left objects
        else:
            for left in left_matches:
                yield left, right

def injoin(left_iter, left_expr, right_iter, right_expr):
    """Join the values of the left_iter and right_iter iterables where the
    value of left_expr is contained in the value of right_expr.
    
    left_expr and right_expr are callables accepting the members of their
    respective iterable input as a single argument which derive the values
    to be compared.
    
    Generate the resulting (left, right) tuple pairs where a join is made.
    The resulting tuples are yielded in arbitrary order.
    """
    right_by_val = {}
    for obj in right_iter:
        for val in right_expr(obj):
            try:
                right_by_val[val].insert(obj)
            except KeyError:
                # Use of OOTreeSet allows for non-hashable objects
                right_by_val[val] = OOTreeSet((obj,))
            except TypeError:
                if isinstance(right_by_val, dict):
                    # Likely val is not hashable switch to using a
                    # BTree which does not require hashable keys
                    # (but lookup is slower)
                    right_by_val = OOBTree(right_by_val)
                    right_by_val[val] = right_by_val.get(val, []) + [obj]
                else:
                    raise
                    
    for obj in left_iter:
        left = left_expr(obj)
        try:
            right_matches = right_by_val[left]
        except KeyError:
            continue # No matching right objects
        else:
            for right in right_matches:
                yield left, right

def greaterjoin(left_iter, left_expr, right_iter, right_expr):
    """Join the values of the left_iter and right_iter iterables where the
    value of left_expr is greater than the value of right_expr.
    
    left_expr and right_expr are callables accepting the members of their
    respective iterable input as a single argument which derive the values
    to be compared.
    
    Generate the resulting (left, right) tuple pairs where a join is made.
    The resulting tuples are yielded in arbitrary order.
    """
    left_by_val = OOBTree() # (ordered) map of values => left members
    for obj in left_iter:
        left_val = left_expr(obj)
        try:
            left_by_val[left_val].append(obj)
        except KeyError:
            left_by_val[left_val] = [obj]
    for right in right_iter:
        right_val = right_expr(right)        
        for left_val, left_matches in left_by_val.items(right_val, None):
            if left_val > right_val:
                for left in left_matches:
                    yield left, right

class Join(PartialResult):
    """Join of multiple inputs using a criteria expression.
    
    This implementation handles joins of two inputs each on a separate side
    of a comparison operation (i.e., a.foo == b.bar). The following operators
    are supported: ==, <, >, in.
    """
    
    implements(IPartialResult)
    
    _join_ops = {
        '==': equijoin,
        '>': greaterjoin,
        'in': injoin}
    
    def __init__(self, input_map, criteria):
        """Construct the join
        
        input_map -- a mapping object mapping names => inputs.
        
        criteria -- criteria expression, an IExpression object. Supported
        criteria compare two inputs (i.e., a.foo == b.bar). The following 
        operators are supported: ==, <, >, in.
        
        If an unsupported criteria is specified, a CantProcess exception
        is raised.
        """
        self._inputs = input_map
        self._criteria = criteria
        if len(criteria.freeNames()) != 2:
            raise CantProcess, 'join criteria must have 2 free variables'
        compare_node = criteria.ast().getChildNodes()[0]
        if not (isinstance(compare_node, ast.Compare) and
            len(compare_node.getChildNodes()) == 2):
            raise CantProcess, ('join criteria must be single comparison '
                                'between two terms')
        left, operator, right = compare_node.getChildren()
        if operator == '<':
            # Only greater join is directly supported
            operator = '>'
            left, right = right, left
            self.reverse = True
        else:
            self.reverse = False
        if operator not in self._join_ops.keys():
            raise CantProcess, (
                'operator %s not supported for join' % operator)
        bindings = criteria.bindings()
        left_expr = Expression.fromAstNode(left, bindings)
        right_expr = Expression.fromAstNode(right, bindings)
        self._operator = operator
        left_names = list(left_expr.freeNames())
        right_names = list(right_expr.freeNames())
        if (len(left_names) != 1 
            or left_names[0] not in input_map
            or len(right_names) != 1 
            or right_names[0] not in input_map):
            raise CantProcess, ('left and right join criteria operands '
                                'must contain a single input term')
        self._left_name, = left_names
        self._left_func = left_expr.makeFunction(left_names)
        self._right_name, = right_names
        self._right_func = right_expr.makeFunction(right_names)
        
    def union(self, other):
        inputs = union_input_maps(self.inputMap(), other.inputMap())
        criteria = self.criteriaExpr() | other.criteriaExpr()
        return CartProduct(inputs, criteria)
    
    def intersection(self, other):
        inputs = intersect_input_maps(self.inputMap(), other.inputMap())
        criteria = self.criteriaExpr() & other.criteriaExpr()
        return CartProduct(inputs, criteria)
        
    def _iterJoin(self, reverse=False):
        # Generates (left, right) pairs of joined terms
        join = self._join_ops[self._operator]
        if reverse:
            return join(
                self._inputs[self._right_name], self._right_func,
                self._inputs[self._left_name], self._left_func)
        else:
            return join(
                self._inputs[self._left_name], self._left_func,
                self._inputs[self._right_name], self._right_func)

    def _iterJoinOtherProduct(self, names, joined):
        # Yield iterators of products for each join pair
        product = CartProduct(self._inputs, self._criteria)
        joined = {}
        has_left = self._left_name in joined
        has_right = self._right_name in joined
        # Iterate the joined results and pass them as fixed values
        # to the full product's results
        for left, right in self._iterJoin(self.reverse):
            if has_left:
                joined[self._left_name] = left
            if has_right:
                joined[self._right_name] = right
            for row in product.iterResult(*names, **joined):
                yield row
        
    def iterResult(self, *names, **fixed):
        if fixed:
            for name, obj in fixed.items():
                if name not in self._inputs:
                    raise QueryError(
                        'fixed input %s not part of query' % name)
                if obj not in self._inputs[name]:
                    return _empty_iter
            # See if fixed values contain possible outputs. If not, bail early
            if (self._left_name in fixed
                and not self._left_func(fixed[self._left_name])):
                return _empty_iter
            if (self._right_name in fixed
                and not self._right_func(fixed[self._right_name])):
                return _empty_iter
            if self._left_name in fixed or self._right_name in fixed:
                # One or both of the joined inputs are fixed 
                # don't bother using full join operations
                return CartProduct(
                    self._inputs, self._criteria).iterResult(*names, **fixed)
        if len(names) == 2 and names == (self._left_name, self._right_name):
            # Special case, names match join terms in order exactly
            # No transform of the raw join is required
            # This case is expected to be relatively common         
            return self._iterJoin(self.reverse)
        joined_names = []
        other_names = []
        free_names = self._criteria.freeNames()
        for n in names:
            if n in free_names:
                joined_names.append(n)
            else:
                other_names.append(n)
        if not other_names:
            # No names other than the joined names were selected
            if names == (self._left_name,):
                output = lambda row: row[0]
            elif names == (self._right_name,):
                output = lambda row: row[1]
            elif names == (self._right_name, self._left_name):
                output = lambda row: (row[1], row[0])
            else:
                # Ok, something is wrong with this picture
                raise RuntimeError('Bug in join result tranformation')
            return imap(output, self._iterJoin(self.reverse))
        else:
            return self._iterJoinOtherProduct(names, joined_names)


class ActualizedResult(PartialResult):
    """Partial result with greedy evaluation.
    
    When instantiated, the inputs are processed immediately against the
    criteria. The criteria may consist of one or more terms each filtering
    exactly one input. Multiple terms are contained in a single AND
    boolean operation.
    """
    
    implements(IActualizedPartialResult)
    
    def __init__(self, input_map, criteria=None):
        """Construct the actualized result
        
        input_map -- a mapping object mapping names => inputs.
        
        criteria -- criteria expression, an IExpression object. Supported
        criteria consists of one or more terms that each refer to a single
        input. If criteria is omitted, then the actualized result is the
        same as the inputs provided.
        
        If the criteria is not supported, raise CantProcess
        """
        self._criteria = criteria
        if criteria is not None:
            node = criteria.ast().getChildNodes()[0]
            if (len(criteria.freeNames(input_map.keys())) > 1 
                and isinstance(node, ast.And)):
                terms = node.getChildNodes()
            else:
                terms = (node,)
            bindings = criteria.bindings()
            filters = {} # Map input name => filter expression
            for term in terms:
                term_expr = Expression.fromAstNode(term, bindings)
                try:
                    input_name, = term_expr.freeNames()
                except ValueError:
                    raise CantProcess, \
                        'cannot actualize result for criteria containing join'
                if input_name not in input_map:
                    raise QueryInputError, \
                        'name %s in criteria not found in inputs'
                if input_name in filters:
                    filters[input_name] = filters[input_name] & term_expr
                else:
                    filters[input_name] = term_expr

            # Filter the inputs as results
            self._inputs = {}
            for name, ipt in input_map.items():
                if name in filters:
                    self._inputs[name] = ipt = Set(
                        ifilter(filters[name].makeFunction(name), ipt))
                else:
                    self._inputs[name] = ipt
                if not ipt:
                    # If one of the inputs is empty, they all are
                    empty = IdentitySet()
                    for name in input_map:
                        self._inputs[name] = empty
                    break
        else:
            self._inputs = input_map.copy()
        
    def union(self, other):
        """A union with an actualized result returns a new partial result
        the same class as other, with inputs unioned with self
        """
        if isinstance(other, ActualizedResult):
            # Actualized results have already realized the criteria,
            # Drop the criteria here to avoid redundant processing
            criteria = None
        else:
            criteria = other.criteriaExpr()
        inputs = union_input_maps(self.inputMap(), other.inputMap())
        return other.__class__(inputs, criteria)
    
    def intersection(self, other):
        """An intersection with an actualized result returns a new partial 
        result the same class as other, with inputs intersected with self
        """
        if isinstance(other, ActualizedResult):
            # Actualized results have already realized the criteria,
            # Drop the criteria here to avoid redundant processing
            criteria = None
        else:
            criteria = other.criteriaExpr()
        inputs = intersect_input_maps(self.inputMap(), other.inputMap())
        return other.__class__(inputs, criteria)
    
    def iterResult(self, *names, **fixed):
        if len(names) == 1:
            name = names[0]
            ipt = self._inputs[name]
            if not fixed:
                return iter(ipt)
            elif name in fixed:
                if fixed[name] in ipt:
                    return iter((fixed[name],))
                else:
                    return _empty_iter
        if len(names) == len(fixed) and Set(names) == Set(fixed):
            # Optimization for when all names are fixed
            # as when testing for a single result row
            row = []
            inputs = self._inputs
            for name in names:
                if name not in inputs:
                    raise QueryError(
                        'fixed input "%s" not in query' % name)
                col = fixed[name]
                if col in inputs[name]:
                    row.append(col)
                else:
                    return _empty_iter
            return iter((tuple(row),))
        return CartProduct(self._inputs).iterResult(*names, **fixed)
    
    def resultSet(self, name):
        if name not in self._inputs:
            raise QueryError('no input named "%s"' % name)
        return self._inputs[name]
    
    def __len__(self):
        return reduce(mul, map(len, self._inputs.values()))


def sort(iterable, expression, order='ascending', limit=None):
    """Return a sequence containing the elements of iterable sorted in order
    by the values returned by applying expression, a callable object accepting
    a single argument, to each element. 
    
    order may be the constants ascending and descending to specify the
    resulting sort order. limit, if specified is an integer value representing
    the  number of elements to sort. When limit is specified, a partial sort
    will be performed and sort will return as soon as the limit count is
    reached.
    
    In order for the sort to perform properly, the expression should generate
    a homogenous set of values that support total ordering.
    """
    sortable = [(expression(i), i) for i in iterable]
    sortable.sort()
    if order == 'descending':
        sortable.reverse()
    if limit is not None:
        # XXX Implement N-Best algorithm for this
        sortable = sortable[:limit]
    # XXX We should be make this lazy
    return [i for key, i in sortable]


=== Packages/pypes/pypes/query.py 1.19 => 1.20 ===

--- Packages/pypes/pypes/query.py:1.19	Thu Jun 10 00:44:24 2004
+++ Packages/pypes/pypes/query.py	Thu Jul  1 23:18:56 2004
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2002 Zope Corporation and Contributors.
+# Copyright (c) 2004 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
@@ -40,640 +40,260 @@
 
 $Id$"""
 
-from operator import mul
 from sets import Set
-from itertools import imap, ifilter
-from zope.interface import implements
 from compiler import ast
-from BTrees.OOBTree import OOBTree, OOTreeSet
-from BTrees.IIBTree import IITreeSet
-from pypes import services
-from pypes.identity import IdentitySet
-from pypes.interfaces import IPartialResult, IExtent
-from pypes.exceptions import PypesLookupError, PypesQueryInputError, CantProcess
-from pypes.exceptions import PypesQueryError
+from operator import and_, or_
+from zope.interface import implements, classProvides
+from pypes.interfaces import IQuery, IBasicQueryFactory, IQueryResult
+from pypes.interfaces import ISet, IExtent, IOrderExpression
+from pypes.interfaces import IActualizedPartialResult
+from pypes.exceptions import QueryError, QueryInputError, CantProcess
+from pypes.exceptions import NoResultsError, MultipleResultsError
 from pypes.expression import Expression
+from pypes.queryops import CartProduct, Join, ActualizedResult
 
-## Exported constants ##
-
-descending = 'descending'
-ascending = 'ascending'
-
-## Query Primitives ##
-
-_empty_iter = iter(())
-
-def union_input_maps(input1, input2):
-    """Create an input map dict whose values are the union of the cooresponding
-    values of input1 and input2 which are mappings of name => input set. Both
-    input1 and input2 should have the same name keys
-    """
-    assert Set(input1) == Set(input2), \
-        'cannot union results with different inputs'
-    if input1 is input2:
-        return input1.copy()
-    else:
-        inmap = {}
-        for name, set1 in input1.items():
-            set2 = input2[name]
-            if set1 is not set2:
-                inmap[name] = set1.union(set2)
-            else:
-                inmap[name] = set1
-        return inmap                    
 
+class QueryResult:
+    """Result from query selection"""
+    
+    implements(IQueryResult)
+    
+    def __init__(self, query, names, partial_result):
+        self._query = query
+        self._names = names
+        self._partial_result = partial_result
+        self._len_cache = None
+        self._result_cache = None
+    
+    def names(self):
+        """Return names selected by query"""
+        return self._names
+        
+    def query(self):
+        """Return query that generated the result"""
+        return self._query
+    
+    def evaluated(self):
+        """Return true if results and length are cached"""
+        return self._len_cache is not None
+    
+    def _cache_results(self):
+        """Evaluate the query results and cache results and length"""
+        if IActualizedPartialResult.isImplementedBy(self._partial_result):
+            self._len_cache = len(self._partial_result)
+            # No need to cache actualized result
+            self._result_cache = None
+        else:
+            self._result_cache = list(
+                self._partial_result.iterResult(*self._names))
+            self._len_cache = len(self._result_cache)            
+    
+    def __len__(self):
+        if self._len_cache is None:
+            self._cache_results()
+        return self._len_cache
+    
+    def __iter__(self):
+        if self._result_cache is None:
+            return self._partial_result.iterResult(*self._names)
+        else:
+            return iter(self._result_cache)
+    
+    def __contains__(self, item):
+        if len(self._names) == 1:
+            item = (item,)
+        elif not isinstance(item, tuple):
+            return False
+        if len(item) != len(self._names):
+            # Item wrong length, can't be in result
+            return False
+        item_map = dict(zip(self._names, item))
+        try:
+            self._partial_result.iterResult(*self._names, **item_map).next()
+        except StopIteration:
+            return False
+        else:
+            return True
 
-def intersect_input_maps(input1, input2):
-    """Create an input map dict whose values are the intersection of the
-    cooresponding values of input1 and input2 which are mappings of name =>
-    input set. Both input1 and input2 should have the same name keys 
+class Query:
+    """Pypes query declaration.
+    
+    Serves as a basic query factory or can be subclassed as the basis for
+    application-specific query types
     """
-    assert Set(input1) == Set(input2), \
-        'cannot union results with different inputs'
-    if input1 is input2:
-        return input1.copy()
-    else:
-        inmap = {}
-        for name, set1 in input1.items():
-            set2 = input2[name]
-            if set1 is not set2:
-                inmap[name] = set1.intersection(set2)
-            else:
-                inmap[name] = set1
-        return inmap                    
-
-class PartialResult:
-    """Abstract partial result class"""
     
-    _inputs = None # Input map
-    _criteria = None # Criteria expression
+    classProvides(IBasicQueryFactory)
+    implements(IQuery)
     
-    def inputMap(self):
-        return self._inputs.copy()
-        
-    def criteriaExpr(self):
-        return self._criteria
+    ## Default query attributes ##
+    
+    # may be overridden to provide default values for the constructor arguments
+    inputs = None # mapping of names => inputs
+    input_names = None # sequence of input names in the default order
+    criteria = None # IExpression object
+    order = None # sequence of IOrderExpression objects
+    limit = None # integer maximum result size
+    
+    def __init__(self, inputs=None, criteria=None, order=None, limit=None):
+        """Construct the query. 
+        
+        inputs, criteria, order and limit may all be provided either by 
+        arguments or by values specified by the class. At a minimum a query
+        must define inputs or QueryInputError is raised.
+        """
+        if inputs is not None:
+            self._setInputs(inputs)
+        if criteria is not None:
+            self._setCriteria(criteria)
+        if order is not None:
+            self._setOrder(order, limit)
+        if self.inputs is None:
+            raise QueryInputError('no inputs provided to query')
     
-    def namesUsed(self):
-        return self._criteria.freeNames(self._inputs.keys())
+    ## Values used to affect the query plan ##
     
-    def union(self, other):
-        raise NotImplementedError
+    # Partial results below this size will not be processed with indices
+    scan_threshold = 50
     
-    def intersection(self, other):
-        raise NotImplementedError
+    ## Private method hooks, intended to be overridden as desired ##
     
-    def iterResult(self, *names, **fixed):
-        raise NotImplementedError
+    def _setInputs(self, inputs):
+        """Process the inputs and set the value for inputs and input_names.
         
-    def resultSet(self, name):
-        """Return a set of objects from the input named which satisfy the
-        criteria.
+        inputs -- A named set, a sequence of named sets, or a mapping of
+        input names to sets.
         """
-        if name not in self._inputs:
-            raise PypesQueryError('no input named "%s"' % name)
-        if self._criteria is None:
-            return self._inputs[name]
-        if name in self._criteria.freeNames(self._inputs.keys()):
-            # Input is referenced in criteria, create set from product result
-            maxlength = len(self._inputs[name])
-            length = 0
-            idset = IITreeSet()
-            insert = idset.insert
-            for obj in self.iterResult(name):
+        if ISet.isImplementedBy(inputs) or isinstance(inputs, Set):
+            inputs = (inputs,)
+        if hasattr(inputs, 'items') and hasattr(inputs, 'keys'):
+            # Mapping of input names to inputs
+            self.input_names = list(inputs.keys())
+            # XXX maybe we should enforce string keys?
+            self.inputs = dict(inputs)
+        else:
+            # Sequence of inputs
+            self.input_names = []
+            self.inputs = {}
+            for ipt in inputs:
                 try:
-                    pypesid = obj._pypes_id_
+                    name = ipt.__name__
                 except AttributeError:
-                    # object not identified
-                    idset = None
-                    break
+                    raise QueryInputError('unnamed query input')
                 else:
-                    if insert(pypesid):
-                        length += 1
-                    elif length == maxlen:
-                        # All objects from the input were found
-                        return self._inputs[name]
-            if idset is not None:
-                dbconn = services.accesspointFrom(self._inputs[name])
-                return IdentitySet.fromIdSet(
-                    idset, dbconn=dbconn, length=length)
-            else:
-                # can't use identity set
-                # XXX won't work with unhashable objects
-                return Set(self.iterResult(name))
-        else:
-            # Input not referenced from criteria, if any results are
-            # generated, then we can return the entire input set. If
-            # not return an empty set.
-            try:
-                self.iterResult(name).next()
-            except StopIteration:
-                return IdentitySet()
-            else:
-                return self._inputs[name]
-
-
-class CartProduct(PartialResult):
-    """Cartesian product of one or more inputs to a query. Allows lazy 
-    evaluation, and efficient combinatorial operations
-    """
+                    self.input_names.append(name)
+                    if name not in self.inputs:
+                        self.inputs[name] = ipt
+                    else:
+                        raise QueryInputError(
+                            'duplicate query input name "%s"' % name)
     
-    implements(IPartialResult)
-
-    def __init__(self, input_map, criteria=None):
-        """Construct the product
+    def _setCriteria(self, criteria):
+        """Process the criteria and set the criteria instance value.
         
-        input_map -- a mapping object mapping names => inputs.
-        
-        criteria -- optional criteria expression, an IExpression object. 
-        Used to select the input items to create a partial filtered product.
-        If omitted then a full product is generated.
+        Optimize the query based on the inputs. The inputs must already be set
         """
-        self._inputs = input_map
-        self._criteria = criteria
+        criteria.optimize(self.input_names)
+        self.criteria = criteria
     
-    def namesUsed(self):
-        if self._criteria is not None:
-            return self._criteria.freeNames(self._inputs.keys())
-        else:
-            return Set()
-        
-    def magnitude(self, *names):
-        """Return the maximum integer length of the product. This value 
-        is calculated by multiplying the lengths of the inputs together.
-        This value matches the actual length for a full product.
-        
-        This method assumes all the inputs support __len__
-        """
-        if not names:
-            names = self._inputs.keys()
-        return reduce(mul, [len(self._inputs[nm]) for nm in names])
-        
-    def union(self, other):
-        inputs = union_input_maps(self.inputMap(), other.inputMap())
-        criteria = self.criteriaExpr() | other.criteriaExpr()
-        return self.__class__(inputs, criteria)
-    
-    def intersection(self, other):
-        inputs = intersect_input_maps(self.inputMap(), other.inputMap())
-        criteria = self.criteriaExpr() & other.criteriaExpr()
-        return self.__class__(inputs, criteria)
-    
-    def iterResult(self, *names, **fixed):
-        """Iterate the cartesian product yielding items from the inputs named
-        where the criteria is satisfied. If a single name is specified, yield
-        each matching item from the input. If multiple names are specified,
-        yield tuples of the matching items from the cooresponding named inputs.
-        
-        Fixed values may be passed as keyword args. The arg name matches an
-        input name, and the value must be a member of the input set. Static
-        inputs are not iterated, but are returned in every result. They are
-        useful when using a cart product inside of another iterator which
-        handles iterating those values itself.
+    def _setOrder(self, order, limit=None):
+        """Set the order attribute for the query"""
+        if limit is not None:
+            if not isinstance(limit, int):
+                raise QueryInputError('expected integer limit')
+            if limit < 1:
+                raise QueryInputError('limit must be 1 or greater')
+            self.limit = limit
+        if IOrderExpression.isImplementedBy(order):
+            self.order = (order,)
+        else:
+            # XXX Should we try to test for sequence-ness?
+            self.order = order
+    
+    def _query(self, input_map, criteria):
+        """Workhorse, process the criteria against the input_map and return
+        an IPartialResult object
         """
-        for name, obj in fixed.items():
-            if name not in self._inputs:
-                raise PypesQueryError(
-                    'fixed input %s not part of cart. product' % name)
-            if obj not in self._inputs[name]:
-                raise StopIteration
-        criteria = self._criteria
-        # Prime the input iterators and get the first row
-        row = {}
-        input_iters = []
-        for name, ipt in self._inputs.items():
-            if name not in fixed:
-                obj_iter = iter(ipt)
-                input_iters.append((name, obj_iter))
-                row[name] = obj_iter.next()
-            else:
-                input_iters.append(None)
-                row[name] = fixed[name]
-        # Create output factory from input names
-        if len(names) == 1:
-            out, = names
-            output = lambda row: row[out]
-        elif len(names) == 2:
-            out1, out2 = names
-            output = lambda row: (row[out1], row[out2])
-        else:
-            output = lambda row: tuple([row[n] for n in names])
-        if criteria is None or criteria(row):
-            yield output(row)
-        # Get subsequent rows by combining inputs
-        i = 0
-        while True:
-            try:
-                name, obj_iter = input_iters[i]
-            except TypeError:
-                # Static value, skip it
-                i += 1
-                continue
-            except IndexError:
-                break
-            while True:
-                try:
-                    row[name] = obj_iter.next()
-                except StopIteration:
-                    obj_iter = iter(self._inputs[name])
-                    input_iters[i] = name, obj_iter
-                    row[name] = obj_iter.next()
-                    i += 1
-                    break   
-                if criteria is None or criteria(row):
-                    yield output(row)
-                if i > 0:
-                    i = 0
-                    break
-
-
-def equijoin(left_iter, left_expr, right_iter, right_expr):
-    """Join the values of the left_iter and right_iter iterables where the
-    value of left_expr equals the value of right_expr.
-    
-    left_expr and right_expr are callables accepting the members of their
-    respective iterable input as a single argument which derive the values
-    to be compared.
-    
-    Generate the resulting (left, right) tuple pairs where a join is made.
-    The resulting tuples are yielded in arbitrary order.
-    
-    Complexity: O(N+M) where N=len(left), M=len(right)
-    """
-    left_by_val = {}
-    for obj in left_iter:
-        val = left_expr(obj)
         try:
-            left_by_val[val].append(obj)
-        except KeyError:
-            left_by_val[val] = [obj]
-        except TypeError:
-            if isinstance(left_by_val, dict):
-                # Likely val is not hashable
-                # switch to using BTree which does not require hashable keys
-                # (but lookup is slower)
-                left_by_val = OOBTree(left_by_val)
-                left_by_val[val] = left_by_val.get(val, []) + [obj]
-            else:
-                raise
-    
-    for right in right_iter:
-        val = right_expr(right)
+            return self._ActualizedResult(input_map, criteria)
+        except CantProcess:
+            pass
         try:
-            left_matches = left_by_val[val]
-        except KeyError:
-            continue # No matching left objects
-        else:
-            for left in left_matches:
-                yield left, right
-
-def injoin(left_iter, left_expr, right_iter, right_expr):
-    """Join the values of the left_iter and right_iter iterables where the
-    value of left_expr is contained in the value of right_expr.
-    
-    left_expr and right_expr are callables accepting the members of their
-    respective iterable input as a single argument which derive the values
-    to be compared.
-    
-    Generate the resulting (left, right) tuple pairs where a join is made.
-    The resulting tuples are yielded in arbitrary order.
-    """
-    right_by_val = {}
-    for obj in right_iter:
-        for val in right_expr(obj):
+            return self._Join(input_map, criteria)
+        except CantProcess:
+            pass
+        # Try to divide criteria on boolean operators and process each
+        top_node, = criteria.ast().getChildren()
+        if isinstance(top_node, (ast.Or, ast.And)):
+            bindings = criteria.bindings()
+            subresults = [
+                self._query(input_map, Expression.fromAstNode(node, bindings))
+                for node in top_node.nodes]
             try:
-                right_by_val[val].insert(obj)
-            except KeyError:
-                # Use of OOTreeSet allows for non-hashable objects
-                right_by_val[val] = OOTreeSet((obj,))
-            except TypeError:
-                if isinstance(right_by_val, dict):
-                    # Likely val is not hashable switch to using a
-                    # BTree which does not require hashable keys
-                    # (but lookup is slower)
-                    right_by_val = OOBTree(right_by_val)
-                    right_by_val[val] = right_by_val.get(val, []) + [obj]
+                if isinstance(top_node, ast.Or):
+                    return reduce(or_, subresults)
                 else:
-                    raise
-                    
-    for obj in left_iter:
-        left = left_expr(obj)
-        try:
-            right_matches = right_by_val[left]
-        except KeyError:
-            continue # No matching right objects
-        else:
-            for right in right_matches:
-                yield left, right
-
-def greaterjoin(left_iter, left_expr, right_iter, right_expr):
-    """Join the values of the left_iter and right_iter iterables where the
-    value of left_expr is greater than the value of right_expr.
-    
-    left_expr and right_expr are callables accepting the members of their
-    respective iterable input as a single argument which derive the values
-    to be compared.
-    
-    Generate the resulting (left, right) tuple pairs where a join is made.
-    The resulting tuples are yielded in arbitrary order.
-    """
-    left_by_val = OOBTree() # (ordered) map of values => left members
-    for obj in left_iter:
-        left_val = left_expr(obj)
-        try:
-            left_by_val[left_val].append(obj)
-        except KeyError:
-            left_by_val[left_val] = [obj]
-    for right in right_iter:
-        right_val = right_expr(right)        
-        for left_val, left_matches in left_by_val.items(right_val, None):
-            if left_val > right_val:
-                for left in left_matches:
-                    yield left, right
-
-class Join(PartialResult):
-    """Join of multiple inputs using a criteria expression.
-    
-    This implementation handles joins of two inputs each on a separate side
-    of a comparison operation (i.e., a.foo == b.bar). The following operators
-    are supported: ==, <, >, in.
-    """
-    
-    implements(IPartialResult)
-    
-    _join_ops = {
-        '==': equijoin,
-        '>': greaterjoin,
-        'in': injoin}
-    
-    def __init__(self, input_map, criteria):
-        """Construct the join
-        
-        input_map -- a mapping object mapping names => inputs.
-        
-        criteria -- criteria expression, an IExpression object. Supported
-        criteria compare two inputs (i.e., a.foo == b.bar). The following 
-        operators are supported: ==, <, >, in.
-        
-        If an unsupported criteria is specified, a CantProcess exception
-        is raised.
+                    return reduce(and_, subresults)
+            except CantProcess:
+                pass
+        # Criteria cannot be divided
+        return self._CartProduct(input_map, criteria)
+    
+    def _checkNames(self, names):
+        """If any names are not a valid input name or a name is duplicated,
+        raise QueryError
         """
-        self._inputs = input_map
-        self._criteria = criteria
-        if len(criteria.freeNames()) != 2:
-            raise CantProcess, 'join criteria must have 2 free variables'
-        compare_node = criteria.ast().getChildNodes()[0]
-        if not (isinstance(compare_node, ast.Compare) and
-            len(compare_node.getChildNodes()) == 2):
-            raise CantProcess, ('join criteria must be single comparison '
-                                'between two terms')
-        left, operator, right = compare_node.getChildren()
-        if operator == '<':
-            # Only greater join is directly supported
-            operator = '>'
-            left, right = right, left
-            self.reverse = True
-        else:
-            self.reverse = False
-        if operator not in self._join_ops.keys():
-            raise CantProcess, (
-                'operator %s not supported for join' % operator)
-        bindings = criteria.bindings()
-        left_expr = Expression.fromAstNode(left, bindings)
-        right_expr = Expression.fromAstNode(right, bindings)
-        self._operator = operator
-        left_names = list(left_expr.freeNames())
-        right_names = list(right_expr.freeNames())
-        if (len(left_names) != 1 
-            or left_names[0] not in input_map
-            or len(right_names) != 1 
-            or right_names[0] not in input_map):
-            raise CantProcess, ('left and right join criteria operands '
-                                'must contain a single input term')
-        self._left_name, = left_names
-        self._left_func = left_expr.makeFunction(left_names)
-        self._right_name, = right_names
-        self._right_func = right_expr.makeFunction(right_names)
-        
-    def union(self, other):
-        inputs = union_input_maps(self.inputMap(), other.inputMap())
-        criteria = self.criteriaExpr() | other.criteriaExpr()
-        return CartProduct(inputs, criteria)
-    
-    def intersection(self, other):
-        inputs = intersect_input_maps(self.inputMap(), other.inputMap())
-        criteria = self.criteriaExpr() & other.criteriaExpr()
-        return CartProduct(inputs, criteria)
-        
-    def _iterJoin(self, reverse=False):
-        # Generates (left, right) pairs of joined terms
-        join = self._join_ops[self._operator]
-        if reverse:
-            return join(
-                self._inputs[self._right_name], self._right_func,
-                self._inputs[self._left_name], self._left_func)
-        else:
-            return join(
-                self._inputs[self._left_name], self._left_func,
-                self._inputs[self._right_name], self._right_func)
-
-    def _iterJoinOtherProduct(self, names, joined):
-        # Yield iterators of products for each join pair
-        product = CartProduct(self._inputs, self._criteria)
-        joined = {}
-        has_left = self._left_name in joined
-        has_right = self._right_name in joined
-        # Iterate the joined results and pass them as fixed values
-        # to the full product's results
-        for left, right in self._iterJoin(self.reverse):
-            if has_left:
-                joined[self._left_name] = left
-            if has_right:
-                joined[self._right_name] = right
-            for row in product.iterResult(*names, **joined):
-                yield row
-        
-    def iterResult(self, *names, **fixed):
-        if fixed:
-            for name, obj in fixed.items():
-                if name not in self._inputs:
-                    raise PypesQueryError(
-                        'fixed input %s not part of query' % name)
-                if obj not in self._inputs[name]:
-                    return _empty_iter
-            # See if fixed values contain possible outputs. If not, bail early
-            if (self._left_name in fixed
-                and not self._left_func(fixed[self._left_name])):
-                return _empty_iter
-            if (self._right_name in fixed
-                and not self._right_func(fixed[self._right_name])):
-                return _empty_iter
-            if self._left_name in fixed or self._right_name in fixed:
-                # One or both of the joined inputs are fixed 
-                # don't bother using full join operations
-                return CartProduct(
-                    self._inputs, self._criteria).iterResult(*names, **fixed)
-        if len(names) == 2 and names == (self._left_name, self._right_name):
-            # Special case, names match join terms in order exactly
-            # No transform of the raw join is required
-            # This case is expected to be relatively common         
-            return self._iterJoin(self.reverse)
-        joined_names = []
-        other_names = []
-        free_names = self._criteria.freeNames()
-        for n in names:
-            if n in free_names:
-                joined_names.append(n)
-            else:
-                other_names.append(n)
-        if not other_names:
-            # No names other than the joined names were selected
-            if names == (self._left_name,):
-                output = lambda row: row[0]
-            elif names == (self._right_name,):
-                output = lambda row: row[1]
-            elif names == (self._right_name, self._left_name):
-                output = lambda row: (row[1], row[0])
+        checked_names = Set()        
+        for name in names:
+            if name not in self.inputs:
+                raise QueryError('no query input named "%s" % name')
+            if name not in checked_names:
+                checked_names.add(name)
             else:
-                # Ok, something is wrong with this picture
-                raise RuntimeError('Bug in join result tranformation')
-            return imap(output, self._iterJoin(self.reverse))
-        else:
-            return self._iterJoinOtherProduct(names, joined_names)
-
-
-class ActualizedResult(PartialResult):
-    """Partial result with greedy evaluation.
-    
-    When instantiated, the inputs are processed immediately against the
-    criteria. The criteria may consist of one or more terms each filtering
-    exactly one input. Multiple terms are contained in a single AND
-    boolean operation.
-    """
-    
-    implements(IPartialResult)
-    
-    def __init__(self, input_map, criteria=None):
-        """Construct the actualized result
-        
-        input_map -- a mapping object mapping names => inputs.
+                # XXX are dupe names bad??
+                raise QueryError('duplicate output name "%s"' % name)
         
-        criteria -- criteria expression, an IExpression object. Supported
-        criteria consists of one or more terms that each refer to a single
-        input. If criteria is omitted, then the actualized result is the
-        same as the inputs provided.
-        
-        If the criteria is not supported, raise CantProcess
+    # Result factories, overridden for testing
+    _CartProduct = CartProduct
+    _Join = Join
+    _ActualizedResult = ActualizedResult
+    _QueryResult = QueryResult
+    
+    ## IQuery methods ##
+    
+    def select(self, *input_names):
+        """Process inputs with criteria and order expressions and return
+        and IQueryResult
         """
-        self._criteria = criteria
-        if criteria is not None:
-            node = criteria.ast().getChildNodes()[0]
-            if (len(criteria.freeNames(input_map.keys())) > 1 
-                and isinstance(node, ast.And)):
-                terms = node.getChildNodes()
-            else:
-                terms = (node,)
-            bindings = criteria.bindings()
-            filters = {} # Map input name => filter expression
-            for term in terms:
-                term_expr = Expression.fromAstNode(term, bindings)
-                try:
-                    input_name, = term_expr.freeNames()
-                except ValueError:
-                    raise CantProcess, \
-                        'cannot actualize result for criteria containing join'
-                if input_name not in input_map:
-                    raise PypesQueryInputError, \
-                        'name %s in criteria not found in inputs'
-                if input_name in filters:
-                    filters[input_name] = filters[input_name] & term_expr
-                else:
-                    filters[input_name] = term_expr
-
-            # Filter the inputs as results
-            self._inputs = {}
-            for name, ipt in input_map.items():
-                if name in filters:
-                    self._inputs[name] = ipt = Set(
-                        ifilter(filters[name].makeFunction(name), ipt))
-                else:
-                    self._inputs[name] = ipt
-                if not ipt:
-                    # If one of the inputs is empty, they all are
-                    empty = IdentitySet()
-                    for name in input_map:
-                        self._inputs[name] = empty
-                    break
-        else:
-            self._inputs = input_map.copy()
-        
-    def union(self, other):
-        """A union with an actualized result returns a new partial result
-        the same class as other, with inputs unioned with self
+        if not input_names:
+            input_names = self.input_names
+        self._checkNames(input_names)
+        return QueryResult(
+            self, input_names, self._query(self.inputs, self.criteria))
+    
+    def selectOne(self, *input_names):
+        """Process the inputs and criteria and return the single matching
+        result.
         """
-        if isinstance(other, ActualizedResult):
-            # Actualized results have already realized the criteria,
-            # Drop the criteria here to avoid redundant processing
-            criteria = None
-        else:
-            criteria = other.criteriaExpr()
-        inputs = union_input_maps(self.inputMap(), other.inputMap())
-        return other.__class__(inputs, criteria)
-    
-    def intersection(self, other):
-        """An intersection with an actualized result returns a new partial 
-        result the same class as other, with inputs intersected with self
-        """
-        if isinstance(other, ActualizedResult):
-            # Actualized results have already realized the criteria,
-            # Drop the criteria here to avoid redundant processing
-            criteria = None
-        else:
-            criteria = other.criteriaExpr()
-        inputs = intersect_input_maps(self.inputMap(), other.inputMap())
-        return other.__class__(inputs, criteria)
-    
-    def iterResult(self, *names, **fixed):
-        if len(names) == 1:
-            name = names[0]
-            ipt = self._inputs[name]
-            if not fixed:
-                return iter(ipt)
-            elif name in fixed:
-                if fixed[name] in ipt:
-                    return iter((fixed[name],))
-                else:
-                    return _empty_iter
-        return CartProduct(self._inputs).iterResult(*names, **fixed)
-    
+        if not input_names:
+            input_names = self.input_names
+        self._checkNames(input_names)
+        resultiter = self._query(
+            self.inputs, self.criteria).iterResult(*input_names)
+        try:
+            result = resultiter.next()
+        except StopIteration:
+            raise NoResultsError('no matching results')
+        try:
+            resultiter.next()
+        except StopIteration:
+            return result
+        else:
+            raise MultipleResultsError(
+                'multiple results found for single result selection')
+                
     def resultSet(self, name):
-        if name not in self._inputs:
-            raise PypesQueryError('no input named "%s"' % name)
-        return self._inputs[name]
-
-
-def sort(iterable, expression, order=ascending, limit=None):
-    """Return a sequence containing the elements of iterable sorted in order
-    by the values returned by applying expression, a callable object accepting
-    a single argument, to each element. 
-    
-    order may be the constants ascending and descending to specify the
-    resulting sort order. limit, if specified is an integer value representing
-    the  number of elements to sort. When limit is specified, a partial sort
-    will be performed and sort will return as soon as the limit count is
-    reached.
-    
-    In order for the sort to perform properly, the expression should generate
-    a homogenous set of values that support total ordering.
-    """
-    sortable = [(expression(i), i) for i in iterable]
-    sortable.sort()
-    if order == descending:
-        sortable.reverse()
-    if limit is not None:
-        # XXX Implement N-Best algorithm for this
-        sortable = sortable[:limit]
-    # XXX We should be make this lazy
-    return [i for key, i in sortable]
-
-    
+        """Return the set of objects resulting from querying the specified
+        input"""
+        self._checkNames((name,))
+        return self._query(self.inputs, self.criteria).resultSet(name)
+