[Zope-CVS] CVS: Packages/pypes/pypes - queryops.py:1.1 query.py:1.20
Casey Duncan
casey at zope.com
Thu Jul 1 23:19:27 EDT 2004
Update of /cvs-repository/Packages/pypes/pypes
In directory cvs.zope.org:/tmp/cvs-serv17543
Modified Files:
query.py
Added Files:
queryops.py
Log Message:
Factor out query primitives into separate queryops module
Implement query results and queries (sans sorting right now)
=== Added File Packages/pypes/pypes/queryops.py ===
##############################################################################
#
# Copyright (c) 2004 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Query operation primitives
$Id: queryops.py,v 1.1 2004/07/02 03:18:56 caseman Exp $"""
from operator import mul
from sets import Set
from itertools import imap, ifilter
from compiler import ast
from BTrees.OOBTree import OOBTree, OOTreeSet
from BTrees.IIBTree import IITreeSet
from zope.interface import implements
from pypes import services
from pypes.identity import IdentitySet
from pypes.interfaces import IPartialResult, IActualizedPartialResult
from pypes.exceptions import PypesLookupError, QueryInputError, CantProcess
from pypes.exceptions import QueryError
from pypes.expression import Expression
_empty_iter = iter(())
def union_input_maps(input1, input2):
"""Create an input map dict whose values are the union of the cooresponding
values of input1 and input2 which are mappings of name => input set. Both
input1 and input2 should have the same name keys
"""
assert Set(input1) == Set(input2), \
'cannot union results with different inputs'
if input1 is input2:
return input1.copy()
else:
inmap = {}
for name, set1 in input1.items():
set2 = input2[name]
if set1 is not set2:
inmap[name] = set1.union(set2)
else:
inmap[name] = set1
return inmap
def intersect_input_maps(input1, input2):
"""Create an input map dict whose values are the intersection of the
cooresponding values of input1 and input2 which are mappings of name =>
input set. Both input1 and input2 should have the same name keys
"""
assert Set(input1) == Set(input2), \
'cannot union results with different inputs'
if input1 is input2:
return input1.copy()
else:
inmap = {}
for name, set1 in input1.items():
set2 = input2[name]
if set1 is not set2:
inmap[name] = set1.intersection(set2)
else:
inmap[name] = set1
return inmap
class PartialResult:
"""Abstract partial result class"""
_inputs = None # Input map
_criteria = None # Criteria expression
def inputMap(self):
return self._inputs.copy()
def criteriaExpr(self):
return self._criteria
def namesUsed(self):
return self._criteria.freeNames(self._inputs.keys())
def union(self, other):
raise NotImplementedError
def intersection(self, other):
raise NotImplementedError
def iterResult(self, *names, **fixed):
raise NotImplementedError
def resultSet(self, name):
"""Return a set of objects from the input named which satisfy the
criteria.
"""
if name not in self._inputs:
raise QueryError('no input named "%s"' % name)
if self._criteria is None:
return self._inputs[name]
if name in self._criteria.freeNames(self._inputs.keys()):
# Input is referenced in criteria, create set from product result
maxlength = len(self._inputs[name])
length = 0
idset = IITreeSet()
insert = idset.insert
for obj in self.iterResult(name):
try:
pypesid = obj._pypes_id_
except AttributeError:
# object not identified
idset = None
break
else:
if insert(pypesid):
length += 1
elif length == maxlen:
# All objects from the input were found
return self._inputs[name]
if idset is not None:
dbconn = services.accesspointFrom(self._inputs[name])
return IdentitySet.fromIdSet(
idset, dbconn=dbconn, length=length)
else:
# can't use identity set
# XXX won't work with unhashable objects
return Set(self.iterResult(name))
else:
# Input not referenced from criteria, if any results are
# generated, then we can return the entire input set. If
# not return an empty set.
try:
self.iterResult(name).next()
except StopIteration:
return IdentitySet()
else:
return self._inputs[name]
class CartProduct(PartialResult):
"""Cartesian product of one or more inputs to a query. Allows lazy
evaluation, and efficient combinatorial operations
"""
implements(IPartialResult)
def __init__(self, input_map, criteria=None):
"""Construct the product
input_map -- a mapping object mapping names => inputs.
criteria -- optional criteria expression, an IExpression object.
Used to select the input items to create a partial filtered product.
If omitted then a full product is generated.
"""
self._inputs = input_map
self._criteria = criteria
def namesUsed(self):
if self._criteria is not None:
return self._criteria.freeNames(self._inputs.keys())
else:
return Set()
def magnitude(self, *names):
"""Return the maximum integer length of the product. This value
is calculated by multiplying the lengths of the inputs together.
This value matches the actual length for a full product.
This method assumes all the inputs support __len__
"""
if not names:
names = self._inputs.keys()
return reduce(mul, [len(self._inputs[nm]) for nm in names])
def union(self, other):
inputs = union_input_maps(self.inputMap(), other.inputMap())
criteria = self.criteriaExpr() | other.criteriaExpr()
return self.__class__(inputs, criteria)
def intersection(self, other):
inputs = intersect_input_maps(self.inputMap(), other.inputMap())
criteria = self.criteriaExpr() & other.criteriaExpr()
return self.__class__(inputs, criteria)
def iterResult(self, *names, **fixed):
"""Iterate the cartesian product yielding items from the inputs named
where the criteria is satisfied. If a single name is specified, yield
each matching item from the input. If multiple names are specified,
yield tuples of the matching items from the cooresponding named inputs.
Fixed values may be passed as keyword args. The arg name matches an
input name, and the value must be a member of the input set. Static
inputs are not iterated, but are returned in every result. They are
useful when using a cart product inside of another iterator which
handles iterating those values itself.
"""
for name, obj in fixed.items():
if name not in self._inputs:
raise QueryError(
'fixed input %s not part of cart. product' % name)
if obj not in self._inputs[name]:
raise StopIteration
criteria = self._criteria
# Prime the input iterators and get the first row
row = {}
input_iters = []
for name, ipt in self._inputs.items():
if name not in fixed:
obj_iter = iter(ipt)
input_iters.append((name, obj_iter))
row[name] = obj_iter.next()
else:
input_iters.append(None)
row[name] = fixed[name]
# Create output factory from input names
if len(names) == 1:
out, = names
output = lambda row: row[out]
elif len(names) == 2:
out1, out2 = names
output = lambda row: (row[out1], row[out2])
else:
output = lambda row: tuple([row[n] for n in names])
if criteria is None or criteria(row):
yield output(row)
# Get subsequent rows by combining inputs
i = 0
while True:
try:
name, obj_iter = input_iters[i]
except TypeError:
# Static value, skip it
i += 1
continue
except IndexError:
break
while True:
try:
row[name] = obj_iter.next()
except StopIteration:
obj_iter = iter(self._inputs[name])
input_iters[i] = name, obj_iter
row[name] = obj_iter.next()
i += 1
break
if criteria is None or criteria(row):
yield output(row)
if i > 0:
i = 0
break
def equijoin(left_iter, left_expr, right_iter, right_expr):
"""Join the values of the left_iter and right_iter iterables where the
value of left_expr equals the value of right_expr.
left_expr and right_expr are callables accepting the members of their
respective iterable input as a single argument which derive the values
to be compared.
Generate the resulting (left, right) tuple pairs where a join is made.
The resulting tuples are yielded in arbitrary order.
Complexity: O(N+M) where N=len(left), M=len(right)
"""
left_by_val = {}
for obj in left_iter:
val = left_expr(obj)
try:
left_by_val[val].append(obj)
except KeyError:
left_by_val[val] = [obj]
except TypeError:
if isinstance(left_by_val, dict):
# Likely val is not hashable
# switch to using BTree which does not require hashable keys
# (but lookup is slower)
left_by_val = OOBTree(left_by_val)
left_by_val[val] = left_by_val.get(val, []) + [obj]
else:
raise
for right in right_iter:
val = right_expr(right)
try:
left_matches = left_by_val[val]
except KeyError:
continue # No matching left objects
else:
for left in left_matches:
yield left, right
def injoin(left_iter, left_expr, right_iter, right_expr):
"""Join the values of the left_iter and right_iter iterables where the
value of left_expr is contained in the value of right_expr.
left_expr and right_expr are callables accepting the members of their
respective iterable input as a single argument which derive the values
to be compared.
Generate the resulting (left, right) tuple pairs where a join is made.
The resulting tuples are yielded in arbitrary order.
"""
right_by_val = {}
for obj in right_iter:
for val in right_expr(obj):
try:
right_by_val[val].insert(obj)
except KeyError:
# Use of OOTreeSet allows for non-hashable objects
right_by_val[val] = OOTreeSet((obj,))
except TypeError:
if isinstance(right_by_val, dict):
# Likely val is not hashable switch to using a
# BTree which does not require hashable keys
# (but lookup is slower)
right_by_val = OOBTree(right_by_val)
right_by_val[val] = right_by_val.get(val, []) + [obj]
else:
raise
for obj in left_iter:
left = left_expr(obj)
try:
right_matches = right_by_val[left]
except KeyError:
continue # No matching right objects
else:
for right in right_matches:
yield left, right
def greaterjoin(left_iter, left_expr, right_iter, right_expr):
"""Join the values of the left_iter and right_iter iterables where the
value of left_expr is greater than the value of right_expr.
left_expr and right_expr are callables accepting the members of their
respective iterable input as a single argument which derive the values
to be compared.
Generate the resulting (left, right) tuple pairs where a join is made.
The resulting tuples are yielded in arbitrary order.
"""
left_by_val = OOBTree() # (ordered) map of values => left members
for obj in left_iter:
left_val = left_expr(obj)
try:
left_by_val[left_val].append(obj)
except KeyError:
left_by_val[left_val] = [obj]
for right in right_iter:
right_val = right_expr(right)
for left_val, left_matches in left_by_val.items(right_val, None):
if left_val > right_val:
for left in left_matches:
yield left, right
class Join(PartialResult):
"""Join of multiple inputs using a criteria expression.
This implementation handles joins of two inputs each on a separate side
of a comparison operation (i.e., a.foo == b.bar). The following operators
are supported: ==, <, >, in.
"""
implements(IPartialResult)
_join_ops = {
'==': equijoin,
'>': greaterjoin,
'in': injoin}
def __init__(self, input_map, criteria):
"""Construct the join
input_map -- a mapping object mapping names => inputs.
criteria -- criteria expression, an IExpression object. Supported
criteria compare two inputs (i.e., a.foo == b.bar). The following
operators are supported: ==, <, >, in.
If an unsupported criteria is specified, a CantProcess exception
is raised.
"""
self._inputs = input_map
self._criteria = criteria
if len(criteria.freeNames()) != 2:
raise CantProcess, 'join criteria must have 2 free variables'
compare_node = criteria.ast().getChildNodes()[0]
if not (isinstance(compare_node, ast.Compare) and
len(compare_node.getChildNodes()) == 2):
raise CantProcess, ('join criteria must be single comparison '
'between two terms')
left, operator, right = compare_node.getChildren()
if operator == '<':
# Only greater join is directly supported
operator = '>'
left, right = right, left
self.reverse = True
else:
self.reverse = False
if operator not in self._join_ops.keys():
raise CantProcess, (
'operator %s not supported for join' % operator)
bindings = criteria.bindings()
left_expr = Expression.fromAstNode(left, bindings)
right_expr = Expression.fromAstNode(right, bindings)
self._operator = operator
left_names = list(left_expr.freeNames())
right_names = list(right_expr.freeNames())
if (len(left_names) != 1
or left_names[0] not in input_map
or len(right_names) != 1
or right_names[0] not in input_map):
raise CantProcess, ('left and right join criteria operands '
'must contain a single input term')
self._left_name, = left_names
self._left_func = left_expr.makeFunction(left_names)
self._right_name, = right_names
self._right_func = right_expr.makeFunction(right_names)
def union(self, other):
inputs = union_input_maps(self.inputMap(), other.inputMap())
criteria = self.criteriaExpr() | other.criteriaExpr()
return CartProduct(inputs, criteria)
def intersection(self, other):
inputs = intersect_input_maps(self.inputMap(), other.inputMap())
criteria = self.criteriaExpr() & other.criteriaExpr()
return CartProduct(inputs, criteria)
def _iterJoin(self, reverse=False):
# Generates (left, right) pairs of joined terms
join = self._join_ops[self._operator]
if reverse:
return join(
self._inputs[self._right_name], self._right_func,
self._inputs[self._left_name], self._left_func)
else:
return join(
self._inputs[self._left_name], self._left_func,
self._inputs[self._right_name], self._right_func)
def _iterJoinOtherProduct(self, names, joined):
# Yield iterators of products for each join pair
product = CartProduct(self._inputs, self._criteria)
joined = {}
has_left = self._left_name in joined
has_right = self._right_name in joined
# Iterate the joined results and pass them as fixed values
# to the full product's results
for left, right in self._iterJoin(self.reverse):
if has_left:
joined[self._left_name] = left
if has_right:
joined[self._right_name] = right
for row in product.iterResult(*names, **joined):
yield row
def iterResult(self, *names, **fixed):
if fixed:
for name, obj in fixed.items():
if name not in self._inputs:
raise QueryError(
'fixed input %s not part of query' % name)
if obj not in self._inputs[name]:
return _empty_iter
# See if fixed values contain possible outputs. If not, bail early
if (self._left_name in fixed
and not self._left_func(fixed[self._left_name])):
return _empty_iter
if (self._right_name in fixed
and not self._right_func(fixed[self._right_name])):
return _empty_iter
if self._left_name in fixed or self._right_name in fixed:
# One or both of the joined inputs are fixed
# don't bother using full join operations
return CartProduct(
self._inputs, self._criteria).iterResult(*names, **fixed)
if len(names) == 2 and names == (self._left_name, self._right_name):
# Special case, names match join terms in order exactly
# No transform of the raw join is required
# This case is expected to be relatively common
return self._iterJoin(self.reverse)
joined_names = []
other_names = []
free_names = self._criteria.freeNames()
for n in names:
if n in free_names:
joined_names.append(n)
else:
other_names.append(n)
if not other_names:
# No names other than the joined names were selected
if names == (self._left_name,):
output = lambda row: row[0]
elif names == (self._right_name,):
output = lambda row: row[1]
elif names == (self._right_name, self._left_name):
output = lambda row: (row[1], row[0])
else:
# Ok, something is wrong with this picture
raise RuntimeError('Bug in join result tranformation')
return imap(output, self._iterJoin(self.reverse))
else:
return self._iterJoinOtherProduct(names, joined_names)
class ActualizedResult(PartialResult):
"""Partial result with greedy evaluation.
When instantiated, the inputs are processed immediately against the
criteria. The criteria may consist of one or more terms each filtering
exactly one input. Multiple terms are contained in a single AND
boolean operation.
"""
implements(IActualizedPartialResult)
def __init__(self, input_map, criteria=None):
"""Construct the actualized result
input_map -- a mapping object mapping names => inputs.
criteria -- criteria expression, an IExpression object. Supported
criteria consists of one or more terms that each refer to a single
input. If criteria is omitted, then the actualized result is the
same as the inputs provided.
If the criteria is not supported, raise CantProcess
"""
self._criteria = criteria
if criteria is not None:
node = criteria.ast().getChildNodes()[0]
if (len(criteria.freeNames(input_map.keys())) > 1
and isinstance(node, ast.And)):
terms = node.getChildNodes()
else:
terms = (node,)
bindings = criteria.bindings()
filters = {} # Map input name => filter expression
for term in terms:
term_expr = Expression.fromAstNode(term, bindings)
try:
input_name, = term_expr.freeNames()
except ValueError:
raise CantProcess, \
'cannot actualize result for criteria containing join'
if input_name not in input_map:
raise QueryInputError, \
'name %s in criteria not found in inputs'
if input_name in filters:
filters[input_name] = filters[input_name] & term_expr
else:
filters[input_name] = term_expr
# Filter the inputs as results
self._inputs = {}
for name, ipt in input_map.items():
if name in filters:
self._inputs[name] = ipt = Set(
ifilter(filters[name].makeFunction(name), ipt))
else:
self._inputs[name] = ipt
if not ipt:
# If one of the inputs is empty, they all are
empty = IdentitySet()
for name in input_map:
self._inputs[name] = empty
break
else:
self._inputs = input_map.copy()
def union(self, other):
"""A union with an actualized result returns a new partial result
the same class as other, with inputs unioned with self
"""
if isinstance(other, ActualizedResult):
# Actualized results have already realized the criteria,
# Drop the criteria here to avoid redundant processing
criteria = None
else:
criteria = other.criteriaExpr()
inputs = union_input_maps(self.inputMap(), other.inputMap())
return other.__class__(inputs, criteria)
def intersection(self, other):
"""An intersection with an actualized result returns a new partial
result the same class as other, with inputs intersected with self
"""
if isinstance(other, ActualizedResult):
# Actualized results have already realized the criteria,
# Drop the criteria here to avoid redundant processing
criteria = None
else:
criteria = other.criteriaExpr()
inputs = intersect_input_maps(self.inputMap(), other.inputMap())
return other.__class__(inputs, criteria)
def iterResult(self, *names, **fixed):
if len(names) == 1:
name = names[0]
ipt = self._inputs[name]
if not fixed:
return iter(ipt)
elif name in fixed:
if fixed[name] in ipt:
return iter((fixed[name],))
else:
return _empty_iter
if len(names) == len(fixed) and Set(names) == Set(fixed):
# Optimization for when all names are fixed
# as when testing for a single result row
row = []
inputs = self._inputs
for name in names:
if name not in inputs:
raise QueryError(
'fixed input "%s" not in query' % name)
col = fixed[name]
if col in inputs[name]:
row.append(col)
else:
return _empty_iter
return iter((tuple(row),))
return CartProduct(self._inputs).iterResult(*names, **fixed)
def resultSet(self, name):
if name not in self._inputs:
raise QueryError('no input named "%s"' % name)
return self._inputs[name]
def __len__(self):
return reduce(mul, map(len, self._inputs.values()))
def sort(iterable, expression, order='ascending', limit=None):
"""Return a sequence containing the elements of iterable sorted in order
by the values returned by applying expression, a callable object accepting
a single argument, to each element.
order may be the constants ascending and descending to specify the
resulting sort order. limit, if specified is an integer value representing
the number of elements to sort. When limit is specified, a partial sort
will be performed and sort will return as soon as the limit count is
reached.
In order for the sort to perform properly, the expression should generate
a homogenous set of values that support total ordering.
"""
sortable = [(expression(i), i) for i in iterable]
sortable.sort()
if order == 'descending':
sortable.reverse()
if limit is not None:
# XXX Implement N-Best algorithm for this
sortable = sortable[:limit]
# XXX We should be make this lazy
return [i for key, i in sortable]
=== Packages/pypes/pypes/query.py 1.19 => 1.20 ===
--- Packages/pypes/pypes/query.py:1.19 Thu Jun 10 00:44:24 2004
+++ Packages/pypes/pypes/query.py Thu Jul 1 23:18:56 2004
@@ -1,6 +1,6 @@
##############################################################################
#
-# Copyright (c) 2002 Zope Corporation and Contributors.
+# Copyright (c) 2004 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
@@ -40,640 +40,260 @@
$Id$"""
-from operator import mul
from sets import Set
-from itertools import imap, ifilter
-from zope.interface import implements
from compiler import ast
-from BTrees.OOBTree import OOBTree, OOTreeSet
-from BTrees.IIBTree import IITreeSet
-from pypes import services
-from pypes.identity import IdentitySet
-from pypes.interfaces import IPartialResult, IExtent
-from pypes.exceptions import PypesLookupError, PypesQueryInputError, CantProcess
-from pypes.exceptions import PypesQueryError
+from operator import and_, or_
+from zope.interface import implements, classProvides
+from pypes.interfaces import IQuery, IBasicQueryFactory, IQueryResult
+from pypes.interfaces import ISet, IExtent, IOrderExpression
+from pypes.interfaces import IActualizedPartialResult
+from pypes.exceptions import QueryError, QueryInputError, CantProcess
+from pypes.exceptions import NoResultsError, MultipleResultsError
from pypes.expression import Expression
+from pypes.queryops import CartProduct, Join, ActualizedResult
-## Exported constants ##
-
-descending = 'descending'
-ascending = 'ascending'
-
-## Query Primitives ##
-
-_empty_iter = iter(())
-
-def union_input_maps(input1, input2):
- """Create an input map dict whose values are the union of the cooresponding
- values of input1 and input2 which are mappings of name => input set. Both
- input1 and input2 should have the same name keys
- """
- assert Set(input1) == Set(input2), \
- 'cannot union results with different inputs'
- if input1 is input2:
- return input1.copy()
- else:
- inmap = {}
- for name, set1 in input1.items():
- set2 = input2[name]
- if set1 is not set2:
- inmap[name] = set1.union(set2)
- else:
- inmap[name] = set1
- return inmap
+class QueryResult:
+ """Result from query selection"""
+
+ implements(IQueryResult)
+
+ def __init__(self, query, names, partial_result):
+ self._query = query
+ self._names = names
+ self._partial_result = partial_result
+ self._len_cache = None
+ self._result_cache = None
+
+ def names(self):
+ """Return names selected by query"""
+ return self._names
+
+ def query(self):
+ """Return query that generated the result"""
+ return self._query
+
+ def evaluated(self):
+ """Return true if results and length are cached"""
+ return self._len_cache is not None
+
+ def _cache_results(self):
+ """Evaluate the query results and cache results and length"""
+ if IActualizedPartialResult.isImplementedBy(self._partial_result):
+ self._len_cache = len(self._partial_result)
+ # No need to cache actualized result
+ self._result_cache = None
+ else:
+ self._result_cache = list(
+ self._partial_result.iterResult(*self._names))
+ self._len_cache = len(self._result_cache)
+
+ def __len__(self):
+ if self._len_cache is None:
+ self._cache_results()
+ return self._len_cache
+
+ def __iter__(self):
+ if self._result_cache is None:
+ return self._partial_result.iterResult(*self._names)
+ else:
+ return iter(self._result_cache)
+
+ def __contains__(self, item):
+ if len(self._names) == 1:
+ item = (item,)
+ elif not isinstance(item, tuple):
+ return False
+ if len(item) != len(self._names):
+ # Item wrong length, can't be in result
+ return False
+ item_map = dict(zip(self._names, item))
+ try:
+ self._partial_result.iterResult(*self._names, **item_map).next()
+ except StopIteration:
+ return False
+ else:
+ return True
-def intersect_input_maps(input1, input2):
- """Create an input map dict whose values are the intersection of the
- cooresponding values of input1 and input2 which are mappings of name =>
- input set. Both input1 and input2 should have the same name keys
+class Query:
+ """Pypes query declaration.
+
+ Serves as a basic query factory or can be subclassed as the basis for
+ application-specific query types
"""
- assert Set(input1) == Set(input2), \
- 'cannot union results with different inputs'
- if input1 is input2:
- return input1.copy()
- else:
- inmap = {}
- for name, set1 in input1.items():
- set2 = input2[name]
- if set1 is not set2:
- inmap[name] = set1.intersection(set2)
- else:
- inmap[name] = set1
- return inmap
-
-class PartialResult:
- """Abstract partial result class"""
- _inputs = None # Input map
- _criteria = None # Criteria expression
+ classProvides(IBasicQueryFactory)
+ implements(IQuery)
- def inputMap(self):
- return self._inputs.copy()
-
- def criteriaExpr(self):
- return self._criteria
+ ## Default query attributes ##
+
+ # may be overridden to provide default values for the constructor arguments
+ inputs = None # mapping of names => inputs
+ input_names = None # sequence of input names in the default order
+ criteria = None # IExpression object
+ order = None # sequence of IOrderExpression objects
+ limit = None # integer maximum result size
+
+ def __init__(self, inputs=None, criteria=None, order=None, limit=None):
+ """Construct the query.
+
+ inputs, criteria, order and limit may all be provided either by
+ arguments or by values specified by the class. At a minimum a query
+ must define inputs or QueryInputError is raised.
+ """
+ if inputs is not None:
+ self._setInputs(inputs)
+ if criteria is not None:
+ self._setCriteria(criteria)
+ if order is not None:
+ self._setOrder(order, limit)
+ if self.inputs is None:
+ raise QueryInputError('no inputs provided to query')
- def namesUsed(self):
- return self._criteria.freeNames(self._inputs.keys())
+ ## Values used to affect the query plan ##
- def union(self, other):
- raise NotImplementedError
+ # Partial results below this size will not be processed with indices
+ scan_threshold = 50
- def intersection(self, other):
- raise NotImplementedError
+ ## Private method hooks, intended to be overridden as desired ##
- def iterResult(self, *names, **fixed):
- raise NotImplementedError
+ def _setInputs(self, inputs):
+ """Process the inputs and set the value for inputs and input_names.
- def resultSet(self, name):
- """Return a set of objects from the input named which satisfy the
- criteria.
+ inputs -- A named set, a sequence of named sets, or a mapping of
+ input names to sets.
"""
- if name not in self._inputs:
- raise PypesQueryError('no input named "%s"' % name)
- if self._criteria is None:
- return self._inputs[name]
- if name in self._criteria.freeNames(self._inputs.keys()):
- # Input is referenced in criteria, create set from product result
- maxlength = len(self._inputs[name])
- length = 0
- idset = IITreeSet()
- insert = idset.insert
- for obj in self.iterResult(name):
+ if ISet.isImplementedBy(inputs) or isinstance(inputs, Set):
+ inputs = (inputs,)
+ if hasattr(inputs, 'items') and hasattr(inputs, 'keys'):
+ # Mapping of input names to inputs
+ self.input_names = list(inputs.keys())
+ # XXX maybe we should enforce string keys?
+ self.inputs = dict(inputs)
+ else:
+ # Sequence of inputs
+ self.input_names = []
+ self.inputs = {}
+ for ipt in inputs:
try:
- pypesid = obj._pypes_id_
+ name = ipt.__name__
except AttributeError:
- # object not identified
- idset = None
- break
+ raise QueryInputError('unnamed query input')
else:
- if insert(pypesid):
- length += 1
- elif length == maxlen:
- # All objects from the input were found
- return self._inputs[name]
- if idset is not None:
- dbconn = services.accesspointFrom(self._inputs[name])
- return IdentitySet.fromIdSet(
- idset, dbconn=dbconn, length=length)
- else:
- # can't use identity set
- # XXX won't work with unhashable objects
- return Set(self.iterResult(name))
- else:
- # Input not referenced from criteria, if any results are
- # generated, then we can return the entire input set. If
- # not return an empty set.
- try:
- self.iterResult(name).next()
- except StopIteration:
- return IdentitySet()
- else:
- return self._inputs[name]
-
-
-class CartProduct(PartialResult):
- """Cartesian product of one or more inputs to a query. Allows lazy
- evaluation, and efficient combinatorial operations
- """
+ self.input_names.append(name)
+ if name not in self.inputs:
+ self.inputs[name] = ipt
+ else:
+ raise QueryInputError(
+ 'duplicate query input name "%s"' % name)
- implements(IPartialResult)
-
- def __init__(self, input_map, criteria=None):
- """Construct the product
+ def _setCriteria(self, criteria):
+ """Process the criteria and set the criteria instance value.
- input_map -- a mapping object mapping names => inputs.
-
- criteria -- optional criteria expression, an IExpression object.
- Used to select the input items to create a partial filtered product.
- If omitted then a full product is generated.
+ Optimize the query based on the inputs. The inputs must already be set
"""
- self._inputs = input_map
- self._criteria = criteria
+ criteria.optimize(self.input_names)
+ self.criteria = criteria
- def namesUsed(self):
- if self._criteria is not None:
- return self._criteria.freeNames(self._inputs.keys())
- else:
- return Set()
-
- def magnitude(self, *names):
- """Return the maximum integer length of the product. This value
- is calculated by multiplying the lengths of the inputs together.
- This value matches the actual length for a full product.
-
- This method assumes all the inputs support __len__
- """
- if not names:
- names = self._inputs.keys()
- return reduce(mul, [len(self._inputs[nm]) for nm in names])
-
- def union(self, other):
- inputs = union_input_maps(self.inputMap(), other.inputMap())
- criteria = self.criteriaExpr() | other.criteriaExpr()
- return self.__class__(inputs, criteria)
-
- def intersection(self, other):
- inputs = intersect_input_maps(self.inputMap(), other.inputMap())
- criteria = self.criteriaExpr() & other.criteriaExpr()
- return self.__class__(inputs, criteria)
-
- def iterResult(self, *names, **fixed):
- """Iterate the cartesian product yielding items from the inputs named
- where the criteria is satisfied. If a single name is specified, yield
- each matching item from the input. If multiple names are specified,
- yield tuples of the matching items from the cooresponding named inputs.
-
- Fixed values may be passed as keyword args. The arg name matches an
- input name, and the value must be a member of the input set. Static
- inputs are not iterated, but are returned in every result. They are
- useful when using a cart product inside of another iterator which
- handles iterating those values itself.
+ def _setOrder(self, order, limit=None):
+ """Set the order attribute for the query"""
+ if limit is not None:
+ if not isinstance(limit, int):
+ raise QueryInputError('expected integer limit')
+ if limit < 1:
+ raise QueryInputError('limit must be 1 or greater')
+ self.limit = limit
+ if IOrderExpression.isImplementedBy(order):
+ self.order = (order,)
+ else:
+ # XXX Should we try to test for sequence-ness?
+ self.order = order
+
+ def _query(self, input_map, criteria):
+ """Workhorse, process the criteria against the input_map and return
+ an IPartialResult object
"""
- for name, obj in fixed.items():
- if name not in self._inputs:
- raise PypesQueryError(
- 'fixed input %s not part of cart. product' % name)
- if obj not in self._inputs[name]:
- raise StopIteration
- criteria = self._criteria
- # Prime the input iterators and get the first row
- row = {}
- input_iters = []
- for name, ipt in self._inputs.items():
- if name not in fixed:
- obj_iter = iter(ipt)
- input_iters.append((name, obj_iter))
- row[name] = obj_iter.next()
- else:
- input_iters.append(None)
- row[name] = fixed[name]
- # Create output factory from input names
- if len(names) == 1:
- out, = names
- output = lambda row: row[out]
- elif len(names) == 2:
- out1, out2 = names
- output = lambda row: (row[out1], row[out2])
- else:
- output = lambda row: tuple([row[n] for n in names])
- if criteria is None or criteria(row):
- yield output(row)
- # Get subsequent rows by combining inputs
- i = 0
- while True:
- try:
- name, obj_iter = input_iters[i]
- except TypeError:
- # Static value, skip it
- i += 1
- continue
- except IndexError:
- break
- while True:
- try:
- row[name] = obj_iter.next()
- except StopIteration:
- obj_iter = iter(self._inputs[name])
- input_iters[i] = name, obj_iter
- row[name] = obj_iter.next()
- i += 1
- break
- if criteria is None or criteria(row):
- yield output(row)
- if i > 0:
- i = 0
- break
-
-
-def equijoin(left_iter, left_expr, right_iter, right_expr):
- """Join the values of the left_iter and right_iter iterables where the
- value of left_expr equals the value of right_expr.
-
- left_expr and right_expr are callables accepting the members of their
- respective iterable input as a single argument which derive the values
- to be compared.
-
- Generate the resulting (left, right) tuple pairs where a join is made.
- The resulting tuples are yielded in arbitrary order.
-
- Complexity: O(N+M) where N=len(left), M=len(right)
- """
- left_by_val = {}
- for obj in left_iter:
- val = left_expr(obj)
try:
- left_by_val[val].append(obj)
- except KeyError:
- left_by_val[val] = [obj]
- except TypeError:
- if isinstance(left_by_val, dict):
- # Likely val is not hashable
- # switch to using BTree which does not require hashable keys
- # (but lookup is slower)
- left_by_val = OOBTree(left_by_val)
- left_by_val[val] = left_by_val.get(val, []) + [obj]
- else:
- raise
-
- for right in right_iter:
- val = right_expr(right)
+ return self._ActualizedResult(input_map, criteria)
+ except CantProcess:
+ pass
try:
- left_matches = left_by_val[val]
- except KeyError:
- continue # No matching left objects
- else:
- for left in left_matches:
- yield left, right
-
-def injoin(left_iter, left_expr, right_iter, right_expr):
- """Join the values of the left_iter and right_iter iterables where the
- value of left_expr is contained in the value of right_expr.
-
- left_expr and right_expr are callables accepting the members of their
- respective iterable input as a single argument which derive the values
- to be compared.
-
- Generate the resulting (left, right) tuple pairs where a join is made.
- The resulting tuples are yielded in arbitrary order.
- """
- right_by_val = {}
- for obj in right_iter:
- for val in right_expr(obj):
+ return self._Join(input_map, criteria)
+ except CantProcess:
+ pass
+ # Try to divide criteria on boolean operators and process each
+ top_node, = criteria.ast().getChildren()
+ if isinstance(top_node, (ast.Or, ast.And)):
+ bindings = criteria.bindings()
+ subresults = [
+ self._query(input_map, Expression.fromAstNode(node, bindings))
+ for node in top_node.nodes]
try:
- right_by_val[val].insert(obj)
- except KeyError:
- # Use of OOTreeSet allows for non-hashable objects
- right_by_val[val] = OOTreeSet((obj,))
- except TypeError:
- if isinstance(right_by_val, dict):
- # Likely val is not hashable switch to using a
- # BTree which does not require hashable keys
- # (but lookup is slower)
- right_by_val = OOBTree(right_by_val)
- right_by_val[val] = right_by_val.get(val, []) + [obj]
+ if isinstance(top_node, ast.Or):
+ return reduce(or_, subresults)
else:
- raise
-
- for obj in left_iter:
- left = left_expr(obj)
- try:
- right_matches = right_by_val[left]
- except KeyError:
- continue # No matching right objects
- else:
- for right in right_matches:
- yield left, right
-
-def greaterjoin(left_iter, left_expr, right_iter, right_expr):
- """Join the values of the left_iter and right_iter iterables where the
- value of left_expr is greater than the value of right_expr.
-
- left_expr and right_expr are callables accepting the members of their
- respective iterable input as a single argument which derive the values
- to be compared.
-
- Generate the resulting (left, right) tuple pairs where a join is made.
- The resulting tuples are yielded in arbitrary order.
- """
- left_by_val = OOBTree() # (ordered) map of values => left members
- for obj in left_iter:
- left_val = left_expr(obj)
- try:
- left_by_val[left_val].append(obj)
- except KeyError:
- left_by_val[left_val] = [obj]
- for right in right_iter:
- right_val = right_expr(right)
- for left_val, left_matches in left_by_val.items(right_val, None):
- if left_val > right_val:
- for left in left_matches:
- yield left, right
-
-class Join(PartialResult):
- """Join of multiple inputs using a criteria expression.
-
- This implementation handles joins of two inputs each on a separate side
- of a comparison operation (i.e., a.foo == b.bar). The following operators
- are supported: ==, <, >, in.
- """
-
- implements(IPartialResult)
-
- _join_ops = {
- '==': equijoin,
- '>': greaterjoin,
- 'in': injoin}
-
- def __init__(self, input_map, criteria):
- """Construct the join
-
- input_map -- a mapping object mapping names => inputs.
-
- criteria -- criteria expression, an IExpression object. Supported
- criteria compare two inputs (i.e., a.foo == b.bar). The following
- operators are supported: ==, <, >, in.
-
- If an unsupported criteria is specified, a CantProcess exception
- is raised.
+ return reduce(and_, subresults)
+ except CantProcess:
+ pass
+ # Criteria cannot be divided
+ return self._CartProduct(input_map, criteria)
+
+ def _checkNames(self, names):
+ """If any names are not a valid input name or a name is duplicated,
+ raise QueryError
"""
- self._inputs = input_map
- self._criteria = criteria
- if len(criteria.freeNames()) != 2:
- raise CantProcess, 'join criteria must have 2 free variables'
- compare_node = criteria.ast().getChildNodes()[0]
- if not (isinstance(compare_node, ast.Compare) and
- len(compare_node.getChildNodes()) == 2):
- raise CantProcess, ('join criteria must be single comparison '
- 'between two terms')
- left, operator, right = compare_node.getChildren()
- if operator == '<':
- # Only greater join is directly supported
- operator = '>'
- left, right = right, left
- self.reverse = True
- else:
- self.reverse = False
- if operator not in self._join_ops.keys():
- raise CantProcess, (
- 'operator %s not supported for join' % operator)
- bindings = criteria.bindings()
- left_expr = Expression.fromAstNode(left, bindings)
- right_expr = Expression.fromAstNode(right, bindings)
- self._operator = operator
- left_names = list(left_expr.freeNames())
- right_names = list(right_expr.freeNames())
- if (len(left_names) != 1
- or left_names[0] not in input_map
- or len(right_names) != 1
- or right_names[0] not in input_map):
- raise CantProcess, ('left and right join criteria operands '
- 'must contain a single input term')
- self._left_name, = left_names
- self._left_func = left_expr.makeFunction(left_names)
- self._right_name, = right_names
- self._right_func = right_expr.makeFunction(right_names)
-
- def union(self, other):
- inputs = union_input_maps(self.inputMap(), other.inputMap())
- criteria = self.criteriaExpr() | other.criteriaExpr()
- return CartProduct(inputs, criteria)
-
- def intersection(self, other):
- inputs = intersect_input_maps(self.inputMap(), other.inputMap())
- criteria = self.criteriaExpr() & other.criteriaExpr()
- return CartProduct(inputs, criteria)
-
- def _iterJoin(self, reverse=False):
- # Generates (left, right) pairs of joined terms
- join = self._join_ops[self._operator]
- if reverse:
- return join(
- self._inputs[self._right_name], self._right_func,
- self._inputs[self._left_name], self._left_func)
- else:
- return join(
- self._inputs[self._left_name], self._left_func,
- self._inputs[self._right_name], self._right_func)
-
- def _iterJoinOtherProduct(self, names, joined):
- # Yield iterators of products for each join pair
- product = CartProduct(self._inputs, self._criteria)
- joined = {}
- has_left = self._left_name in joined
- has_right = self._right_name in joined
- # Iterate the joined results and pass them as fixed values
- # to the full product's results
- for left, right in self._iterJoin(self.reverse):
- if has_left:
- joined[self._left_name] = left
- if has_right:
- joined[self._right_name] = right
- for row in product.iterResult(*names, **joined):
- yield row
-
- def iterResult(self, *names, **fixed):
- if fixed:
- for name, obj in fixed.items():
- if name not in self._inputs:
- raise PypesQueryError(
- 'fixed input %s not part of query' % name)
- if obj not in self._inputs[name]:
- return _empty_iter
- # See if fixed values contain possible outputs. If not, bail early
- if (self._left_name in fixed
- and not self._left_func(fixed[self._left_name])):
- return _empty_iter
- if (self._right_name in fixed
- and not self._right_func(fixed[self._right_name])):
- return _empty_iter
- if self._left_name in fixed or self._right_name in fixed:
- # One or both of the joined inputs are fixed
- # don't bother using full join operations
- return CartProduct(
- self._inputs, self._criteria).iterResult(*names, **fixed)
- if len(names) == 2 and names == (self._left_name, self._right_name):
- # Special case, names match join terms in order exactly
- # No transform of the raw join is required
- # This case is expected to be relatively common
- return self._iterJoin(self.reverse)
- joined_names = []
- other_names = []
- free_names = self._criteria.freeNames()
- for n in names:
- if n in free_names:
- joined_names.append(n)
- else:
- other_names.append(n)
- if not other_names:
- # No names other than the joined names were selected
- if names == (self._left_name,):
- output = lambda row: row[0]
- elif names == (self._right_name,):
- output = lambda row: row[1]
- elif names == (self._right_name, self._left_name):
- output = lambda row: (row[1], row[0])
+ checked_names = Set()
+ for name in names:
+ if name not in self.inputs:
+ raise QueryError('no query input named "%s" % name')
+ if name not in checked_names:
+ checked_names.add(name)
else:
- # Ok, something is wrong with this picture
- raise RuntimeError('Bug in join result tranformation')
- return imap(output, self._iterJoin(self.reverse))
- else:
- return self._iterJoinOtherProduct(names, joined_names)
-
-
-class ActualizedResult(PartialResult):
- """Partial result with greedy evaluation.
-
- When instantiated, the inputs are processed immediately against the
- criteria. The criteria may consist of one or more terms each filtering
- exactly one input. Multiple terms are contained in a single AND
- boolean operation.
- """
-
- implements(IPartialResult)
-
- def __init__(self, input_map, criteria=None):
- """Construct the actualized result
-
- input_map -- a mapping object mapping names => inputs.
+ # XXX are dupe names bad??
+ raise QueryError('duplicate output name "%s"' % name)
- criteria -- criteria expression, an IExpression object. Supported
- criteria consists of one or more terms that each refer to a single
- input. If criteria is omitted, then the actualized result is the
- same as the inputs provided.
-
- If the criteria is not supported, raise CantProcess
+ # Result factories, overridden for testing
+ _CartProduct = CartProduct
+ _Join = Join
+ _ActualizedResult = ActualizedResult
+ _QueryResult = QueryResult
+
+ ## IQuery methods ##
+
+ def select(self, *input_names):
+ """Process inputs with criteria and order expressions and return
+ and IQueryResult
"""
- self._criteria = criteria
- if criteria is not None:
- node = criteria.ast().getChildNodes()[0]
- if (len(criteria.freeNames(input_map.keys())) > 1
- and isinstance(node, ast.And)):
- terms = node.getChildNodes()
- else:
- terms = (node,)
- bindings = criteria.bindings()
- filters = {} # Map input name => filter expression
- for term in terms:
- term_expr = Expression.fromAstNode(term, bindings)
- try:
- input_name, = term_expr.freeNames()
- except ValueError:
- raise CantProcess, \
- 'cannot actualize result for criteria containing join'
- if input_name not in input_map:
- raise PypesQueryInputError, \
- 'name %s in criteria not found in inputs'
- if input_name in filters:
- filters[input_name] = filters[input_name] & term_expr
- else:
- filters[input_name] = term_expr
-
- # Filter the inputs as results
- self._inputs = {}
- for name, ipt in input_map.items():
- if name in filters:
- self._inputs[name] = ipt = Set(
- ifilter(filters[name].makeFunction(name), ipt))
- else:
- self._inputs[name] = ipt
- if not ipt:
- # If one of the inputs is empty, they all are
- empty = IdentitySet()
- for name in input_map:
- self._inputs[name] = empty
- break
- else:
- self._inputs = input_map.copy()
-
- def union(self, other):
- """A union with an actualized result returns a new partial result
- the same class as other, with inputs unioned with self
+ if not input_names:
+ input_names = self.input_names
+ self._checkNames(input_names)
+ return QueryResult(
+ self, input_names, self._query(self.inputs, self.criteria))
+
+ def selectOne(self, *input_names):
+ """Process the inputs and criteria and return the single matching
+ result.
"""
- if isinstance(other, ActualizedResult):
- # Actualized results have already realized the criteria,
- # Drop the criteria here to avoid redundant processing
- criteria = None
- else:
- criteria = other.criteriaExpr()
- inputs = union_input_maps(self.inputMap(), other.inputMap())
- return other.__class__(inputs, criteria)
-
- def intersection(self, other):
- """An intersection with an actualized result returns a new partial
- result the same class as other, with inputs intersected with self
- """
- if isinstance(other, ActualizedResult):
- # Actualized results have already realized the criteria,
- # Drop the criteria here to avoid redundant processing
- criteria = None
- else:
- criteria = other.criteriaExpr()
- inputs = intersect_input_maps(self.inputMap(), other.inputMap())
- return other.__class__(inputs, criteria)
-
- def iterResult(self, *names, **fixed):
- if len(names) == 1:
- name = names[0]
- ipt = self._inputs[name]
- if not fixed:
- return iter(ipt)
- elif name in fixed:
- if fixed[name] in ipt:
- return iter((fixed[name],))
- else:
- return _empty_iter
- return CartProduct(self._inputs).iterResult(*names, **fixed)
-
+ if not input_names:
+ input_names = self.input_names
+ self._checkNames(input_names)
+ resultiter = self._query(
+ self.inputs, self.criteria).iterResult(*input_names)
+ try:
+ result = resultiter.next()
+ except StopIteration:
+ raise NoResultsError('no matching results')
+ try:
+ resultiter.next()
+ except StopIteration:
+ return result
+ else:
+ raise MultipleResultsError(
+ 'multiple results found for single result selection')
+
def resultSet(self, name):
- if name not in self._inputs:
- raise PypesQueryError('no input named "%s"' % name)
- return self._inputs[name]
-
-
-def sort(iterable, expression, order=ascending, limit=None):
- """Return a sequence containing the elements of iterable sorted in order
- by the values returned by applying expression, a callable object accepting
- a single argument, to each element.
-
- order may be the constants ascending and descending to specify the
- resulting sort order. limit, if specified is an integer value representing
- the number of elements to sort. When limit is specified, a partial sort
- will be performed and sort will return as soon as the limit count is
- reached.
-
- In order for the sort to perform properly, the expression should generate
- a homogenous set of values that support total ordering.
- """
- sortable = [(expression(i), i) for i in iterable]
- sortable.sort()
- if order == descending:
- sortable.reverse()
- if limit is not None:
- # XXX Implement N-Best algorithm for this
- sortable = sortable[:limit]
- # XXX We should be make this lazy
- return [i for key, i in sortable]
-
-
+ """Return the set of objects resulting from querying the specified
+ input"""
+ self._checkNames((name,))
+ return self._query(self.inputs, self.criteria).resultSet(name)
+
More information about the Zope-CVS
mailing list