[Zodb-checkins] CVS: Zope3/lib/python/Persistence/BTrees - sorters.c:1.1.2.1 BucketTemplate.c:1.1.2.10 MergeTemplate.c:1.1.2.5 SetOpTemplate.c:1.1.2.3 _IIBTree.c:1.1.2.2

Tim Peters tim.one@comcast.net
Tue, 4 Jun 2002 17:09:04 -0400


Update of /cvs-repository/Zope3/lib/python/Persistence/BTrees
In directory cvs.zope.org:/tmp/cvs-serv19688/lib/python/Persistence/BTrees

Modified Files:
      Tag: Zope-3x-branch
	BucketTemplate.c MergeTemplate.c SetOpTemplate.c _IIBTree.c 
Added Files:
      Tag: Zope-3x-branch
	sorters.c 
Log Message:
Give the branch the trunk's multiunion() function.  This includes new C
code, a new test file, and giving a Bucket_grow() a new "desired size"
argument.


=== Added File Zope3/lib/python/Persistence/BTrees/sorters.c === (422/522 lines abridged)
/*****************************************************************************

  Copyright (c) 2002 Zope Corporation and Contributors.
  All Rights Reserved.

  This software is subject to the provisions of the Zope Public License,
  Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
  THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
  WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
  FOR A PARTICULAR PURPOSE

 ****************************************************************************/

/* Revision information: $Id: sorters.c,v 1.1.2.1 2002/06/04 21:09:03 tim_one Exp $ */

/* The only routine here intended to be used outside the file is
   size_t sort_int4_nodups(int *p, size_t n)

   Sort the array of n ints pointed at by p, in place, and also remove
   duplicates.  Return the number of unique elements remaining, which occupy
   a contiguous and monotonically increasing slice of the array starting at p.

   Example:  If the input array is [3, 1, 2, 3, 1, 5, 2], sort_int4_nodups
   returns 4, and the first 4 elements of the array are changed to
   [1, 2, 3, 5].  The content of the remaining array positions is not defined.

   Notes:

   + This is specific to 4-byte signed ints, with endianness natural to the
     platform.

   + 4*n bytes of available heap memory are required for best speed.
*/

#include <stdlib.h>
#include <stddef.h>
#include <malloc.h>
#include <memory.h>
#include <string.h>
#include <assert.h>

/* The type of array elements to be sorted.  Most of the routines don't
   care about the type, and will work fine for any scalar C type (provided
   they're recompiled with element_type appropriately redefined).  However,
   the radix sort has to know everything about the type's internal
   representation.
*/
typedef int element_type;


[-=- -=- -=- 422 lines omitted -=- -=- -=-]

		plo[1] = *pj;
		*pj = pivot;

		/* Subfiles are from plo to pj-1 inclusive, and pj+1 to phi
		   inclusive.  Push the larger one, and loop back to do the
		   smaller one directly.
		*/
		if (pj - plo >= phi - pj) {
			PUSH(plo, pj-1);
			plo = pj+1;
		}
		else {
			PUSH(pj+1, phi);
			phi = pj-1;
		}
	}

#undef PUSH
#undef SWAP
}

/* Sort p and remove duplicates, as fast as we can. */
static size_t
sort_int4_nodups(int *p, size_t n)
{
	size_t nunique;
	element_type *work;

	assert(sizeof(int) == sizeof(element_type));
	assert(p);

	/* Use quicksort if the array is small, OR if malloc can't find
	   enough temp memory for radixsort.
	*/
	work = NULL;
	if (n > QUICKSORT_BEATS_RADIXSORT)
		work = (element_type *)malloc(n * sizeof(element_type));

	if (work) {
		element_type *out = radixsort_int4(p, work, n);
		nunique = uniq(p, out, n);
		free(work);
	}
	else {
		quicksort(p, n);
		nunique = uniq(p, p, n);
	}

	return nunique;
}


=== Zope3/lib/python/Persistence/BTrees/BucketTemplate.c 1.1.2.9 => 1.1.2.10 ===
   return _bucket_get(self, key, 0);
 }
-
+  
+/*
+** Bucket_grow
+**
+** Resize a bucket.
+**
+** Arguments:   self    The bucket.
+**              newsize The new maximum capacity.  If < 0, double the
+**                      current size unless the bucket is currently empty,
+**                      in which case use MIN_BUCKET_ALLOC.
+**              noval   Boolean; if true, allocate only key space and not
+**                      value space
+**
+** Returns:     -1      on error, and MemoryError exception is set
+**               0      on success
+*/
 static int 
-Bucket_grow(Bucket *self, int noval)
+Bucket_grow(Bucket *self, int newsize, int noval)
 {
     KEY_TYPE *keys;
     VALUE_TYPE *values;
 
     if (self->size) {
-	keys = BTree_Realloc(self->keys, sizeof(KEY_TYPE) * self->size * 2);
-	if (keys == NULL)
-	    return -1;
-	if (!noval) {
-	    values = BTree_Realloc(self->values, 
-				   sizeof(VALUE_TYPE) * self->size * 2);
-	    if (values == NULL)
-		return -1;
-	    self->values = values;
-	}
-	self->keys = keys;
-	self->size *= 2;
-    } else {
-	self->keys = BTree_Malloc(sizeof(KEY_TYPE) * MIN_BUCKET_ALLOC);
-	if (self->keys == NULL)
-	    return -1;
-	if (!noval) {
-	    self->values = BTree_Malloc(sizeof(VALUE_TYPE) * MIN_BUCKET_ALLOC);
-	    if (self->values == NULL)
-		return -1;
-	}
-	self->size = MIN_BUCKET_ALLOC;
-    }
+        if (newsize < 0)
+            newsize = self->size * 2;
+        if (newsize < 0)    /* int overflow */
+            goto Overflow;
+        UNLESS (keys = BTree_Realloc(self->keys, sizeof(KEY_TYPE) * newsize))
+            return -1;
     
+        UNLESS (noval) {
+            UNLESS (values = BTree_Realloc(self->values,
+                                           sizeof(VALUE_TYPE) * newsize))
+                return -1;
+            self->values = values;
+        }
+        self->keys = keys;
+    }
+    else {
+        if (newsize < 0)
+            newsize = MIN_BUCKET_ALLOC;
+        UNLESS (self->keys = BTree_Malloc(sizeof(KEY_TYPE) * newsize))
+            return -1;
+        UNLESS (noval) {
+            UNLESS (self->values = BTree_Malloc(sizeof(VALUE_TYPE) * newsize))
+                return -1;
+        }
+    }
+    self->size = newsize;
     return 0;
+    
+Overflow:
+  PyErr_NoMemory();
+  return -1;  
 }
 
 /*
@@ -215,7 +236,7 @@
       goto err;
     }
 
-  if (self->len==self->size && Bucket_grow(self, noval) < 0) goto err;
+  if (self->len==self->size && Bucket_grow(self, -1, noval) < 0) goto err;
 
   if (max != i) i++;
 


=== Zope3/lib/python/Persistence/BTrees/MergeTemplate.c 1.1.2.4 => 1.1.2.5 ===
 merge_output(Bucket *r, SetIteration *i, int mapping)
 {
-    if (r->len >= r->size && Bucket_grow(r, !mapping) < 0) 
+    if (r->len >= r->size && Bucket_grow(r, -1, !mapping) < 0) 
 	return -1;
     COPY_KEY(r->keys[r->len], i->key);
     INCREF_KEY(r->keys[r->len]);


=== Zope3/lib/python/Persistence/BTrees/SetOpTemplate.c 1.1.2.2 => 1.1.2.3 ===
   while (i->position >= 0)
     {
-      if(r->len >= r->size && Bucket_grow(r, ! merge) < 0) return -1;
+      if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) return -1;
       COPY_KEY(r->keys[r->len], i->key);
       INCREF_KEY(r->keys[r->len]);
 
@@ -228,7 +228,7 @@
 	{
 	  if(c1)
 	    {
-	      if(r->len >= r->size && Bucket_grow(r, ! merge) < 0) goto err;
+	      if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
               COPY_KEY(r->keys[r->len], i1.key);
               INCREF_KEY(r->keys[r->len]);
               if (merge)
@@ -244,7 +244,7 @@
 	{
 	  if(c12)
 	    {
-	      if(r->len >= r->size && Bucket_grow(r, ! merge) < 0) goto err;
+	      if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
               COPY_KEY(r->keys[r->len], i1.key);
               INCREF_KEY(r->keys[r->len]);
               if (merge)
@@ -265,7 +265,7 @@
 	{
 	  if(c2)
 	    {
-	      if(r->len >= r->size && Bucket_grow(r, ! merge) < 0) goto err;
+	      if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
               COPY_KEY(r->keys[r->len], i2.key);
               INCREF_KEY(r->keys[r->len]);
               if (merge)
@@ -399,5 +399,101 @@
 
   return o1;
 }         
+
+#endif
+
+#ifdef MULTI_INT_UNION
+#include "sorters.c"
+
+/* Input is a sequence of integer sets (or convertible to sets by the
+   set iteration protocol).  Output is the union of the sets.  The point
+   is to run much faster than doing pairs of unions.
+*/
+static PyObject *
+multiunion_m(PyObject *ignored, PyObject *args)
+{
+    PyObject *seq;          /* input sequence */
+    int n;                  /* length of input sequence */
+    PyObject *set = NULL;   /* an element of the input sequence */
+    Bucket *result;         /* result set */
+    SetIteration setiter = {0, 0, 0};
+    int i;
+
+    UNLESS(PyArg_ParseTuple(args, "O", &seq))
+        return NULL;
+
+    n = PyObject_Length(seq);
+    if (n < 0)
+        return NULL;
+
+    /* Construct an empty result set. */
+    result = BUCKET(PyObject_CallObject(OBJECT(&SetType), NULL));
+    if (result == NULL)
+        return NULL;
+
+    /* For each set in the input sequence, append its elements to the result
+       set.  At this point, we ignore the possibility of duplicates. */
+    for (i = 0; i < n; ++i) {
+        set = PySequence_GetItem(seq, i);
+        if (set == NULL)
+            goto Error;
+
+        /* If set is a bucket, do a straight resize + memcpy. */
+        if (set->ob_type == (PyTypeObject*)&SetType ||
+            set->ob_type == (PyTypeObject*)&BucketType) {
+
+            const int setsize = SIZED(set)->len;
+            int size_desired = result->len + setsize;
+            /* If there are more to come, overallocate by 25% (arbitrary). */
+            if (i < n-1)
+                size_desired += size_desired >> 2;
+            if (size_desired && size_desired > result->size) {
+                if (Bucket_grow(result, size_desired, 1) < 0)
+                    goto Error;
+            }
+            memcpy(result->keys + result->len,
+                   BUCKET(set)->keys,
+                   setsize * sizeof(KEY_TYPE));
+            result->len += setsize;
+        }
+        else {
+            /* No cheap way:  iterate over set's elements one at a time. */
+            int merge;  /* dummy needed for initSetIteration */
+
+            if (initSetIteration(&setiter, set, 1, &merge) < 0) goto Error;
+            if (setiter.next(&setiter) < 0) goto Error;
+            while (setiter.position >= 0) {
+                if (result->len >= result->size && Bucket_grow(result, -1, 1) < 0)
+                    goto Error;
+                COPY_KEY(result->keys[result->len], setiter.key);
+                ++result->len;
+                /* We know the key is an int, so no need to incref it. */
+                if (setiter.next(&setiter) < 0) goto Error;
+            }
+            Py_DECREF(setiter.set);
+            setiter.set = NULL;
+        }
+        Py_DECREF(set);
+        set = NULL;
+    }
+
+    /* Combine, sort, remove duplicates, and reset the result's len.
+       If the set shrinks (which happens if and only if there are
+       duplicates), no point to realloc'ing the set smaller, as we
+       expect the result set to be short-lived.
+    */
+    if (result->len > 0) {
+        size_t newlen;          /* number of elements in final result set */
+        newlen = sort_int4_nodups(result->keys, (size_t)result->len);
+        result->len = (int)newlen;
+    }
+    return (PyObject *)result;
+
+Error:
+    Py_DECREF(result);
+    Py_XDECREF(set);
+    Py_XDECREF(setiter.set);
+    return NULL;
+}
 
 #endif


=== Zope3/lib/python/Persistence/BTrees/_IIBTree.c 1.1.2.1 => 1.1.2.2 ===
 #define DEFAULT_MAX_BUCKET_SIZE 120
 #define DEFAULT_MAX_BTREE_SIZE 500
-                
+#define MULTI_INT_UNION 1
+
 #include "intkeymacros.h"
 #include "intvaluemacros.h"
 #ifndef EXCLUDE_INTSET_SUPPORT