[Zope3-checkins] SVN: Zope3/trunk/src/ apply the new BTrees integer-family goodness

Fred L. Drake, Jr. fdrake at gmail.com
Wed Apr 25 19:09:35 EDT 2007


Log message for revision 74770:
  apply the new BTrees integer-family goodness

Changed:
  _U  Zope3/trunk/src/
  U   Zope3/trunk/src/zope/app/catalog/catalog.py
  U   Zope3/trunk/src/zope/app/intid/__init__.py
  U   Zope3/trunk/src/zope/app/intid/tests.py
  U   Zope3/trunk/src/zope/index/field/index.py
  U   Zope3/trunk/src/zope/index/keyword/index.py
  U   Zope3/trunk/src/zope/index/keyword/tests.py
  U   Zope3/trunk/src/zope/index/text/baseindex.py
  U   Zope3/trunk/src/zope/index/text/cosineindex.py
  U   Zope3/trunk/src/zope/index/text/okapiindex.py
  U   Zope3/trunk/src/zope/index/text/parsetree.py
  U   Zope3/trunk/src/zope/index/text/setops.py
  U   Zope3/trunk/src/zope/index/text/tests/test_index.py
  U   Zope3/trunk/src/zope/index/text/tests/test_queryengine.py
  U   Zope3/trunk/src/zope/index/text/tests/test_setops.py
  U   Zope3/trunk/src/zope/index/topic/filter.py
  U   Zope3/trunk/src/zope/index/topic/index.py
  U   Zope3/trunk/src/zope/index/topic/tests/test_topicindex.py

-=-

Property changes on: Zope3/trunk/src
___________________________________________________________________
Name: svn:externals
   - docutils       svn://svn.zope.org/repos/main/docutils/tags/0.4.0
ZConfig        svn://svn.zope.org/repos/main/ZConfig/trunk/ZConfig
BTrees         -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/BTrees
persistent     -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/persistent
ThreadedAsync  -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/ThreadedAsync
transaction    -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/transaction
ZEO            -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/ZEO
ZODB           -r 73816 svn://svn.zope.org/repos/main/ZODB/trunk/src/ZODB
twisted        svn://svn.twistedmatrix.com/svn/Twisted/tags/releases/twisted-core-2.5.0/twisted
zdaemon        -r 40792 svn://svn.zope.org/repos/main/zdaemon/trunk/src/zdaemon


   + docutils       svn://svn.zope.org/repos/main/docutils/tags/0.4.0
ZConfig        svn://svn.zope.org/repos/main/ZConfig/trunk/ZConfig
BTrees         -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/BTrees
persistent     -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/persistent
ThreadedAsync  -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/ThreadedAsync
transaction    -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/transaction
ZEO            -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/ZEO
ZODB           -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/ZODB
twisted        svn://svn.twistedmatrix.com/svn/Twisted/tags/releases/twisted-core-2.5.0/twisted
zdaemon        -r 40792 svn://svn.zope.org/repos/main/zdaemon/trunk/src/zdaemon



Modified: Zope3/trunk/src/zope/app/catalog/catalog.py
===================================================================
--- Zope3/trunk/src/zope/app/catalog/catalog.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/app/catalog/catalog.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,6 +17,8 @@
 """
 __docformat__ = 'restructuredtext'
 
+import BTrees
+
 import zope.index.interfaces
 from zope.interface import implements
 from zope.annotation.interfaces import IAttributeAnnotatable
@@ -28,8 +30,8 @@
 from zope.app.intid.interfaces import IIntIds
 from zope.traversing.interfaces import IPhysicallyLocatable
 from zope.location import location
-from BTrees.IFBTree import weightedIntersection
 
+
 class ResultSet:
     """Lazily accessed set of objects."""
 
@@ -54,6 +56,13 @@
                zope.index.interfaces.IIndexSearch,
                )
 
+    family = BTrees.family32
+
+    def __init__(self, family=None):
+        super(Catalog, self).__init__()
+        if family is not None:
+            self.family = family
+
     def clear(self):
         for index in self.values():
             index.clear()
@@ -120,7 +129,7 @@
 
         _, result = results.pop(0)
         for _, r in results:
-            _, result = weightedIntersection(result, r)
+            _, result = self.family.IFModule.weightedIntersection(result, r)
 
         return result
 

Modified: Zope3/trunk/src/zope/app/intid/__init__.py
===================================================================
--- Zope3/trunk/src/zope/app/intid/__init__.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/app/intid/__init__.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -21,7 +21,8 @@
 $Id$
 """
 import random
-from BTrees import IOBTree, OIBTree
+import BTrees
+
 from ZODB.interfaces import IConnection
 from persistent import Persistent
 
@@ -54,10 +55,14 @@
 
     _randrange = random.randrange
 
-    def __init__(self):
-        self.ids = queryUtility(IFactory, 'OIBTree', OIBTree.OIBTree)()
-        self.refs = queryUtility(IFactory, 'IOBTree', IOBTree.IOBTree)()
+    family = BTrees.family32
 
+    def __init__(self, family=None):
+        if family is not None:
+            self.family = family
+        self.ids = self.family.OIModule.BTree()
+        self.refs = self.family.IOModule.BTree()
+
     def __len__(self):
         return len(self.ids)
 
@@ -102,7 +107,7 @@
         """
         while True:
             if self._v_nextid is None:
-                self._v_nextid = self._randrange(0, 2**31)
+                self._v_nextid = self._randrange(0, self.family.maxint)
             uid = self._v_nextid
             self._v_nextid += 1
             if uid not in self.refs:

Modified: Zope3/trunk/src/zope/app/intid/tests.py
===================================================================
--- Zope3/trunk/src/zope/app/intid/tests.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/app/intid/tests.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,6 +17,8 @@
 """
 import unittest
 
+import BTrees
+
 from persistent import Persistent
 from persistent.interfaces import IPersistent
 from ZODB.interfaces import IConnection
@@ -70,11 +72,13 @@
 
 class TestIntIds(ReferenceSetupMixin, unittest.TestCase):
 
+    createIntIds = IntIds
+
     def test_interface(self):
-        verifyObject(IIntIds, IntIds())
+        verifyObject(IIntIds, self.createIntIds())
 
     def test_non_keyreferences(self):
-        u = IntIds()
+        u = self.createIntIds()
         obj = object()
 
         self.assert_(u.queryId(obj) is None)
@@ -82,7 +86,7 @@
         self.assertRaises(KeyError, u.getId, obj)
 
     def test(self):
-        u = IntIds()
+        u = self.createIntIds()
         obj = P()
         
         obj._p_jar = ConnectionStub()
@@ -112,7 +116,7 @@
     def test_btree_long(self):
         # This is a somewhat arkward test, that *simulates* the border case
         # behaviour of the _generateId method
-        u = IntIds()
+        u = self.createIntIds()
         u._randrange = lambda x,y:int(2**31-1)
 
         # The chosen int is exactly the largest number possible that is
@@ -126,7 +130,7 @@
         self.failUnless(2**31-1 in tuple(u.refs.keys()))
 
     def test_len_items(self):
-        u = IntIds()
+        u = self.createIntIds()
         obj = P()
         obj._p_jar = ConnectionStub()
 
@@ -164,7 +168,7 @@
         self.assertEquals(u.items(), [])
 
     def test_getenrateId(self):
-        u = IntIds()
+        u = self.createIntIds()
         self.assertEquals(u._v_nextid, None)
         id1 = u._generateId()
         self.assert_(u._v_nextid is not None)
@@ -244,9 +248,17 @@
         self.assertEquals(events[0].original_event.object, parent_folder)
         self.assertEquals(events[0].object, folder)
 
+
+class TestIntIds64(TestIntIds):
+
+    def createIntIds(self):
+        return IntIds(family=BTrees.family64)
+
+
 def test_suite():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(TestIntIds))
+    suite.addTest(unittest.makeSuite(TestIntIds64))
     suite.addTest(unittest.makeSuite(TestSubscribers))
     return suite
 

Modified: Zope3/trunk/src/zope/index/field/index.py
===================================================================
--- Zope3/trunk/src/zope/index/field/index.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/field/index.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,9 +17,9 @@
 """
 import persistent
 
-from BTrees.IOBTree import IOBTree
+import BTrees
+
 from BTrees.OOBTree import OOBTree
-from BTrees.IFBTree import IFTreeSet, multiunion
 from BTrees.Length import Length
 
 import zope.interface
@@ -34,7 +34,11 @@
         interfaces.IIndexSearch,
         )
 
-    def __init__(self):
+    family = BTrees.family32
+
+    def __init__(self, family=None):
+        if family is not None:
+            self.family = family
         self.clear()
 
     def clear(self):
@@ -42,7 +46,7 @@
         # The forward index maps indexed values to a sequence of docids
         self._fwd_index = OOBTree()
         # The reverse index maps a docid to its index value
-        self._rev_index = IOBTree()
+        self._rev_index = self.family.IOModule.BTree()
         self._num_docs = Length(0)
 
     def documentCount(self):
@@ -66,7 +70,7 @@
         # Insert into forward index.
         set = self._fwd_index.get(value)
         if set is None:
-            set = IFTreeSet()
+            set = self.family.IFModule.TreeSet()
             self._fwd_index[value] = set
         set.insert(docid)
 
@@ -102,4 +106,5 @@
     def apply(self, query):
         if len(query) != 2 or not isinstance(query, tuple):
             raise TypeError("two-length tuple expected", query)
-        return multiunion(self._fwd_index.values(*query))        
+        return self.family.IFModule.multiunion(
+            self._fwd_index.values(*query))

Modified: Zope3/trunk/src/zope/index/keyword/index.py
===================================================================
--- Zope3/trunk/src/zope/index/keyword/index.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/keyword/index.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,9 +17,9 @@
 """
 from persistent import Persistent
 
-from BTrees.IOBTree import IOBTree
+import BTrees
+
 from BTrees.OOBTree import OOBTree, OOSet, difference 
-from BTrees.IIBTree import IISet, union, intersection
 from BTrees.Length import Length
 
 from types import StringTypes
@@ -27,13 +27,17 @@
 from zope.index.keyword.interfaces import IKeywordQuerying
 from zope.interface import implements
 
+
 class KeywordIndex(Persistent):
     """ A case-insensitive keyword index """
 
+    family = BTrees.family32
     normalize = True
     implements(IInjection, IStatistics, IKeywordQuerying)
 
-    def __init__(self):
+    def __init__(self, family=None):
+        if family is not None:
+            self.family = family
         self.clear()
 
     def clear(self):
@@ -45,7 +49,7 @@
         # The reverse index maps a docid to its keywords
         # TODO: Using a vocabulary might be the better choice to store
         # keywords since it would allow use to use integers instead of strings
-        self._rev_index = IOBTree()
+        self._rev_index = self.family.IOModule.BTree()
         self._num_docs = Length(0)
 
     def documentCount(self):
@@ -115,7 +119,7 @@
         has_key = idx.has_key
         for word in words:
             if not has_key(word):
-                idx[word] = IISet()
+                idx[word] = self.family.IIModule.Set()
             idx[word].insert(docid)
 
     def _insert_reverse(self, docid, words):
@@ -132,17 +136,19 @@
         if self.normalize:
             query = [w.lower() for w in query]
 
-        f = {'and' : intersection, 'or' : union}[operator]
+        f = {'and' : self.family.IIModule.intersection,
+             'or' : self.family.IIModule.union,
+             }[operator]
     
         rs = None
         for word in query:
-            docids = self._fwd_index.get(word, IISet())
+            docids = self._fwd_index.get(word, self.family.IIModule.Set())
             rs = f(rs, docids)
             
         if rs:
             return rs
         else:
-            return IISet()
+            return self.family.IIModule.Set()
 
 class CaseSensitiveKeywordIndex(KeywordIndex):
     """ A case-sensitive keyword index """

Modified: Zope3/trunk/src/zope/index/keyword/tests.py
===================================================================
--- Zope3/trunk/src/zope/index/keyword/tests.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/keyword/tests.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -14,7 +14,8 @@
 
 from unittest import TestCase, TestSuite, main, makeSuite
 
-from BTrees.IIBTree import IISet
+import BTrees
+
 from zope.index.keyword.index import KeywordIndex
 from zope.index.interfaces import IInjection, IStatistics
 from zope.index.keyword.interfaces import IKeywordQuerying
@@ -22,6 +23,8 @@
 
 class KeywordIndexTest(TestCase):
 
+    from BTrees.IIBTree import IISet
+
     def setUp(self):
         self.index = KeywordIndex()
 
@@ -82,13 +85,13 @@
         self.index.index_doc(1,  ('foo', 'bar', 'doom'))
         self.index.index_doc(1,  ('bar', 'blabla'))
         self.assertEqual(self.index.documentCount(), 3)
-        self._search('quick',   IISet())
-        self._search('foo',   IISet())
-        self._search('bar',   IISet([1]))
-        self._search(['doom'],   IISet())
-        self._search(['blabla'],   IISet([1]))
-        self._search_and(('bar', 'blabla'),   IISet([1]))
-        self._search(['cmf'],   IISet([5]))
+        self._search('quick',   self.IISet())
+        self._search('foo',   self.IISet())
+        self._search('bar',   self.IISet([1]))
+        self._search(['doom'],   self.IISet())
+        self._search(['blabla'],   self.IISet([1]))
+        self._search_and(('bar', 'blabla'),   self.IISet([1]))
+        self._search(['cmf'],   self.IISet([5]))
 
     def test_hasdoc(self):
         self._populate_index()
@@ -101,31 +104,43 @@
 
     def test_simplesearch(self):
         self._populate_index()
-        self._search([''],      IISet())
-        self._search(['cmf'],   IISet([1, 5]))
-        self._search(['zope'],  IISet([1, 3]))
-        self._search(['zope3'], IISet([1]))
-        self._search(['foo'],   IISet())
+        self._search([''],      self.IISet())
+        self._search(['cmf'],   self.IISet([1, 5]))
+        self._search(['zope'],  self.IISet([1, 3]))
+        self._search(['zope3'], self.IISet([1]))
+        self._search(['foo'],   self.IISet())
 
     def test_search_and(self):
         self._populate_index()
-        self._search_and(('cmf', 'zope3'), IISet([1]))
-        self._search_and(('cmf', 'zope'),  IISet([1]))
-        self._search_and(('cmf', 'zope4'), IISet())
-        self._search_and(('zope', 'ZOPE'), IISet([1, 3]))
+        self._search_and(('cmf', 'zope3'), self.IISet([1]))
+        self._search_and(('cmf', 'zope'),  self.IISet([1]))
+        self._search_and(('cmf', 'zope4'), self.IISet())
+        self._search_and(('zope', 'ZOPE'), self.IISet([1, 3]))
 
     def test_search_or(self):
         self._populate_index()
-        self._search_or(('cmf', 'zope3'), IISet([1, 5]))
-        self._search_or(('cmf', 'zope'),  IISet([1, 3, 5]))
-        self._search_or(('cmf', 'zope4'), IISet([1, 5]))
-        self._search_or(('zope', 'ZOPE'), IISet([1,3]))
+        self._search_or(('cmf', 'zope3'), self.IISet([1, 5]))
+        self._search_or(('cmf', 'zope'),  self.IISet([1, 3, 5]))
+        self._search_or(('cmf', 'zope4'), self.IISet([1, 5]))
+        self._search_or(('zope', 'ZOPE'), self.IISet([1,3]))
 
     def test_index_input(self):
-        self.assertRaises(TypeError, self.index.index_doc, 1, "non-sequence-string")
+        self.assertRaises(
+            TypeError, self.index.index_doc, 1, "non-sequence-string")
 
+
+class KeywordIndexTest64(KeywordIndexTest):
+
+    from BTrees.LLBTree import LLSet as IISet
+
+    def setUp(self):
+        self.index = KeywordIndex(family=BTrees.family64)
+
+
 def test_suite():
-    return TestSuite((makeSuite(KeywordIndexTest), ))
+    return TestSuite((makeSuite(KeywordIndexTest),
+                      makeSuite(KeywordIndexTest64),
+                      ))
 
 if __name__=='__main__':
     main(defaultTest='test_suite')

Modified: Zope3/trunk/src/zope/index/text/baseindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/baseindex.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/baseindex.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -20,27 +20,27 @@
 from persistent import Persistent
 from zope.interface import implements
 
+import BTrees
+
+from BTrees import Length
 from BTrees.IOBTree import IOBTree
-from BTrees.IFBTree import IFBTree, IFTreeSet
-from BTrees.IFBTree import intersection, difference
-from BTrees import Length
 
 from zope.index.interfaces import IInjection, IStatistics
 
 from zope.index.text.interfaces import IExtendedQuerying
 from zope.index.text import widcode
 from zope.index.text.setops import mass_weightedIntersection, \
-                                  mass_weightedUnion
+                                   mass_weightedUnion
 
 
-def unique(L):
-    """Return a list of the unique elements in L."""
-    return IFTreeSet(L).keys()
-
 class BaseIndex(Persistent):
     implements(IInjection, IStatistics, IExtendedQuerying)
 
-    def __init__(self, lexicon):
+    family = BTrees.family32
+
+    def __init__(self, lexicon, family=None):
+        if family is not None:
+            self.family = family
         self._lexicon = lexicon
 
         # wid -> {docid -> weight}; t -> D -> w(D, t)
@@ -56,17 +56,19 @@
         # may introduce a lexicon word we've never seen.
         # A word is in-vocabulary for this index if and only if
         # _wordinfo.has_key(wid).  Note that wid 0 must not be a key.
+        # This does not use the BTree family since wids are always "I"
+        # flavor trees.
         self._wordinfo = IOBTree()
 
         # docid -> weight
         # Different indexers have different notions of doc weight, but we
         # expect each indexer to use ._docweight to map docids to its
         # notion of what a doc weight is.
-        self._docweight = IFBTree()
+        self._docweight = self.family.IFModule.BTree()
 
         # docid -> WidCode'd list of wids
         # Used for un-indexing, and for phrase search.
-        self._docwords = IOBTree()
+        self._docwords = self.family.IOModule.BTree()
 
         # Use a BTree length for efficient length computation w/o conflicts
         self.wordCount = Length.Length()
@@ -116,12 +118,13 @@
         old_wid2w, old_docw = self._get_frequencies(old_wids)
         new_wid2w, new_docw = self._get_frequencies(new_wids)
 
-        old_widset = IFTreeSet(old_wid2w.keys())
-        new_widset = IFTreeSet(new_wid2w.keys())
+        old_widset = self.family.IFModule.TreeSet(old_wid2w.keys())
+        new_widset = self.family.IFModule.TreeSet(new_wid2w.keys())
 
-        in_both_widset = intersection(old_widset, new_widset)
-        only_old_widset = difference(old_widset, in_both_widset)
-        only_new_widset = difference(new_widset, in_both_widset)
+        IFModule = self.family.IFModule
+        in_both_widset = IFModule.intersection(old_widset, new_widset)
+        only_old_widset = IFModule.difference(old_widset, in_both_widset)
+        only_new_widset = IFModule.difference(new_widset, in_both_widset)
         del old_widset, new_widset
 
         for wid in only_old_widset.keys():
@@ -161,7 +164,7 @@
     def unindex_doc(self, docid):
         if docid not in self._docwords:
             return
-        for wid in unique(self.get_words(docid)):
+        for wid in self.family.IFModule.TreeSet(self.get_words(docid)).keys():
             self._del_wordinfo(wid, docid)
         del self._docwords[docid]
         del self._docweight[docid]
@@ -171,25 +174,25 @@
         if not wids:
             return None # All docs match
         wids = self._remove_oov_wids(wids)
-        return mass_weightedUnion(self._search_wids(wids))
+        return mass_weightedUnion(self._search_wids(wids), self.family)
 
     def search_glob(self, pattern):
         wids = self._lexicon.globToWordIds(pattern)
         wids = self._remove_oov_wids(wids)
-        return mass_weightedUnion(self._search_wids(wids))
+        return mass_weightedUnion(self._search_wids(wids), self.family)
 
     def search_phrase(self, phrase):
         wids = self._lexicon.termToWordIds(phrase)
         cleaned_wids = self._remove_oov_wids(wids)
         if len(wids) != len(cleaned_wids):
             # At least one wid was OOV:  can't possibly find it.
-            return IFBTree()
+            return self.family.IFModule.BTree()
         scores = self._search_wids(wids)
-        hits = mass_weightedIntersection(scores)
+        hits = mass_weightedIntersection(scores, self.family)
         if not hits:
             return hits
         code = widcode.encode(wids)
-        result = IFBTree()
+        result = self.family.IFModule.BTree()
         for docid, weight in hits.items():
             docwords = self._docwords[docid]
             if docwords.find(code) >= 0:
@@ -254,7 +257,7 @@
             # len(IFBTree).
             if (isinstance(doc2score, type({})) and
                 len(doc2score) == self.DICT_CUTOFF):
-                doc2score = IFBTree(doc2score)
+                doc2score = self.family.IFModule.BTree(doc2score)
         doc2score[docid] = f
         self._wordinfo[wid] = doc2score # not redundant:  Persistency!
 
@@ -277,7 +280,7 @@
                 new_word_count += 1
             elif (isinstance(doc2score, dicttype) and
                   len(doc2score) == self.DICT_CUTOFF):
-                doc2score = IFBTree(doc2score)
+                doc2score = self.family.IFModule.BTree(doc2score)
             doc2score[docid] = weight
             self._wordinfo[wid] = doc2score # not redundant:  Persistency!
         self.wordCount.change(new_word_count)

Modified: Zope3/trunk/src/zope/index/text/cosineindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/cosineindex.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/cosineindex.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,14 +17,13 @@
 """
 import math
 
-from BTrees.IFBTree import IFBucket
-
 from zope.index.text.baseindex import BaseIndex, inverse_doc_frequency
 
+
 class CosineIndex(BaseIndex):
 
-    def __init__(self, lexicon):
-        BaseIndex.__init__(self, lexicon)
+    def __init__(self, lexicon, family=None):
+        BaseIndex.__init__(self, lexicon, family=family)
 
         # ._wordinfo for cosine is wid -> {docid -> weight};
         # t -> D -> w(d, t)/W(d)
@@ -74,7 +73,7 @@
             idf = inverse_doc_frequency(len(d2w), N)  # an unscaled float
             #print "idf = %.3f" % idf
             if isinstance(d2w, DictType):
-                d2w = IFBucket(d2w)
+                d2w = self.family.IFModule.Bucket(d2w)
             L.append((d2w, idf))
         return L
 

Modified: Zope3/trunk/src/zope/index/text/okapiindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/okapiindex.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/okapiindex.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -191,8 +191,6 @@
 
 $Id$
 """
-from BTrees.IFBTree import IFBucket
-
 from zope.index.text.baseindex import BaseIndex
 from zope.index.text.baseindex import inverse_doc_frequency
 
@@ -204,8 +202,8 @@
     assert K1 >= 0.0
     assert 0.0 <= B <= 1.0
 
-    def __init__(self, lexicon):
-        BaseIndex.__init__(self, lexicon)
+    def __init__(self, lexicon, family=None):
+        BaseIndex.__init__(self, lexicon, family=family)
 
         # ._wordinfo for Okapi is
         # wid -> {docid -> frequency}; t -> D -> f(D, t)
@@ -267,7 +265,7 @@
         for t in wids:
             d2f = self._wordinfo[t] # map {docid -> f(docid, t)}
             idf = inverse_doc_frequency(len(d2f), N)  # an unscaled float
-            result = IFBucket()
+            result = self.family.IFModule.Bucket()
             for docid, f in d2f.items():
                 lenweight = B_from1 + B * docid2len[docid] / meandoclen
                 tf = f * K1_plus1 / (f + K1 * lenweight)
@@ -311,7 +309,7 @@
         for t in wids:
             d2f = self._wordinfo[t] # map {docid -> f(docid, t)}
             idf = inverse_doc_frequency(len(d2f), N)  # an unscaled float
-            result = IFBucket()
+            result = self.family.IFModule.Bucket()
             score(result, d2f.items(), docid2len, idf, meandoclen)
             L.append((result, 1))
         return L

Modified: Zope3/trunk/src/zope/index/text/parsetree.py
===================================================================
--- Zope3/trunk/src/zope/index/text/parsetree.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/parsetree.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -15,8 +15,6 @@
 
 $Id$
 """
-from BTrees.IFBTree import difference
-
 from zope.index.text.interfaces import IQueryParseTree
 from zope.index.text.setops import mass_weightedIntersection
 from zope.index.text.setops import mass_weightedUnion
@@ -88,10 +86,10 @@
                 # included.
                 if r is not None:
                     L.append((r, 1))
-        set = mass_weightedIntersection(L)
+        set = mass_weightedIntersection(L, index.family)
         if Nots:
-            notset = mass_weightedUnion(Nots)
-            set = difference(set, notset)
+            notset = mass_weightedUnion(Nots, index.family)
+            set = index.family.IFModule.difference(set, notset)
         return set
 
 class OrNode(ParseTreeNode):
@@ -108,7 +106,7 @@
             # to act like plain real_word).
             if r is not None:
                 weighted.append((r, 1))
-        return mass_weightedUnion(weighted)
+        return mass_weightedUnion(weighted, index.family)
 
 class AtomNode(ParseTreeNode):
 

Modified: Zope3/trunk/src/zope/index/text/setops.py
===================================================================
--- Zope3/trunk/src/zope/index/text/setops.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/setops.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -15,29 +15,30 @@
 
 $Id$
 """
-from BTrees.IFBTree import IFBucket, weightedIntersection, weightedUnion
 
+import BTrees
+
 from zope.index.nbest import NBest
 
-def mass_weightedIntersection(L):
+def mass_weightedIntersection(L, family=BTrees.family32):
     "A list of (mapping, weight) pairs -> their weightedIntersection IFBucket."
     L = [(x, wx) for (x, wx) in L if x is not None]
     if len(L) < 2:
-        return _trivial(L)
+        return _trivial(L, family)
     # Intersect with smallest first.  We expect the input maps to be
     # IFBuckets, so it doesn't hurt to get their lengths repeatedly
     # (len(Bucket) is fast; len(BTree) is slow).
     L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
     (x, wx), (y, wy) = L[:2]
-    dummy, result = weightedIntersection(x, y, wx, wy)
+    dummy, result = family.IFModule.weightedIntersection(x, y, wx, wy)
     for x, wx in L[2:]:
-        dummy, result = weightedIntersection(result, x, 1, wx)
+        dummy, result = family.IFModule.weightedIntersection(result, x, 1, wx)
     return result
 
-def mass_weightedUnion(L):
+def mass_weightedUnion(L, family=BTrees.family32):
     "A list of (mapping, weight) pairs -> their weightedUnion IFBucket."
     if len(L) < 2:
-        return _trivial(L)
+        return _trivial(L, family)
     # Balance unions as closely as possible, smallest to largest.
     merge = NBest(len(L))
     for x, weight in L:
@@ -46,18 +47,19 @@
         # Merge the two smallest so far, and add back to the queue.
         (x, wx), dummy = merge.pop_smallest()
         (y, wy), dummy = merge.pop_smallest()
-        dummy, z = weightedUnion(x, y, wx, wy)
+        dummy, z = family.IFModule.weightedUnion(x, y, wx, wy)
         merge.add((z, 1), len(z))
     (result, weight), dummy = merge.pop_smallest()
     return result
 
-def _trivial(L):
+def _trivial(L, family):
     # L is empty or has only one (mapping, weight) pair.  If there is a
     # pair, we may still need to multiply the mapping by its weight.
     assert len(L) <= 1
     if len(L) == 0:
-        return IFBucket()
+        return family.IFModule.Bucket()
     [(result, weight)] = L
     if weight != 1:
-        dummy, result = weightedUnion(IFBucket(), result, 0, weight)
+        dummy, result = family.IFModule.weightedUnion(
+            family.IFModule.Bucket(), result, 0, weight)
     return result

Modified: Zope3/trunk/src/zope/index/text/tests/test_index.py
===================================================================
--- Zope3/trunk/src/zope/index/text/tests/test_index.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/tests/test_index.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,6 +17,8 @@
 """
 from unittest import TestCase, TestSuite, main, makeSuite
 
+import BTrees
+
 from zope.index.text.lexicon import Lexicon, Splitter
 from zope.index.text.cosineindex import CosineIndex
 from zope.index.text.okapiindex import OkapiIndex
@@ -169,6 +171,18 @@
 class OkapiIndexTest(IndexTest):
     IndexFactory = OkapiIndex
 
+class CosineIndexTest(IndexTest):
+
+    @staticmethod
+    def IndexFactory(*args, **kw):
+        return CosineIndex(family=BTrees.family64, *args, **kw)
+
+class OkapiIndexTest(IndexTest):
+
+    @staticmethod
+    def IndexFactory(*args, **kw):
+        return OkapiIndex(family=BTrees.family64, *args, **kw)
+
 def test_suite():
     return TestSuite((makeSuite(CosineIndexTest),
                       makeSuite(OkapiIndexTest),

Modified: Zope3/trunk/src/zope/index/text/tests/test_queryengine.py
===================================================================
--- Zope3/trunk/src/zope/index/text/tests/test_queryengine.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/tests/test_queryengine.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,7 +17,7 @@
 """
 import unittest
 
-from BTrees.IFBTree import IFBucket
+import BTrees
 
 from zope.index.text.queryparser import QueryParser
 from zope.index.text.parsetree import QueryError
@@ -25,8 +25,10 @@
 
 class FauxIndex(object):
 
+    family = BTrees.family32
+
     def search(self, term):
-        b = IFBucket()
+        b = self.family.IFModule.Bucket()
         if term == "foo":
             b[1] = b[3] = 1
         elif term == "bar":

Modified: Zope3/trunk/src/zope/index/text/tests/test_setops.py
===================================================================
--- Zope3/trunk/src/zope/index/text/tests/test_setops.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/tests/test_setops.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,7 +17,10 @@
 """
 from unittest import TestCase, main, makeSuite
 
+import BTrees
+
 from BTrees.IFBTree import IFBTree, IFBucket
+from BTrees.LFBTree import LFBucket
 
 from zope.index.text.setops import mass_weightedIntersection
 from zope.index.text.setops import mass_weightedUnion
@@ -28,6 +31,23 @@
         self.assertEqual(len(mass_weightedIntersection([])), 0)
         self.assertEqual(len(mass_weightedUnion([])), 0)
 
+    def testEmptyListsHonorFamily(self):
+        # family32
+        t = mass_weightedIntersection([], BTrees.family32)
+        self.assertEqual(len(t), 0)
+        self.assertEqual(t.__class__, IFBucket)
+        t = mass_weightedUnion([], BTrees.family32)
+        self.assertEqual(len(t), 0)
+        self.assertEqual(t.__class__, IFBucket)
+
+        # family64
+        t = mass_weightedIntersection([], BTrees.family64)
+        self.assertEqual(len(t), 0)
+        self.assertEqual(t.__class__, LFBucket)
+        t = mass_weightedUnion([], BTrees.family64)
+        self.assertEqual(len(t), 0)
+        self.assertEqual(t.__class__, LFBucket)
+
     def testIdentity(self):
         t = IFBTree([(1, 2)])
         b = IFBucket([(1, 2)])

Modified: Zope3/trunk/src/zope/index/topic/filter.py
===================================================================
--- Zope3/trunk/src/zope/index/topic/filter.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/topic/filter.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -15,7 +15,9 @@
 
 $Id$
 """
-from BTrees.IIBTree import IISet
+
+import BTrees
+
 from zope.index.topic.interfaces import ITopicFilteredSet
 from zope.interface import implements
 
@@ -29,13 +31,17 @@
 
     implements(ITopicFilteredSet)
 
-    def __init__(self, id, expr):
+    family = BTrees.family32
+
+    def __init__(self, id, expr, family=None):
+        if family is not None:
+            self.family = family
         self.id   = id
         self.expr = expr
         self.clear()
 
     def clear(self):
-        self._ids  = IISet()
+        self._ids = self.family.IIModule.Set()
 
     def index_doc(self, docid, context):
         raise NotImplementedError
@@ -66,10 +72,7 @@
     """ a topic filtered set to check a context against a Python expression """
 
     def index_doc(self, docid, context):
-
         try:
             if eval(self.expr): self._ids.insert(docid)
         except:
             pass  # ignore errors 
-
-

Modified: Zope3/trunk/src/zope/index/topic/index.py
===================================================================
--- Zope3/trunk/src/zope/index/topic/index.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/topic/index.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,27 +17,32 @@
 """
 from persistent import Persistent
 
+import BTrees
+
 from BTrees.OOBTree import OOBTree
-from BTrees.IIBTree import IISet, union, intersection
 
-from types import ListType, TupleType, StringTypes
 from zope.interface import implements
 
 from zope.index.interfaces import IInjection
 from zope.index.topic.interfaces import ITopicQuerying
 
+
 class TopicIndex(Persistent):
 
     implements(IInjection, ITopicQuerying)
 
-    def __init__(self):
+    family = BTrees.family32
+
+    def __init__(self, family=None):
+        if family is not None:
+            self.family = family
         self.clear()
 
     def clear(self):
         # mapping filter id -> filter
         self._filters = OOBTree()
 
-    def addFilter(self, f ):
+    def addFilter(self, f):
         """ Add filter 'f' with ID 'id' """
         self._filters[f.getId()] = f
 
@@ -58,12 +63,13 @@
             f.unindex_doc(docid)
 
     def search(self, query, operator='and'):
+        if isinstance(query, basestring): query = [query]
+        if not isinstance(query, (tuple, list)):
+            raise TypeError(
+                'query argument must be a list/tuple of filter ids')
 
-        if isinstance(query, StringTypes): query = [query]
-        if not isinstance(query, (TupleType, ListType)):
-            raise TypeError('query argument must be a list/tuple of filter ids')
-
-        f = {'and' : intersection, 'or' : union}[operator]
+        IIModule = self.family.IIModule
+        f = {'and': IIModule.intersection, 'or': IIModule.union}[operator]
     
         rs = None
         for id in self._filters.keys():
@@ -71,6 +77,5 @@
                 docids = self._filters[id].getIds()
                 rs = f(rs, docids)
             
-        if rs:  return rs
-        else: return IISet()
-        
+        if rs: return rs
+        else: return self.family.IIModule.Set()

Modified: Zope3/trunk/src/zope/index/topic/tests/test_topicindex.py
===================================================================
--- Zope3/trunk/src/zope/index/topic/tests/test_topicindex.py	2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/topic/tests/test_topicindex.py	2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,6 +17,8 @@
 """
 from unittest import TestCase, TestSuite, main, makeSuite 
 
+import BTrees
+
 from zope.index.topic.index import TopicIndex
 from zope.index.topic.filter import PythonFilteredSet
 from zope.interface.verify import verifyClass
@@ -30,14 +32,19 @@
 
 class TopicIndexTest(TestCase):
 
+    family = BTrees.family32
+
     def setUp(self):
-        self.index = TopicIndex()
-        self.index.addFilter(PythonFilteredSet('doc1',
-                                               "context.meta_type == 'doc1'"))
-        self.index.addFilter(PythonFilteredSet('doc2',
-                                               "context.meta_type == 'doc2'"))
-        self.index.addFilter(PythonFilteredSet('doc3',
-                                               "context.meta_type == 'doc3'"))
+        self.index = TopicIndex(family=self.family)
+        self.index.addFilter(
+            PythonFilteredSet('doc1', "context.meta_type == 'doc1'",
+                              self.family))
+        self.index.addFilter(
+            PythonFilteredSet('doc2', "context.meta_type == 'doc2'",
+                              self.family))
+        self.index.addFilter(
+            PythonFilteredSet('doc3', "context.meta_type == 'doc3'",
+                              self.family))
 
         self.index.index_doc(0 , O('doc0'))
         self.index.index_doc(1 , O('doc1'))
@@ -90,8 +97,15 @@
         self._search_and(['doc1','doc2'], [])
 
 
+class TopicIndexTest64(TopicIndexTest):
+
+    family = BTrees.family64
+
+
 def test_suite():
-    return TestSuite((makeSuite(TopicIndexTest), ))
+    return TestSuite((makeSuite(TopicIndexTest),
+                      makeSuite(TopicIndexTest64),
+                      ))
 
 if __name__=='__main__':
     main(defaultTest='test_suite')



More information about the Zope3-Checkins mailing list