[Zope3-checkins] SVN: Zope3/trunk/src/ apply the new BTrees
integer-family goodness
Fred L. Drake, Jr.
fdrake at gmail.com
Wed Apr 25 19:09:35 EDT 2007
Log message for revision 74770:
apply the new BTrees integer-family goodness
Changed:
_U Zope3/trunk/src/
U Zope3/trunk/src/zope/app/catalog/catalog.py
U Zope3/trunk/src/zope/app/intid/__init__.py
U Zope3/trunk/src/zope/app/intid/tests.py
U Zope3/trunk/src/zope/index/field/index.py
U Zope3/trunk/src/zope/index/keyword/index.py
U Zope3/trunk/src/zope/index/keyword/tests.py
U Zope3/trunk/src/zope/index/text/baseindex.py
U Zope3/trunk/src/zope/index/text/cosineindex.py
U Zope3/trunk/src/zope/index/text/okapiindex.py
U Zope3/trunk/src/zope/index/text/parsetree.py
U Zope3/trunk/src/zope/index/text/setops.py
U Zope3/trunk/src/zope/index/text/tests/test_index.py
U Zope3/trunk/src/zope/index/text/tests/test_queryengine.py
U Zope3/trunk/src/zope/index/text/tests/test_setops.py
U Zope3/trunk/src/zope/index/topic/filter.py
U Zope3/trunk/src/zope/index/topic/index.py
U Zope3/trunk/src/zope/index/topic/tests/test_topicindex.py
-=-
Property changes on: Zope3/trunk/src
___________________________________________________________________
Name: svn:externals
- docutils svn://svn.zope.org/repos/main/docutils/tags/0.4.0
ZConfig svn://svn.zope.org/repos/main/ZConfig/trunk/ZConfig
BTrees -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/BTrees
persistent -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/persistent
ThreadedAsync -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/ThreadedAsync
transaction -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/transaction
ZEO -r 73079 svn://svn.zope.org/repos/main/ZODB/trunk/src/ZEO
ZODB -r 73816 svn://svn.zope.org/repos/main/ZODB/trunk/src/ZODB
twisted svn://svn.twistedmatrix.com/svn/Twisted/tags/releases/twisted-core-2.5.0/twisted
zdaemon -r 40792 svn://svn.zope.org/repos/main/zdaemon/trunk/src/zdaemon
+ docutils svn://svn.zope.org/repos/main/docutils/tags/0.4.0
ZConfig svn://svn.zope.org/repos/main/ZConfig/trunk/ZConfig
BTrees -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/BTrees
persistent -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/persistent
ThreadedAsync -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/ThreadedAsync
transaction -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/transaction
ZEO -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/ZEO
ZODB -r 74768 svn://svn.zope.org/repos/main/ZODB/trunk/src/ZODB
twisted svn://svn.twistedmatrix.com/svn/Twisted/tags/releases/twisted-core-2.5.0/twisted
zdaemon -r 40792 svn://svn.zope.org/repos/main/zdaemon/trunk/src/zdaemon
Modified: Zope3/trunk/src/zope/app/catalog/catalog.py
===================================================================
--- Zope3/trunk/src/zope/app/catalog/catalog.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/app/catalog/catalog.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,6 +17,8 @@
"""
__docformat__ = 'restructuredtext'
+import BTrees
+
import zope.index.interfaces
from zope.interface import implements
from zope.annotation.interfaces import IAttributeAnnotatable
@@ -28,8 +30,8 @@
from zope.app.intid.interfaces import IIntIds
from zope.traversing.interfaces import IPhysicallyLocatable
from zope.location import location
-from BTrees.IFBTree import weightedIntersection
+
class ResultSet:
"""Lazily accessed set of objects."""
@@ -54,6 +56,13 @@
zope.index.interfaces.IIndexSearch,
)
+ family = BTrees.family32
+
+ def __init__(self, family=None):
+ super(Catalog, self).__init__()
+ if family is not None:
+ self.family = family
+
def clear(self):
for index in self.values():
index.clear()
@@ -120,7 +129,7 @@
_, result = results.pop(0)
for _, r in results:
- _, result = weightedIntersection(result, r)
+ _, result = self.family.IFModule.weightedIntersection(result, r)
return result
Modified: Zope3/trunk/src/zope/app/intid/__init__.py
===================================================================
--- Zope3/trunk/src/zope/app/intid/__init__.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/app/intid/__init__.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -21,7 +21,8 @@
$Id$
"""
import random
-from BTrees import IOBTree, OIBTree
+import BTrees
+
from ZODB.interfaces import IConnection
from persistent import Persistent
@@ -54,10 +55,14 @@
_randrange = random.randrange
- def __init__(self):
- self.ids = queryUtility(IFactory, 'OIBTree', OIBTree.OIBTree)()
- self.refs = queryUtility(IFactory, 'IOBTree', IOBTree.IOBTree)()
+ family = BTrees.family32
+ def __init__(self, family=None):
+ if family is not None:
+ self.family = family
+ self.ids = self.family.OIModule.BTree()
+ self.refs = self.family.IOModule.BTree()
+
def __len__(self):
return len(self.ids)
@@ -102,7 +107,7 @@
"""
while True:
if self._v_nextid is None:
- self._v_nextid = self._randrange(0, 2**31)
+ self._v_nextid = self._randrange(0, self.family.maxint)
uid = self._v_nextid
self._v_nextid += 1
if uid not in self.refs:
Modified: Zope3/trunk/src/zope/app/intid/tests.py
===================================================================
--- Zope3/trunk/src/zope/app/intid/tests.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/app/intid/tests.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,6 +17,8 @@
"""
import unittest
+import BTrees
+
from persistent import Persistent
from persistent.interfaces import IPersistent
from ZODB.interfaces import IConnection
@@ -70,11 +72,13 @@
class TestIntIds(ReferenceSetupMixin, unittest.TestCase):
+ createIntIds = IntIds
+
def test_interface(self):
- verifyObject(IIntIds, IntIds())
+ verifyObject(IIntIds, self.createIntIds())
def test_non_keyreferences(self):
- u = IntIds()
+ u = self.createIntIds()
obj = object()
self.assert_(u.queryId(obj) is None)
@@ -82,7 +86,7 @@
self.assertRaises(KeyError, u.getId, obj)
def test(self):
- u = IntIds()
+ u = self.createIntIds()
obj = P()
obj._p_jar = ConnectionStub()
@@ -112,7 +116,7 @@
def test_btree_long(self):
# This is a somewhat arkward test, that *simulates* the border case
# behaviour of the _generateId method
- u = IntIds()
+ u = self.createIntIds()
u._randrange = lambda x,y:int(2**31-1)
# The chosen int is exactly the largest number possible that is
@@ -126,7 +130,7 @@
self.failUnless(2**31-1 in tuple(u.refs.keys()))
def test_len_items(self):
- u = IntIds()
+ u = self.createIntIds()
obj = P()
obj._p_jar = ConnectionStub()
@@ -164,7 +168,7 @@
self.assertEquals(u.items(), [])
def test_getenrateId(self):
- u = IntIds()
+ u = self.createIntIds()
self.assertEquals(u._v_nextid, None)
id1 = u._generateId()
self.assert_(u._v_nextid is not None)
@@ -244,9 +248,17 @@
self.assertEquals(events[0].original_event.object, parent_folder)
self.assertEquals(events[0].object, folder)
+
+class TestIntIds64(TestIntIds):
+
+ def createIntIds(self):
+ return IntIds(family=BTrees.family64)
+
+
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestIntIds))
+ suite.addTest(unittest.makeSuite(TestIntIds64))
suite.addTest(unittest.makeSuite(TestSubscribers))
return suite
Modified: Zope3/trunk/src/zope/index/field/index.py
===================================================================
--- Zope3/trunk/src/zope/index/field/index.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/field/index.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,9 +17,9 @@
"""
import persistent
-from BTrees.IOBTree import IOBTree
+import BTrees
+
from BTrees.OOBTree import OOBTree
-from BTrees.IFBTree import IFTreeSet, multiunion
from BTrees.Length import Length
import zope.interface
@@ -34,7 +34,11 @@
interfaces.IIndexSearch,
)
- def __init__(self):
+ family = BTrees.family32
+
+ def __init__(self, family=None):
+ if family is not None:
+ self.family = family
self.clear()
def clear(self):
@@ -42,7 +46,7 @@
# The forward index maps indexed values to a sequence of docids
self._fwd_index = OOBTree()
# The reverse index maps a docid to its index value
- self._rev_index = IOBTree()
+ self._rev_index = self.family.IOModule.BTree()
self._num_docs = Length(0)
def documentCount(self):
@@ -66,7 +70,7 @@
# Insert into forward index.
set = self._fwd_index.get(value)
if set is None:
- set = IFTreeSet()
+ set = self.family.IFModule.TreeSet()
self._fwd_index[value] = set
set.insert(docid)
@@ -102,4 +106,5 @@
def apply(self, query):
if len(query) != 2 or not isinstance(query, tuple):
raise TypeError("two-length tuple expected", query)
- return multiunion(self._fwd_index.values(*query))
+ return self.family.IFModule.multiunion(
+ self._fwd_index.values(*query))
Modified: Zope3/trunk/src/zope/index/keyword/index.py
===================================================================
--- Zope3/trunk/src/zope/index/keyword/index.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/keyword/index.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,9 +17,9 @@
"""
from persistent import Persistent
-from BTrees.IOBTree import IOBTree
+import BTrees
+
from BTrees.OOBTree import OOBTree, OOSet, difference
-from BTrees.IIBTree import IISet, union, intersection
from BTrees.Length import Length
from types import StringTypes
@@ -27,13 +27,17 @@
from zope.index.keyword.interfaces import IKeywordQuerying
from zope.interface import implements
+
class KeywordIndex(Persistent):
""" A case-insensitive keyword index """
+ family = BTrees.family32
normalize = True
implements(IInjection, IStatistics, IKeywordQuerying)
- def __init__(self):
+ def __init__(self, family=None):
+ if family is not None:
+ self.family = family
self.clear()
def clear(self):
@@ -45,7 +49,7 @@
# The reverse index maps a docid to its keywords
# TODO: Using a vocabulary might be the better choice to store
# keywords since it would allow use to use integers instead of strings
- self._rev_index = IOBTree()
+ self._rev_index = self.family.IOModule.BTree()
self._num_docs = Length(0)
def documentCount(self):
@@ -115,7 +119,7 @@
has_key = idx.has_key
for word in words:
if not has_key(word):
- idx[word] = IISet()
+ idx[word] = self.family.IIModule.Set()
idx[word].insert(docid)
def _insert_reverse(self, docid, words):
@@ -132,17 +136,19 @@
if self.normalize:
query = [w.lower() for w in query]
- f = {'and' : intersection, 'or' : union}[operator]
+ f = {'and' : self.family.IIModule.intersection,
+ 'or' : self.family.IIModule.union,
+ }[operator]
rs = None
for word in query:
- docids = self._fwd_index.get(word, IISet())
+ docids = self._fwd_index.get(word, self.family.IIModule.Set())
rs = f(rs, docids)
if rs:
return rs
else:
- return IISet()
+ return self.family.IIModule.Set()
class CaseSensitiveKeywordIndex(KeywordIndex):
""" A case-sensitive keyword index """
Modified: Zope3/trunk/src/zope/index/keyword/tests.py
===================================================================
--- Zope3/trunk/src/zope/index/keyword/tests.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/keyword/tests.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -14,7 +14,8 @@
from unittest import TestCase, TestSuite, main, makeSuite
-from BTrees.IIBTree import IISet
+import BTrees
+
from zope.index.keyword.index import KeywordIndex
from zope.index.interfaces import IInjection, IStatistics
from zope.index.keyword.interfaces import IKeywordQuerying
@@ -22,6 +23,8 @@
class KeywordIndexTest(TestCase):
+ from BTrees.IIBTree import IISet
+
def setUp(self):
self.index = KeywordIndex()
@@ -82,13 +85,13 @@
self.index.index_doc(1, ('foo', 'bar', 'doom'))
self.index.index_doc(1, ('bar', 'blabla'))
self.assertEqual(self.index.documentCount(), 3)
- self._search('quick', IISet())
- self._search('foo', IISet())
- self._search('bar', IISet([1]))
- self._search(['doom'], IISet())
- self._search(['blabla'], IISet([1]))
- self._search_and(('bar', 'blabla'), IISet([1]))
- self._search(['cmf'], IISet([5]))
+ self._search('quick', self.IISet())
+ self._search('foo', self.IISet())
+ self._search('bar', self.IISet([1]))
+ self._search(['doom'], self.IISet())
+ self._search(['blabla'], self.IISet([1]))
+ self._search_and(('bar', 'blabla'), self.IISet([1]))
+ self._search(['cmf'], self.IISet([5]))
def test_hasdoc(self):
self._populate_index()
@@ -101,31 +104,43 @@
def test_simplesearch(self):
self._populate_index()
- self._search([''], IISet())
- self._search(['cmf'], IISet([1, 5]))
- self._search(['zope'], IISet([1, 3]))
- self._search(['zope3'], IISet([1]))
- self._search(['foo'], IISet())
+ self._search([''], self.IISet())
+ self._search(['cmf'], self.IISet([1, 5]))
+ self._search(['zope'], self.IISet([1, 3]))
+ self._search(['zope3'], self.IISet([1]))
+ self._search(['foo'], self.IISet())
def test_search_and(self):
self._populate_index()
- self._search_and(('cmf', 'zope3'), IISet([1]))
- self._search_and(('cmf', 'zope'), IISet([1]))
- self._search_and(('cmf', 'zope4'), IISet())
- self._search_and(('zope', 'ZOPE'), IISet([1, 3]))
+ self._search_and(('cmf', 'zope3'), self.IISet([1]))
+ self._search_and(('cmf', 'zope'), self.IISet([1]))
+ self._search_and(('cmf', 'zope4'), self.IISet())
+ self._search_and(('zope', 'ZOPE'), self.IISet([1, 3]))
def test_search_or(self):
self._populate_index()
- self._search_or(('cmf', 'zope3'), IISet([1, 5]))
- self._search_or(('cmf', 'zope'), IISet([1, 3, 5]))
- self._search_or(('cmf', 'zope4'), IISet([1, 5]))
- self._search_or(('zope', 'ZOPE'), IISet([1,3]))
+ self._search_or(('cmf', 'zope3'), self.IISet([1, 5]))
+ self._search_or(('cmf', 'zope'), self.IISet([1, 3, 5]))
+ self._search_or(('cmf', 'zope4'), self.IISet([1, 5]))
+ self._search_or(('zope', 'ZOPE'), self.IISet([1,3]))
def test_index_input(self):
- self.assertRaises(TypeError, self.index.index_doc, 1, "non-sequence-string")
+ self.assertRaises(
+ TypeError, self.index.index_doc, 1, "non-sequence-string")
+
+class KeywordIndexTest64(KeywordIndexTest):
+
+ from BTrees.LLBTree import LLSet as IISet
+
+ def setUp(self):
+ self.index = KeywordIndex(family=BTrees.family64)
+
+
def test_suite():
- return TestSuite((makeSuite(KeywordIndexTest), ))
+ return TestSuite((makeSuite(KeywordIndexTest),
+ makeSuite(KeywordIndexTest64),
+ ))
if __name__=='__main__':
main(defaultTest='test_suite')
Modified: Zope3/trunk/src/zope/index/text/baseindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/baseindex.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/baseindex.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -20,27 +20,27 @@
from persistent import Persistent
from zope.interface import implements
+import BTrees
+
+from BTrees import Length
from BTrees.IOBTree import IOBTree
-from BTrees.IFBTree import IFBTree, IFTreeSet
-from BTrees.IFBTree import intersection, difference
-from BTrees import Length
from zope.index.interfaces import IInjection, IStatistics
from zope.index.text.interfaces import IExtendedQuerying
from zope.index.text import widcode
from zope.index.text.setops import mass_weightedIntersection, \
- mass_weightedUnion
+ mass_weightedUnion
-def unique(L):
- """Return a list of the unique elements in L."""
- return IFTreeSet(L).keys()
-
class BaseIndex(Persistent):
implements(IInjection, IStatistics, IExtendedQuerying)
- def __init__(self, lexicon):
+ family = BTrees.family32
+
+ def __init__(self, lexicon, family=None):
+ if family is not None:
+ self.family = family
self._lexicon = lexicon
# wid -> {docid -> weight}; t -> D -> w(D, t)
@@ -56,17 +56,19 @@
# may introduce a lexicon word we've never seen.
# A word is in-vocabulary for this index if and only if
# _wordinfo.has_key(wid). Note that wid 0 must not be a key.
+ # This does not use the BTree family since wids are always "I"
+ # flavor trees.
self._wordinfo = IOBTree()
# docid -> weight
# Different indexers have different notions of doc weight, but we
# expect each indexer to use ._docweight to map docids to its
# notion of what a doc weight is.
- self._docweight = IFBTree()
+ self._docweight = self.family.IFModule.BTree()
# docid -> WidCode'd list of wids
# Used for un-indexing, and for phrase search.
- self._docwords = IOBTree()
+ self._docwords = self.family.IOModule.BTree()
# Use a BTree length for efficient length computation w/o conflicts
self.wordCount = Length.Length()
@@ -116,12 +118,13 @@
old_wid2w, old_docw = self._get_frequencies(old_wids)
new_wid2w, new_docw = self._get_frequencies(new_wids)
- old_widset = IFTreeSet(old_wid2w.keys())
- new_widset = IFTreeSet(new_wid2w.keys())
+ old_widset = self.family.IFModule.TreeSet(old_wid2w.keys())
+ new_widset = self.family.IFModule.TreeSet(new_wid2w.keys())
- in_both_widset = intersection(old_widset, new_widset)
- only_old_widset = difference(old_widset, in_both_widset)
- only_new_widset = difference(new_widset, in_both_widset)
+ IFModule = self.family.IFModule
+ in_both_widset = IFModule.intersection(old_widset, new_widset)
+ only_old_widset = IFModule.difference(old_widset, in_both_widset)
+ only_new_widset = IFModule.difference(new_widset, in_both_widset)
del old_widset, new_widset
for wid in only_old_widset.keys():
@@ -161,7 +164,7 @@
def unindex_doc(self, docid):
if docid not in self._docwords:
return
- for wid in unique(self.get_words(docid)):
+ for wid in self.family.IFModule.TreeSet(self.get_words(docid)).keys():
self._del_wordinfo(wid, docid)
del self._docwords[docid]
del self._docweight[docid]
@@ -171,25 +174,25 @@
if not wids:
return None # All docs match
wids = self._remove_oov_wids(wids)
- return mass_weightedUnion(self._search_wids(wids))
+ return mass_weightedUnion(self._search_wids(wids), self.family)
def search_glob(self, pattern):
wids = self._lexicon.globToWordIds(pattern)
wids = self._remove_oov_wids(wids)
- return mass_weightedUnion(self._search_wids(wids))
+ return mass_weightedUnion(self._search_wids(wids), self.family)
def search_phrase(self, phrase):
wids = self._lexicon.termToWordIds(phrase)
cleaned_wids = self._remove_oov_wids(wids)
if len(wids) != len(cleaned_wids):
# At least one wid was OOV: can't possibly find it.
- return IFBTree()
+ return self.family.IFModule.BTree()
scores = self._search_wids(wids)
- hits = mass_weightedIntersection(scores)
+ hits = mass_weightedIntersection(scores, self.family)
if not hits:
return hits
code = widcode.encode(wids)
- result = IFBTree()
+ result = self.family.IFModule.BTree()
for docid, weight in hits.items():
docwords = self._docwords[docid]
if docwords.find(code) >= 0:
@@ -254,7 +257,7 @@
# len(IFBTree).
if (isinstance(doc2score, type({})) and
len(doc2score) == self.DICT_CUTOFF):
- doc2score = IFBTree(doc2score)
+ doc2score = self.family.IFModule.BTree(doc2score)
doc2score[docid] = f
self._wordinfo[wid] = doc2score # not redundant: Persistency!
@@ -277,7 +280,7 @@
new_word_count += 1
elif (isinstance(doc2score, dicttype) and
len(doc2score) == self.DICT_CUTOFF):
- doc2score = IFBTree(doc2score)
+ doc2score = self.family.IFModule.BTree(doc2score)
doc2score[docid] = weight
self._wordinfo[wid] = doc2score # not redundant: Persistency!
self.wordCount.change(new_word_count)
Modified: Zope3/trunk/src/zope/index/text/cosineindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/cosineindex.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/cosineindex.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,14 +17,13 @@
"""
import math
-from BTrees.IFBTree import IFBucket
-
from zope.index.text.baseindex import BaseIndex, inverse_doc_frequency
+
class CosineIndex(BaseIndex):
- def __init__(self, lexicon):
- BaseIndex.__init__(self, lexicon)
+ def __init__(self, lexicon, family=None):
+ BaseIndex.__init__(self, lexicon, family=family)
# ._wordinfo for cosine is wid -> {docid -> weight};
# t -> D -> w(d, t)/W(d)
@@ -74,7 +73,7 @@
idf = inverse_doc_frequency(len(d2w), N) # an unscaled float
#print "idf = %.3f" % idf
if isinstance(d2w, DictType):
- d2w = IFBucket(d2w)
+ d2w = self.family.IFModule.Bucket(d2w)
L.append((d2w, idf))
return L
Modified: Zope3/trunk/src/zope/index/text/okapiindex.py
===================================================================
--- Zope3/trunk/src/zope/index/text/okapiindex.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/okapiindex.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -191,8 +191,6 @@
$Id$
"""
-from BTrees.IFBTree import IFBucket
-
from zope.index.text.baseindex import BaseIndex
from zope.index.text.baseindex import inverse_doc_frequency
@@ -204,8 +202,8 @@
assert K1 >= 0.0
assert 0.0 <= B <= 1.0
- def __init__(self, lexicon):
- BaseIndex.__init__(self, lexicon)
+ def __init__(self, lexicon, family=None):
+ BaseIndex.__init__(self, lexicon, family=family)
# ._wordinfo for Okapi is
# wid -> {docid -> frequency}; t -> D -> f(D, t)
@@ -267,7 +265,7 @@
for t in wids:
d2f = self._wordinfo[t] # map {docid -> f(docid, t)}
idf = inverse_doc_frequency(len(d2f), N) # an unscaled float
- result = IFBucket()
+ result = self.family.IFModule.Bucket()
for docid, f in d2f.items():
lenweight = B_from1 + B * docid2len[docid] / meandoclen
tf = f * K1_plus1 / (f + K1 * lenweight)
@@ -311,7 +309,7 @@
for t in wids:
d2f = self._wordinfo[t] # map {docid -> f(docid, t)}
idf = inverse_doc_frequency(len(d2f), N) # an unscaled float
- result = IFBucket()
+ result = self.family.IFModule.Bucket()
score(result, d2f.items(), docid2len, idf, meandoclen)
L.append((result, 1))
return L
Modified: Zope3/trunk/src/zope/index/text/parsetree.py
===================================================================
--- Zope3/trunk/src/zope/index/text/parsetree.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/parsetree.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -15,8 +15,6 @@
$Id$
"""
-from BTrees.IFBTree import difference
-
from zope.index.text.interfaces import IQueryParseTree
from zope.index.text.setops import mass_weightedIntersection
from zope.index.text.setops import mass_weightedUnion
@@ -88,10 +86,10 @@
# included.
if r is not None:
L.append((r, 1))
- set = mass_weightedIntersection(L)
+ set = mass_weightedIntersection(L, index.family)
if Nots:
- notset = mass_weightedUnion(Nots)
- set = difference(set, notset)
+ notset = mass_weightedUnion(Nots, index.family)
+ set = index.family.IFModule.difference(set, notset)
return set
class OrNode(ParseTreeNode):
@@ -108,7 +106,7 @@
# to act like plain real_word).
if r is not None:
weighted.append((r, 1))
- return mass_weightedUnion(weighted)
+ return mass_weightedUnion(weighted, index.family)
class AtomNode(ParseTreeNode):
Modified: Zope3/trunk/src/zope/index/text/setops.py
===================================================================
--- Zope3/trunk/src/zope/index/text/setops.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/setops.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -15,29 +15,30 @@
$Id$
"""
-from BTrees.IFBTree import IFBucket, weightedIntersection, weightedUnion
+import BTrees
+
from zope.index.nbest import NBest
-def mass_weightedIntersection(L):
+def mass_weightedIntersection(L, family=BTrees.family32):
"A list of (mapping, weight) pairs -> their weightedIntersection IFBucket."
L = [(x, wx) for (x, wx) in L if x is not None]
if len(L) < 2:
- return _trivial(L)
+ return _trivial(L, family)
# Intersect with smallest first. We expect the input maps to be
# IFBuckets, so it doesn't hurt to get their lengths repeatedly
# (len(Bucket) is fast; len(BTree) is slow).
L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
(x, wx), (y, wy) = L[:2]
- dummy, result = weightedIntersection(x, y, wx, wy)
+ dummy, result = family.IFModule.weightedIntersection(x, y, wx, wy)
for x, wx in L[2:]:
- dummy, result = weightedIntersection(result, x, 1, wx)
+ dummy, result = family.IFModule.weightedIntersection(result, x, 1, wx)
return result
-def mass_weightedUnion(L):
+def mass_weightedUnion(L, family=BTrees.family32):
"A list of (mapping, weight) pairs -> their weightedUnion IFBucket."
if len(L) < 2:
- return _trivial(L)
+ return _trivial(L, family)
# Balance unions as closely as possible, smallest to largest.
merge = NBest(len(L))
for x, weight in L:
@@ -46,18 +47,19 @@
# Merge the two smallest so far, and add back to the queue.
(x, wx), dummy = merge.pop_smallest()
(y, wy), dummy = merge.pop_smallest()
- dummy, z = weightedUnion(x, y, wx, wy)
+ dummy, z = family.IFModule.weightedUnion(x, y, wx, wy)
merge.add((z, 1), len(z))
(result, weight), dummy = merge.pop_smallest()
return result
-def _trivial(L):
+def _trivial(L, family):
# L is empty or has only one (mapping, weight) pair. If there is a
# pair, we may still need to multiply the mapping by its weight.
assert len(L) <= 1
if len(L) == 0:
- return IFBucket()
+ return family.IFModule.Bucket()
[(result, weight)] = L
if weight != 1:
- dummy, result = weightedUnion(IFBucket(), result, 0, weight)
+ dummy, result = family.IFModule.weightedUnion(
+ family.IFModule.Bucket(), result, 0, weight)
return result
Modified: Zope3/trunk/src/zope/index/text/tests/test_index.py
===================================================================
--- Zope3/trunk/src/zope/index/text/tests/test_index.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/tests/test_index.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,6 +17,8 @@
"""
from unittest import TestCase, TestSuite, main, makeSuite
+import BTrees
+
from zope.index.text.lexicon import Lexicon, Splitter
from zope.index.text.cosineindex import CosineIndex
from zope.index.text.okapiindex import OkapiIndex
@@ -169,6 +171,18 @@
class OkapiIndexTest(IndexTest):
IndexFactory = OkapiIndex
+class CosineIndexTest(IndexTest):
+
+ @staticmethod
+ def IndexFactory(*args, **kw):
+ return CosineIndex(family=BTrees.family64, *args, **kw)
+
+class OkapiIndexTest(IndexTest):
+
+ @staticmethod
+ def IndexFactory(*args, **kw):
+ return OkapiIndex(family=BTrees.family64, *args, **kw)
+
def test_suite():
return TestSuite((makeSuite(CosineIndexTest),
makeSuite(OkapiIndexTest),
Modified: Zope3/trunk/src/zope/index/text/tests/test_queryengine.py
===================================================================
--- Zope3/trunk/src/zope/index/text/tests/test_queryengine.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/tests/test_queryengine.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,7 +17,7 @@
"""
import unittest
-from BTrees.IFBTree import IFBucket
+import BTrees
from zope.index.text.queryparser import QueryParser
from zope.index.text.parsetree import QueryError
@@ -25,8 +25,10 @@
class FauxIndex(object):
+ family = BTrees.family32
+
def search(self, term):
- b = IFBucket()
+ b = self.family.IFModule.Bucket()
if term == "foo":
b[1] = b[3] = 1
elif term == "bar":
Modified: Zope3/trunk/src/zope/index/text/tests/test_setops.py
===================================================================
--- Zope3/trunk/src/zope/index/text/tests/test_setops.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/text/tests/test_setops.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,7 +17,10 @@
"""
from unittest import TestCase, main, makeSuite
+import BTrees
+
from BTrees.IFBTree import IFBTree, IFBucket
+from BTrees.LFBTree import LFBucket
from zope.index.text.setops import mass_weightedIntersection
from zope.index.text.setops import mass_weightedUnion
@@ -28,6 +31,23 @@
self.assertEqual(len(mass_weightedIntersection([])), 0)
self.assertEqual(len(mass_weightedUnion([])), 0)
+ def testEmptyListsHonorFamily(self):
+ # family32
+ t = mass_weightedIntersection([], BTrees.family32)
+ self.assertEqual(len(t), 0)
+ self.assertEqual(t.__class__, IFBucket)
+ t = mass_weightedUnion([], BTrees.family32)
+ self.assertEqual(len(t), 0)
+ self.assertEqual(t.__class__, IFBucket)
+
+ # family64
+ t = mass_weightedIntersection([], BTrees.family64)
+ self.assertEqual(len(t), 0)
+ self.assertEqual(t.__class__, LFBucket)
+ t = mass_weightedUnion([], BTrees.family64)
+ self.assertEqual(len(t), 0)
+ self.assertEqual(t.__class__, LFBucket)
+
def testIdentity(self):
t = IFBTree([(1, 2)])
b = IFBucket([(1, 2)])
Modified: Zope3/trunk/src/zope/index/topic/filter.py
===================================================================
--- Zope3/trunk/src/zope/index/topic/filter.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/topic/filter.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -15,7 +15,9 @@
$Id$
"""
-from BTrees.IIBTree import IISet
+
+import BTrees
+
from zope.index.topic.interfaces import ITopicFilteredSet
from zope.interface import implements
@@ -29,13 +31,17 @@
implements(ITopicFilteredSet)
- def __init__(self, id, expr):
+ family = BTrees.family32
+
+ def __init__(self, id, expr, family=None):
+ if family is not None:
+ self.family = family
self.id = id
self.expr = expr
self.clear()
def clear(self):
- self._ids = IISet()
+ self._ids = self.family.IIModule.Set()
def index_doc(self, docid, context):
raise NotImplementedError
@@ -66,10 +72,7 @@
""" a topic filtered set to check a context against a Python expression """
def index_doc(self, docid, context):
-
try:
if eval(self.expr): self._ids.insert(docid)
except:
pass # ignore errors
-
-
Modified: Zope3/trunk/src/zope/index/topic/index.py
===================================================================
--- Zope3/trunk/src/zope/index/topic/index.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/topic/index.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,27 +17,32 @@
"""
from persistent import Persistent
+import BTrees
+
from BTrees.OOBTree import OOBTree
-from BTrees.IIBTree import IISet, union, intersection
-from types import ListType, TupleType, StringTypes
from zope.interface import implements
from zope.index.interfaces import IInjection
from zope.index.topic.interfaces import ITopicQuerying
+
class TopicIndex(Persistent):
implements(IInjection, ITopicQuerying)
- def __init__(self):
+ family = BTrees.family32
+
+ def __init__(self, family=None):
+ if family is not None:
+ self.family = family
self.clear()
def clear(self):
# mapping filter id -> filter
self._filters = OOBTree()
- def addFilter(self, f ):
+ def addFilter(self, f):
""" Add filter 'f' with ID 'id' """
self._filters[f.getId()] = f
@@ -58,12 +63,13 @@
f.unindex_doc(docid)
def search(self, query, operator='and'):
+ if isinstance(query, basestring): query = [query]
+ if not isinstance(query, (tuple, list)):
+ raise TypeError(
+ 'query argument must be a list/tuple of filter ids')
- if isinstance(query, StringTypes): query = [query]
- if not isinstance(query, (TupleType, ListType)):
- raise TypeError('query argument must be a list/tuple of filter ids')
-
- f = {'and' : intersection, 'or' : union}[operator]
+ IIModule = self.family.IIModule
+ f = {'and': IIModule.intersection, 'or': IIModule.union}[operator]
rs = None
for id in self._filters.keys():
@@ -71,6 +77,5 @@
docids = self._filters[id].getIds()
rs = f(rs, docids)
- if rs: return rs
- else: return IISet()
-
+ if rs: return rs
+ else: return self.family.IIModule.Set()
Modified: Zope3/trunk/src/zope/index/topic/tests/test_topicindex.py
===================================================================
--- Zope3/trunk/src/zope/index/topic/tests/test_topicindex.py 2007-04-25 23:02:19 UTC (rev 74769)
+++ Zope3/trunk/src/zope/index/topic/tests/test_topicindex.py 2007-04-25 23:09:34 UTC (rev 74770)
@@ -17,6 +17,8 @@
"""
from unittest import TestCase, TestSuite, main, makeSuite
+import BTrees
+
from zope.index.topic.index import TopicIndex
from zope.index.topic.filter import PythonFilteredSet
from zope.interface.verify import verifyClass
@@ -30,14 +32,19 @@
class TopicIndexTest(TestCase):
+ family = BTrees.family32
+
def setUp(self):
- self.index = TopicIndex()
- self.index.addFilter(PythonFilteredSet('doc1',
- "context.meta_type == 'doc1'"))
- self.index.addFilter(PythonFilteredSet('doc2',
- "context.meta_type == 'doc2'"))
- self.index.addFilter(PythonFilteredSet('doc3',
- "context.meta_type == 'doc3'"))
+ self.index = TopicIndex(family=self.family)
+ self.index.addFilter(
+ PythonFilteredSet('doc1', "context.meta_type == 'doc1'",
+ self.family))
+ self.index.addFilter(
+ PythonFilteredSet('doc2', "context.meta_type == 'doc2'",
+ self.family))
+ self.index.addFilter(
+ PythonFilteredSet('doc3', "context.meta_type == 'doc3'",
+ self.family))
self.index.index_doc(0 , O('doc0'))
self.index.index_doc(1 , O('doc1'))
@@ -90,8 +97,15 @@
self._search_and(['doc1','doc2'], [])
+class TopicIndexTest64(TopicIndexTest):
+
+ family = BTrees.family64
+
+
def test_suite():
- return TestSuite((makeSuite(TopicIndexTest), ))
+ return TestSuite((makeSuite(TopicIndexTest),
+ makeSuite(TopicIndexTest64),
+ ))
if __name__=='__main__':
main(defaultTest='test_suite')
More information about the Zope3-Checkins
mailing list