[Checkins] SVN: zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py Use more efficient data structures for oidsets and refs db.
Jim Fulton
jim at zope.com
Sat May 30 18:20:51 EDT 2009
Log message for revision 100563:
Use more efficient data structures for oidsets and refs db.
Changed:
U zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py
-=-
Modified: zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py 2009-05-30 16:14:09 UTC (rev 100562)
+++ zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py 2009-05-30 22:20:50 UTC (rev 100563)
@@ -12,9 +12,10 @@
#
##############################################################################
-from ZODB.utils import u64, z64, p64
-import BTrees.IIBTree
+from ZODB.utils import z64
+import BTrees.fsBTree
import BTrees.OOBTree
+import BTrees.LLBTree
import base64
import cPickle
import cStringIO
@@ -23,6 +24,7 @@
import optparse
import os
import shutil
+import struct
import sys
import tempfile
import time
@@ -32,6 +34,14 @@
import ZODB.FileStorage
import ZODB.TimeStamp
+def p64(v):
+ """Pack an integer or long into a 8-byte string"""
+ return struct.pack(">q", v)
+
+def u64(v):
+ """Unpack an 8-byte string into a 64-bit signed long integer."""
+ return struct.unpack(">q", v)[0]
+
logger = logging.getLogger(__name__)
def gc(conf, days=1, conf2=None, batch_size=10000):
@@ -209,24 +219,24 @@
self[name] = {}
def insert(self, name, oid):
- ioid1, ioid2 = divmod(u64(oid), 2147483648L)
- ioid2 = int(ioid2)
- data = self[name].get(ioid1)
+ prefix = oid[:6]
+ suffix = oid[6:]
+ data = self[name].get(prefix)
if data is None:
- data = self[name][ioid1] = BTrees.IIBTree.TreeSet()
- elif ioid2 in data:
+ data = self[name][prefix] = BTrees.fsBTree.TreeSet()
+ elif suffix in data:
return False
- data.insert(ioid2)
+ data.insert(suffix)
return True
def remove(self, name, oid):
- ioid1, ioid2 = divmod(u64(oid), 2147483648L)
- ioid2 = int(ioid2)
- data = self[name].get(ioid1)
- if data and ioid2 in data:
- data.remove(ioid2)
+ prefix = oid[:6]
+ suffix = oid[6:]
+ data = self[name].get(prefix)
+ if data and suffix in data:
+ data.remove(suffix)
if not data:
- del self[name][ioid1]
+ del self[name][prefix]
def __nonzero__(self):
for v in self.itervalues():
@@ -238,20 +248,19 @@
for name, data in self.iteritems():
if data:
break
- ioid1, s = data.iteritems().next()
- ioid2 = s.maxKey()
- s.remove(ioid2)
+ prefix, s = data.iteritems().next()
+ suffix = s.maxKey()
+ s.remove(suffix)
if not s:
- del data[ioid1]
- return name, p64(ioid1*2147483648L+ioid2)
+ del data[prefix]
+ return name, prefix+suffix
def has(self, name, oid):
- ioid1, ioid2 = divmod(u64(oid), 2147483648L)
try:
- data = self[name].get(ioid1)
+ data = self[name][oid[:6]]
except KeyError:
return False
- return bool(data and (int(ioid2) in data))
+ return oid[6:] in data
def iterator(self, name=None):
if name is None:
@@ -259,10 +268,9 @@
for oid in self.iterator(name):
yield name, oid
else:
- for ioid1, data in self[name].iteritems():
- ioid1 *= 2147483648L
- for ioid2 in data:
- yield p64(ioid1+ioid2)
+ for prefix, data in self[name].iteritems():
+ for suffix in data:
+ yield prefix+suffix
def gc_command(args=None):
if args is None:
@@ -311,12 +319,14 @@
shutil.rmtree(tempdir)
def _insert_ref(references, rname, roid, name, oid):
+ oid = u64(oid)
+ roid = u64(roid)
by_oid = references.get(name)
if not by_oid:
- by_oid = references[name] = BTrees.OOBTree.BTree()
+ by_oid = references[name] = BTrees.LOBTree.BTree()
by_rname = by_oid.get(oid)
if not by_rname:
- references = BTrees.OOBTree.TreeSet()
+ references = BTrees.LLBTree.TreeSet()
if rname == name:
by_oid[oid] = references
else:
@@ -324,9 +334,11 @@
elif isinstance(by_rname, dict):
references = by_rname.get(rname)
if not references:
- references = by_rname[rname] = BTrees.OOBTree.TreeSet()
+ references = by_rname[rname] = BTrees.LLBTree.TreeSet()
+ # trigger change since dict is not persistent:
+ by_oid[oid] = by_rname
elif rname != name:
- references = BTrees.OOBTree.TreeSet()
+ references = BTrees.LLBTree.TreeSet()
by_oid[oid] = {name: by_rname, rname: references}
else:
references = by_rname
@@ -335,13 +347,14 @@
def _get_referer(references, name, oid):
by_oid = references.get(name)
if by_oid:
+ oid = u64(oid)
by_rname = by_oid.get(oid)
if by_rname:
if isinstance(by_rname, dict):
rname = iter(by_rname).next()
- return rname, iter(by_rname[rname]).next()
+ return rname, p64(iter(by_rname[rname]).next())
else:
- return name, iter(by_rname).next()
+ return name, p64(iter(by_rname).next())
def check_(config, references):
db = ZODB.config.databaseFromFile(open(config))
More information about the Checkins
mailing list