[Checkins] SVN: zc.zodbdgc/branches/dev/src/zc/zodbdgc/ Refactored the ref method to try to use less memory.
Jim Fulton
jim at zope.com
Fri May 29 18:27:39 EDT 2009
Log message for revision 100545:
Refactored the ref method to try to use less memory.
As a useful side effect, added an option to get a references db that
let's you get the references to an object.
Changed:
U zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test
U zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py
-=-
Modified: zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test
===================================================================
--- zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test 2009-05-29 22:21:56 UTC (rev 100544)
+++ zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test 2009-05-29 22:27:38 UTC (rev 100545)
@@ -326,12 +326,14 @@
>>> _ = [d.close() for d in db.databases.values()]
- >>> try: zc.zodbdgc.check_command([])
- ... except SystemExit: pass
- usage: test [options] config
- <BLANKLINE>
- options:
- -h, --help show this help message and exit
+>>> try: zc.zodbdgc.check_command([])
+... except SystemExit: pass
+usage: test [options] config
+<BLANKLINE>
+options:
+ -h, --help show this help message and exit
+ -r REFDB, --references-filestorage=REFDB
+ The name of a file-storage to save reference info in.
>>> zc.zodbdgc.check_command(['config'])
@@ -409,7 +411,7 @@
!!! db1 2216 db1 0
POSKeyError: 'No blob file'
!!! db2 2 db1 1
- KeyError: 'db2'
+ bad db
If a database is configured to not allow cross references, we complain
about cross references that we see:
Modified: zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py 2009-05-29 22:21:56 UTC (rev 100544)
+++ zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py 2009-05-29 22:27:38 UTC (rev 100545)
@@ -14,6 +14,7 @@
from ZODB.utils import u64, z64, p64
import BTrees.IIBTree
+import BTrees.OOBTree
import base64
import cPickle
import cStringIO
@@ -28,6 +29,7 @@
import transaction
import ZODB.blob
import ZODB.config
+import ZODB.FileStorage
import ZODB.TimeStamp
logger = logging.getLogger(__name__)
@@ -226,6 +228,23 @@
if not data:
del self[name][ioid1]
+ def __nonzero__(self):
+ for v in self.itervalues():
+ if v:
+ return True
+ return False
+
+ def pop(self):
+ for name, data in self.iteritems():
+ if data:
+ break
+ ioid1, s = data.iteritems().next()
+ ioid2 = s.maxKey()
+ s.remove(ioid2)
+ if not s:
+ del data[ioid1]
+ return name, p64(ioid1*2147483648L+ioid2)
+
def has(self, name, oid):
ioid1, ioid2 = divmod(u64(oid), 2147483648L)
try:
@@ -273,13 +292,68 @@
return gc(args[0], options.days, *args[1:])
-def check(config):
+
+
+def check(config, refdb=None):
+ tempdir = None
+ if refdb is None:
+ tempdir = tempfile.mkdtemp('check_refs')
+ refdb = os.path.join(tempdir, 'refs.fs')
+ fs = ZODB.FileStorage.FileStorage(refdb, create=True)
+ conn = ZODB.connection(fs)
+ references = conn.root.references = {}
+ try:
+ check_(config, references)
+ finally:
+ transaction.commit()
+ conn.close()
+ if tempdir:
+ shutil.rmtree(tempdir)
+
+def _insert_ref(references, rname, roid, name, oid):
+ by_oid = references.get(name)
+ if not by_oid:
+ by_oid = references[name] = BTrees.OOBTree.BTree()
+ by_rname = by_oid.get(oid)
+ if not by_rname:
+ references = BTrees.OOBTree.TreeSet()
+ if rname == name:
+ by_oid[oid] = references
+ else:
+ by_oid[oid] = {rname: references}
+ elif isinstance(by_rname, dict):
+ references = by_rname.get(rname)
+ if not references:
+ references = by_rname[rname] = BTrees.OOBTree.TreeSet()
+ elif rname != name:
+ references = BTrees.OOBTree.TreeSet()
+ by_oid[oid] = {name: by_rname, rname: references}
+ else:
+ references = by_rname
+ references.insert(roid)
+
+def _get_referer(references, name, oid):
+ by_oid = references.get(name)
+ if by_oid:
+ by_rname = by_oid.get(oid)
+ if by_rname:
+ if isinstance(by_rname, dict):
+ rname = iter(by_rname).next()
+ return rname, iter(by_rname[rname]).next()
+ else:
+ return name, iter(by_rname).next()
+
+def check_(config, references):
db = ZODB.config.databaseFromFile(open(config))
databases = db.databases
storages = dict((name, db.storage) for (name, db) in databases.iteritems())
- roots = set((name, z64) for name in databases)
- referers = {}
+
+ roots = oidset(databases)
+ for name in databases:
+ roots.insert(name, z64)
seen = oidset(databases)
+ nreferences = 0
+
while roots:
name, oid = roots.pop()
@@ -296,7 +370,8 @@
storages[name].loadBlob(oid, tid)
except:
print '!!!', name, u64(oid),
- referer = referers.pop((name, oid), None)
+
+ referer = _get_referer(references, name, oid)
if referer:
rname, roid = referer
print rname, u64(roid)
@@ -306,17 +381,26 @@
print "%s: %s" % (t.__name__, v)
continue
- referers.pop((name, oid), None)
-
for ref in getrefs(p, name):
if (ref[0] != name) and not databases[name].xrefs:
print 'bad xref', ref[0], u64(ref[1]), name, u64(oid)
+
+ _insert_ref(references, name, oid, *ref)
+ nreferences += 1
+
+ if nreferences > 10000:
+ transaction.commit()
+ nreferences = 0
+
+ if ref[0] not in databases:
+ print '!!!', ref[0], u64(ref[1]), name, u64(oid)
+ print 'bad db'
+ continue
if seen.has(*ref):
continue
- if ref in roots:
+ if roots.has(*ref):
continue
- roots.add(ref)
- referers[ref] = name, oid
+ roots.insert(*ref)
[d.close() for d in db.databases.values()]
@@ -326,10 +410,13 @@
logging.basicConfig(level=logging.WARNING)
parser = optparse.OptionParser("usage: %prog [options] config")
+ parser.add_option(
+ '-r', '--references-filestorage', dest='refdb',
+ help='The name of a file-storage to save reference info in.')
options, args = parser.parse_args(args)
if not args or len(args) > 1:
parser.parse_args(['-h'])
- check(args[0])
+ check(args[0], options.refdb)
More information about the Checkins
mailing list