[Checkins] SVN: zc.zodbdgc/branches/dev/src/zc/zodbdgc/ Refactored the ref method to try to use less memory.

Jim Fulton jim at zope.com
Fri May 29 18:27:39 EDT 2009


Log message for revision 100545:
  Refactored the ref method to try to use less memory.
  
  As a useful side effect, added an option to get a references db that
  let's you get the references to an object.
  

Changed:
  U   zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test
  U   zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py

-=-
Modified: zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test
===================================================================
--- zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test	2009-05-29 22:21:56 UTC (rev 100544)
+++ zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test	2009-05-29 22:27:38 UTC (rev 100545)
@@ -326,12 +326,14 @@
 
     >>> _ = [d.close() for d in db.databases.values()]
 
-    >>> try: zc.zodbdgc.check_command([])
-    ... except SystemExit: pass
-    usage: test [options] config
-    <BLANKLINE>
-    options:
-      -h, --help  show this help message and exit
+>>> try: zc.zodbdgc.check_command([])
+... except SystemExit: pass
+usage: test [options] config
+<BLANKLINE>
+options:
+  -h, --help            show this help message and exit
+  -r REFDB, --references-filestorage=REFDB
+                        The name of a file-storage to save reference info in.
 
     >>> zc.zodbdgc.check_command(['config'])
 
@@ -409,7 +411,7 @@
     !!! db1 2216 db1 0
     POSKeyError: 'No blob file'
     !!! db2 2 db1 1
-    KeyError: 'db2'
+    bad db
 
 If a database is configured to not allow cross references, we complain
 about cross references that we see:

Modified: zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py	2009-05-29 22:21:56 UTC (rev 100544)
+++ zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py	2009-05-29 22:27:38 UTC (rev 100545)
@@ -14,6 +14,7 @@
 
 from ZODB.utils import u64, z64, p64
 import BTrees.IIBTree
+import BTrees.OOBTree
 import base64
 import cPickle
 import cStringIO
@@ -28,6 +29,7 @@
 import transaction
 import ZODB.blob
 import ZODB.config
+import ZODB.FileStorage
 import ZODB.TimeStamp
 
 logger = logging.getLogger(__name__)
@@ -226,6 +228,23 @@
             if not data:
                 del self[name][ioid1]
 
+    def __nonzero__(self):
+        for v in self.itervalues():
+            if v:
+                return True
+        return False
+
+    def pop(self):
+        for name, data in self.iteritems():
+            if data:
+               break
+        ioid1, s = data.iteritems().next()
+        ioid2 = s.maxKey()
+        s.remove(ioid2)
+        if not s:
+            del data[ioid1]
+        return name, p64(ioid1*2147483648L+ioid2)
+
     def has(self, name, oid):
         ioid1, ioid2 = divmod(u64(oid), 2147483648L)
         try:
@@ -273,13 +292,68 @@
 
     return gc(args[0], options.days, *args[1:])
 
-def check(config):
+
+
+def check(config, refdb=None):
+    tempdir = None
+    if refdb is None:
+        tempdir = tempfile.mkdtemp('check_refs')
+        refdb = os.path.join(tempdir, 'refs.fs')
+    fs = ZODB.FileStorage.FileStorage(refdb, create=True)
+    conn = ZODB.connection(fs)
+    references = conn.root.references = {}
+    try:
+        check_(config, references)
+    finally:
+        transaction.commit()
+        conn.close()
+        if tempdir:
+            shutil.rmtree(tempdir)
+
+def _insert_ref(references, rname, roid, name, oid):
+    by_oid = references.get(name)
+    if not by_oid:
+        by_oid = references[name] = BTrees.OOBTree.BTree()
+    by_rname = by_oid.get(oid)
+    if not by_rname:
+        references = BTrees.OOBTree.TreeSet()
+        if rname == name:
+            by_oid[oid] = references
+        else:
+            by_oid[oid] = {rname: references}
+    elif isinstance(by_rname, dict):
+        references = by_rname.get(rname)
+        if not references:
+            references = by_rname[rname] = BTrees.OOBTree.TreeSet()
+    elif rname != name:
+        references = BTrees.OOBTree.TreeSet()
+        by_oid[oid] = {name: by_rname, rname: references}
+    else:
+        references = by_rname
+    references.insert(roid)
+
+def _get_referer(references, name, oid):
+    by_oid = references.get(name)
+    if by_oid:
+        by_rname = by_oid.get(oid)
+        if by_rname:
+            if isinstance(by_rname, dict):
+                rname = iter(by_rname).next()
+                return rname, iter(by_rname[rname]).next()
+            else:
+                return name, iter(by_rname).next()
+
+def check_(config, references):
     db = ZODB.config.databaseFromFile(open(config))
     databases = db.databases
     storages = dict((name, db.storage) for (name, db) in databases.iteritems())
-    roots = set((name, z64) for name in databases)
-    referers = {}
+
+    roots = oidset(databases)
+    for name in databases:
+        roots.insert(name, z64)
     seen = oidset(databases)
+    nreferences = 0
+
     while roots:
         name, oid = roots.pop()
 
@@ -296,7 +370,8 @@
                 storages[name].loadBlob(oid, tid)
         except:
             print '!!!', name, u64(oid),
-            referer = referers.pop((name, oid), None)
+
+            referer = _get_referer(references, name, oid)
             if referer:
                 rname, roid = referer
                 print rname, u64(roid)
@@ -306,17 +381,26 @@
             print "%s: %s" % (t.__name__, v)
             continue
 
-        referers.pop((name, oid), None)
-
         for ref in getrefs(p, name):
             if (ref[0] != name) and not databases[name].xrefs:
                 print 'bad xref', ref[0], u64(ref[1]), name, u64(oid)
+
+            _insert_ref(references, name, oid, *ref)
+            nreferences += 1
+
+            if nreferences > 10000:
+                transaction.commit()
+                nreferences = 0
+
+            if ref[0] not in databases:
+                print '!!!', ref[0], u64(ref[1]), name, u64(oid)
+                print 'bad db'
+                continue
             if seen.has(*ref):
                 continue
-            if ref in roots:
+            if roots.has(*ref):
                 continue
-            roots.add(ref)
-            referers[ref] = name, oid
+            roots.insert(*ref)
 
     [d.close() for d in db.databases.values()]
 
@@ -326,10 +410,13 @@
         logging.basicConfig(level=logging.WARNING)
 
     parser = optparse.OptionParser("usage: %prog [options] config")
+    parser.add_option(
+        '-r', '--references-filestorage', dest='refdb',
+        help='The name of a file-storage to save reference info in.')
 
     options, args = parser.parse_args(args)
 
     if not args or len(args) > 1:
         parser.parse_args(['-h'])
 
-    check(args[0])
+    check(args[0], options.refdb)



More information about the Checkins mailing list