[Checkins] SVN: zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py Don't use temporary files to keep track of bad references. Just use

Jim Fulton jim at zope.com
Sat May 30 18:54:41 EDT 2009


Log message for revision 100564:
  Don't use temporary files to keep track of bad references. Just use
  the database to get the refs again.  This requires beating the
  database, but avoid a ton of temp files. :/
  
  When building the refs database, don't add refs that are already
  there.
  

Changed:
  U   zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py

-=-
Modified: zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py	2009-05-30 22:20:50 UTC (rev 100563)
+++ zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py	2009-05-30 22:54:40 UTC (rev 100564)
@@ -66,14 +66,13 @@
     bad = oidset(databases)
     both = good, bad
     deleted = oidset(databases)
-    baddir = tempfile.mkdtemp()
-    for name in storages:
-        os.mkdir(os.path.join(baddir, name))
 
     for name, storage in storages.iteritems():
         # Make sure we can get the roots
-        _ = storage.load(z64, '')
+        data, s = storage.load(z64, '')
         good.insert(name, z64)
+        for ref in getrefs(data, name):
+            good.insert(*ref)
 
         # All non-deleted new records are good
         for trans in storage.iterator(ptid):
@@ -109,28 +108,15 @@
                             if deleted.has(*ref):
                                 continue
                             if good.insert(*ref) and bad.has(*ref):
-                                bad_to_good(baddir, bad, good, *ref)
+                                bad_to_good(storages, bad, good, *ref)
                     else:
                         bad.insert(name, oid)
-                        refs = tuple(ref for ref in getrefs(data, name)
-                                     if not (good.has(*ref) or
-                                             deleted.has(*ref)
-                                             )
-                                     )
-                        if not refs:
-                            continue    # leaves are common
-                        f = open(bad_path(baddir, name, oid), 'ab')
-                        marshal.dump(refs, f)
-                        f.close()
                 else:
                     # deleted record
                     if good.has(name, oid):
                         good.remove(name, oid)
                     elif bad.has(name, oid):
                         bad.remove(name, oid)
-                        path = bad_path(baddir, name, oid)
-                        if os.path.exists(path):
-                            os.remove(path)
                     deleted.insert(name, oid)
 
     if conf2 is not None:
@@ -164,38 +150,25 @@
             t.abort()
         db.close()
 
-    shutil.rmtree(baddir)
-
     return bad
 
 def bad_path(baddir, name, oid):
     return os.path.join(baddir, name, base64.urlsafe_b64encode(oid))
 
-def bad_to_good(baddir, bad, good, name, oid):
+def bad_to_good(storages, bad, good, name, oid):
 
     to_do = [(name, oid)]
     while to_do:
         name, oid = to_do.pop()
         bad.remove(name, oid)
+        storage = storages[name]
 
-        path = bad_path(baddir, name, oid)
-        if not os.path.exists(path):
-            return
-
-        f = open(path , 'rb')
-        while 1:
-            try:
-                refs = marshal.load(f)
-            except EOFError:
-                break
-
-            for ref in refs:
+        for h in storage.history(oid, size=1<<99):
+            data = storage.loadSerial(oid, h['tid'])
+            for ref in getrefs(data, name):
                 if good.insert(*ref) and bad.has(*ref):
                     to_do.append(ref)
 
-        f.close()
-        os.remove(path)
-
 def getrefs(p, rname):
     refs = []
     u = cPickle.Unpickler(cStringIO.StringIO(p))
@@ -342,8 +315,12 @@
         by_oid[oid] = {name: by_rname, rname: references}
     else:
         references = by_rname
-    references.insert(roid)
 
+    if roid not in references:
+        references.insert(roid)
+        return True
+    return False
+
 def _get_referer(references, name, oid):
     by_oid = references.get(name)
     if by_oid:
@@ -398,8 +375,7 @@
             if (ref[0] != name) and not databases[name].xrefs:
                 print 'bad xref', ref[0], u64(ref[1]), name, u64(oid)
 
-            _insert_ref(references, name, oid, *ref)
-            nreferences += 1
+            nreferences += _insert_ref(references, name, oid, *ref)
 
             if nreferences > 10000:
                 transaction.commit()



More information about the Checkins mailing list