[Checkins] SVN: zc.zodbdgc/branches/jim-dev/src/zc/zodbdgc/__init__.py Only use bdb for tracking object references.

Jim Fulton jim at zope.com
Mon Jul 27 13:45:17 EDT 2009


Log message for revision 102351:
  Only use bdb for tracking object references.
  
  (Maybe I should use a file storage.)
  

Changed:
  U   zc.zodbdgc/branches/jim-dev/src/zc/zodbdgc/__init__.py

-=-
Modified: zc.zodbdgc/branches/jim-dev/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/branches/jim-dev/src/zc/zodbdgc/__init__.py	2009-07-27 17:45:15 UTC (rev 102350)
+++ zc.zodbdgc/branches/jim-dev/src/zc/zodbdgc/__init__.py	2009-07-27 17:45:16 UTC (rev 102351)
@@ -77,8 +77,9 @@
         )
 
     good = oidset(databases)
-    bad = Bad(databases)
-    deleted = Deleted(databases)
+    bad = oidset(databases)
+    badrefs = BadRefs(databases)
+    deleted = oidset(databases)
 
     for name, storage in storages:
         logger.info("%s: roots", name)
@@ -135,11 +136,16 @@
                             if good.insert(*ref) and bad.has(*ref):
                                 to_do = [ref]
                                 while to_do:
-                                    for ref in bad.pop(*to_do.pop()):
+                                    ref = to_do.pop()
+                                    bad.remove(*ref)
+                                    for ref in badrefs.pop(*ref):
                                         if good.insert(*ref) and bad.has(*ref):
                                             to_do.append(ref)
                     else:
-                        bad.insert(name, oid, set(getrefs(data, name, ignore)))
+                        bad.insert(name, oid)
+                        refs = set(getrefs(data, name, ignore))
+                        if refs:
+                            badrefs.insert(name, oid, refs)
                 else:
                     # deleted record
                     if good.has(name, oid):
@@ -148,6 +154,8 @@
                         bad.remove(name, oid)
                     deleted.insert(name, oid)
 
+    badrefs.close()
+
     if conf2 is not None:
         for db in db2.databases.itervalues():
             db.close()
@@ -203,16 +211,77 @@
             if ref[0] not in ignore:
                 yield ref[:2]
 
-class oidset:
+class oidset(dict):
+    """
+    {(name, oid)} implemented as:
 
-    type_ = 'good'
+       {name-> {oid[:6] -> {oid[-2:]}}}
+    """
+    def __init__(self, names):
+        for name in names:
+            self[name] = {}
 
+    def insert(self, name, oid):
+        prefix = oid[:6]
+        suffix = oid[6:]
+        data = self[name].get(prefix)
+        if data is None:
+            data = self[name][prefix] = BTrees.fsBTree.TreeSet()
+        elif suffix in data:
+            return False
+        data.insert(suffix)
+        return True
+
+    def remove(self, name, oid):
+        prefix = oid[:6]
+        suffix = oid[6:]
+        data = self[name].get(prefix)
+        if data and suffix in data:
+            data.remove(suffix)
+            if not data:
+                del self[name][prefix]
+
+    def __nonzero__(self):
+        for v in self.itervalues():
+            if v:
+                return True
+        return False
+
+    def pop(self):
+        for name, data in self.iteritems():
+            if data:
+               break
+        prefix, s = data.iteritems().next()
+        suffix = s.maxKey()
+        s.remove(suffix)
+        if not s:
+            del data[prefix]
+        return name, prefix+suffix
+
+    def has(self, name, oid):
+        try:
+            data = self[name][oid[:6]]
+        except KeyError:
+            return False
+        return oid[6:] in data
+
+    def iterator(self, name=None):
+        if name is None:
+            for name in self:
+                for oid in self.iterator(name):
+                    yield name, oid
+        else:
+            for prefix, data in self[name].iteritems():
+                for suffix in data:
+                    yield prefix+suffix
+
+class BadRefs:
+
     def __init__(self, names):
         self._dbs = {}
         self._paths = []
         for name in names:
-            fd, path = tempfile.mkstemp(
-                dir='.', prefix='db-'+name.strip()+'-', suffix=self.type_)
+            fd, path = tempfile.mkstemp(dir='.', prefix='db-'+name.strip()+'-')
             os.close(fd)
             self._dbs[name] = bsddb3.hashopen(path, cachesize=1<<24)
             self._paths.append(path)
@@ -223,24 +292,11 @@
         while self._paths:
             os.remove(self._paths.pop())
 
-    def insert(self, name, oid):
-        db = self._dbs[name]
-        if oid in db:
-            return False
-        db[oid] = ''
-        return True
-
     def remove(self, name, oid):
         db = self._dbs[name]
         if oid in db:
             del db[oid]
 
-    def pop(self):
-        for name, db in self._dbs.iteritems():
-            if db:
-                oid, _ = db.popitem()
-                return name, oid
-
     def __nonzero__(self):
         return sum(map(bool, self._dbs.itervalues()))
 
@@ -257,31 +313,22 @@
             for oid in self._dbs[name]:
                 yield oid
 
-class Deleted(oidset):
+    def insert(self, name, oid, refs):
+        if not refs:
+            return
 
-    type_ = 'deleted'
-
-class Bad(oidset):
-
-    type_ = 'bad'
-
-    def insert(self, name, oid, refs):
         db = self._dbs[name]
         old = db.get(oid)
-        if old is None:
-            db[oid] = refs and marshal.dumps(list(refs)) or ''
+        if old:
+            old = set(marshal.loads(old))
+            refs = old.union(refs)
+            if refs != old:
+                db[oid] = marshal.dumps(list(refs))
         else:
-            if old:
-                if refs:
-                    old = set(marshal.loads(old))
-                    refs = old.union(refs)
-                    if refs != old:
-                        db[oid] = marshal.dumps(list(refs))
-            elif refs:
-                db[oid] = marshal.dumps(list(refs))
+            db[oid] = marshal.dumps(list(refs))
 
     def pop(self, name, oid):
-        refs = self._dbs[name].pop(oid)
+        refs = self._dbs[name].pop(oid, ())
         if refs:
             return marshal.loads(refs)
         return ()



More information about the Checkins mailing list