[Zope3-checkins] CVS: Zope3/src/zodb/storage/file - pack.py:1.1.2.7

Jeremy Hylton jeremy@zope.com
Mon, 21 Apr 2003 13:18:32 -0400


Update of /cvs-repository/Zope3/src/zodb/storage/file
In directory cvs.zope.org:/tmp/cvs-serv19396

Modified Files:
      Tag: jeremy-new-pack-branch
	pack.py 
Log Message:
Implement a more space-efficient data structure to hold pack index.


=== Zope3/src/zodb/storage/file/pack.py 1.1.2.6 => 1.1.2.7 ===
--- Zope3/src/zodb/storage/file/pack.py:1.1.2.6	Fri Apr 18 12:17:12 2003
+++ Zope3/src/zodb/storage/file/pack.py	Mon Apr 21 13:18:31 2003
@@ -31,7 +31,29 @@
         self.packpos = None
         self.oid2curpos = {} # maps oid to current data record position
         self.oid2verpos = {} # maps oid to current version data
-        self.reachable = {} # maps oid to list of reachable data record pos
+
+        # The set of reachable revisions of each object.
+        #
+        # This set as managed using two data structures.  The first is
+        # an fsIndex mapping oids to one data record pos.  Since only
+        # a few objects will have more than one revision, we use this
+        # efficient data structure to handle the common case.  The
+        # second is a dictionary mapping objects to lists of
+        # positions; it is used to handle the same number of objects
+        # for which we must keep multiple revisions.
+        
+        self.reachable = fsIndex()
+        self.reach_ex = {}
+
+    def isReachable(self, oid, pos):
+        """Return True if revision of `oid` at `pos` is reachable."""
+
+        rpos = self.reachable.get(oid)
+        if rpos is None:
+            return False
+        if rpos == pos:
+            return True
+        return pos in self.reach_ex.get(oid, [])
 
     def findReachable(self):
         self.buildPackIndex()
@@ -82,7 +104,13 @@
                 L.append(pos)
                 todo.extend(self.findrefs(pos))
 
-            self.reachable[oid] = L
+            if not L:
+                continue
+
+            pos = L.pop()
+            self.reachable[oid] = pos
+            if L:
+                self.reach_ex[oid] = L
 
     def findReachableFromFuture(self):
         # In this pass, the roots are positions of object revisions.
@@ -102,17 +130,22 @@
                 dh = self._read_data_header(pos)
 
                 if dh.back and dh.back < self.packpos:
-                    L = self.reachable.setdefault(dh.oid, [])
-                    if dh.back not in L:
-                        L.append(dh.back)
-                        extra_roots.append(dh.back)
+                    if dh.oid in self.reachable:
+                        L = self.reach_ex.setdefault(dh.oid, [])
+                        if dh.back not in L:
+                            L.append(dh.back)
+                            extra_roots.append(dh.back)
+                    else:
+                        self.reachable[dh.oid] = dh.back
 
                 if dh.version:
-                    if dh.pnv and dh.pnv < self.packpos:
-                        L = self.reachable.setdefault(dh.oid, [])
+                    if dh.oid in self.reachable:
+                        L = self.reach_ex.setdefault(dh.oid, [])
                         if dh.pnv not in L:
                             L.append(dh.pnv)
                             extra_roots.append(dh.pnv)
+                    else:
+                        self.reachable[dh.oid] = dh.back
                         
                 pos += dh.recordlen()
 
@@ -296,7 +329,7 @@
         pos += th.headerlen()
         while pos < tend:
             h = self._read_data_header(pos)
-            if not pos in self.gc.reachable.get(h.oid, []):
+            if not self.gc.isReachable(h.oid, pos):
                 pos += h.recordlen()
                 continue
             pos += h.recordlen()