[Zope3-checkins] CVS: ZODB4/src/zodb/storage - bdbminimal.py:1.6

Barry Warsaw barry@wooz.org
Wed, 22 Jan 2003 14:29:43 -0500


Update of /cvs-repository/ZODB4/src/zodb/storage
In directory cvs.zope.org:/tmp/cvs-serv10364

Modified Files:
	bdbminimal.py 
Log Message:
Forward port from ZODB 3.2

- Get db (bsddb) and ZERO from the zodb.storage.base module

- Generalize storage-specific metadata (i.e. packtime -> info)

- Add _packing flag and implement fix for pack race condition.


=== ZODB4/src/zodb/storage/bdbminimal.py 1.5 => 1.6 ===
--- ZODB4/src/zodb/storage/bdbminimal.py:1.5	Wed Jan 15 18:28:03 2003
+++ ZODB4/src/zodb/storage/bdbminimal.py	Wed Jan 22 14:29:41 2003
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# Copyright (c) 2001 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
@@ -13,31 +13,19 @@
 ##############################################################################
 
 """Berkeley storage without undo or versioning.
-"""
-
-__version__ = '$Revision$'[-2:][0]
 
-# In Python 2.3, we can simply use the bsddb module, but for Python 2.2, we
-# need to use pybsddb3, a.k.a. bsddb3.
-try:
-    from bsddb import _db as db
-except ImportError:
-    from bsddb3 import db
+$Revision$
+"""
 
 from zodb import interfaces
 from zodb.utils import p64, u64
 from zodb.serialize import findrefs
 from zodb.conflict import ConflictResolvingStorage, ResolvedSerial
-
-# BerkeleyBase class provides some common functionality for BerkeleyDB-based
-# storages.  It in turn inherits from BaseStorage which itself provides some
-# common storage functionality.
-from zodb.storage.base import BerkeleyBase, PackStop, _WorkThread
+from zodb.storage.base import db, ZERO, BerkeleyBase, PackStop, _WorkThread
 
 ABORT = 'A'
 COMMIT = 'C'
 PRESENT = 'X'
-ZERO = '\0'*8
 
 
 
@@ -89,6 +77,13 @@
         #     no pending entry.  It is a database invariant that if the
         #     pending table is empty, the oids table must also be empty.
         #
+        # info -- {key -> value}
+        #     This table contains storage metadata information.  The keys and
+        #     values are simple strings of variable length.   Here are the
+        #     valid keys:
+        #
+        #         version - the version of the database (reserved for ZODB4)
+        #
         # packmark -- [oid]
         #     Every object reachable from the root during a classic pack
         #     operation will have its oid present in this table.
@@ -100,6 +95,8 @@
         #     references exist, such that the objects can be completely packed
         #     away.
         #
+        self._packing = False
+        self._info = self._setupDB('info')
         self._serials = self._setupDB('serials', db.DB_DUP)
         self._pickles = self._setupDB('pickles')
         self._refcounts = self._setupDB('refcounts')
@@ -181,6 +178,8 @@
                     if soid <> oid:
                         break
                     if stid <> tid:
+                        # This is the previous revision of the object, so
+                        # decref its referents and clean up its pickles.
                         cs.delete()
                         data = self._pickles.get(oid+stid, txn=txn)
                         assert data is not None
@@ -198,8 +197,16 @@
             if co: co.close()
             if cs: cs.close()
         # We're done with this table
-        self._oids.truncate(txn)
         self._pending.truncate(txn)
+        # If we're in the middle of a pack, we need to add to the packmark
+        # table any objects that were modified in this transaction.
+        # Otherwise, there's a race condition where mark might have happened,
+        # then the object is added, then sweep runs, deleting the object
+        # created in the interrim.
+        if self._packing:
+            for oid in self._oids.keys():
+                self._packmark.put(oid, PRESENT, txn=txn)
+        self._oids.truncate(txn)
         # Now, to finish up, we need apply the refcount deltas to the
         # refcounts table, and do recursive collection of all refcount == 0
         # objects.
@@ -353,6 +360,7 @@
         # A simple wrapper around the bulk of packing, but which acquires a
         # lock that prevents multiple packs from running at the same time.
         self._packlock.acquire()
+        self._packing = True
         try:
             # We don't wrap this in _withtxn() because we're going to do the
             # operation across several Berkeley transactions, which allows
@@ -363,20 +371,11 @@
             # collect object revisions
             self._dopack()
         finally:
+            self._packing = False
             self._packlock.release()
         self.log('classic pack finished')
 
     def _dopack(self):
-        # XXX There is a potential race condition here that we need to address
-        # so we don't lose objects.  Say we've just completed the mark phase
-        # and another thread comes along and stores an object.  Now we enter
-        # the sweep phase and notice that that other object isn't in the
-        # marked list.  So we add it to the collection list.  Full is less
-        # vulnerable to this hole (although it's still there) because it
-        # usually won't pack all the way to the current time.  I want to try
-        # to write a test case for this situation before I fix it, but that's
-        # difficult without instrumenting _dopack() for testing.
-        #
         # Do a mark and sweep for garbage collection.  Calculate the set of
         # objects reachable from the root.  Anything else is a candidate for
         # having all their revisions packed away.  The set of reachable
@@ -407,7 +406,6 @@
         # we'll save the mark data in the packmark table.  The oidqueue is a
         # BerkeleyDB Queue that holds the list of object ids to look at next,
         # and by using this we don't need to keep an in-memory dictionary.
-        assert len(self._packmark) == 0
         assert len(self._oidqueue) == 0
         # Quick exit for empty storages
         if not self._serials:
@@ -462,10 +460,6 @@
             if self._stop:
                 raise PackStop, 'stopped in _collect_objs()'
             oid = orec[1]
-            # Never pack away the root object.
-            if oid == ZERO:
-                orec = self._oidqueue.consume(txn)
-                continue
             # Delete the object from the serials table
             c = self._serials.cursor(txn)
             try: