[Zodb-checkins] CVS: ZODB4/src/zodb/storage - bdbfull.py:1.8

Barry Warsaw barry@wooz.org
Wed, 22 Jan 2003 14:26:45 -0500


Update of /cvs-repository/ZODB4/src/zodb/storage
In directory cvs.zope.org:/tmp/cvs-serv9962

Modified Files:
	bdbfull.py 
Log Message:
Forward port from ZODB 3.2

- Get db (bsddb) and ZERO from the zodb.storage.base module

- Generalize storage-specific metadata (i.e. packtime -> info)

- Add _packing flag and implement fix for pack race condition.


=== ZODB4/src/zodb/storage/bdbfull.py 1.7 => 1.8 ===
--- ZODB4/src/zodb/storage/bdbfull.py:1.7	Wed Jan 15 18:28:02 2003
+++ ZODB4/src/zodb/storage/bdbfull.py	Wed Jan 22 14:26:42 2003
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# Copyright (c) 2001 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
@@ -13,38 +13,26 @@
 ##############################################################################
 
 """Berkeley storage with full undo and versioning support.
-"""
 
-__version__ = '$Revision$'.split()[-2:][0]
+$Revision$
+"""
 
 import time
 import cPickle as pickle
 from struct import pack, unpack
 
-# In Python 2.3, we can simply use the bsddb module, but for Python 2.2, we
-# need to use pybsddb3, a.k.a. bsddb3.
-try:
-    from bsddb import _db as db
-except ImportError:
-    from bsddb3 import db
-
 from zodb import interfaces
 from zodb.utils import p64, u64
 from zodb.serialize import findrefs
 from zodb.timestamp import TimeStamp
 from zodb.conflict import ConflictResolvingStorage, ResolvedSerial
 from zodb.interfaces import ITransactionAttrs
-
-# BerkeleyBase class provides some common functionality for both the
-# BDBFullStorage and BDBMinimalStorage implementations.  It in turn inherits
-# from BaseStorage which itself provides some common storage functionality.
-from zodb.storage.base import BerkeleyBase, PackStop, _WorkThread
+from zodb.storage.base import db, ZERO, BerkeleyBase, PackStop, _WorkThread
 from zodb.storage._helper import incr
 
 ABORT = 'A'
 COMMIT = 'C'
 PRESENT = 'X'
-ZERO = '\0'*8
 
 # Special flag for uncreated objects (i.e. Does Not Exist)
 DNE = '\377'*8
@@ -183,9 +171,15 @@
         #     pending table is empty, the oids, pvids, and prevrevids tables
         #     must also be empty.
         #
-        # packtime -- tid
-        #     The time of the last pack.  It is illegal to undo to before the
-        #     last pack time.
+        # info -- {key -> value}
+        #     This table contains storage metadata information.  The keys and
+        #     values are simple strings of variable length.   Here are the
+        #     valid keys:
+        #
+        #         packtime - time of the last pack.  It is illegal to undo to
+        #         before the last pack time.
+        #
+        #         version - the version of the database (reserved for ZODB4)
         #
         # objrevs -- {newserial+oid -> oldserial}
         #     This table collects object revision information for packing
@@ -209,6 +203,8 @@
         #     This table is a Queue, not a BTree.  It is used during the mark
         #     phase of pack() and contains a list of oids for work to be done.
         #
+        self._packing = False
+        self._info = self._setupDB('info')
         self._serials = self._setupDB('serials', db.DB_DUP)
         self._pickles = self._setupDB('pickles')
         self._refcounts = self._setupDB('refcounts')
@@ -228,7 +224,6 @@
         # Tables to support packing.
         self._objrevs = self._setupDB('objrevs', db.DB_DUP)
         self._packmark = self._setupDB('packmark')
-        self._packtime = self._setupDB('packtime')
         self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 8)
         self._delqueue = self._setupDB('delqueue', 0, db.DB_QUEUE, 8)
         # Do recovery and consistency checks
@@ -434,10 +429,18 @@
             refcount = self._refcounts.get(oid, ZERO, txn=txn)
             self._refcounts.put(oid, incr(refcount, delta), txn=txn)
         # Now clean up the temporary log tables
-        self._oids.truncate(txn)
         self._pvids.truncate(txn)
         self._prevrevids.truncate(txn)
         self._pending.truncate(txn)
+        # If we're in the middle of a pack, we need to add to the packmark
+        # table any objects that were modified in this transaction.
+        # Otherwise, there's a race condition where mark might have happened,
+        # then the object is added, then sweep runs, deleting the object
+        # created in the interrim.
+        if self._packing:
+            for oid in self._oids.keys():
+                self._packmark.put(oid, PRESENT, txn=txn)
+        self._oids.truncate(txn)
 
     def _dobegin(self, txn, tid, u, d, e):
         # When a transaction begins, we set the pending flag to ABORT,
@@ -1034,13 +1037,7 @@
             self._lock_release()
 
     def _last_packtime(self):
-        packtimes = self._packtime.keys()
-        if len(packtimes) == 1:
-            return packtimes[0]
-        elif len(packtimes) == 0:
-            return ZERO
-        else:
-            assert False, 'too many packtimes'
+        return self._info.get('packtime', ZERO)
 
     def lastTransaction(self):
         """Return transaction id for last committed transaction"""
@@ -1307,11 +1304,10 @@
         finally:
             self._lock_release()
 
-    #
     # Packing
     #
     # There are two types of pack operations, the classic pack and the
-    # autopack.  Autopack's sole job is to periodically delete non-current
+    # autopack.  Autopack's primary job is to periodically delete non-current
     # object revisions.  It runs in a thread and has an `autopack time' which
     # is essentially just a time in the past at which to autopack to.  For
     # example, you might set up autopack to run once per hour, packing away
@@ -1334,7 +1330,6 @@
     # acquisition as granularly as possible so that packing doesn't block
     # other operations for too long.  But remember we don't use Berkeley locks
     # so we have to be careful about our application level locks.
-    #
 
     # First, the public API for classic pack
     def pack(self, t):
@@ -1342,6 +1337,7 @@
         # A simple wrapper around the bulk of packing, but which acquires a
         # lock that prevents multiple packs from running at the same time.
         self._packlock.acquire()
+        self._packing = True
         try:
             # We don't wrap this in _withtxn() because we're going to do the
             # operation across several Berkeley transactions, which allows
@@ -1349,13 +1345,11 @@
             # done.
             self._dopack(t)
         finally:
+            self._packing = False
             self._packlock.release()
         self.log('classic pack finished')
 
     def _dopack(self, t, gc=True):
-        # XXX race condition warning.  See _dopack() in bdbminimal.py for
-        # details.
-        #
         # t is a TimeTime, or time float, convert this to a TimeStamp object,
         # using an algorithm similar to what's used in FileStorage.  We know
         # that our transaction ids, a.k.a. revision ids, are timestamps.
@@ -1416,6 +1410,7 @@
         # A simple wrapper around the bulk of packing, but which acquires a
         # lock that prevents multiple packs from running at the same time.
         self._packlock.acquire()
+        self._packing = True
         try:
             # We don't wrap this in _withtxn() because we're going to do the
             # operation across several Berkeley transactions, which allows
@@ -1423,6 +1418,7 @@
             # done.
             self._dopack(t, gc)
         finally:
+            self._packing = False
             self._packlock.release()
         self.log('autopack finished')
 
@@ -1470,10 +1466,9 @@
             if co: co.close()
             if ct: ct.close()
         # Note that before we commit this Berkeley transaction, we also need
-        # to update the packtime table, so we can't have the possibility of a
-        # race condition with undoLog().
-        self._packtime.truncate(txn)
-        self._packtime.put(packtid, PRESENT, txn=txn)
+        # to update the last packtime entry, so we can't have the possibility
+        # of a race condition with undoLog().
+        self._info.put('packtime', packtid, txn=txn)
 
     def _decrefPickle(self, oid, lrevid, txn):
         if lrevid == DNE:
@@ -1600,7 +1595,6 @@
         # oidqueue is a BerkeleyDB Queue that holds the list of object ids to
         # look at next, and by using this we don't need to keep an in-memory
         # dictionary.
-        assert len(self._packmark) == 0
         assert len(self._oidqueue) == 0
         # Quick exit for empty storages
         if not self._serials: