[Zodb-checkins] CVS: StandaloneZODB/bsddb3Storage/bsddb3Storage - BerkeleyBase.py:1.18
Barry Warsaw
barry@wooz.org
Fri, 23 Aug 2002 13:10:31 -0400
Update of /cvs-repository/StandaloneZODB/bsddb3Storage/bsddb3Storage
In directory cvs.zope.org:/tmp/cvs-serv1090/bsddb3Storage
Modified Files:
BerkeleyBase.py
Log Message:
Merging in changes from the bsddb3Storage-picklelog-branch. Briefly:
- Added BerkeleyConfig class for easier configuration of checkpointing
policy and lock sizing.
- Added _lockstats() and _docheckpoint() helpermethods.
=== StandaloneZODB/bsddb3Storage/bsddb3Storage/BerkeleyBase.py 1.17 => 1.18 ===
--- StandaloneZODB/bsddb3Storage/bsddb3Storage/BerkeleyBase.py:1.17 Fri Jul 19 12:42:37 2002
+++ StandaloneZODB/bsddb3Storage/bsddb3Storage/BerkeleyBase.py Fri Aug 23 13:10:30 2002
@@ -41,12 +41,69 @@
__version__ = '$Revision$'.split()[-2:][0]
+# Lock usage is inherently unbounded because there may be an unlimited number
+# of objects actually touched in any single transaction, and worst case could
+# be that each object is on a different page in the database. Berkeley BTrees
+# implement a lock per leaf page, plus a lock per level. We try to limit the
+# negative effects of this by writing as much data optimistically as we can.
+# But there's no way to completely avoid this. So this value is used to size
+# the lock subsystem before the environment is opened.
+DEFAULT_MAX_LOCKS = 20000
+
+
+class BerkeleyConfig:
+ """Bag of bits for describing various underlying configuration options.
+
+ Berkeley databases are wildly configurable, and this class exposes some of
+ that. Two important configuration options are the size of the lock table
+ and the checkpointing policy. To customize these options, instantiate one
+ of these classes and set the attributes below to the desired value. Then
+ pass this instance to the Berkeley storage constructor, using the `config'
+ keyword argument.
+
+ Locks in Berkeley are a limited and static resource; they can only be
+ changed before the environment is opened. It is possible for Berkeley
+ based storages to exhaust the available locks because worst case is to
+ consume one lock per object being modified, and transactions are unbounded
+ in the number of objects they modify. See
+
+ http://www.sleepycat.com/docs/ref/lock/max.html
+
+ for a discussion on lock sizing. These attributes control the lock
+ sizing:
+
+ - numlocks is passed directly to set_lk_max_locks() when the environment
+ is opened.
+
+ You will need to find the right balance between the number of locks
+ allocated and the system resources that consumes. If the locks are
+ exhausted a TransactionTooLargeError can get raised during commit.
+
+ To improve recovery times in case of failures, you should set up a
+ checkpointing policy when you create the database. Note that the database
+ is automatically, and forcefully, checkpointed twice when it is closed.
+ But an exception during processing (e.g.
+
+ The following checkpointing attributes are supported:
+
+ - interval indicates the maximum number of calls to tpc_finish() after
+ which a checkpoint is performed.
+
+ - kbytes is passed directly to txn_checkpoint()
+
+ - min is passed directly to txn_checkpoint()
+ """
+ numlocks = DEFAULT_MAX_LOCKS
+ interval = 100
+ kbyte = 0
+ min = 0
+
class BerkeleyBase(BaseStorage):
"""Base storage for Minimal and Full Berkeley implementations."""
- def __init__(self, name, env=None, prefix='zodb_'):
+ def __init__(self, name, env=None, prefix='zodb_', config=None):
"""Create a new storage.
name is an arbitrary name for this storage. It is returned by the
@@ -72,9 +129,17 @@
DB.open() as the dbname parameter. IOW, prefix+name is passed to the
BerkeleyDb function DB->open() as the database parameter. It defaults
to "zodb_".
+
+ Optional config must be a BerkeleyConfig instance, or None, which
+ means to use the default configuration options.
"""
# sanity check arguments
+ if config is None:
+ config = BerkeleyConfig()
+ self._config = config
+ self._config._counter = 0
+
if name == '':
raise TypeError, 'database name is empty'
@@ -84,7 +149,7 @@
if env == '':
raise TypeError, 'environment name is empty'
elif isinstance(env, StringType):
- self._env = env_from_string(env)
+ self._env = env_from_string(env, self._config)
else:
self._env = env
@@ -231,9 +296,22 @@
self._env.close()
self._closelog()
+ # Useful for debugging
+
+ def _lockstats(self):
+ d = self._env.lock_stat()
+ return 'locks = [%(nlocks)d/%(maxnlocks)d]' % d
+
+ def _docheckpoint(self):
+ config = self._config
+ config._counter += 1
+ if config._counter > config.interval:
+ self._env.txn_checkpoint(config.kbyte, config.min)
+ config._counter = 0
+
-def env_from_string(envname):
+def env_from_string(envname, config):
# BSDDB requires that the directory already exists. BAW: do we need to
# adjust umask to ensure filesystem permissions?
try:
@@ -242,6 +320,7 @@
if e.errno <> errno.EEXIST: raise
# already exists
env = db.DBEnv()
+ env.set_lk_max_locks(config.numlocks)
env.open(envname,
db.DB_CREATE # create underlying files as necessary
| db.DB_RECOVER # run normal recovery before opening