[Zodb-checkins] SVN: ZODB/trunk/src/ZODB/interfaces.py Fleshed out
the storage interfaces.
Jim Fulton
jim at zope.com
Thu Apr 26 19:20:23 EDT 2007
Log message for revision 74841:
Fleshed out the storage interfaces.
Changed:
U ZODB/trunk/src/ZODB/interfaces.py
-=-
Modified: ZODB/trunk/src/ZODB/interfaces.py
===================================================================
--- ZODB/trunk/src/ZODB/interfaces.py 2007-04-26 23:20:20 UTC (rev 74840)
+++ ZODB/trunk/src/ZODB/interfaces.py 2007-04-26 23:20:21 UTC (rev 74841)
@@ -18,6 +18,7 @@
from zope.interface import Interface, Attribute
+
class IConnection(Interface):
"""Connection to ZODB for loading and storing objects.
@@ -289,7 +290,6 @@
This invalidates *all* objects in the cache. If the connection
is open, subsequent reads will fail until a new transaction
begins or until the connection os reopned.
-
"""
class IStorageDB(Interface):
@@ -345,37 +345,6 @@
TODO: This interface is incomplete.
"""
-## __init__ methods don't belong in interfaces:
-##
-## def __init__(storage,
-## pool_size=7,
-## cache_size=400,
-## version_pool_size=3,
-## version_cache_size=100,
-## database_name='unnamed',
-## databases=None,
-## ):
-## """Create an object database.
-
-## storage: the storage used by the database, e.g. FileStorage
-## pool_size: expected maximum number of open connections
-## cache_size: target size of Connection object cache, in number of
-## objects
-## version_pool_size: expected maximum number of connections (per
-## version)
-## version_cache_size: target size of Connection object cache for
-## version connections, in number of objects
-## database_name: when using a multi-database, the name of this DB
-## within the database group. It's a (detected) error if databases
-## is specified too and database_name is already a key in it.
-## This becomes the value of the DB's database_name attribute.
-## databases: when using a multi-database, a mapping to use as the
-## binding of this DB's .databases attribute. It's intended
-## that the second and following DB's added to a multi-database
-## pass the .databases attribute set on the first DB added to the
-## collection.
-## """
-
databases = Attribute("""\
A mapping from database name to DB (database) object.
@@ -386,119 +355,456 @@
entry.
""")
- def invalidateCache():
- """Invalidate all objects in the database object caches
+ def open(version='',
+ mvcc=True,
+ transaction_manager=None,
+ synch=True
+ ):
+ """Return an IConnection object for use by application code.
- invalidateCache will be called on each of the database's connections.
+ version: the "version" that all changes will be made
+ in, defaults to no version.
+ mvcc: boolean indicating whether MVCC is enabled
+ transaction_manager: transaction manager to use. None means
+ use the default transaction manager.
+ synch: boolean indicating whether Connection should
+ register for afterCompletion() calls.
+
+ Note that the connection pool is managed as a stack, to
+ increase the likelihood that the connection's stack will
+ include useful objects.
"""
+ # TODO: Should this method be moved into some subinterface?
+ def pack(t=None, days=0):
+ """Pack the storage, deleting unused object revisions.
+
+ A pack is always performed relative to a particular time, by
+ default the current time. All object revisions that are not
+ reachable as of the pack time are deleted from the storage.
+
+ The cost of this operation varies by storage, but it is
+ usually an expensive operation.
+
+ There are two optional arguments that can be used to set the
+ pack time: t, pack time in seconds since the epcoh, and days,
+ the number of days to subtract from t or from the current
+ time if t is not specified.
+ """
+
+ # TODO: Should this method be moved into some subinterface?
+ def undo(id, txn=None):
+ """Undo a transaction identified by id.
+
+ A transaction can be undone if all of the objects involved in
+ the transaction were not modified subsequently, if any
+ modifications can be resolved by conflict resolution, or if
+ subsequent changes resulted in the same object state.
+
+ The value of id should be generated by calling undoLog()
+ or undoInfo(). The value of id is not the same as a
+ transaction id used by other methods; it is unique to undo().
+
+ id: a storage-specific transaction identifier
+ txn: transaction context to use for undo().
+ By default, uses the current transaction.
+ """
+
+ def close():
+ """Close the database and its underlying storage.
+
+ It is important to close the database, because the storage may
+ flush in-memory data structures to disk when it is closed.
+ Leaving the storage open with the process exits can cause the
+ next open to be slow.
+
+ What effect does closing the database have on existing
+ connections? Technically, they remain open, but their storage
+ is closed, so they stop behaving usefully. Perhaps close()
+ should also close all the Connections.
+ """
+
class IStorage(Interface):
"""A storage is responsible for storing and retrieving data of objects.
"""
-## What follows is the union of methods found across various storage
-## implementations. Exactly what "the storage API" is and means has
-## become fuzzy over time. Methods should be uncommented here, or
-## even deleted, as the storage API regains a crisp definition.
+ def close():
+ """Close the storage.
+ """
-## def load(oid, version):
-## """TODO"""
-##
-## def close():
-## """TODO"""
-##
-## def cleanup():
-## """TODO"""
-##
-## def lastSerial():
-## """TODO"""
-##
-## def lastTransaction():
-## """TODO"""
-##
-## def lastTid(oid):
-## """Return last serialno committed for object oid."""
-##
-## def loadSerial(oid, serial):
-## """TODO"""
-##
-## def loadBefore(oid, tid):
-## """TODO"""
-##
-## def iterator(start=None, stop=None):
-## """TODO"""
-##
-## def sortKey():
-## """TODO"""
-##
-## def getName():
-## """TODO"""
-##
-## def getSize():
-## """TODO"""
-##
-## def history(oid, version, length=1, filter=None):
-## """TODO"""
-##
-## def new_oid():
-## """TODO"""
-##
-## def set_max_oid(possible_new_max_oid):
-## """TODO"""
-##
-## def registerDB(db):
-## """TODO"""
-##
-## def isReadOnly():
-## """TODO"""
-##
-## def supportsUndo():
-## """TODO"""
-##
-## def supportsVersions():
-## """TODO"""
-##
-## def tpc_abort(transaction):
-## """TODO"""
-##
-## def tpc_begin(transaction):
-## """TODO"""
-##
-## def tpc_vote(transaction):
-## """TODO"""
-##
-## def tpc_finish(transaction, f=None):
-## """TODO"""
-##
-## def getSerial(oid):
-## """TODO"""
-##
-## def loadSerial(oid, serial):
-## """TODO"""
-##
-## def loadBefore(oid, tid):
-## """TODO"""
-##
-## def getExtensionMethods():
-## """TODO"""
-##
-## def copyTransactionsFrom():
-## """TODO"""
-##
-## def store(oid, oldserial, data, version, transaction):
-## """
-##
-## may return the new serial or not
-## """
+ def getName():
+ """The name of the storage
+ The format and interpretation of this name is storage
+ dependent. It could be a file name, a database name, etc.
+
+ This is used soley for informational purposes.
+ """
+
+ def getSize():
+ """An approximate size of the database, in bytes.
+
+ This is used soley for informational purposes.
+ """
+
+ def history(oid, version, size=1):
+ """Return a sequence of history information dictionaries.
+
+ Up to size objects (including no objects) may be returned.
+
+ The information provides a log of the changes made to the
+ object. Data are reported in reverse chronological order.
+
+ Each dictionary has the following keys:
+
+ time
+ UTC seconds since the epoch (as in time.time) that the
+ object revision was committed.
+ tid
+ The transaction identifier of the transaction that
+ committed the version.
+ version
+ The version that the revision is in. If the storage
+ doesn't support versions, then this must be an empty
+ string.
+ user_name
+ The user identifier, if any (or an empty string) of the
+ user on whos behalf the revision was committed.
+ description
+ The transaction description for the transaction that
+ committed the revision.
+ size
+ The size of the revision data record.
+
+ If the transaction had extension items, then these items are
+ also included if they don't conflict with the keys above.
+ """
+
+ def isReadOnly():
+ """Test whether a storage allows committing new transactions
+
+ For a given storage instance, this method always returns the
+ same value. Read-only-ness is a static property of a storage.
+ """
+
+ def lastTransaction():
+ """Return the id of the last committed transaction
+ """
+
+ def __len__():
+ """The approximate number of objects in the storage
+
+ This is used soley for informational purposes.
+ """
+
+ def load(oid, version):
+ """Load data for an object id and version
+
+ A data record and serial are returned. The serial is a
+ transaction identifier of the transaction that wrote the data
+ record.
+
+ A POSKeyError is raised if there is no record for the object
+ id and version.
+
+ Storages that don't support versions must ignore the version
+ argument.
+ """
+
+ def loadBefore(oid, tid):
+ """Load the object data written before a transaction id
+
+ If there isn't data before the object before the given
+ transaction, then None is returned, otherwise three values are
+ returned:
+
+ - The data record
+
+ - The transaction id of the data record
+
+ - The transaction id of the following revision, if any, or None.
+ """
+
+ def loadSerial(oid, serial):
+ """Load the object record for the give transaction id
+
+ If a matching data record can be found, it is returned,
+ otherwise, POSKeyError is raised.
+ """
+
+ def new_oid():
+ """Allocate a new object id.
+
+ The object id returned is reserved at least as long as the
+ storage is opened.
+
+ The return value is a string.
+ """
+
+ def pack(pack_time, referencesf):
+ """Pack the storage
+
+ It is up to the storage to interpret this call, however, the
+ general idea is that the storage free space by:
+
+ - discarding object revisions that were old and not current as of the
+ given pack time.
+
+ - garbage collecting objects that aren't reachable from the
+ root object via revisions remaining after discarding
+ revisions that were not current as of the pack time.
+
+ The pack time is given as a UTC time in seconds since the
+ empoch.
+
+ The second argument is a function that should be used to
+ extract object references from database records. This is
+ needed to determine which objects are referenced from object
+ revisions.
+ """
+
+ def registerDB(db):
+ """Register an IStorageDB.
+
+ Note that, for historical reasons, an implementation may
+ require a second argument, however, if required, the None will
+ be passed as the second argument.
+ """
+
+ def sortKey():
+ """Sort key used to order distributed transactions
+
+ When a transaction involved multiple storages, 2-phase commit
+ operations are applied in sort-key order. This must be unique
+ among storages used in a transaction. Obviously, the storage
+ can't assure this, but it should construct the sort key so it
+ has a reasonable chance of being unique.
+ """
+
+ def store(oid, serial, data, version, transaction):
+ """Store data for the object id, oid.
+
+ Arguments:
+
+ oid
+ The object identifier. This is either a string
+ consisting of 8 nulls or a string previously returned by
+ new_oid.
+
+ serial
+ The serial of the data that was read when the object was
+ loaded from the database. If the object was created in
+ the current transaction this will be a string consisting
+ of 8 nulls.
+
+ data
+ The data record. This is opaque to the storage.
+
+ version
+ The version to store the data is. If the storage doesn't
+ support versions, this should be an empty string and the
+ storage is allowed to ignore it.
+
+ transaction
+ A transaction object. This should match the current
+ transaction for the storage, set by tpc_begin.
+
+ The new serial for the object is returned, but not necessarily
+ immediately. It may be returned directly, or un a subsequent
+ store or tpc_vote call.
+
+ The return value may be:
+
+ - None
+
+ - A new serial (string) for the object, or
+
+ - An iterable of object-id and serial pairs giving new serials
+ for objects.
+ """
+
+ def tpc_abort(transaction):
+ """Abort the transaction.
+
+ Any changes made by the transaction are discarded.
+
+ This call is ignored is the storage is not participating in
+ two-phase commit or if the given transaction is not the same
+ as the transaction the storage is commiting.
+ """
+
+ def tpc_begin(transaction):
+ """Begin the two-phase commit process.
+
+ If storage is already participating in a two-phase commit
+ using the same transaction, the call is ignored.
+
+ If the storage is already participating in a two-phase commit
+ using a different transaction, the call blocks until the
+ current transaction ends (commits or aborts).
+ """
+
+ def tpc_finish(transaction, func = lambda: None):
+ """Finish the transaction, making any transaction changes permanent.
+
+ Changes must be made permanent at this point.
+
+ This call is ignored if the storage isn't participating in
+ two-phase commit or if it is commiting a different
+ transaction. Failure of this method is extremely serious.
+ """
+
+ def tpc_vote(transaction):
+ """Provide a storage with an opportunity to veto a transaction
+
+ This call is ignored if the storage isn't participating in
+ two-phase commit or if it is commiting a different
+ transaction. Failure of this method is extremely serious.
+
+ If a transaction can be committed by a storage, then the
+ method should return. If a transaction cannot be committed,
+ then an exception should be raised. If this method returns
+ without an error, then there must not be an error if
+ tpc_finish or tpc_abort is called subsequently.
+
+ The return value can be either None or a sequence of object-id
+ and serial pairs giving new serials for objects who's ids were
+ passed to previous store calls in the same transaction.
+ After the tpc_vote call, bew serials must have been returned,
+ either from tpc_vote or store for objects passed to store.
+ """
+
+class IStorageRestoreable(IStorage):
+
+ def tpc_begin(transaction, tid=None):
+ """Begin the two-phase commit process.
+
+ If storage is already participating in a two-phase commit
+ using the same transaction, the call is ignored.
+
+ If the storage is already participating in a two-phase commit
+ using a different transaction, the call blocks until the
+ current transaction ends (commits or aborts).
+
+ If a transaction id is given, then the transaction will use
+ the given id rather than generating a new id. This is used
+ when copying already committed transactions from another
+ storage.
+ """
+
+ # Note that the current implementation also accepts a status.
+ # This is an artifact of:
+ # - Earlier use of an undo status to undo revisions in place,
+ # and,
+ # - Incorrect pack garbage-collection algorithms (possibly
+ # including the existing FileStorage implementation), that
+ # failed to take into account records after the pack time.
+
+
+ def restore(oid, serial, data, version, prev_txn, transaction):
+ """Write data already committed in a separate database
+
+ The restore method is used when copying data from one database
+ to a replica of the database. It differs from store in that
+ the data have already been committed, so there is no check for
+ conflicts and no new transaction is is used for the data.
+
+ Arguments:
+
+ oid
+ The object id for the record
+
+ serial
+ The transaction identifier that originally committed this object.
+
+ data
+ The record data. This will be None if the transaction
+ undid the creation of the object.
+
+ version
+ The version identifier for the record
+
+ prev_txn
+ The identifier of a previous transaction that held the
+ object data. The target storage can sometimes use this
+ as a hint to save space.
+
+ transaction
+ The current transaction.
+
+ Nothing is returned.
+ """
+
+class IStorageRecordInformation(Interface):
+ """Provide information about a single storage record
+ """
+
+ oid = Attribute("The object id")
+ version = Attribute("The version")
+ data = Attribute("The data record")
+
+class IStorageTransactionInformation(Interface):
+ """Provide information about a storage transaction
+ """
+
+ tid = Attribute("Transaction id")
+ status = Attribute("Transaction Status") # XXX what are valid values?
+ user = Attribute("Transaction user")
+ description = Attribute("Transaction Description")
+ extension = Attribute("Transaction extension data")
+
+ def __iter__():
+ """Return an iterable of IStorageTransactionInformation
+ """
+
+class IStorageIteration(Interface):
+ """API for iterating over the contents of a storage
+
+ Note that this is a future API. Some storages now provide an
+ approximation of this.
+
+ """
+
+ def iterator(start=None, stop=None):
+ """Return an IStorageTransactionInformation iterator.
+
+ An IStorageTransactionInformation iterator is returned for
+ iterating over the transactions in the storage.
+
+ If the start argument is not None, then iteration will start
+ with the first transaction whos identifier is greater than or
+ equal to start.
+
+ If the stop argument is not None, then iteration will end with
+ the last transaction whos identifier is less than or equal to
+ start.
+
+ """
+
class IStorageUndoable(IStorage):
"""A storage supporting transactional undo.
"""
- def undo(transaction_id, txn):
- """TODO"""
+ def supportsUndo():
+ """Return True, indicating that the storage supports undo.
+ """
- def undoLog(first, last, filter=(lambda desc: True)):
+ def undo(transaction_id, transaction):
+ """Undo the transaction corresponding to the given transaction id.
+
+ The transaction id is a value returned from undoInfo or
+ undoLog, which may not be a stored transaction identifier as
+ used elsewhere in the storage APIs.
+
+ This method must only be called in the first phase of
+ two-phase commit (after tpc_begin but before tpc_vote). It
+ returns a serial (transaction id) and a sequence of object ids
+ for objects affected by the transaction.
+
+ """
+ # Used by DB (Actually, by TransactionalUndo)
+
+ def undoLog(first, last, filter=None):
"""Return a sequence of descriptions for undoable transactions.
Application code should call undoLog() on a DB instance instead of on
@@ -551,8 +857,9 @@
could be gotten by passing the positive first-last for
`last` instead.
"""
+ # DB pass through
- def undoInfo(first, last, specification=None):
+ def undoInfo(first=0, last=-20, specification=None):
"""Return a sequence of descriptions for undoable transactions.
This is like `undoLog()`, except for the `specification` argument.
@@ -567,30 +874,39 @@
ZEO client to its ZEO server (while a ZEO client ignores any `filter`
argument passed to `undoLog()`).
"""
+ # DB pass-through
+
+class IStoragePackable(Interface):
+
def pack(t, referencesf):
- """TODO"""
+ """Pack the storage
-class IStorageVersioning(IStorage):
- """A storage supporting versions.
- """
+ Pack and/or garbage-collect the storage. If the storage does
+ not support undo, then t is ignored. All records for objects
+ that are not reachable from the system root object as of time
+ t, or as of the current time, if undo is not supported, are
+ removed from the storage.
-## What follows is the union of methods found across various version storage
-## implementations. Exactly what "the storage API" is and means has
-## become fuzzy over time. Methods should be uncommented here, or
-## even deleted, as the storage API regains a crisp definition.
+ A storage implementation may treat this method as ano-op. A
+ storage implementation may also delay packing and return
+ immediately. Storage documentation should define the behavior
+ of this method.
+ """
+ # Called by DB
-## def abortVersion(src, transaction):
-## """TODO"""
-##
-## def commitVersion(src, dest, transaction):
-## """TODO"""
-##
-## def modifiedInVersion(oid):
-## """TODO"""
-##
-## def versionEmpty(version):
-## """TODO"""
-##
-## def versions(max=None):
-## """TODO"""
+class IStorageCurrentRecordIteration(IStorage):
+
+ def record_iternext(next=None):
+ """Iterate over the records in a storage
+
+ Use like this:
+
+ >>> next = None
+ >>> while 1:
+ ... oid, tid, data, next = storage.record_iternext(next)
+ ... # do things with oid, tid, and data
+ ... if next is None:
+ ... break
+
+ """
More information about the Zodb-checkins
mailing list