[Zodb-checkins] SVN: ZODB/trunk/src/ZODB/interfaces.py Fleshed out the storage interfaces.

Jim Fulton jim at zope.com
Thu Apr 26 19:20:23 EDT 2007


Log message for revision 74841:
  Fleshed out the storage interfaces.
  

Changed:
  U   ZODB/trunk/src/ZODB/interfaces.py

-=-
Modified: ZODB/trunk/src/ZODB/interfaces.py
===================================================================
--- ZODB/trunk/src/ZODB/interfaces.py	2007-04-26 23:20:20 UTC (rev 74840)
+++ ZODB/trunk/src/ZODB/interfaces.py	2007-04-26 23:20:21 UTC (rev 74841)
@@ -18,6 +18,7 @@
 
 from zope.interface import Interface, Attribute
 
+
 class IConnection(Interface):
     """Connection to ZODB for loading and storing objects.
 
@@ -289,7 +290,6 @@
         This invalidates *all* objects in the cache. If the connection
         is open, subsequent reads will fail until a new transaction
         begins or until the connection os reopned.
-        
         """
 
 class IStorageDB(Interface):
@@ -345,37 +345,6 @@
     TODO: This interface is incomplete.
     """
 
-## __init__ methods don't belong in interfaces:
-##
-##     def __init__(storage,
-##                  pool_size=7,
-##                  cache_size=400,
-##                  version_pool_size=3,
-##                  version_cache_size=100,
-##                  database_name='unnamed',
-##                  databases=None,
-##                  ):
-##         """Create an object database.
-
-##         storage: the storage used by the database, e.g. FileStorage
-##         pool_size: expected maximum number of open connections
-##         cache_size: target size of Connection object cache, in number of
-##             objects
-##         version_pool_size: expected maximum number of connections (per
-##             version)
-##         version_cache_size: target size of Connection object cache for
-##              version connections, in number of objects
-##         database_name: when using a multi-database, the name of this DB
-##             within the database group.  It's a (detected) error if databases
-##             is specified too and database_name is already a key in it.
-##             This becomes the value of the DB's database_name attribute.
-##         databases: when using a multi-database, a mapping to use as the
-##             binding of this DB's .databases attribute.  It's intended
-##             that the second and following DB's added to a multi-database
-##             pass the .databases attribute set on the first DB added to the
-##             collection.
-##         """
-
     databases = Attribute("""\
         A mapping from database name to DB (database) object.
 
@@ -386,119 +355,456 @@
         entry.
         """)
 
-    def invalidateCache():
-        """Invalidate all objects in the database object caches
+    def open(version='',
+             mvcc=True,
+             transaction_manager=None,
+             synch=True
+             ):
+        """Return an IConnection object for use by application code.
 
-        invalidateCache will be called on each of the database's connections.
+        version: the "version" that all changes will be made
+            in, defaults to no version.
+        mvcc: boolean indicating whether MVCC is enabled
+        transaction_manager: transaction manager to use.  None means
+            use the default transaction manager.
+        synch: boolean indicating whether Connection should
+            register for afterCompletion() calls.
+
+        Note that the connection pool is managed as a stack, to
+        increase the likelihood that the connection's stack will
+        include useful objects.
         """
 
+    # TODO: Should this method be moved into some subinterface?
+    def pack(t=None, days=0):
+        """Pack the storage, deleting unused object revisions.
+
+        A pack is always performed relative to a particular time, by
+        default the current time.  All object revisions that are not
+        reachable as of the pack time are deleted from the storage.
+
+        The cost of this operation varies by storage, but it is
+        usually an expensive operation.
+
+        There are two optional arguments that can be used to set the
+        pack time: t, pack time in seconds since the epcoh, and days,
+        the number of days to subtract from t or from the current
+        time if t is not specified.
+        """
+
+    # TODO: Should this method be moved into some subinterface?
+    def undo(id, txn=None):
+        """Undo a transaction identified by id.
+
+        A transaction can be undone if all of the objects involved in
+        the transaction were not modified subsequently, if any
+        modifications can be resolved by conflict resolution, or if
+        subsequent changes resulted in the same object state.
+
+        The value of id should be generated by calling undoLog()
+        or undoInfo().  The value of id is not the same as a
+        transaction id used by other methods; it is unique to undo().
+
+        id: a storage-specific transaction identifier
+        txn: transaction context to use for undo().
+            By default, uses the current transaction.
+        """
+
+    def close():
+        """Close the database and its underlying storage.
+
+        It is important to close the database, because the storage may
+        flush in-memory data structures to disk when it is closed.
+        Leaving the storage open with the process exits can cause the
+        next open to be slow.
+
+        What effect does closing the database have on existing
+        connections?  Technically, they remain open, but their storage
+        is closed, so they stop behaving usefully.  Perhaps close()
+        should also close all the Connections.
+        """
+
 class IStorage(Interface):
     """A storage is responsible for storing and retrieving data of objects.
     """
 
-## What follows is the union of methods found across various storage
-## implementations.  Exactly what "the storage API" is and means has
-## become fuzzy over time.  Methods should be uncommented here, or
-## even deleted, as the storage API regains a crisp definition.
+    def close():
+        """Close the storage.
+        """
 
-##    def load(oid, version):
-##        """TODO"""
-##
-##    def close():
-##        """TODO"""
-##
-##    def cleanup():
-##        """TODO"""
-##
-##    def lastSerial():
-##        """TODO"""
-##
-##    def lastTransaction():
-##        """TODO"""
-##
-##    def lastTid(oid):
-##        """Return last serialno committed for object oid."""
-##
-##    def loadSerial(oid, serial):
-##        """TODO"""
-##
-##    def loadBefore(oid, tid):
-##        """TODO"""
-##
-##    def iterator(start=None, stop=None):
-##        """TODO"""
-##
-##    def sortKey():
-##        """TODO"""
-##
-##    def getName():
-##        """TODO"""
-##
-##    def getSize():
-##        """TODO"""
-##
-##    def history(oid, version, length=1, filter=None):
-##        """TODO"""
-##
-##    def new_oid():
-##        """TODO"""
-##
-##    def set_max_oid(possible_new_max_oid):
-##        """TODO"""
-##
-##    def registerDB(db):
-##        """TODO"""
-##
-##    def isReadOnly():
-##        """TODO"""
-##
-##    def supportsUndo():
-##        """TODO"""
-##
-##    def supportsVersions():
-##        """TODO"""
-##
-##    def tpc_abort(transaction):
-##        """TODO"""
-##
-##    def tpc_begin(transaction):
-##        """TODO"""
-##
-##    def tpc_vote(transaction):
-##        """TODO"""
-##
-##    def tpc_finish(transaction, f=None):
-##        """TODO"""
-##
-##    def getSerial(oid):
-##        """TODO"""
-##
-##    def loadSerial(oid, serial):
-##        """TODO"""
-##
-##    def loadBefore(oid, tid):
-##        """TODO"""
-##
-##    def getExtensionMethods():
-##        """TODO"""
-##
-##    def copyTransactionsFrom():
-##        """TODO"""
-##
-##    def store(oid, oldserial, data, version, transaction):
-##        """
-##
-##        may return the new serial or not
-##        """
+    def getName():
+        """The name of the storage
 
+        The format and interpretation of this name is storage
+        dependent. It could be a file name, a database name, etc.
+
+        This is used soley for informational purposes.
+        """
+
+    def getSize():
+        """An approximate size of the database, in bytes.
+        
+        This is used soley for informational purposes.
+        """
+
+    def history(oid, version, size=1):
+        """Return a sequence of history information dictionaries.
+
+        Up to size objects (including no objects) may be returned.
+        
+        The information provides a log of the changes made to the
+        object. Data are reported in reverse chronological order.
+
+        Each dictionary has the following keys:
+
+        time
+            UTC seconds since the epoch (as in time.time) that the
+            object revision was committed.
+        tid
+            The transaction identifier of the transaction that
+            committed the version.
+        version
+            The version that the revision is in.  If the storage
+            doesn't support versions, then this must be an empty
+            string.
+        user_name
+            The user identifier, if any (or an empty string) of the
+            user on whos behalf the revision was committed.
+        description
+            The transaction description for the transaction that
+            committed the revision.
+        size
+            The size of the revision data record.
+
+        If the transaction had extension items, then these items are
+        also included if they don't conflict with the keys above.
+        """
+
+    def isReadOnly():
+        """Test whether a storage allows committing new transactions
+
+        For a given storage instance, this method always returns the
+        same value.  Read-only-ness is a static property of a storage.
+        """
+
+    def lastTransaction():
+        """Return the id of the last committed transaction
+        """
+
+    def __len__():
+        """The approximate number of objects in the storage
+        
+        This is used soley for informational purposes.
+        """
+
+    def load(oid, version):
+        """Load data for an object id and version
+
+        A data record and serial are returned.  The serial is a
+        transaction identifier of the transaction that wrote the data
+        record.
+
+        A POSKeyError is raised if there is no record for the object
+        id and version.
+
+        Storages that don't support versions must ignore the version
+        argument.
+        """
+
+    def loadBefore(oid, tid):
+        """Load the object data written before a transaction id
+
+        If there isn't data before the object before the given
+        transaction, then None is returned, otherwise three values are
+        returned:
+
+        - The data record
+
+        - The transaction id of the data record
+
+        - The transaction id of the following revision, if any, or None.
+        """
+
+    def loadSerial(oid, serial):
+        """Load the object record for the give transaction id
+
+        If a matching data record can be found, it is returned,
+        otherwise, POSKeyError is raised.
+        """
+
+    def new_oid():
+        """Allocate a new object id.
+
+        The object id returned is reserved at least as long as the
+        storage is opened.
+
+        The return value is a string.
+        """
+
+    def pack(pack_time, referencesf):
+        """Pack the storage
+
+        It is up to the storage to interpret this call, however, the
+        general idea is that the storage free space by:
+
+        - discarding object revisions that were old and not current as of the
+          given pack time.
+
+        - garbage collecting objects that aren't reachable from the
+          root object via revisions remaining after discarding
+          revisions that were not current as of the pack time.
+
+        The pack time is given as a UTC time in seconds since the
+        empoch.
+
+        The second argument is a function that should be used to
+        extract object references from database records.  This is
+        needed to determine which objects are referenced from object
+        revisions.
+        """
+
+    def registerDB(db):
+        """Register an IStorageDB.
+
+        Note that, for historical reasons, an implementation may
+        require a second argument, however, if required, the None will
+        be passed as the second argument.
+        """
+
+    def sortKey():
+        """Sort key used to order distributed transactions
+
+        When a transaction involved multiple storages, 2-phase commit
+        operations are applied in sort-key order.  This must be unique
+        among storages used in a transaction. Obviously, the storage
+        can't assure this, but it should construct the sort key so it
+        has a reasonable chance of being unique.
+        """
+
+    def store(oid, serial, data, version, transaction):
+        """Store data for the object id, oid.
+
+        Arguments:
+
+        oid
+            The object identifier.  This is either a string
+            consisting of 8 nulls or a string previously returned by
+            new_oid. 
+
+        serial
+            The serial of the data that was read when the object was
+            loaded from the database.  If the object was created in
+            the current transaction this will be a string consisting
+            of 8 nulls.
+
+        data
+            The data record. This is opaque to the storage.
+
+        version
+            The version to store the data is.  If the storage doesn't
+            support versions, this should be an empty string and the
+            storage is allowed to ignore it.
+
+        transaction
+            A transaction object.  This should match the current
+            transaction for the storage, set by tpc_begin.
+
+        The new serial for the object is returned, but not necessarily
+        immediately.  It may be returned directly, or un a subsequent
+        store or tpc_vote call.
+
+        The return value may be:
+
+        - None
+
+        - A new serial (string) for the object, or
+
+        - An iterable of object-id and serial pairs giving new serials
+          for objects.
+        """
+
+    def tpc_abort(transaction):
+        """Abort the transaction.
+
+        Any changes made by the transaction are discarded.
+
+        This call is ignored is the storage is not participating in
+        two-phase commit or if the given transaction is not the same
+        as the transaction the storage is commiting.
+        """
+
+    def tpc_begin(transaction):
+        """Begin the two-phase commit process.
+
+        If storage is already participating in a two-phase commit
+        using the same transaction, the call is ignored.
+
+        If the storage is already participating in a two-phase commit
+        using a different transaction, the call blocks until the
+        current transaction ends (commits or aborts).
+        """
+
+    def tpc_finish(transaction, func = lambda: None):
+        """Finish the transaction, making any transaction changes permanent.
+
+        Changes must be made permanent at this point.
+
+        This call is ignored if the storage isn't participating in
+        two-phase commit or if it is commiting a different
+        transaction.  Failure of this method is extremely serious.
+        """
+
+    def tpc_vote(transaction):
+        """Provide a storage with an opportunity to veto a transaction
+
+        This call is ignored if the storage isn't participating in
+        two-phase commit or if it is commiting a different
+        transaction.  Failure of this method is extremely serious.
+
+        If a transaction can be committed by a storage, then the
+        method should return.  If a transaction cannot be committed,
+        then an exception should be raised.  If this method returns
+        without an error, then there must not be an error if
+        tpc_finish or tpc_abort is called subsequently.
+
+        The return value can be either None or a sequence of object-id
+        and serial pairs giving new serials for objects who's ids were
+        passed to previous store calls in the same transaction.
+        After the tpc_vote call, bew serials must have been returned,
+        either from tpc_vote or store for objects passed to store.
+        """
+
+class IStorageRestoreable(IStorage):
+
+    def tpc_begin(transaction, tid=None):
+        """Begin the two-phase commit process.
+
+        If storage is already participating in a two-phase commit
+        using the same transaction, the call is ignored.
+
+        If the storage is already participating in a two-phase commit
+        using a different transaction, the call blocks until the
+        current transaction ends (commits or aborts).
+
+        If a transaction id is given, then the transaction will use
+        the given id rather than generating a new id.  This is used
+        when copying already committed transactions from another
+        storage.
+        """
+
+        # Note that the current implementation also accepts a status.
+        # This is an artifact of:
+        # - Earlier use of an undo status to undo revisions in place,
+        #   and,
+        # - Incorrect pack garbage-collection algorithms (possibly
+        #   including the existing FileStorage implementation), that
+        #   failed to take into account records after the pack time.
+        
+
+    def restore(oid, serial, data, version, prev_txn, transaction):
+        """Write data already committed in a separate database
+
+        The restore method is used when copying data from one database
+        to a replica of the database.  It differs from store in that
+        the data have already been committed, so there is no check for
+        conflicts and no new transaction is is used for the data.
+
+        Arguments:
+
+        oid
+             The object id for the record
+        
+        serial
+             The transaction identifier that originally committed this object.
+
+        data
+             The record data.  This will be None if the transaction
+             undid the creation of the object.
+
+        version
+             The version identifier for the record
+
+        prev_txn
+             The identifier of a previous transaction that held the
+             object data.  The target storage can sometimes use this
+             as a hint to save space.
+
+        transaction
+             The current transaction.
+
+        Nothing is returned.
+        """
+
+class IStorageRecordInformation(Interface):
+    """Provide information about a single storage record
+    """
+
+    oid = Attribute("The object id")
+    version = Attribute("The version")
+    data = Attribute("The data record")
+
+class IStorageTransactionInformation(Interface):
+    """Provide information about a storage transaction
+    """
+
+    tid = Attribute("Transaction id")
+    status = Attribute("Transaction Status") # XXX what are valid values?
+    user = Attribute("Transaction user")
+    description = Attribute("Transaction Description")
+    extension = Attribute("Transaction extension data")
+
+    def __iter__():
+        """Return an iterable of IStorageTransactionInformation
+        """
+
+class IStorageIteration(Interface):
+    """API for iterating over the contents of a storage
+
+    Note that this is a future API.  Some storages now provide an
+    approximation of this.
+
+    """
+
+    def iterator(start=None, stop=None):
+        """Return an IStorageTransactionInformation iterator.
+
+        An IStorageTransactionInformation iterator is returned for
+        iterating over the transactions in the storage.
+
+        If the start argument is not None, then iteration will start
+        with the first transaction whos identifier is greater than or
+        equal to start.
+
+        If the stop argument is not None, then iteration will end with
+        the last transaction whos identifier is less than or equal to
+        start.
+
+        """
+
 class IStorageUndoable(IStorage):
     """A storage supporting transactional undo.
     """
 
-    def undo(transaction_id, txn):
-        """TODO"""
+    def supportsUndo():
+        """Return True, indicating that the storage supports undo.
+        """
 
-    def undoLog(first, last, filter=(lambda desc: True)):
+    def undo(transaction_id, transaction):
+        """Undo the transaction corresponding to the given transaction id.
+
+        The transaction id is a value returned from undoInfo or
+        undoLog, which may not be a stored transaction identifier as
+        used elsewhere in the storage APIs.
+
+        This method must only be called in the first phase of
+        two-phase commit (after tpc_begin but before tpc_vote). It
+        returns a serial (transaction id) and a sequence of object ids
+        for objects affected by the transaction.
+
+        """
+        # Used by DB (Actually, by TransactionalUndo)
+
+    def undoLog(first, last, filter=None):
         """Return a sequence of descriptions for undoable transactions.
 
         Application code should call undoLog() on a DB instance instead of on
@@ -551,8 +857,9 @@
                      could be gotten by passing the positive first-last for
                      `last` instead.
         """
+        # DB pass through
 
-    def undoInfo(first, last, specification=None):
+    def undoInfo(first=0, last=-20, specification=None):
         """Return a sequence of descriptions for undoable transactions.
 
         This is like `undoLog()`, except for the `specification` argument.
@@ -567,30 +874,39 @@
         ZEO client to its ZEO server (while a ZEO client ignores any `filter`
         argument passed to `undoLog()`).
         """
+        # DB pass-through
 
+
+class IStoragePackable(Interface):
+
     def pack(t, referencesf):
-        """TODO"""
+        """Pack the storage
 
-class IStorageVersioning(IStorage):
-    """A storage supporting versions.
-    """
+        Pack and/or garbage-collect the storage. If the storage does
+        not support undo, then t is ignored. All records for objects
+        that are not reachable from the system root object as of time
+        t, or as of the current time, if undo is not supported, are
+        removed from the storage.
 
-## What follows is the union of methods found across various version storage
-## implementations.  Exactly what "the storage API" is and means has
-## become fuzzy over time.  Methods should be uncommented here, or
-## even deleted, as the storage API regains a crisp definition.
+        A storage implementation may treat this method as ano-op. A
+        storage implementation may also delay packing and return
+        immediately. Storage documentation should define the behavior
+        of this method.
+        """
+        # Called by DB
 
-##    def abortVersion(src, transaction):
-##        """TODO"""
-##
-##    def commitVersion(src, dest, transaction):
-##        """TODO"""
-##
-##    def modifiedInVersion(oid):
-##        """TODO"""
-##
-##    def versionEmpty(version):
-##        """TODO"""
-##
-##    def versions(max=None):
-##        """TODO"""
+class IStorageCurrentRecordIteration(IStorage):
+
+    def record_iternext(next=None):
+        """Iterate over the records in a storage
+
+        Use like this:
+
+            >>> next = None
+            >>> while 1:
+            ...     oid, tid, data, next = storage.record_iternext(next)
+            ...     # do things with oid, tid, and data
+            ...     if next is None:
+            ...         break
+        
+        """



More information about the Zodb-checkins mailing list