[Zodb-checkins] SVN: ZODB/trunk/src/ - FileStorage now provides optional garbage collection. A 'gc'

Jim Fulton jim at zope.com
Sat Nov 1 14:23:23 EDT 2008


Log message for revision 92743:
  - FileStorage now provides optional garbage collection.  A 'gc'
    keyword option can be passed to the pack method.  A false value
    prevents garbage collection.
  
  - The FileStorage constructor now provides a boolean pack_gc option,
    which defaults to True, to control whether garbage collection is
    performed when packing by default. This can be overridden with the
    gc option to the pack method.
  
  The ZConfig configuration for FileStorage now includes a pack-gc
    option, corresponding to the pack_gc constructor argument.
  
  - The FileStorage constructor now has a packer keyword argument that
    allows an alternative packer to be supplied.
  
  The ZConfig configuration for FileStorage now includes a packer
    option, corresponding to the packer constructor argument.
  

Changed:
  U   ZODB/trunk/src/CHANGES.txt
  U   ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
  U   ZODB/trunk/src/ZODB/FileStorage/fspack.py
  A   ZODB/trunk/src/ZODB/FileStorage/interfaces.py
  A   ZODB/trunk/src/ZODB/FileStorage/tests.py
  A   ZODB/trunk/src/ZODB/FileStorage/zconfig.txt
  U   ZODB/trunk/src/ZODB/component.xml
  U   ZODB/trunk/src/ZODB/config.py
  U   ZODB/trunk/src/ZODB/tests/testFileStorage.py

-=-
Modified: ZODB/trunk/src/CHANGES.txt
===================================================================
--- ZODB/trunk/src/CHANGES.txt	2008-11-01 18:23:20 UTC (rev 92742)
+++ ZODB/trunk/src/CHANGES.txt	2008-11-01 18:23:23 UTC (rev 92743)
@@ -22,6 +22,27 @@
   XXX There are known issues with this implementation that need to be
   sorted out before it is "released".
 
+New Features
+------------
+
+- FileStorage now provides optional garbage collection.  A 'gc'
+  keyword option can be passed to the pack method.  A false value
+  prevents garbage collection.
+
+- The FileStorage constructor now provides a boolean pack_gc option,
+  which defaults to True, to control whether garbage collection is
+  performed when packing by default. This can be overridden with the
+  gc option to the pack method.
+
+  The ZConfig configuration for FileStorage now includes a pack-gc
+  option, corresponding to the pack_gc constructor argument.
+
+- The FileStorage constructor now has a packer keyword argument that
+  allows an alternative packer to be supplied.
+
+  The ZConfig configuration for FileStorage now includes a packer
+  option, corresponding to the packer constructor argument.
+
 3.9.0a2 (2008-10-31)
 ====================
 

Modified: ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/FileStorage.py	2008-11-01 18:23:20 UTC (rev 92742)
+++ ZODB/trunk/src/ZODB/FileStorage/FileStorage.py	2008-11-01 18:23:23 UTC (rev 92743)
@@ -102,7 +102,7 @@
     _pack_is_in_progress = False
 
     def __init__(self, file_name, create=False, read_only=False, stop=None,
-                 quota=None):
+                 quota=None, pack_gc=True, packer=None):
 
         if read_only:
             self._is_read_only = True
@@ -125,6 +125,10 @@
 
         self._file_name = file_name
 
+        self._pack_gc = pack_gc
+        if packer is not None:
+            self.packer = packer
+
         BaseStorage.BaseStorage.__init__(self, file_name)
 
         index, tindex = self._newIndexes()
@@ -979,7 +983,26 @@
         file.seek(pos - p + 8)
         return file.read(1) not in ' u'
 
-    def pack(self, t, referencesf):
+    @staticmethod
+    def packer(storage, referencesf, stop, gc):
+        # Our default packer is built around the original packer.  We
+        # simply adapt the old interface to the new.  We don't really
+        # want to invest much in the old packer, at least for now.
+        p = FileStoragePacker(
+            storage._file.name,
+            stop,
+            storage._lock_acquire,
+            storage._lock_release,
+            storage._commit_lock_acquire,
+            storage._commit_lock_release,
+            storage.getSize(),
+            gc)
+        opos = p.pack()
+        if opos is None:
+            return None
+        return opos, p.index
+
+    def pack(self, t, referencesf, gc=None):
         """Copy data from the current database file to a packed file
 
         Non-current records from transactions with time-stamp strings less
@@ -1003,23 +1026,23 @@
             if self._pack_is_in_progress:
                 raise FileStorageError('Already packing')
             self._pack_is_in_progress = True
-            current_size = self.getSize()
         finally:
             self._lock_release()
 
-        p = FileStoragePacker(self._file_name, stop,
-                              self._lock_acquire, self._lock_release,
-                              self._commit_lock_acquire,
-                              self._commit_lock_release,
-                              current_size)
+        if gc is None:
+            gc = self._pack_gc
+
+        have_commit_lock = False
         try:
-            opos = None
+            pack_result = None
             try:
-                opos = p.pack()
+                pack_result = self.packer(self, referencesf, stop, gc)
             except RedundantPackWarning, detail:
                 logger.info(str(detail))
-            if opos is None:
+            if pack_result is None:
                 return
+            have_commit_lock = True
+            opos, index = pack_result
             oldpath = self._file_name + ".old"
             self._lock_acquire()
             try:
@@ -1035,13 +1058,13 @@
                 # OK, we're beyond the point of no return
                 os.rename(self._file_name + '.pack', self._file_name)
                 self._file = open(self._file_name, 'r+b')
-                self._initIndex(p.index, p.tindex)
+                self._initIndex(index, self._tindex)
                 self._pos = opos
                 self._save_index()
             finally:
                 self._lock_release()
         finally:
-            if p.locked:
+            if have_commit_lock:
                 self._commit_lock_release()
             self._lock_acquire()
             self._pack_is_in_progress = False

Modified: ZODB/trunk/src/ZODB/FileStorage/fspack.py
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/fspack.py	2008-11-01 18:23:20 UTC (rev 92742)
+++ ZODB/trunk/src/ZODB/FileStorage/fspack.py	2008-11-01 18:23:23 UTC (rev 92743)
@@ -139,11 +139,12 @@
 
 class GC(FileStorageFormatter):
 
-    def __init__(self, file, eof, packtime):
+    def __init__(self, file, eof, packtime, gc):
         self._file = file
         self._name = file.name
         self.eof = eof
         self.packtime = packtime
+        self.gc = gc
         # packpos: position of first txn header after pack time
         self.packpos = None
         self.oid2curpos = fsIndex() # maps oid to current data record position
@@ -157,7 +158,6 @@
         # second is a dictionary mapping objects to lists of
         # positions; it is used to handle the same number of objects
         # for which we must keep multiple revisions.
-
         self.reachable = fsIndex()
         self.reach_ex = {}
 
@@ -176,11 +176,14 @@
 
     def findReachable(self):
         self.buildPackIndex()
-        self.findReachableAtPacktime([z64])
-        self.findReachableFromFuture()
-        # These mappings are no longer needed and may consume a lot
-        # of space.
-        del self.oid2curpos
+        if self.gc:
+            self.findReachableAtPacktime([z64])
+            self.findReachableFromFuture()
+            # These mappings are no longer needed and may consume a lot of
+            # space.
+            del self.oid2curpos
+        else:
+            self.reachable = self.oid2curpos
 
     def buildPackIndex(self):
         pos = 4L
@@ -320,7 +323,7 @@
     # current_size is the storage's _pos.  All valid data at the start
     # lives before that offset (there may be a checkpoint transaction in
     # progress after it).
-    def __init__(self, path, stop, la, lr, cla, clr, current_size):
+    def __init__(self, path, stop, la, lr, cla, clr, current_size, gc=True):
         self._name = path
         # We open our own handle on the storage so that much of pack can
         # proceed in parallel.  It's important to close this file at every
@@ -329,10 +332,10 @@
         self._file = open(path, "rb")
         self._path = path
         self._stop = stop
-        self.locked = 0
+        self.locked = False
         self.file_end = current_size
 
-        self.gc = GC(self._file, self.file_end, self._stop)
+        self.gc = GC(self._file, self.file_end, self._stop, gc)
 
         # The packer needs to acquire the parent's commit lock
         # during the copying stage, so the two sets of lock acquire
@@ -386,37 +389,44 @@
             os.remove(self._name + ".pack")
             return None
         self._commit_lock_acquire()
-        self.locked = 1
-        self._lock_acquire()
+        self.locked = True
         try:
-            # Re-open the file in unbuffered mode.
+            self._lock_acquire()
+            try:
+                # Re-open the file in unbuffered mode.
 
-            # The main thread may write new transactions to the file,
-            # which creates the possibility that we will read a status
-            # 'c' transaction into the pack thread's stdio buffer even
-            # though we're acquiring the commit lock.  Transactions
-            # can still be in progress throughout much of packing, and
-            # are written to the same physical file but via a distinct
-            # Python file object.  The code used to leave off the
-            # trailing 0 argument, and then on every platform except
-            # native Windows it was observed that we could read stale
-            # data from the tail end of the file.
-            self._file.close()  # else self.gc keeps the original alive & open
-            self._file = open(self._path, "rb", 0)
-            self._file.seek(0, 2)
-            self.file_end = self._file.tell()
-        finally:
-            self._lock_release()
-        if ipos < self.file_end:
-            self.copyRest(ipos)
+                # The main thread may write new transactions to the
+                # file, which creates the possibility that we will
+                # read a status 'c' transaction into the pack thread's
+                # stdio buffer even though we're acquiring the commit
+                # lock.  Transactions can still be in progress
+                # throughout much of packing, and are written to the
+                # same physical file but via a distinct Python file
+                # object.  The code used to leave off the trailing 0
+                # argument, and then on every platform except native
+                # Windows it was observed that we could read stale
+                # data from the tail end of the file.
+                self._file.close() # else self.gc keeps the original
+                                   # alive & open
+                self._file = open(self._path, "rb", 0)
+                self._file.seek(0, 2)
+                self.file_end = self._file.tell()
+            finally:
+                self._lock_release()
+            if ipos < self.file_end:
+                self.copyRest(ipos)
 
-        # OK, we've copied everything. Now we need to wrap things up.
-        pos = self._tfile.tell()
-        self._tfile.flush()
-        self._tfile.close()
-        self._file.close()
+            # OK, we've copied everything. Now we need to wrap things up.
+            pos = self._tfile.tell()
+            self._tfile.flush()
+            self._tfile.close()
+            self._file.close()
 
-        return pos
+            return pos
+        except:
+            if self.locked:
+                self._commit_lock_release()
+            raise
 
     def copyToPacktime(self):
         offset = 0L  # the amount of space freed by packing
@@ -524,9 +534,6 @@
         # After the pack time, all data records are copied.
         # Copy one txn at a time, using copy() for data.
 
-        # Release the commit lock every 20 copies
-        self._lock_counter = 0
-
         try:
             while 1:
                 ipos = self.copyOne(ipos)
@@ -543,9 +550,9 @@
     def copyOne(self, ipos):
         # The call below will raise CorruptedDataError at EOF.
         th = self._read_txn_header(ipos)
-        self._lock_counter += 1
-        if self._lock_counter % 20 == 0:
-            self._commit_lock_release()
+        # Release commit lock while writing to pack file
+        self._commit_lock_release()
+        self.locked = False
         pos = self._tfile.tell()
         self._copier.setTxnPos(pos)
         self._tfile.write(th.asString())
@@ -573,6 +580,6 @@
 
         self.index.update(self.tindex)
         self.tindex.clear()
-        if self._lock_counter % 20 == 0:
-            self._commit_lock_acquire()
+        self._commit_lock_acquire()
+        self.locked = True
         return ipos

Added: ZODB/trunk/src/ZODB/FileStorage/interfaces.py
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/interfaces.py	                        (rev 0)
+++ ZODB/trunk/src/ZODB/FileStorage/interfaces.py	2008-11-01 18:23:23 UTC (rev 92743)
@@ -0,0 +1,56 @@
+##############################################################################
+#
+# Copyright (c) Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+import zope.interface
+
+class IFileStoragePacker(zope.interface.Interface):
+
+    def __call__(storage, referencesf, stop, gc):
+        """Pack the file storage into a new file
+
+        The new file will have the same name as the old file with
+        '.pack' appended. (The packer can get the old file name via
+        storage._file.name.)
+
+        If packing is unnecessary, or would not change the file, then
+        None is returned, otherwise a tule is returned with:
+
+        - the size of the packed file, and
+
+        - the packed index
+
+        If and only if packing was necessary (non-None) and there was
+        no error, then the commit lock must be acquired.
+        """
+
+class IFileStorage(zope.interface.Interface):
+
+    packer = zope.interface.Attribute(
+        "The IFileStoragePacker to be used for packing."
+        )
+
+    _file = zope.interface.Attribute(
+        "The file object used to access the underlying data."
+        )
+
+    def _lock_acquire():
+        "Acquire the storage lock"
+
+    def _lock_release():
+        "Release the storage lock"
+
+    def _commit_lock_acquire():
+        "Acquire the storage commit lock"
+
+    def _commit_lock_release():
+        "Release the storage commit lock"


Property changes on: ZODB/trunk/src/ZODB/FileStorage/interfaces.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: ZODB/trunk/src/ZODB/FileStorage/tests.py
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/tests.py	                        (rev 0)
+++ ZODB/trunk/src/ZODB/FileStorage/tests.py	2008-11-01 18:23:23 UTC (rev 92743)
@@ -0,0 +1,21 @@
+##############################################################################
+#
+# Copyright (c) Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+import unittest
+from zope.testing import doctest
+
+def test_suite():
+    return unittest.TestSuite((
+        doctest.DocFileSuite('zconfig.txt'),
+        ))
+


Property changes on: ZODB/trunk/src/ZODB/FileStorage/tests.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: ZODB/trunk/src/ZODB/FileStorage/zconfig.txt
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/zconfig.txt	                        (rev 0)
+++ ZODB/trunk/src/ZODB/FileStorage/zconfig.txt	2008-11-01 18:23:23 UTC (rev 92743)
@@ -0,0 +1,135 @@
+Defining FileStorages using ZConfig
+===================================
+
+ZODB provides support for defining many storages, including
+FileStorages, using ZConfig.  To do this, you use a filestorage
+section, and define a path:
+
+    >>> import ZODB.config
+    >>> fs = ZODB.config.storageFromString("""
+    ... <filestorage>
+    ...     path my.fs
+    ... </filestorage>
+    ... """)
+
+    >>> fs._file.name
+    'my.fs'
+
+
+There are a number of options we can provide:
+
+create
+    Flag that indicates whether the storage should be truncated if
+    it already exists.
+
+    To demonstrate this, we'll first write some dataL
+
+    >>> db = ZODB.DB(fs) # writes object 0
+    >>> db.close()
+
+    Then reopen with the create option:
+
+    >>> fs = ZODB.config.storageFromString("""
+    ... <filestorage>
+    ...     path my.fs
+    ...     create true
+    ... </filestorage>
+    ... """)
+
+    Because the file was truncated, we no-longer have object 0:
+
+    >>> fs.load('\0'*8)
+    Traceback (most recent call last):
+    ...
+    POSKeyError: 0x00
+
+    >>> fs.close()
+
+read-only
+    If true, only reads may be executed against the storage.  Note
+    that the "pack" operation is not considered a write operation
+    and is still allowed on a read-only filestorage.
+
+    >>> fs = ZODB.config.storageFromString("""
+    ... <filestorage>
+    ...     path my.fs
+    ...     read-only true
+    ... </filestorage>
+    ... """)
+    >>> fs.isReadOnly()
+    True
+    >>> fs.close()
+
+quota
+    Maximum allowed size of the storage file.  Operations which
+    would cause the size of the storage to exceed the quota will
+    result in a ZODB.FileStorage.FileStorageQuotaError being
+    raised.
+
+    >>> fs = ZODB.config.storageFromString("""
+    ... <filestorage>
+    ...     path my.fs
+    ...     quota 10
+    ... </filestorage>
+    ... """)
+    >>> db = ZODB.DB(fs) # writes object 0
+    Traceback (most recent call last):
+    ...
+    FileStorageQuotaError: The storage quota has been exceeded.
+
+    >>> fs.close()
+
+packer
+    The dotten name (dotten module name and object name) of a
+    packer object.  This is used to provide an alternative pack
+    implementation.
+
+    To demonstrate this, we'll create a null packer that just prints
+    some information about it's arguments:
+
+    >>> def packer(storage, referencesf, stop, gc):
+    ...     print referencesf, storage is fs, gc
+    >>> ZODB.FileStorage.config_demo_printing_packer = packer
+
+    >>> fs = ZODB.config.storageFromString("""
+    ... <filestorage>
+    ...     path my.fs
+    ...     packer ZODB.FileStorage.config_demo_printing_packer
+    ... </filestorage>
+    ... """)
+
+    >>> import time
+    >>> db = ZODB.DB(fs) # writes object 0
+    >>> fs.pack(time.time(), 42)
+    42 True True
+
+    >>> fs.close()
+
+pack-gc
+    If false, then no garbage collection will be performed when
+    packing.  This can make packing go much faster and can avoid
+    problems when objects are referenced only from other
+    databases.
+
+    >>> fs = ZODB.config.storageFromString("""
+    ... <filestorage>
+    ...     path my.fs
+    ...     packer ZODB.FileStorage.config_demo_printing_packer
+    ...     pack-gc false
+    ... </filestorage>
+    ... """)
+
+    >>> fs.pack(time.time(), 42)
+    42 True False
+
+    Note that if we pass the gc option to pack, then this will
+    override the value set in the configuration:
+
+    >>> fs.pack(time.time(), 42, gc=True)
+    42 True True
+    
+
+
+
+
+    


Property changes on: ZODB/trunk/src/ZODB/FileStorage/zconfig.txt
___________________________________________________________________
Name: svn:eol-style
   + native

Modified: ZODB/trunk/src/ZODB/component.xml
===================================================================
--- ZODB/trunk/src/ZODB/component.xml	2008-11-01 18:23:20 UTC (rev 92742)
+++ ZODB/trunk/src/ZODB/component.xml	2008-11-01 18:23:23 UTC (rev 92743)
@@ -35,6 +35,21 @@
         raised.
       </description>
     </key>
+    <key name="packer" datatype="dotted-name">
+      <description>
+        The dotten name (dotten module name and object name) of a
+        packer object.  This is used to provide an alternative pack
+        implementation.
+      </description>
+    </key>
+    <key name="pack-gc" datatype="boolean" default="true">
+      <description>
+         If false, then no garbage collection will be performed when
+         packing.  This can make packing go much faster and can avoid
+         problems when objects are referenced only from other
+         databases.
+      </description>
+    </key>
   </sectiontype>
 
   <sectiontype name="mappingstorage" datatype=".MappingStorage"

Modified: ZODB/trunk/src/ZODB/config.py
===================================================================
--- ZODB/trunk/src/ZODB/config.py	2008-11-01 18:23:20 UTC (rev 92742)
+++ ZODB/trunk/src/ZODB/config.py	2008-11-01 18:23:23 UTC (rev 92743)
@@ -137,10 +137,17 @@
 
     def open(self):
         from ZODB.FileStorage import FileStorage
+        options = {}
+        if self.config.packer:
+            m, name = self.config.packer.rsplit('.', 1)
+            options['packer'] = getattr(__import__(m, {}, {}, ['*']), name)
+            
         return FileStorage(self.config.path,
                            create=self.config.create,
                            read_only=self.config.read_only,
-                           quota=self.config.quota)
+                           quota=self.config.quota,
+                           pack_gc=self.config.pack_gc,
+                           **options)
 
 class BlobStorage(BaseConfig):
 

Modified: ZODB/trunk/src/ZODB/tests/testFileStorage.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testFileStorage.py	2008-11-01 18:23:20 UTC (rev 92742)
+++ ZODB/trunk/src/ZODB/tests/testFileStorage.py	2008-11-01 18:23:23 UTC (rev 92743)
@@ -41,7 +41,7 @@
     BasicStorage.BasicStorage,
     TransactionalUndoStorage.TransactionalUndoStorage,
     RevisionStorage.RevisionStorage,
-    PackableStorage.PackableStorage,
+    PackableStorage.PackableStorageWithOptionalGC,
     PackableStorage.PackableUndoStorage,
     Synchronization.SynchronizedStorage,
     ConflictResolution.ConflictResolvingStorage,



More information about the Zodb-checkins mailing list