[Zodb-checkins] SVN: ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/ New pack implementation and more tests.

Chris McDonough chrism at plope.com
Sun Jun 12 17:48:18 EDT 2005


Log message for revision 30775:
  New pack implementation and more tests.
  

Changed:
  U   ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/BlobStorage.py
  U   ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/tests/packing.txt

-=-
Modified: ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/BlobStorage.py
===================================================================
--- ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/BlobStorage.py	2005-06-12 16:35:56 UTC (rev 30774)
+++ ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/BlobStorage.py	2005-06-12 21:48:17 UTC (rev 30775)
@@ -108,57 +108,79 @@
             raise POSKeyError, "Not an existing blob."
         return filename
 
-    def _getNewestBlobSerial(self, oid):
-        blob_path = self._getBlobPath(oid)
-        serials = os.listdir(blob_path)
-        serials = [ os.path.join(blob_path, serial) for serial in serials ]
-        serials.sort(lambda x,y: cmp(os.stat(x).st_mtime, 
-                                     os.stat(y).st_mtime)
-                     )
+    def _packUndoing(self, packtime, referencesf):
 
-        # XXX the above sort is inadequate for files written within
-        # the same second at least under UNIX (st_mtime has a 1-second
-        # resolution).  We should really try to make it an invariant
-        # that the filenames be sortable instead.  This is the case
-        # right now due to ever-increasing tid values, but that's
-        # presumably an implementation detail, and also relies on the
-        # clock never going backwards.
+        # Walk over all existing revisions of all blob files and check
+        # if they are still needed by attempting to load the revision
+        # of that object from the database.  This is maybe the slowest
+        # possible way to do this, but it's safe.
 
-        return self._splitBlobFilename(serials[-1])[1]
+        # XXX we should be tolerant of "garbage" directories/files in
+        # the base_directory here.
 
+        for oid_repr in os.listdir(self.base_directory):
+            oid = utils.repr_to_oid(oid_repr)
+            oid_path = os.path.join(self.base_directory, oid_repr)
+            files = os.listdir(oid_path)
+            files.sort()
+
+            for filename in files:
+                filepath = os.path.join(oid_path, filename)
+                whatever, serial = self._splitBlobFilename(filepath)
+                try:
+                    fn = self._getCleanFilename(oid, serial)
+                    self.loadSerial(oid, serial)
+                except POSKeyError:
+                    os.unlink(filepath)
+
+            if not os.listdir(oid_path):
+                shutil.rmtree(oid_path)
+
+    def _packNonUndoing(self, packtime, referencesf):
+        for oid_repr in os.listdir(self.base_directory):
+            oid = utils.repr_to_oid(oid_repr)
+            oid_path = os.path.join(self.base_directory, oid_repr)
+            exists = True
+
+            try:
+                self.load(oid, None) # no version support
+            except (POSKeyError, KeyError):
+                exists = False
+
+            if exists:
+                files = os.listdir(oid_path)
+                files.sort()
+                latest = files[-1] # depends on ever-increasing tids
+                files.remove(latest)
+                for file in files:
+                    os.unlink(os.path.join(oid_path, file))
+            else:
+                shutil.rmtree(oid_path)
+                continue
+
+            if not os.listdir(oid_path):
+                shutil.rmtree(oid_path)
+
     def pack(self, packtime, referencesf):
         """Remove all unused oid/tid combinations."""
-        getProxiedObject(self).pack(packtime, referencesf)
+        unproxied = getProxiedObject(self)
 
+        # pack the underlying storage, which will allow us to determine
+        # which serials are current.
+        result = unproxied.pack(packtime, referencesf)
+
+        # perform a pack on blob data
         self._lock_acquire()
         try:
-            # Walk over all existing files and check if they are still needed
-            for filename in os.listdir(self.base_directory):
-                oid = utils.repr_to_oid(filename)
-                serial = self._getNewestBlobSerial(oid)
-                file_path = os.path.join(self.base_directory, filename)
-        
-                try:
-                    self.loadSerial(oid, serial)   # XXX Is that expensive?
-                except POSKeyError:
-                    # The object doesn't exist anymore at all. We can remove
-                    # everything belonging to that oid
-                    shutil.rmtree(file_path)
-                else:
-                    # The object still exists. We can remove everything but the
-                    # last recent object before pack time.
-                    serials = os.listdir(file_path)
-                    recent_candidate = \
-                            os.path.split(self._getCleanFilename(oid, serial))[1]
-                    serials.remove(recent_candidate)
-                    for serial_candidate in serials:
-                        cfname = os.path.join(file_path, serial_candidate)
-                        mtime = os.stat(cfname).st_mtime
-                        if mtime < packtime:
-                            os.unlink(cfname)
+            if unproxied.supportsUndo():
+                self._packUndoing(packtime, referencesf)
+            else:
+                self._packNonUndoing(packtime, referencesf)
         finally:
             self._lock_release()
-         
+
+        return result
+    
     def getSize(self):
         """Return the size of the database in bytes."""
         orig_size = getProxiedObject(self).getSize()

Modified: ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/tests/packing.txt
===================================================================
--- ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/tests/packing.txt	2005-06-12 16:35:56 UTC (rev 30774)
+++ ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/tests/packing.txt	2005-06-12 21:48:17 UTC (rev 30775)
@@ -15,13 +15,18 @@
 Packing support for blob data
 =============================
 
-We need a database with a blob supporting storage:
+UNDOING
+=======
 
+We need an database with an undoing blob supporting storage:
+
     >>> from ZODB.FileStorage import FileStorage
+    >>> from ZODB.serialize import referencesf
     >>> from ZODB.Blobs.BlobStorage import BlobStorage
     >>> from ZODB.Blobs.Blob import Blob
     >>> from ZODB import utils
     >>> from ZODB.DB import DB
+    >>> import shutil
     >>> import transaction
     >>> from tempfile import mkdtemp, mktemp
     >>> storagefile = '/home/chrism/blobtest.fs'
@@ -41,6 +46,7 @@
     >>> tids = []
     >>> times = []
     >>> nothing = transaction.begin()
+    >>> times.append(time.time())
     >>> blob = Blob()
     >>> blob.open('w').write('this is blob data 0')
     >>> root['blob'] = blob
@@ -78,16 +84,171 @@
 
     >>> oid = root['blob']._p_oid
     >>> fns = [ blob_storage._getCleanFilename(oid, x) for x in tids ]
-    >>> fns
     >>> [ os.path.exists(x) for x in fns ]
     [True, True, True, True, True]
 
-Do a pack to the slightly before the last revision was written:
+Get our blob filenames for this oid.
 
+    >>> fns = [ blob_storage._getCleanFilename(oid, x) for x in tids ]
+
+Do a pack to the slightly before the first revision was written:
+
+    >>> packtime = times[0]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [True, True, True, True, True]
+    
+Do a pack to the slightly before the second revision was written:
+
+    >>> packtime = times[1]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [True, True, True, True, True]
+
+Do a pack to the slightly before the third revision was written:
+
+    >>> packtime = times[2]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, True, True, True, True]
+
+Do a pack to the slightly before the fourth revision was written:
+
+    >>> packtime = times[3]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, True, True, True]
+
+Do a pack to the slightly before the fifth revision was written:
+
+    >>> packtime = times[4]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, True, True]
+
+Do a pack to now:
+
+    >>> packtime = time.time()
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, False, True]
+
+Delete the object and do a pack, it should get rid of the most current
+revision as well as the entire directory:
+
+    >>> nothing = transaction.begin()
+    >>> del root['blob']
+    >>> transaction.commit()
+    >>> packtime = time.time()
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, False, False]
+    >>> os.path.exists(os.path.split(fns[0])[0])
+    False
+
+Clean up our blob directory and database:
+
+    >>> shutil.rmtree(blob_dir)
+    >>> os.unlink(storagefile)
+
+NON-UNDOING
+===========
+
+We need an database with a NON-undoing blob supporting storage:
+
+    >>> from ZODB.MappingStorage import MappingStorage
     >>> from ZODB.serialize import referencesf
-    >>> packtime = times[-1]
-    >>> blob_storage.pack(packtime, referencesf)
+    >>> from ZODB.Blobs.BlobStorage import BlobStorage
+    >>> from ZODB.Blobs.Blob import Blob
+    >>> from ZODB import utils
+    >>> from ZODB.DB import DB
+    >>> import transaction
+    >>> from tempfile import mkdtemp, mktemp
+    >>> base_storage = MappingStorage('storage')
+    >>> blob_dir = '/home/chrism/test_blobs'
+    >>> blob_storage = BlobStorage(blob_dir, base_storage)
+    >>> database = DB(blob_storage)
+    
+Create our root object:
+
+    >>> connection1 = database.open()
+    >>> root = connection1.root()
+
+Put some revisions of a blob object in our database and on the filesystem:
+
+    >>> import time, os
+    >>> tids = []
+    >>> times = []
+    >>> nothing = transaction.begin()
+    >>> times.append(time.time())
+    >>> blob = Blob()
+    >>> blob.open('w').write('this is blob data 0')
+    >>> root['blob'] = blob
+    >>> transaction.commit()
+    >>> tids.append(blob_storage._tid)
+    >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+    >>> nothing = transaction.begin()
+    >>> times.append(time.time())
+    >>> root['blob'].open('w').write('this is blob data 1')
+    >>> transaction.commit()
+    >>> tids.append(blob_storage._tid)
+    >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+    >>> nothing = transaction.begin()
+    >>> times.append(time.time())
+    >>> root['blob'].open('w').write('this is blob data 2')
+    >>> transaction.commit()
+    >>> tids.append(blob_storage._tid)
+    >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+    >>> nothing = transaction.begin()
+    >>> times.append(time.time())
+    >>> root['blob'].open('w').write('this is blob data 3')
+    >>> transaction.commit()
+    >>> tids.append(blob_storage._tid)
+    >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+    >>> nothing = transaction.begin()
+    >>> times.append(time.time())
+    >>> root['blob'].open('w').write('this is blob data 4')
+    >>> transaction.commit()
+    >>> tids.append(blob_storage._tid)
+    >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+    >>> oid = root['blob']._p_oid
     >>> fns = [ blob_storage._getCleanFilename(oid, x) for x in tids ]
     >>> [ os.path.exists(x) for x in fns ]
-    [True, True, True, True, False]
+    [True, True, True, True, True]
+
+Get our blob filenames for this oid.
+
+    >>> fns = [ blob_storage._getCleanFilename(oid, x) for x in tids ]
+
+Do a pack to the slightly before the first revision was written:
+
+    >>> packtime = times[0]
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, False, True]
     
+Do a pack to now:
+
+    >>> packtime = time.time()
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, False, True]
+
+Delete the object and do a pack, it should get rid of the most current
+revision as well as the entire directory:
+
+    >>> nothing = transaction.begin()
+    >>> del root['blob']
+    >>> transaction.commit()
+    >>> packtime = time.time()
+    >>> blob_storage.pack(packtime, referencesf)
+    >>> [ os.path.exists(x) for x in fns ]
+    [False, False, False, False, False]
+    >>> os.path.exists(os.path.split(fns[0])[0])
+    False
+



More information about the Zodb-checkins mailing list