[Zodb-checkins]
SVN: ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/
New pack implementation and more tests.
Chris McDonough
chrism at plope.com
Sun Jun 12 17:48:18 EDT 2005
Log message for revision 30775:
New pack implementation and more tests.
Changed:
U ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/BlobStorage.py
U ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/tests/packing.txt
-=-
Modified: ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/BlobStorage.py
===================================================================
--- ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/BlobStorage.py 2005-06-12 16:35:56 UTC (rev 30774)
+++ ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/BlobStorage.py 2005-06-12 21:48:17 UTC (rev 30775)
@@ -108,57 +108,79 @@
raise POSKeyError, "Not an existing blob."
return filename
- def _getNewestBlobSerial(self, oid):
- blob_path = self._getBlobPath(oid)
- serials = os.listdir(blob_path)
- serials = [ os.path.join(blob_path, serial) for serial in serials ]
- serials.sort(lambda x,y: cmp(os.stat(x).st_mtime,
- os.stat(y).st_mtime)
- )
+ def _packUndoing(self, packtime, referencesf):
- # XXX the above sort is inadequate for files written within
- # the same second at least under UNIX (st_mtime has a 1-second
- # resolution). We should really try to make it an invariant
- # that the filenames be sortable instead. This is the case
- # right now due to ever-increasing tid values, but that's
- # presumably an implementation detail, and also relies on the
- # clock never going backwards.
+ # Walk over all existing revisions of all blob files and check
+ # if they are still needed by attempting to load the revision
+ # of that object from the database. This is maybe the slowest
+ # possible way to do this, but it's safe.
- return self._splitBlobFilename(serials[-1])[1]
+ # XXX we should be tolerant of "garbage" directories/files in
+ # the base_directory here.
+ for oid_repr in os.listdir(self.base_directory):
+ oid = utils.repr_to_oid(oid_repr)
+ oid_path = os.path.join(self.base_directory, oid_repr)
+ files = os.listdir(oid_path)
+ files.sort()
+
+ for filename in files:
+ filepath = os.path.join(oid_path, filename)
+ whatever, serial = self._splitBlobFilename(filepath)
+ try:
+ fn = self._getCleanFilename(oid, serial)
+ self.loadSerial(oid, serial)
+ except POSKeyError:
+ os.unlink(filepath)
+
+ if not os.listdir(oid_path):
+ shutil.rmtree(oid_path)
+
+ def _packNonUndoing(self, packtime, referencesf):
+ for oid_repr in os.listdir(self.base_directory):
+ oid = utils.repr_to_oid(oid_repr)
+ oid_path = os.path.join(self.base_directory, oid_repr)
+ exists = True
+
+ try:
+ self.load(oid, None) # no version support
+ except (POSKeyError, KeyError):
+ exists = False
+
+ if exists:
+ files = os.listdir(oid_path)
+ files.sort()
+ latest = files[-1] # depends on ever-increasing tids
+ files.remove(latest)
+ for file in files:
+ os.unlink(os.path.join(oid_path, file))
+ else:
+ shutil.rmtree(oid_path)
+ continue
+
+ if not os.listdir(oid_path):
+ shutil.rmtree(oid_path)
+
def pack(self, packtime, referencesf):
"""Remove all unused oid/tid combinations."""
- getProxiedObject(self).pack(packtime, referencesf)
+ unproxied = getProxiedObject(self)
+ # pack the underlying storage, which will allow us to determine
+ # which serials are current.
+ result = unproxied.pack(packtime, referencesf)
+
+ # perform a pack on blob data
self._lock_acquire()
try:
- # Walk over all existing files and check if they are still needed
- for filename in os.listdir(self.base_directory):
- oid = utils.repr_to_oid(filename)
- serial = self._getNewestBlobSerial(oid)
- file_path = os.path.join(self.base_directory, filename)
-
- try:
- self.loadSerial(oid, serial) # XXX Is that expensive?
- except POSKeyError:
- # The object doesn't exist anymore at all. We can remove
- # everything belonging to that oid
- shutil.rmtree(file_path)
- else:
- # The object still exists. We can remove everything but the
- # last recent object before pack time.
- serials = os.listdir(file_path)
- recent_candidate = \
- os.path.split(self._getCleanFilename(oid, serial))[1]
- serials.remove(recent_candidate)
- for serial_candidate in serials:
- cfname = os.path.join(file_path, serial_candidate)
- mtime = os.stat(cfname).st_mtime
- if mtime < packtime:
- os.unlink(cfname)
+ if unproxied.supportsUndo():
+ self._packUndoing(packtime, referencesf)
+ else:
+ self._packNonUndoing(packtime, referencesf)
finally:
self._lock_release()
-
+
+ return result
+
def getSize(self):
"""Return the size of the database in bytes."""
orig_size = getProxiedObject(self).getSize()
Modified: ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/tests/packing.txt
===================================================================
--- ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/tests/packing.txt 2005-06-12 16:35:56 UTC (rev 30774)
+++ ZODB/branches/ctheune-blobsupport/src/ZODB/Blobs/tests/packing.txt 2005-06-12 21:48:17 UTC (rev 30775)
@@ -15,13 +15,18 @@
Packing support for blob data
=============================
-We need a database with a blob supporting storage:
+UNDOING
+=======
+We need an database with an undoing blob supporting storage:
+
>>> from ZODB.FileStorage import FileStorage
+ >>> from ZODB.serialize import referencesf
>>> from ZODB.Blobs.BlobStorage import BlobStorage
>>> from ZODB.Blobs.Blob import Blob
>>> from ZODB import utils
>>> from ZODB.DB import DB
+ >>> import shutil
>>> import transaction
>>> from tempfile import mkdtemp, mktemp
>>> storagefile = '/home/chrism/blobtest.fs'
@@ -41,6 +46,7 @@
>>> tids = []
>>> times = []
>>> nothing = transaction.begin()
+ >>> times.append(time.time())
>>> blob = Blob()
>>> blob.open('w').write('this is blob data 0')
>>> root['blob'] = blob
@@ -78,16 +84,171 @@
>>> oid = root['blob']._p_oid
>>> fns = [ blob_storage._getCleanFilename(oid, x) for x in tids ]
- >>> fns
>>> [ os.path.exists(x) for x in fns ]
[True, True, True, True, True]
-Do a pack to the slightly before the last revision was written:
+Get our blob filenames for this oid.
+ >>> fns = [ blob_storage._getCleanFilename(oid, x) for x in tids ]
+
+Do a pack to the slightly before the first revision was written:
+
+ >>> packtime = times[0]
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [True, True, True, True, True]
+
+Do a pack to the slightly before the second revision was written:
+
+ >>> packtime = times[1]
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [True, True, True, True, True]
+
+Do a pack to the slightly before the third revision was written:
+
+ >>> packtime = times[2]
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [False, True, True, True, True]
+
+Do a pack to the slightly before the fourth revision was written:
+
+ >>> packtime = times[3]
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [False, False, True, True, True]
+
+Do a pack to the slightly before the fifth revision was written:
+
+ >>> packtime = times[4]
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [False, False, False, True, True]
+
+Do a pack to now:
+
+ >>> packtime = time.time()
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [False, False, False, False, True]
+
+Delete the object and do a pack, it should get rid of the most current
+revision as well as the entire directory:
+
+ >>> nothing = transaction.begin()
+ >>> del root['blob']
+ >>> transaction.commit()
+ >>> packtime = time.time()
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [False, False, False, False, False]
+ >>> os.path.exists(os.path.split(fns[0])[0])
+ False
+
+Clean up our blob directory and database:
+
+ >>> shutil.rmtree(blob_dir)
+ >>> os.unlink(storagefile)
+
+NON-UNDOING
+===========
+
+We need an database with a NON-undoing blob supporting storage:
+
+ >>> from ZODB.MappingStorage import MappingStorage
>>> from ZODB.serialize import referencesf
- >>> packtime = times[-1]
- >>> blob_storage.pack(packtime, referencesf)
+ >>> from ZODB.Blobs.BlobStorage import BlobStorage
+ >>> from ZODB.Blobs.Blob import Blob
+ >>> from ZODB import utils
+ >>> from ZODB.DB import DB
+ >>> import transaction
+ >>> from tempfile import mkdtemp, mktemp
+ >>> base_storage = MappingStorage('storage')
+ >>> blob_dir = '/home/chrism/test_blobs'
+ >>> blob_storage = BlobStorage(blob_dir, base_storage)
+ >>> database = DB(blob_storage)
+
+Create our root object:
+
+ >>> connection1 = database.open()
+ >>> root = connection1.root()
+
+Put some revisions of a blob object in our database and on the filesystem:
+
+ >>> import time, os
+ >>> tids = []
+ >>> times = []
+ >>> nothing = transaction.begin()
+ >>> times.append(time.time())
+ >>> blob = Blob()
+ >>> blob.open('w').write('this is blob data 0')
+ >>> root['blob'] = blob
+ >>> transaction.commit()
+ >>> tids.append(blob_storage._tid)
+ >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+ >>> nothing = transaction.begin()
+ >>> times.append(time.time())
+ >>> root['blob'].open('w').write('this is blob data 1')
+ >>> transaction.commit()
+ >>> tids.append(blob_storage._tid)
+ >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+ >>> nothing = transaction.begin()
+ >>> times.append(time.time())
+ >>> root['blob'].open('w').write('this is blob data 2')
+ >>> transaction.commit()
+ >>> tids.append(blob_storage._tid)
+ >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+ >>> nothing = transaction.begin()
+ >>> times.append(time.time())
+ >>> root['blob'].open('w').write('this is blob data 3')
+ >>> transaction.commit()
+ >>> tids.append(blob_storage._tid)
+ >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+ >>> nothing = transaction.begin()
+ >>> times.append(time.time())
+ >>> root['blob'].open('w').write('this is blob data 4')
+ >>> transaction.commit()
+ >>> tids.append(blob_storage._tid)
+ >>> time.sleep(1.1) # let mtime catch up (temporary)
+
+ >>> oid = root['blob']._p_oid
>>> fns = [ blob_storage._getCleanFilename(oid, x) for x in tids ]
>>> [ os.path.exists(x) for x in fns ]
- [True, True, True, True, False]
+ [True, True, True, True, True]
+
+Get our blob filenames for this oid.
+
+ >>> fns = [ blob_storage._getCleanFilename(oid, x) for x in tids ]
+
+Do a pack to the slightly before the first revision was written:
+
+ >>> packtime = times[0]
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [False, False, False, False, True]
+Do a pack to now:
+
+ >>> packtime = time.time()
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [False, False, False, False, True]
+
+Delete the object and do a pack, it should get rid of the most current
+revision as well as the entire directory:
+
+ >>> nothing = transaction.begin()
+ >>> del root['blob']
+ >>> transaction.commit()
+ >>> packtime = time.time()
+ >>> blob_storage.pack(packtime, referencesf)
+ >>> [ os.path.exists(x) for x in fns ]
+ [False, False, False, False, False]
+ >>> os.path.exists(os.path.split(fns[0])[0])
+ False
+
More information about the Zodb-checkins
mailing list