[Zodb-checkins] SVN: ZODB/branches/jim-dev/src/ZODB/FileStorage/ switched to keeping track of blobs deleted rather than bobs kept.
Jim Fulton
jim at zope.com
Tue Dec 16 15:59:01 EST 2008
Log message for revision 94132:
switched to keeping track of blobs deleted rather than bobs kept.
Changed:
U ZODB/branches/jim-dev/src/ZODB/FileStorage/FileStorage.py
U ZODB/branches/jim-dev/src/ZODB/FileStorage/fspack.py
U ZODB/branches/jim-dev/src/ZODB/FileStorage/interfaces.py
-=-
Modified: ZODB/branches/jim-dev/src/ZODB/FileStorage/FileStorage.py
===================================================================
--- ZODB/branches/jim-dev/src/ZODB/FileStorage/FileStorage.py 2008-12-16 20:31:19 UTC (rev 94131)
+++ ZODB/branches/jim-dev/src/ZODB/FileStorage/FileStorage.py 2008-12-16 20:59:00 UTC (rev 94132)
@@ -1105,24 +1105,63 @@
# Move any blobs linked or copied while packing to the
# pack dir, which will become the old dir
lblob_dir = len(self.blob_dir)
- for path, dir_names, file_names in os.walk(self.blob_dir, False):
- n = 0
+ fshelper = self.fshelper
+ old = self.blob_dir+'.old'
+ os.mkdir(old, 0777)
+
+ # Helper to clean up dirs left empty after moving things to old
+ def maybe_remove_empty_dir_containing(path):
+ path = os.path.dirname(path)
+ if len(path) <= lblob_dir:
+ return
+ if not os.listdir(path):
+ os.rmdir(path)
+ maybe_remove_empty_dir_containing(path)
+
+ # Helper that moves a oid dir or revision file to the old dir.
+ def move(path):
+ dest = os.path.dirname(old+path[lblob_dir:])
+ if not os.path.exists(dest):
+ os.makedirs(dest, 0700)
+ os.rename(path, old+path[lblob_dir:])
+ maybe_remove_empty_dir_containing(path)
+
+ # Fist step: "remove" oids or revisions by moving them to .old
+ # (Later, when we add an option to not keep old files, we'll
+ # be able to simply remove.)
+ for line in open(os.path.join(self.blob_dir, '.removed')):
+ line = line.strip().decode('hex')
+
+ if len(line) == 8:
+ # oid is garbage, re/move dir
+ path = fshelper.getPathForOID(line)
+ if not os.path.exists(path):
+ # Hm, already gone. Odd.
+ continue
+ move(path)
+ continue
+
+ if len(line) != 16:
+ raise ValueError("Bad record in ", self.blob_dir, '.removed')
+
+ oid, tid = line[:8], line[8:]
+ path = fshelper.getBlobFilename(oid, tid)
+ if not os.path.exists(path):
+ # Hm, already gone. Odd.
+ continue
+ move(path)
+
+ # Second step, copy remaining files.
+ link_or_copy = ZODB.blob.link_or_copy
+ for path, dir_names, file_names in os.walk(self.blob_dir):
for file_name in file_names:
if not file_name.endswith('.blob'):
continue
- file_packed = os.path.join(
- path[:lblob_dir]+'.pack'+path[lblob_dir:],
- file_name)
- if not os.path.exists(file_packed):
- if not os.path.exists(os.path.dirname(file_packed)):
- os.makedirs(os.path.dirname(file_packed), 0700)
- ZODB.blob.rename_or_copy_blob(
- os.path.join(path, file_name),
- file_packed)
- n += 1
- if (n == len(file_names)) and not os.listdir(path):
- os.rmdir(path)
- os.rename(self.blob_dir+'.pack', self.blob_dir+'.old')
+ file_path = os.path.join(path, file_name)
+ dest = os.path.dirname(old+file_path[lblob_dir:])
+ if not os.path.exists(dest):
+ os.makedirs(dest, 0700)
+ link_or_copy(file_path, old+file_path[lblob_dir:])
def iterator(self, start=None, stop=None):
return FileIterator(self._file_name, start, stop)
Modified: ZODB/branches/jim-dev/src/ZODB/FileStorage/fspack.py
===================================================================
--- ZODB/branches/jim-dev/src/ZODB/FileStorage/fspack.py 2008-12-16 20:31:19 UTC (rev 94131)
+++ ZODB/branches/jim-dev/src/ZODB/FileStorage/fspack.py 2008-12-16 20:59:00 UTC (rev 94132)
@@ -337,12 +337,11 @@
def __init__(self, storage, referencesf, stop, gc=True):
self._storage = storage
if storage.blob_dir:
- self.blob_dir = storage.blob_dir+'.pack'
- self.fshelper = ZODB.blob.FilesystemHelper(
- self.blob_dir, storage.fshelper.layout_name)
- self.fshelper.create()
+ self.pack_blobs = True
+ self.blob_removed = open(
+ os.path.join(storage.blob_dir, '.removed'), 'w')
else:
- self.blob_dir = None
+ self.pack_blobs = False
path = storage._file.name
self._name = path
@@ -492,8 +491,24 @@
while pos < tend:
h = self._read_data_header(pos)
if not self.gc.isReachable(h.oid, pos):
+ if self.pack_blobs:
+ # We need to find out if this is a blob, so get the data:
+ if h.plen:
+ data = self._file.read(h.plen)
+ else:
+ data = self.fetchDataViaBackpointer(h.oid, h.back)
+ if data and ZODB.blob.is_blob_record(data):
+ # We need to remove the blob record. Maybe we
+ # need to remove oid:
+ if h.oid not in self.gc.reachable:
+ self.blob_removed.write(h.oid.encode('hex')+'\n')
+ else:
+ self.blob_removed.write(
+ (h.oid+h.tid).encode('hex')+'\n')
+
pos += h.recordlen()
continue
+
pos += h.recordlen()
# If we are going to copy any data, we need to copy
@@ -510,24 +525,18 @@
if h.plen:
data = self._file.read(h.plen)
else:
- # If a current record has a backpointer, fetch
- # refs and data from the backpointer. We need
- # to write the data in the new record.
- data = self.fetchBackpointer(h.oid, h.back)
+ data = self.fetchDataViaBackpointer(h.oid, h.back)
self.writePackedDataRecord(h, data, new_tpos)
new_pos = self._tfile.tell()
- if ZODB.blob.is_blob_record(data):
- self.copyBlob(h.oid, h.tid)
-
return new_tpos, pos
- def fetchBackpointer(self, oid, back):
- """Return data and refs backpointer `back` to object `oid.
+ def fetchDataViaBackpointer(self, oid, back):
+ """Return the data for oid via backpointer back
- If `back` is 0 or ultimately resolves to 0, return None
- and None. In this case, the transaction undoes the object
+ If `back` is 0 or ultimately resolves to 0, return None.
+ In this case, the transaction undoes the object
creation.
"""
if back == 0:
@@ -535,15 +544,6 @@
data, tid = self._loadBackTxn(oid, back, 0)
return data
- def copyBlob(self, oid, tid):
- if not self.blob_dir:
- return
- self.fshelper.createPathForOID(oid)
- ZODB.blob.link_or_copy(
- self._storage.fshelper.getBlobFilename(oid, tid),
- self.fshelper.getBlobFilename(oid, tid),
- )
-
def writePackedDataRecord(self, h, data, new_tpos):
# Update the header to reflect current information, then write
# it to the output file.
@@ -599,16 +599,13 @@
if h.plen:
data = self._file.read(h.plen)
else:
- data = self.fetchBackpointer(h.oid, h.back)
+ data = self.fetchDataViaBackpointer(h.oid, h.back)
if h.back:
prev_txn = self.getTxnFromData(h.oid, h.back)
self._copier.copy(h.oid, h.tid, data, prev_txn,
pos, self._tfile.tell())
- if ZODB.blob.is_blob_record(data):
- self.copyBlob(h.oid, h.tid)
-
tlen = self._tfile.tell() - pos
assert tlen == th.tlen
self._tfile.write(p64(tlen))
Modified: ZODB/branches/jim-dev/src/ZODB/FileStorage/interfaces.py
===================================================================
--- ZODB/branches/jim-dev/src/ZODB/FileStorage/interfaces.py 2008-12-16 20:31:19 UTC (rev 94131)
+++ ZODB/branches/jim-dev/src/ZODB/FileStorage/interfaces.py 2008-12-16 20:59:00 UTC (rev 94132)
@@ -20,17 +20,34 @@
The new file will have the same name as the old file with
'.pack' appended. (The packer can get the old file name via
- storage._file.name.)
+ storage._file.name.) If blobs are supported, if the storages
+ blob_dir attribute is not None or empty, then a .removed file
+ most be created in the blob directory. This file contains of
+ the form:
+ (oid+serial).encode('hex')+'\n'
+
+ or, of the form:
+
+ oid.encode('hex')+'\n'
+
+
If packing is unnecessary, or would not change the file, then
- None is returned, otherwise a tule is returned with:
+ no pack or removed files are created None is returned,
+ otherwise a tuple is returned with:
- the size of the packed file, and
- the packed index
If and only if packing was necessary (non-None) and there was
- no error, then the commit lock must be acquired.
+ no error, then the commit lock must be acquired. In addition,
+ it is up to FileStorage to:
+
+ - Rename the .pack file, and
+
+ - process the blob_dir/.removed file by removing the blobs
+ corresponding to the file records.
"""
class IFileStorage(zope.interface.Interface):
More information about the Zodb-checkins
mailing list