[Zodb-checkins] SVN: ZODB/branches/ctheune-bushy-directory-3.8/
provide backport of bushy directory for ZODB 3.8
Christian Theune
ct at gocept.com
Mon Jun 23 03:48:30 EDT 2008
Log message for revision 87666:
provide backport of bushy directory for ZODB 3.8
Changed:
U ZODB/branches/ctheune-bushy-directory-3.8/NEWS.txt
U ZODB/branches/ctheune-bushy-directory-3.8/setup.py
U ZODB/branches/ctheune-bushy-directory-3.8/src/ZEO/ClientStorage.py
U ZODB/branches/ctheune-bushy-directory-3.8/src/ZEO/tests/testZEO.py
U ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/blob.py
A ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/scripts/migrateblobs.py
A ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_layout.txt
U ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_tempdir.txt
U ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_transaction.txt
U ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/testblob.py
-=-
Modified: ZODB/branches/ctheune-bushy-directory-3.8/NEWS.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/NEWS.txt 2008-06-23 06:46:05 UTC (rev 87665)
+++ ZODB/branches/ctheune-bushy-directory-3.8/NEWS.txt 2008-06-23 07:48:29 UTC (rev 87666)
@@ -1,5 +1,4 @@
-
Whats new in ZODB 3.8.1
=======================
Modified: ZODB/branches/ctheune-bushy-directory-3.8/setup.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/setup.py 2008-06-23 06:46:05 UTC (rev 87665)
+++ ZODB/branches/ctheune-bushy-directory-3.8/setup.py 2008-06-23 07:48:29 UTC (rev 87666)
@@ -20,7 +20,7 @@
interface, rich transaction support, and undo.
"""
-VERSION = "3.8.1dev"
+VERSION = "3.8.1dev.bushy"
# The (non-obvious!) choices for the Trove Development Status line:
# Development Status :: 5 - Production/Stable
Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZEO/ClientStorage.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZEO/ClientStorage.py 2008-06-23 06:46:05 UTC (rev 87665)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZEO/ClientStorage.py 2008-06-23 07:48:29 UTC (rev 87666)
@@ -906,9 +906,7 @@
def _storeBlob_shared(self, oid, serial, data, filename, version, txn):
# First, move the blob into the blob directory
- dir = self.fshelper.getPathForOID(oid)
- if not os.path.exists(dir):
- os.mkdir(dir)
+ self.fshelper.getPathForOID(oid, create=True)
fd, target = self.fshelper.blob_mkstemp(oid, serial)
os.close(fd)
@@ -976,14 +974,7 @@
raise POSException.POSKeyError("No blob file", oid, serial)
# First, we'll create the directory for this oid, if it doesn't exist.
- targetpath = self.fshelper.getPathForOID(oid)
- if not os.path.exists(targetpath):
- try:
- os.makedirs(targetpath, 0700)
- except OSError:
- # We might have lost a race. If so, the directory
- # must exist now
- assert os.path.exists(targetpath)
+ targetpath = self.fshelper.getPathForOID(oid, create=True)
# OK, it's not here and we (or someone) needs to get it. We
# want to avoid getting it multiple times. We want to avoid
@@ -1169,19 +1160,15 @@
assert s == tid, (s, tid)
self._cache.store(oid, version, s, None, data)
-
if self.fshelper is not None:
blobs = self._tbuf.blobs
while blobs:
oid, blobfilename = blobs.pop()
- targetpath = self.fshelper.getPathForOID(oid)
- if not os.path.exists(targetpath):
- os.makedirs(targetpath, 0700)
+ targetpath = self.fshelper.getPathForOID(oid, create=True)
rename_or_copy_blob(blobfilename,
self.fshelper.getBlobFilename(oid, tid),
)
-
self._tbuf.clear()
def undo(self, trans_id, txn):
Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZEO/tests/testZEO.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZEO/tests/testZEO.py 2008-06-23 06:46:05 UTC (rev 87665)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZEO/tests/testZEO.py 2008-06-23 07:48:29 UTC (rev 87666)
@@ -516,8 +516,7 @@
self._storage.tpc_abort(t)
raise
self.assert_(not os.path.exists(tfname))
- filename = os.path.join(self.blobdir, oid_repr(oid),
- tid_repr(revid) + BLOB_SUFFIX)
+ filename = self._storage.fshelper.getBlobFilename(oid, revid)
self.assert_(os.path.exists(filename))
self.assertEqual(somedata, open(filename).read())
@@ -632,18 +631,16 @@
d1 = f.read(8096)
d2 = somedata.read(8096)
self.assertEqual(d1, d2)
-
-
- # The file should have been copied to the server:
- filename = os.path.join(self.blobdir, oid_repr(oid),
- tid_repr(revid) + BLOB_SUFFIX)
- check_data(filename)
- # It should also be in the cache:
- filename = os.path.join(self.blob_cache_dir, oid_repr(oid),
- tid_repr(revid) + BLOB_SUFFIX)
+ # The file should be in the cache ...
+ filename = self._storage.fshelper.getBlobFilename(oid, revid)
check_data(filename)
+ # ... and on the server
+ server_filename = filename.replace(self.blob_cache_dir, self.blobdir)
+ self.assert_(server_filename.startswith(self.blobdir))
+ check_data(server_filename)
+
# If we remove it from the cache and call loadBlob, it should
# come back. We can do this in many threads. We'll instrument
# the method that is used to request data from teh server to
Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/blob.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/blob.py 2008-06-23 06:46:05 UTC (rev 87665)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/blob.py 2008-06-23 07:48:29 UTC (rev 87666)
@@ -15,8 +15,10 @@
"""
import base64
+import binascii
import logging
import os
+import re
import shutil
import stat
import sys
@@ -43,6 +45,9 @@
BLOB_SUFFIX = ".blob"
SAVEPOINT_SUFFIX = ".spb"
+LAYOUT_MARKER = '.layout'
+LAYOUTS = {}
+
valid_modes = 'r', 'w', 'r+', 'a'
# Threading issues:
@@ -292,22 +297,43 @@
# with blobs and storages needn't indirect through this if they
# want to perform blob storage differently.
- def __init__(self, base_dir):
- self.base_dir = base_dir
+ def __init__(self, base_dir, layout_name='automatic'):
+ self.base_dir = os.path.normpath(base_dir) + '/'
self.temp_dir = os.path.join(base_dir, 'tmp')
+ if layout_name == 'automatic':
+ layout_name = auto_layout_select(base_dir)
+ if layout_name == 'lawn':
+ log('The `lawn` blob directory layout is deprecated due to '
+ 'scalability issues on some file systems, please consider '
+ 'migrating to the `bushy` layout.', level=logging.WARN)
+ self.layout_name = layout_name
+ self.layout = LAYOUTS[layout_name]
+
def create(self):
if not os.path.exists(self.base_dir):
os.makedirs(self.base_dir, 0700)
- log("Blob cache directory '%s' does not exist. "
- "Created new directory." % self.base_dir,
- level=logging.INFO)
+ log("Blob directory '%s' does not exist. "
+ "Created new directory." % self.base_dir)
if not os.path.exists(self.temp_dir):
os.makedirs(self.temp_dir, 0700)
log("Blob temporary directory '%s' does not exist. "
- "Created new directory." % self.temp_dir,
- level=logging.INFO)
+ "Created new directory." % self.temp_dir)
+ if not os.path.exists(os.path.join(self.base_dir, LAYOUT_MARKER)):
+ layout_marker = open(
+ os.path.join(self.base_dir, LAYOUT_MARKER), 'wb')
+ layout_marker.write(self.layout_name)
+ else:
+ layout_marker = open(
+ os.path.join(self.base_dir, LAYOUT_MARKER), 'rb')
+ layout = layout_marker.read().strip()
+ if layout != self.layout_name:
+ raise ValueError(
+ "Directory layout `%s` selected for blob directory %s, but "
+ "marker found for layout `%s`" %
+ (self.layout_name, self.base_dir, layout))
+
def isSecure(self, path):
"""Ensure that (POSIX) path mode bits are 0700."""
return (os.stat(path).st_mode & 077) == 0
@@ -317,19 +343,51 @@
log('Blob dir %s has insecure mode setting' % self.base_dir,
level=logging.WARNING)
- def getPathForOID(self, oid):
+ def getPathForOID(self, oid, create=False):
"""Given an OID, return the path on the filesystem where
the blob data relating to that OID is stored.
+ If the create flag is given, the path is also created if it didn't
+ exist already.
+
"""
- return os.path.join(self.base_dir, utils.oid_repr(oid))
+ # OIDs are numbers and sometimes passed around as integers. For our
+ # computations we rely on the 64-bit packed string representation.
+ if isinstance(oid, int):
+ oid = utils.p64(oid)
+ path = self.layout.oid_to_path(oid)
+ path = os.path.join(self.base_dir, path)
+
+ if create and not os.path.exists(path):
+ try:
+ os.makedirs(path, 0700)
+ except OSError:
+ # We might have lost a race. If so, the directory
+ # must exist now
+ assert os.path.exists(targetpath)
+ return path
+
+ def getOIDForPath(self, path):
+ """Given a path, return an OID, if the path is a valid path for an
+ OID. The inverse function to `getPathForOID`.
+
+ Raises ValueError if the path is not valid for an OID.
+
+ """
+ path = path[len(self.base_dir):]
+ return self.layout.path_to_oid(path)
+
def getBlobFilename(self, oid, tid):
"""Given an oid and a tid, return the full filename of the
'committed' blob file related to that oid and tid.
"""
oid_path = self.getPathForOID(oid)
+ # TIDs are numbers and sometimes passed around as integers. For our
+ # computations we rely on the 64-bit packed string representation
+ if isinstance(tid, int):
+ tid = utils.p64(tid)
filename = "%s%s" % (utils.tid_repr(tid), BLOB_SUFFIX)
return os.path.join(oid_path, filename)
@@ -359,10 +417,9 @@
if not filename.endswith(BLOB_SUFFIX):
return None, None
path, filename = os.path.split(filename)
- oid = os.path.split(path)[1]
+ oid = self.getOIDForPath(path)
serial = filename[:-len(BLOB_SUFFIX)]
- oid = utils.repr_to_oid(oid)
serial = utils.repr_to_oid(serial)
return oid, serial
@@ -372,26 +429,107 @@
"""
oids = []
- base_dir = self.base_dir
- for oidpath in os.listdir(base_dir):
- for filename in os.listdir(os.path.join(base_dir, oidpath)):
- blob_path = os.path.join(base_dir, oidpath, filename)
+ for oid, oidpath in self.listOIDs():
+ for filename in os.listdir(oidpath):
+ blob_path = os.path.join(oidpath, filename)
oid, serial = self.splitBlobFilename(blob_path)
if search_serial == serial:
oids.append(oid)
return oids
def listOIDs(self):
- """Lists all OIDs and their paths.
-
+ """Iterates over all paths under the base directory that contain blob
+ files.
"""
- for candidate in os.listdir(self.base_dir):
- if candidate == 'tmp':
+ for path, dirs, files in os.walk(self.base_dir):
+ try:
+ oid = self.getOIDForPath(path)
+ except ValueError:
continue
- oid = utils.repr_to_oid(candidate)
- yield oid, self.getPathForOID(oid)
+ yield oid, path
+def auto_layout_select(path):
+ # A heuristic to look at a path and determine which directory layout to
+ # use.
+ layout_marker = os.path.join(path, LAYOUT_MARKER)
+ if not os.path.exists(path):
+ log('Blob directory %s does not exist. '
+ 'Selected `bushy` layout. ' % path)
+ layout = 'bushy'
+ elif len(os.listdir(path)) == 0:
+ log('Blob directory `%s` is unused and has no layout marker set. '
+ 'Selected `bushy` layout. ' % path)
+ layout = 'bushy'
+ elif LAYOUT_MARKER not in os.listdir(path):
+ log('Blob directory `%s` is used but has no layout marker set. '
+ 'Selected `lawn` layout. ' % path)
+ layout = 'lawn'
+ else:
+ layout = open(layout_marker, 'rb').read()
+ layout = layout.strip()
+ log('Blob directory `%s` has layout marker set. '
+ 'Selected `%s` layout. ' % (path, layout))
+ return layout
+
+
+class BushyLayout(object):
+ """A bushy directory layout for blob directories.
+
+ Creates an 8-level directory structure (one level per byte) in
+ little-endian order from the OID of an object.
+
+ """
+
+ blob_path_pattern = r'^' + (r'0x[0-9a-f]{1,2}/*'*8) + r'$'
+ blob_path_pattern = re.compile(blob_path_pattern)
+
+ def oid_to_path(self, oid):
+ directories = []
+ # Create the bushy directory structure with the least significant byte
+ # first
+ for byte in reversed(str(oid)):
+ directories.append('0x%s' % binascii.hexlify(byte))
+ return '/'.join(directories)
+
+ def path_to_oid(self, path):
+ if self.blob_path_pattern.match(path) is None:
+ raise ValueError("Not a valid OID path: `%s`" % path)
+ path = path.split('/')
+ # The path contains the OID in little endian form but the OID itself
+ # is big endian.
+ path.reverse()
+ # Each path segment stores a byte in hex representation. Turn it into
+ # an int and then get the character for our byte string.
+ oid = ''.join(binascii.unhexlify(byte[2:]) for byte in path)
+ return oid
+
+LAYOUTS['bushy'] = BushyLayout()
+
+
+class LawnLayout(object):
+ """A shallow directory layout for blob directories.
+
+ Creates a single level of directories (one for each oid).
+
+ """
+
+ def oid_to_path(self, oid):
+ return utils.oid_repr(oid)
+
+ def path_to_oid(self, path):
+ try:
+ if path == '':
+ # This is a special case where repr_to_oid converts '' to the
+ # OID z64.
+ raise TypeError()
+ return utils.repr_to_oid(path)
+ except TypeError:
+ raise ValueError('Not a valid OID path: `%s`' % path)
+
+LAYOUTS['lawn'] = LawnLayout()
+
+
class BlobStorage(SpecificationDecoratorBase):
"""A storage to support blobs."""
@@ -401,13 +539,13 @@
# us to have instance attributes explicitly on the proxy.
__slots__ = ('fshelper', 'dirty_oids', '_BlobStorage__supportsUndo')
- def __new__(self, base_directory, storage):
+ def __new__(self, base_directory, storage, layout='automatic'):
return SpecificationDecoratorBase.__new__(self, storage)
- def __init__(self, base_directory, storage):
+ def __init__(self, base_directory, storage, layout='automatic'):
# XXX Log warning if storage is ClientStorage
SpecificationDecoratorBase.__init__(self, storage)
- self.fshelper = FilesystemHelper(base_directory)
+ self.fshelper = FilesystemHelper(base_directory, layout)
self.fshelper.create()
self.fshelper.checkSecure()
self.dirty_oids = []
@@ -438,10 +576,7 @@
self._lock_acquire()
try:
- targetpath = self.fshelper.getPathForOID(oid)
- if not os.path.exists(targetpath):
- os.makedirs(targetpath, 0700)
-
+ self.fshelper.getPathForOID(oid, create=True)
targetname = self.fshelper.getBlobFilename(oid, serial)
rename_or_copy_blob(blobfilename, targetname)
@@ -487,7 +622,6 @@
# if they are still needed by attempting to load the revision
# of that object from the database. This is maybe the slowest
# possible way to do this, but it's safe.
- base_dir = self.fshelper.base_dir
for oid, oid_path in self.fshelper.listOIDs():
files = os.listdir(oid_path)
@@ -495,7 +629,6 @@
filepath = os.path.join(oid_path, filename)
whatever, serial = self.fshelper.splitBlobFilename(filepath)
try:
- fn = self.fshelper.getBlobFilename(oid, serial)
self.loadSerial(oid, serial)
except POSKeyError:
remove_committed(filepath)
@@ -505,7 +638,6 @@
@non_overridable
def _packNonUndoing(self, packtime, referencesf):
- base_dir = self.fshelper.base_dir
for oid, oid_path in self.fshelper.listOIDs():
exists = True
try:
@@ -553,15 +685,11 @@
"""Return the size of the database in bytes."""
orig_size = getProxiedObject(self).getSize()
blob_size = 0
- base_dir = self.fshelper.base_dir
- for oid in os.listdir(base_dir):
- sub_dir = os.path.join(base_dir, oid)
- if not os.path.isdir(sub_dir):
- continue
- for serial in os.listdir(sub_dir):
+ for oid, path in self.fshelper.listOIDs():
+ for serial in os.listdir(path):
if not serial.endswith(BLOB_SUFFIX):
continue
- file_path = os.path.join(base_dir, oid, serial)
+ file_path = os.path.join(path, serial)
blob_size += os.stat(file_path).st_size
return orig_size + blob_size
@@ -584,7 +712,6 @@
# we get all the blob oids on the filesystem related to the
# transaction we want to undo.
for oid in self.fshelper.getOIDsForSerial(serial_id):
-
# we want to find the serial id of the previous revision
# of this blob object.
load_result = self.loadBefore(oid, serial_id)
Copied: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/scripts/migrateblobs.py (from rev 87622, ZODB/trunk/src/ZODB/scripts/migrateblobs.py)
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/scripts/migrateblobs.py (rev 0)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/scripts/migrateblobs.py 2008-06-23 07:48:29 UTC (rev 87666)
@@ -0,0 +1,74 @@
+##############################################################################
+#
+# Copyright (c) 2008 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""A script to migrate a blob directory into a different layout.
+"""
+
+import logging
+import optparse
+import os
+
+from ZODB.blob import FilesystemHelper, rename_or_copy_blob
+from ZODB.utils import cp, oid_repr
+
+
+def link_or_copy(f1, f2):
+ try:
+ os.link(f1, f2)
+ except OSError:
+ shutil.copy(f1, f2)
+
+
+def migrate(source, dest, layout):
+ source_fsh = FilesystemHelper(source)
+ source_fsh.create()
+ dest_fsh = FilesystemHelper(dest, layout)
+ dest_fsh.create()
+ print "Migrating blob data from `%s` (%s) to `%s` (%s)" % (
+ source, source_fsh.layout_name, dest, dest_fsh.layout_name)
+ for oid, path in source_fsh.listOIDs():
+ dest_path = dest_fsh.getPathForOID(oid, create=True)
+ files = os.listdir(path)
+ for file in files:
+ source_file = os.path.join(path, file)
+ dest_file = os.path.join(dest_path, file)
+ link_or_copy(source_file, dest_file)
+ print "\tOID: %s - %s files " % (oid_repr(oid), len(files))
+
+
+def main(source=None, dest=None, layout="bushy"):
+ usage = "usage: %prog [options] <source> <dest> <layout>"
+ description = ("Create the new directory <dest> and migrate all blob "
+ "data <source> to <dest> while using the new <layout> for "
+ "<dest>")
+
+ parser = optparse.OptionParser(usage=usage, description=description)
+ parser.add_option("-l", "--layout",
+ default=layout, type='choice',
+ choices=['bushy', 'lawn'],
+ help="Define the layout to use for the new directory "
+ "(bushy or lawn). Default: %default")
+ options, args = parser.parse_args()
+
+ if not len(args) == 2:
+ parser.error("source and destination must be given")
+
+ logging.getLogger().addHandler(logging.StreamHandler())
+ logging.getLogger().setLevel(0)
+
+ source, dest = args
+ migrate(source, dest, options.layout)
+
+
+if __name__ == '__main__':
+ main()
Copied: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_layout.txt (from rev 87602, ZODB/trunk/src/ZODB/tests/blob_layout.txt)
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_layout.txt (rev 0)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_layout.txt 2008-06-23 07:48:29 UTC (rev 87666)
@@ -0,0 +1,283 @@
+======================
+Blob directory layouts
+======================
+
+The internal structure of the blob directories is governed by so called
+`layouts`. The current default layout is called `bushy`.
+
+The original blob implementation used a layout that we now call `lawn` and
+which is still available for backwards compatibility.
+
+Layouts implement two methods: one for computing a relative path for an
+OID and one for turning a relative path back into an OID.
+
+Our terminology is roughly the same as used in `DirectoryStorage`.
+
+The `bushy` layout
+==================
+
+The bushy layout splits the OID into the 8 byte parts, reverses them and
+creates one directory level for each part, named by the hexlified
+representation of the byte value. This results in 8 levels of directories, the
+leaf directories being used for the revisions of the blobs and at most 256
+entries per directory level:
+
+>>> from ZODB.blob import BushyLayout
+>>> bushy = BushyLayout()
+>>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
+'0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
+>>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
+'0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
+
+>>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
+'\x00\x00\x00\x00\x00\x00\x00\x01'
+>>> bushy.path_to_oid('0xff/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
+'\x00\x00\x00\x00\x00\x00\x00\xff'
+
+Paths that do not represent an OID will cause a ValueError:
+
+>>> bushy.path_to_oid('tmp')
+Traceback (most recent call last):
+ValueError: Not a valid OID path: `tmp`
+
+
+The `lawn` layout
+=================
+
+The lawn layout creates on directory for each blob named by the blob's hex
+representation of its OID. This has some limitations on various file systems
+like performance penalties or the inability to store more than a given number
+of blobs at the same time (e.g. 32k on ext3).
+
+>>> from ZODB.blob import LawnLayout
+>>> lawn = LawnLayout()
+>>> lawn.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
+'0x00'
+>>> lawn.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
+'0x01'
+
+>>> lawn.path_to_oid('0x01')
+'\x00\x00\x00\x00\x00\x00\x00\x01'
+
+Paths that do not represent an OID will cause a ValueError:
+
+>>> lawn.path_to_oid('tmp')
+Traceback (most recent call last):
+ValueError: Not a valid OID path: `tmp`
+>>> lawn.path_to_oid('')
+Traceback (most recent call last):
+ValueError: Not a valid OID path: ``
+
+
+Auto-detecting the layout of a directory
+========================================
+
+To allow easier migration, we provide an auto-detection feature that analyses a
+blob directory and decides for a strategy to use. In general it prefers to
+choose the `bushy` layout, except if it determines that the directory has
+already been used to create a lawn structure.
+
+>>> from ZODB.blob import auto_layout_select
+
+1. Non-existing directories will trigger a bushy layout:
+
+>>> import tempfile
+>>> import shutil
+>>> d = tempfile.mkdtemp()
+>>> shutil.rmtree(d)
+>>> auto_layout_select(d)
+'bushy'
+
+2. Empty directories will trigger a bushy layout too:
+
+>>> d = tempfile.mkdtemp()
+>>> auto_layout_select(d)
+'bushy'
+
+3. If the directory contains a marker for the strategy it will be used:
+
+>>> from ZODB.blob import LAYOUT_MARKER
+>>> import os.path
+>>> open(os.path.join(d, LAYOUT_MARKER), 'wb').write('bushy')
+>>> auto_layout_select(d)
+'bushy'
+>>> open(os.path.join(d, LAYOUT_MARKER), 'wb').write('lawn')
+>>> auto_layout_select(d)
+'lawn'
+>>> shutil.rmtree(d)
+
+4. If the directory does not contain a marker but other files, we assume that
+it was created with an earlier version of the blob implementation and uses our
+`lawn` layout:
+
+>>> d = tempfile.mkdtemp()
+>>> open(os.path.join(d, '0x0101'), 'wb').write('foo')
+>>> auto_layout_select(d)
+'lawn'
+>>> shutil.rmtree(d)
+
+
+Directory layout markers
+========================
+
+When the file system helper (FSH) is asked to create the directory structure,
+it will leave a marker with the choosen layout if no marker exists yet:
+
+>>> from ZODB.blob import FilesystemHelper
+>>> d = tempfile.mkdtemp()
+>>> blobs = os.path.join(d, 'blobs')
+>>> fsh = FilesystemHelper(blobs)
+>>> fsh.layout_name
+'bushy'
+>>> fsh.create()
+>>> open(os.path.join(blobs, LAYOUT_MARKER), 'rb').read()
+'bushy'
+
+If the FSH finds a marker, then it verifies whether its content matches the
+strategy that was chosen. It will raise an exception if we try to work with a
+directory that has a different marker than the chosen strategy:
+
+>>> fsh = FilesystemHelper(blobs, 'lawn')
+>>> fsh.layout_name
+'lawn'
+>>> fsh.create() # doctest: +ELLIPSIS
+Traceback (most recent call last):
+ValueError: Directory layout `lawn` selected for blob directory /.../blobs/, but marker found for layout `bushy`
+>>> shutil.rmtree(blobs)
+
+This function interacts with the automatic detection in the way, that an
+unmarked directory will be marked the first time when it is auto-guessed and
+the marker will be used in the future:
+
+>>> import ZODB.FileStorage
+>>> from ZODB.blob import BlobStorage
+>>> datafs = os.path.join(d, 'data.fs')
+>>> base_storage = ZODB.FileStorage.FileStorage(datafs)
+
+>>> os.mkdir(blobs)
+>>> open(os.path.join(blobs, 'foo'), 'wb').write('foo')
+>>> blob_storage = BlobStorage(blobs, base_storage)
+>>> blob_storage.fshelper.layout_name
+'lawn'
+>>> open(os.path.join(blobs, LAYOUT_MARKER), 'rb').read()
+'lawn'
+>>> blob_storage = BlobStorage(blobs, base_storage, layout='bushy') # doctest: +ELLIPSIS
+Traceback (most recent call last):
+ValueError: Directory layout `bushy` selected for blob directory /.../blobs/, but marker found for layout `lawn`
+
+
+>>> shutil.rmtree(d)
+
+
+Migrating between directory layouts
+===================================
+
+A script called `migrateblobs.py` is distributed with the ZODB for offline
+migration capabilities between different directory layouts. It can migrate any
+blob directory layout to any other layout. It leaves the original blob
+directory untouched (except from eventually creating a temporary directory and
+the storage layout marker).
+
+The migration is accessible as a library function:
+
+>>> from ZODB.scripts.migrateblobs import migrate
+
+Create a `lawn` directory structure and migrate it to the new `bushy` one:
+
+>>> from ZODB.blob import FilesystemHelper
+>>> d = tempfile.mkdtemp()
+>>> old = os.path.join(d, 'old')
+>>> old_fsh = FilesystemHelper(old, 'lawn')
+>>> old_fsh.create()
+>>> blob1 = old_fsh.getPathForOID(7039, create=True)
+>>> blob2 = old_fsh.getPathForOID(10, create=True)
+>>> blob3 = old_fsh.getPathForOID(7034, create=True)
+>>> open(os.path.join(blob1, 'foo'), 'wb').write('foo')
+>>> open(os.path.join(blob1, 'foo2'), 'wb').write('bar')
+>>> open(os.path.join(blob2, 'foo3'), 'wb').write('baz')
+>>> open(os.path.join(blob2, 'foo4'), 'wb').write('qux')
+>>> open(os.path.join(blob3, 'foo5'), 'wb').write('quux')
+>>> open(os.path.join(blob3, 'foo6'), 'wb').write('corge')
+
+Committed blobs have their permissions set to 000
+
+The migration function is called with the old and the new path and the layout
+that shall be used for the new directory:
+
+>>> bushy = os.path.join(d, 'bushy')
+>>> migrate(old, bushy, 'bushy') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+Migrating blob data from `/.../old` (lawn) to `/.../bushy` (bushy)
+ OID: 0x1b7f - 2 files
+ OID: 0x0a - 2 files
+ OID: 0x1b7a - 2 files
+
+The new directory now contains the same files in different directories, but
+with the same sizes and permissions:
+
+>>> import string
+>>> def stat(path):
+... s = os.stat(path)
+... print "%s\t%s\t%s" % (string.rjust(oct(s.st_mode), 10), s.st_size, path)
+>>> def ls(path):
+... for p, dirs, files in os.walk(path):
+... stat(p)
+... for file in files:
+... stat(os.path.join(p, file))
+>>> ls(bushy)
+ 040700 4096 /.../bushy
+ 0100644 5 /.../bushy/.layout
+ 040700 4096 /.../bushy/0x7a
+ 040700 4096 /.../bushy/0x7a/0x1b
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
+ 0100644 5 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo6
+ 0100644 4 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo5
+ 040700 4096 /.../bushy/tmp
+ 040700 4096 /.../bushy/0x0a
+ 040700 4096 /.../bushy/0x0a/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00
+ 0100644 3 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo4
+ 0100644 3 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo3
+ 040700 4096 /.../bushy/0x7f
+ 040700 4096 /.../bushy/0x7f/0x1b
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
+ 0100644 3 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo
+ 0100644 3 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo2
+
+We can also migrate the bushy layout back to the lawn layout:
+
+>>> lawn = os.path.join(d, 'lawn')
+>>> migrate(bushy, lawn, 'lawn')
+Migrating blob data from `/.../bushy` (bushy) to `/.../lawn` (lawn)
+ OID: 0x1b7a - 2 files
+ OID: 0x0a - 2 files
+ OID: 0x1b7f - 2 files
+>>> ls(lawn)
+ 040700 4096 /.../lawn
+ 0100644 4 /.../lawn/.layout
+ 040700 4096 /.../lawn/0x1b7f
+ 0100644 3 /.../lawn/0x1b7f/foo
+ 0100644 3 /.../lawn/0x1b7f/foo2
+ 040700 4096 /.../lawn/tmp
+ 040700 4096 /.../lawn/0x0a
+ 0100644 3 /.../lawn/0x0a/foo4
+ 0100644 3 /.../lawn/0x0a/foo3
+ 040700 4096 /.../lawn/0x1b7a
+ 0100644 5 /.../lawn/0x1b7a/foo6
+ 0100644 4 /.../lawn/0x1b7a/foo5
+
+>>> shutil.rmtree(d)
Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_tempdir.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_tempdir.txt 2008-06-23 06:46:05 UTC (rev 87665)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_tempdir.txt 2008-06-23 07:48:29 UTC (rev 87666)
@@ -32,7 +32,7 @@
>>> from ZODB.DB import DB
>>> from tempfile import mkdtemp
>>> import os.path
- >>> base_storage = MappingStorage("test")
+ >>> base_storage = MappingStorage('test')
>>> blob_dir = mkdtemp()
>>> blob_storage = BlobStorage(blob_dir, base_storage)
>>> database = DB(blob_storage)
Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_transaction.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_transaction.txt 2008-06-23 06:46:05 UTC (rev 87665)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_transaction.txt 2008-06-23 07:48:29 UTC (rev 87666)
@@ -322,9 +322,9 @@
>>> base_storage = DummyBaseStorage()
>>> blob_dir2 = mkdtemp()
>>> blob_storage2 = BlobStorage(blob_dir2, base_storage)
- >>> committed_blob_dir = os.path.join(blob_dir2, '0')
- >>> committed_blob_file = os.path.join(committed_blob_dir, '0.blob')
- >>> os.mkdir(committed_blob_dir)
+ >>> committed_blob_dir = blob_storage2.fshelper.getPathForOID(0)
+ >>> os.makedirs(committed_blob_dir)
+ >>> committed_blob_file = blob_storage2.fshelper.getBlobFilename(0, 0)
>>> open(os.path.join(committed_blob_file), 'w').write('foo')
>>> os.path.exists(committed_blob_file)
True
Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/testblob.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/testblob.py 2008-06-23 06:46:05 UTC (rev 87665)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/testblob.py 2008-06-23 07:48:29 UTC (rev 87666)
@@ -105,7 +105,6 @@
self.here = os.getcwd()
os.chdir(self.test_dir)
self.storagefile = 'Data.fs'
- os.mkdir('blobs')
self.blob_dir = 'blobs'
def tearDown(self):
@@ -483,7 +482,7 @@
We can access the blob correctly:
>>> tmpstore.loadBlob(blob_oid, tid) # doctest: +ELLIPSIS
- '.../0x01/0x...blob'
+ '.../0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x...blob'
Clean up:
@@ -507,6 +506,12 @@
setUp=ZODB.tests.util.setUp,
tearDown=ZODB.tests.util.tearDown,
))
+ suite.addTest(doctest.DocFileSuite(
+ "blob_layout.txt",
+ optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE|doctest.REPORT_NDIFF,
+ setUp=ZODB.tests.util.setUp,
+ tearDown=ZODB.tests.util.tearDown,
+ ))
suite.addTest(doctest.DocTestSuite(
setUp=ZODB.tests.util.setUp,
tearDown=ZODB.tests.util.tearDown,
More information about the Zodb-checkins
mailing list