[Zodb-checkins] SVN: ZODB/branches/ctheune-bushy-directory/src/ merge from current trunk

Christian Theune ct at gocept.com
Mon Aug 4 14:39:18 EDT 2008


Log message for revision 89351:
  merge from current trunk
  

Changed:
  U   ZODB/branches/ctheune-bushy-directory/src/BTrees/Interfaces.py
  U   ZODB/branches/ctheune-bushy-directory/src/CHANGES.txt
  U   ZODB/branches/ctheune-bushy-directory/src/ZEO/ClientStorage.py
  U   ZODB/branches/ctheune-bushy-directory/src/ZEO/tests/testZEO.py
  U   ZODB/branches/ctheune-bushy-directory/src/ZODB/blob.py
  D   ZODB/branches/ctheune-bushy-directory/src/ZODB/scripts/migrateblobs.py
  D   ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_layout.txt
  U   ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_packing.txt
  U   ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_tempdir.txt
  U   ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_transaction.txt
  U   ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/testblob.py

-=-
Modified: ZODB/branches/ctheune-bushy-directory/src/BTrees/Interfaces.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/BTrees/Interfaces.py	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/BTrees/Interfaces.py	2008-08-04 18:39:18 UTC (rev 89351)
@@ -140,13 +140,19 @@
         Return the default if has_key(key) is false.
         """
 
+    def __getitem__(key):
+        """Get the value associated with the given key.
+
+        Raise KeyError if has_key(key) is false.
+        """
+
     def __setitem__(key, value):
         """Set the value associated with the given key."""
 
     def __delitem__(key):
         """Delete the value associated with the given key.
 
-        Raise KeyError if the key if has_key(key) is false.
+        Raise KeyError if has_key(key) is false.
         """
 
     def values(min=None, max=None, excludemin=False, excludemax=False):

Modified: ZODB/branches/ctheune-bushy-directory/src/CHANGES.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/CHANGES.txt	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/CHANGES.txt	2008-08-04 18:39:18 UTC (rev 89351)
@@ -8,12 +8,6 @@
 New Features
 ------------
 
-- Changed layout strategy for the blob directory to a bushy approach (8 levels
-  deep, at most ~256 entries per directory level, one directory for each
-  blob). Old directories are automatically detected and will be handled with
-  the old strategy. A migration script (`migrateblobs.py`) is provided to
-  convert the different layouts.
-
 - Versions are no-longer supported.
 
 - ZEO cache files can be larger than 4G. Note that older ZEO cache
@@ -43,8 +37,10 @@
 Bugs Fixed
 ----------
 
-- Fix for bug# 220856: Completed implementation of ZEO authentication.
+- Fix for bug #251037: Make packing of blob storages non-blocking.
 
+- Fix for bug #220856: Completed implementation of ZEO authentication.
+
 - Fix for bug #184057: Make initialisation of small ZEO client file cache
   sizes not fail.
 

Modified: ZODB/branches/ctheune-bushy-directory/src/ZEO/ClientStorage.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZEO/ClientStorage.py	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZEO/ClientStorage.py	2008-08-04 18:39:18 UTC (rev 89351)
@@ -855,7 +855,9 @@
 
     def _storeBlob_shared(self, oid, serial, data, filename, txn):
         # First, move the blob into the blob directory
-        self.fshelper.getPathForOID(oid, create=True)
+        dir = self.fshelper.getPathForOID(oid)
+        if not os.path.exists(dir):
+            os.mkdir(dir)
         fd, target = self.fshelper.blob_mkstemp(oid, serial)
         os.close(fd)
 
@@ -922,7 +924,14 @@
             raise POSException.POSKeyError("No blob file", oid, serial)
 
         # First, we'll create the directory for this oid, if it doesn't exist. 
-        targetpath = self.fshelper.getPathForOID(oid, create=True)
+        targetpath = self.fshelper.getPathForOID(oid)
+        if not os.path.exists(targetpath):
+            try:
+                os.makedirs(targetpath, 0700)
+            except OSError:
+                # We might have lost a race.  If so, the directory
+                # must exist now
+                assert os.path.exists(targetpath)
 
         # OK, it's not here and we (or someone) needs to get it.  We
         # want to avoid getting it multiple times.  We want to avoid
@@ -1109,15 +1118,19 @@
                     assert s == tid, (s, tid)
                     self._cache.store(oid, s, None, data)
 
+        
         if self.fshelper is not None:
             blobs = self._tbuf.blobs
             while blobs:
                 oid, blobfilename = blobs.pop()
-                targetpath = self.fshelper.getPathForOID(oid, create=True)
+                targetpath = self.fshelper.getPathForOID(oid)
+                if not os.path.exists(targetpath):
+                    os.makedirs(targetpath, 0700)
                 rename_or_copy_blob(blobfilename,
                           self.fshelper.getBlobFilename(oid, tid),
                           )
 
+                    
         self._tbuf.clear()
 
     def undo(self, trans_id, txn):

Modified: ZODB/branches/ctheune-bushy-directory/src/ZEO/tests/testZEO.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZEO/tests/testZEO.py	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZEO/tests/testZEO.py	2008-08-04 18:39:18 UTC (rev 89351)
@@ -515,7 +515,8 @@
             self._storage.tpc_abort(t)
             raise
         self.assert_(not os.path.exists(tfname))
-        filename = self._storage.fshelper.getBlobFilename(oid, revid)
+        filename = os.path.join(self.blobdir, oid_repr(oid),
+                                tid_repr(revid) + BLOB_SUFFIX)
         self.assert_(os.path.exists(filename))
         self.assertEqual(somedata, open(filename).read())
 
@@ -629,16 +630,18 @@
                 d1 = f.read(8096)
                 d2 = somedata.read(8096)
                 self.assertEqual(d1, d2)
+                
+        
+        # The file should have been copied to the server:
+        filename = os.path.join(self.blobdir, oid_repr(oid),
+                                tid_repr(revid) + BLOB_SUFFIX)
+        check_data(filename)
 
-        # The file should be in the cache ...
-        filename = self._storage.fshelper.getBlobFilename(oid, revid)
+        # It should also be in the cache:
+        filename = os.path.join(self.blob_cache_dir, oid_repr(oid),
+                                tid_repr(revid) + BLOB_SUFFIX)
         check_data(filename)
 
-        # ... and on the server
-        server_filename = filename.replace(self.blob_cache_dir, self.blobdir)
-        self.assert_(server_filename.startswith(self.blobdir))
-        check_data(server_filename)
-
         # If we remove it from the cache and call loadBlob, it should
         # come back. We can do this in many threads.  We'll instrument
         # the method that is used to request data from teh server to

Modified: ZODB/branches/ctheune-bushy-directory/src/ZODB/blob.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZODB/blob.py	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZODB/blob.py	2008-08-04 18:39:18 UTC (rev 89351)
@@ -15,10 +15,8 @@
 """
 
 import base64
-import binascii
 import logging
 import os
-import re
 import shutil
 import stat
 import sys
@@ -45,9 +43,6 @@
 BLOB_SUFFIX = ".blob"
 SAVEPOINT_SUFFIX = ".spb"
 
-LAYOUT_MARKER = '.layout'
-LAYOUTS = {}
-
 valid_modes = 'r', 'w', 'r+', 'a'
 
 # Threading issues:
@@ -297,43 +292,22 @@
     # with blobs and storages needn't indirect through this if they
     # want to perform blob storage differently.
 
-    def __init__(self, base_dir, layout_name='automatic'):
-        self.base_dir = os.path.normpath(base_dir) + '/'
+    def __init__(self, base_dir):
+        self.base_dir = base_dir
         self.temp_dir = os.path.join(base_dir, 'tmp')
 
-        if layout_name == 'automatic':
-            layout_name = auto_layout_select(base_dir)
-        if layout_name == 'lawn':
-            log('The `lawn` blob directory layout is deprecated due to '
-                'scalability issues on some file systems, please consider '
-                'migrating to the `bushy` layout.', level=logging.WARN)
-        self.layout_name = layout_name 
-        self.layout = LAYOUTS[layout_name]
-
     def create(self):
         if not os.path.exists(self.base_dir):
             os.makedirs(self.base_dir, 0700)
-            log("Blob directory '%s' does not exist. "
-                "Created new directory." % self.base_dir)
+            log("Blob cache directory '%s' does not exist. "
+                "Created new directory." % self.base_dir,
+                level=logging.INFO)
         if not os.path.exists(self.temp_dir):
             os.makedirs(self.temp_dir, 0700)
             log("Blob temporary directory '%s' does not exist. "
-                "Created new directory." % self.temp_dir)
+                "Created new directory." % self.temp_dir,
+                level=logging.INFO)
 
-        if not os.path.exists(os.path.join(self.base_dir, LAYOUT_MARKER)):
-            layout_marker = open(
-                os.path.join(self.base_dir, LAYOUT_MARKER), 'wb')
-            layout_marker.write(self.layout_name)
-        else:
-            layout_marker = open(
-                os.path.join(self.base_dir, LAYOUT_MARKER), 'rb')
-            layout = layout_marker.read().strip()
-            if layout != self.layout_name:
-                raise ValueError(
-                    "Directory layout `%s` selected for blob directory %s, but "
-                    "marker found for layout `%s`" %
-                    (self.layout_name, self.base_dir, layout))
-
     def isSecure(self, path):
         """Ensure that (POSIX) path mode bits are 0700."""
         return (os.stat(path).st_mode & 077) == 0
@@ -343,51 +317,19 @@
             log('Blob dir %s has insecure mode setting' % self.base_dir,
                 level=logging.WARNING)
 
-    def getPathForOID(self, oid, create=False):
+    def getPathForOID(self, oid):
         """Given an OID, return the path on the filesystem where
         the blob data relating to that OID is stored.
 
-        If the create flag is given, the path is also created if it didn't
-        exist already.
-
         """
-        # OIDs are numbers and sometimes passed around as integers. For our
-        # computations we rely on the 64-bit packed string representation.
-        if isinstance(oid, int):
-            oid = utils.p64(oid)
+        return os.path.join(self.base_dir, utils.oid_repr(oid))
 
-        path = self.layout.oid_to_path(oid)
-        path = os.path.join(self.base_dir, path)
-
-        if create and not os.path.exists(path):
-            try:
-                os.makedirs(path, 0700)
-            except OSError:
-                # We might have lost a race.  If so, the directory
-                # must exist now
-                assert os.path.exists(targetpath)
-        return path
-
-    def getOIDForPath(self, path):
-        """Given a path, return an OID, if the path is a valid path for an
-        OID. The inverse function to `getPathForOID`.
-
-        Raises ValueError if the path is not valid for an OID.
-
-        """
-        path = path[len(self.base_dir):]
-        return self.layout.path_to_oid(path)
-
     def getBlobFilename(self, oid, tid):
         """Given an oid and a tid, return the full filename of the
         'committed' blob file related to that oid and tid.
 
         """
         oid_path = self.getPathForOID(oid)
-        # TIDs are numbers and sometimes passed around as integers. For our
-        # computations we rely on the 64-bit packed string representation
-        if isinstance(tid, int):
-            tid = utils.p64(tid)
         filename = "%s%s" % (utils.tid_repr(tid), BLOB_SUFFIX)
         return os.path.join(oid_path, filename)
 
@@ -417,9 +359,10 @@
         if not filename.endswith(BLOB_SUFFIX):
             return None, None
         path, filename = os.path.split(filename)
-        oid = self.getOIDForPath(path)
+        oid = os.path.split(path)[1]
 
         serial = filename[:-len(BLOB_SUFFIX)]
+        oid = utils.repr_to_oid(oid)
         serial = utils.repr_to_oid(serial)
         return oid, serial 
 
@@ -429,107 +372,30 @@
 
         """
         oids = []
-        for oid, oidpath in self.listOIDs():
-            for filename in os.listdir(oidpath):
-                blob_path = os.path.join(oidpath, filename)
+        base_dir = self.base_dir
+        for oidpath in os.listdir(base_dir):
+            for filename in os.listdir(os.path.join(base_dir, oidpath)):
+                blob_path = os.path.join(base_dir, oidpath, filename)
                 oid, serial = self.splitBlobFilename(blob_path)
                 if search_serial == serial:
                     oids.append(oid)
         return oids
 
     def listOIDs(self):
-        """Iterates over all paths under the base directory that contain blob
-        files.
+        """Lists all OIDs and their paths.
+
         """
-        for path, dirs, files in os.walk(self.base_dir):
-            try:
-                oid = self.getOIDForPath(path)
-            except ValueError:
+        for candidate in os.listdir(self.base_dir):
+            if candidate == 'tmp':
                 continue
-            yield oid, path
+            oid = utils.repr_to_oid(candidate)
+            yield oid, self.getPathForOID(oid)
 
 
-def auto_layout_select(path):
-    # A heuristic to look at a path and determine which directory layout to
-    # use.
-    layout_marker = os.path.join(path, LAYOUT_MARKER)
-    if not os.path.exists(path):
-        log('Blob directory %s does not exist. '
-            'Selected `bushy` layout. ' % path)
-        layout = 'bushy'
-    elif len(os.listdir(path)) == 0:
-        log('Blob directory `%s` is unused and has no layout marker set. '
-            'Selected `bushy` layout. ' % path)
-        layout = 'bushy'
-    elif LAYOUT_MARKER not in os.listdir(path):
-        log('Blob directory `%s` is used but has no layout marker set. '
-            'Selected `lawn` layout. ' % path)
-        layout = 'lawn'
-    else:
-        layout = open(layout_marker, 'rb').read()
-        layout = layout.strip()
-        log('Blob directory `%s` has layout marker set. '
-            'Selected `%s` layout. ' % (path, layout))
-    return layout
+class BlobStorageError(Exception):
+    """The blob storage encountered an invalid state."""
 
 
-class BushyLayout(object):
-    """A bushy directory layout for blob directories.
-
-    Creates an 8-level directory structure (one level per byte) in
-    little-endian order from the OID of an object.
-
-    """
-
-    blob_path_pattern = r'^' + (r'0x[0-9a-f]{1,2}/*'*8) + r'$'
-    blob_path_pattern = re.compile(blob_path_pattern)
-
-    def oid_to_path(self, oid):
-        directories = []
-        # Create the bushy directory structure with the least significant byte
-        # first
-        for byte in reversed(str(oid)):
-            directories.append('0x%s' % binascii.hexlify(byte))
-        return '/'.join(directories)
-
-    def path_to_oid(self, path):
-        if self.blob_path_pattern.match(path) is None:
-            raise ValueError("Not a valid OID path: `%s`" % path)
-        path = path.split('/')
-        # The path contains the OID in little endian form but the OID itself
-        # is big endian.
-        path.reverse()
-        # Each path segment stores a byte in hex representation. Turn it into
-        # an int and then get the character for our byte string.
-        oid = ''.join(binascii.unhexlify(byte[2:]) for byte in path)
-        return oid
-
-LAYOUTS['bushy'] = BushyLayout()
-
-
-class LawnLayout(object):
-    """A shallow directory layout for blob directories.
-
-    Creates a single level of directories (one for each oid).
-
-    """
-
-    def oid_to_path(self, oid):
-        return utils.oid_repr(oid)
-
-    def path_to_oid(self, path):
-        try:
-            if path == '':
-                # This is a special case where repr_to_oid converts '' to the
-                # OID z64.
-                raise TypeError()
-            return utils.repr_to_oid(path)
-        except TypeError:
-            raise ValueError('Not a valid OID path: `%s`' % path)
-
-LAYOUTS['lawn'] = LawnLayout()
-
-
 class BlobStorage(SpecificationDecoratorBase):
     """A storage to support blobs."""
 
@@ -537,15 +403,16 @@
 
     # Proxies can't have a __dict__ so specifying __slots__ here allows
     # us to have instance attributes explicitly on the proxy.
-    __slots__ = ('fshelper', 'dirty_oids', '_BlobStorage__supportsUndo')
+    __slots__ = ('fshelper', 'dirty_oids', '_BlobStorage__supportsUndo',
+                 '_blobs_pack_is_in_progress', )
 
-    def __new__(self, base_directory, storage, layout='automatic'):
+    def __new__(self, base_directory, storage):
         return SpecificationDecoratorBase.__new__(self, storage)
 
-    def __init__(self, base_directory, storage, layout='automatic'):
+    def __init__(self, base_directory, storage):
         # XXX Log warning if storage is ClientStorage
         SpecificationDecoratorBase.__init__(self, storage)
-        self.fshelper = FilesystemHelper(base_directory, layout)
+        self.fshelper = FilesystemHelper(base_directory)
         self.fshelper.create()
         self.fshelper.checkSecure()
         self.dirty_oids = []
@@ -556,6 +423,7 @@
         else:
             supportsUndo = supportsUndo()
         self.__supportsUndo = supportsUndo
+        self._blobs_pack_is_in_progress = False
 
     @non_overridable
     def temporaryDirectory(self):
@@ -576,7 +444,10 @@
 
         self._lock_acquire()
         try:
-            self.fshelper.getPathForOID(oid, create=True)
+            targetpath = self.fshelper.getPathForOID(oid)
+            if not os.path.exists(targetpath):
+                os.makedirs(targetpath, 0700)
+
             targetname = self.fshelper.getBlobFilename(oid, serial)
             rename_or_copy_blob(blobfilename, targetname)
 
@@ -622,12 +493,14 @@
         # if they are still needed by attempting to load the revision
         # of that object from the database.  This is maybe the slowest
         # possible way to do this, but it's safe.
+        base_dir = self.fshelper.base_dir
         for oid, oid_path in self.fshelper.listOIDs():
             files = os.listdir(oid_path)
             for filename in files:
                 filepath = os.path.join(oid_path, filename)
                 whatever, serial = self.fshelper.splitBlobFilename(filepath)
                 try:
+                    fn = self.fshelper.getBlobFilename(oid, serial)
                     self.loadSerial(oid, serial)
                 except POSKeyError:
                     remove_committed(filepath)
@@ -637,6 +510,7 @@
 
     @non_overridable
     def _packNonUndoing(self, packtime, referencesf):
+        base_dir = self.fshelper.base_dir
         for oid, oid_path in self.fshelper.listOIDs():
             exists = True
             try:
@@ -660,21 +534,29 @@
 
     @non_overridable
     def pack(self, packtime, referencesf):
-        """Remove all unused oid/tid combinations."""
-        unproxied = getProxiedObject(self)
-
-        # pack the underlying storage, which will allow us to determine
-        # which serials are current.
-        result = unproxied.pack(packtime, referencesf)
-
-        # perform a pack on blob data
+        """Remove all unused OID/TID combinations."""
         self._lock_acquire()
         try:
+            if self._blobs_pack_is_in_progress:
+                raise BlobStorageError('Already packing')
+            self._blobs_pack_is_in_progress = True
+        finally:
+            self._lock_release()
+
+        try:
+            # Pack the underlying storage, which will allow us to determine
+            # which serials are current.
+            unproxied = getProxiedObject(self)
+            result = unproxied.pack(packtime, referencesf)
+
+            # Perform a pack on the blob data.
             if self.__supportsUndo:
                 self._packUndoing(packtime, referencesf)
             else:
                 self._packNonUndoing(packtime, referencesf)
         finally:
+            self._lock_acquire()
+            self._blobs_pack_is_in_progress = False
             self._lock_release()
 
         return result
@@ -704,6 +586,7 @@
             # we get all the blob oids on the filesystem related to the
             # transaction we want to undo.
             for oid in self.fshelper.getOIDsForSerial(serial_id):
+
                 # we want to find the serial id of the previous revision
                 # of this blob object.
                 load_result = self.loadBefore(oid, serial_id)

Deleted: ZODB/branches/ctheune-bushy-directory/src/ZODB/scripts/migrateblobs.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZODB/scripts/migrateblobs.py	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZODB/scripts/migrateblobs.py	2008-08-04 18:39:18 UTC (rev 89351)
@@ -1,74 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2008 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""A script to migrate a blob directory into a different layout.
-"""
-
-import logging
-import optparse
-import os
-
-from ZODB.blob import FilesystemHelper, rename_or_copy_blob
-from ZODB.utils import cp, oid_repr
-
-
-def link_or_copy(f1, f2):
-    try:
-        os.link(f1, f2)
-    except OSError:
-        shutil.copy(f1, f2)
-
-
-def migrate(source, dest, layout):
-    source_fsh = FilesystemHelper(source)
-    source_fsh.create()
-    dest_fsh = FilesystemHelper(dest, layout)
-    dest_fsh.create()
-    print "Migrating blob data from `%s` (%s) to `%s` (%s)" % (
-        source, source_fsh.layout_name, dest, dest_fsh.layout_name)
-    for oid, path in source_fsh.listOIDs():
-        dest_path = dest_fsh.getPathForOID(oid, create=True)
-        files = os.listdir(path)
-        for file in files:
-            source_file = os.path.join(path, file)
-            dest_file = os.path.join(dest_path, file)
-            link_or_copy(source_file, dest_file)
-        print "\tOID: %s - %s files " % (oid_repr(oid), len(files))
-
-
-def main(source=None, dest=None, layout="bushy"):
-    usage = "usage: %prog [options] <source> <dest> <layout>"
-    description = ("Create the new directory <dest> and migrate all blob "
-                   "data <source> to <dest> while using the new <layout> for "
-                   "<dest>")
-
-    parser = optparse.OptionParser(usage=usage, description=description)
-    parser.add_option("-l", "--layout",
-                      default=layout, type='choice',
-                      choices=['bushy', 'lawn'],
-                      help="Define the layout to use for the new directory "
-                      "(bushy or lawn). Default: %default")
-    options, args = parser.parse_args()
-
-    if not len(args) == 2:
-        parser.error("source and destination must be given")
-
-    logging.getLogger().addHandler(logging.StreamHandler())
-    logging.getLogger().setLevel(0)
-
-    source, dest = args
-    migrate(source, dest, options.layout)
-
-
-if __name__ == '__main__':
-    main()

Deleted: ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_layout.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_layout.txt	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_layout.txt	2008-08-04 18:39:18 UTC (rev 89351)
@@ -1,283 +0,0 @@
-======================
-Blob directory layouts
-======================
-
-The internal structure of the blob directories is governed by so called
-`layouts`. The current default layout is called `bushy`.
-
-The original blob implementation used a layout that we now call `lawn` and
-which is still available for backwards compatibility.
-
-Layouts implement two methods: one for computing a relative path for an
-OID and one for turning a relative path back into an OID.
-
-Our terminology is roughly the same as used in `DirectoryStorage`.
-
-The `bushy` layout
-==================
-
-The bushy layout splits the OID into the 8 byte parts, reverses them and
-creates one directory level for each part, named by the hexlified
-representation of the byte value. This results in 8 levels of directories, the
-leaf directories being used for the revisions of the blobs and at most 256
-entries per directory level:
-
->>> from ZODB.blob import BushyLayout
->>> bushy = BushyLayout()
->>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
-'0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
->>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
-'0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
-
->>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
-'\x00\x00\x00\x00\x00\x00\x00\x01'
->>> bushy.path_to_oid('0xff/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
-'\x00\x00\x00\x00\x00\x00\x00\xff'
-
-Paths that do not represent an OID will cause a ValueError:
-
->>> bushy.path_to_oid('tmp')
-Traceback (most recent call last):
-ValueError: Not a valid OID path: `tmp`
-
-
-The `lawn` layout
-=================
-
-The lawn layout creates on directory for each blob named by the blob's hex
-representation of its OID. This has some limitations on various file systems
-like performance penalties or the inability to store more than a given number
-of blobs at the same time (e.g. 32k on ext3).
-
->>> from ZODB.blob import LawnLayout
->>> lawn = LawnLayout()
->>> lawn.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
-'0x00'
->>> lawn.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
-'0x01'
-
->>> lawn.path_to_oid('0x01')
-'\x00\x00\x00\x00\x00\x00\x00\x01'
-
-Paths that do not represent an OID will cause a ValueError:
-
->>> lawn.path_to_oid('tmp')
-Traceback (most recent call last):
-ValueError: Not a valid OID path: `tmp`
->>> lawn.path_to_oid('')
-Traceback (most recent call last):
-ValueError: Not a valid OID path: ``
-
-
-Auto-detecting the layout of a directory
-========================================
-
-To allow easier migration, we provide an auto-detection feature that analyses a
-blob directory and decides for a strategy to use. In general it prefers to
-choose the `bushy` layout, except if it determines that the directory has
-already been used to create a lawn structure.
-
->>> from ZODB.blob import auto_layout_select
-
-1. Non-existing directories will trigger a bushy layout:
-
->>> import tempfile
->>> import shutil
->>> d = tempfile.mkdtemp()
->>> shutil.rmtree(d)
->>> auto_layout_select(d)
-'bushy'
-
-2. Empty directories will trigger a bushy layout too:
-
->>> d = tempfile.mkdtemp()
->>> auto_layout_select(d)
-'bushy'
-
-3. If the directory contains a marker for the strategy it will be used:
-
->>> from ZODB.blob import LAYOUT_MARKER
->>> import os.path
->>> open(os.path.join(d, LAYOUT_MARKER), 'wb').write('bushy')
->>> auto_layout_select(d)
-'bushy'
->>> open(os.path.join(d, LAYOUT_MARKER), 'wb').write('lawn')
->>> auto_layout_select(d)
-'lawn'
->>> shutil.rmtree(d)
-
-4. If the directory does not contain a marker but other files, we assume that
-it was created with an earlier version of the blob implementation and uses our
-`lawn` layout:
-
->>> d = tempfile.mkdtemp()
->>> open(os.path.join(d, '0x0101'), 'wb').write('foo')
->>> auto_layout_select(d)
-'lawn'
->>> shutil.rmtree(d)
-
-
-Directory layout markers
-========================
-
-When the file system helper (FSH) is asked to create the directory structure,
-it will leave a marker with the choosen layout if no marker exists yet:
-
->>> from ZODB.blob import FilesystemHelper
->>> d = tempfile.mkdtemp()
->>> blobs = os.path.join(d, 'blobs')
->>> fsh = FilesystemHelper(blobs)
->>> fsh.layout_name
-'bushy'
->>> fsh.create()
->>> open(os.path.join(blobs, LAYOUT_MARKER), 'rb').read()
-'bushy'
-
-If the FSH finds a marker, then it verifies whether its content matches the
-strategy that was chosen. It will raise an exception if we try to work with a
-directory that has a different marker than the chosen strategy:
-
->>> fsh = FilesystemHelper(blobs, 'lawn')
->>> fsh.layout_name
-'lawn'
->>> fsh.create() # doctest: +ELLIPSIS
-Traceback (most recent call last):
-ValueError: Directory layout `lawn` selected for blob directory /.../blobs/, but marker found for layout `bushy`
->>> shutil.rmtree(blobs)
-
-This function interacts with the automatic detection in the way, that an
-unmarked directory will be marked the first time when it is auto-guessed and
-the marker will be used in the future:
-
->>> import ZODB.FileStorage
->>> from ZODB.blob import BlobStorage
->>> datafs = os.path.join(d, 'data.fs')
->>> base_storage = ZODB.FileStorage.FileStorage(datafs)
-
->>> os.mkdir(blobs)
->>> open(os.path.join(blobs, 'foo'), 'wb').write('foo')
->>> blob_storage = BlobStorage(blobs, base_storage)
->>> blob_storage.fshelper.layout_name
-'lawn'
->>> open(os.path.join(blobs, LAYOUT_MARKER), 'rb').read()
-'lawn'
->>> blob_storage = BlobStorage(blobs, base_storage, layout='bushy') # doctest: +ELLIPSIS
-Traceback (most recent call last):
-ValueError: Directory layout `bushy` selected for blob directory /.../blobs/, but marker found for layout `lawn`
-
-
->>> shutil.rmtree(d)
-
-
-Migrating between directory layouts
-===================================
-
-A script called `migrateblobs.py` is distributed with the ZODB for offline
-migration capabilities between different directory layouts. It can migrate any
-blob directory layout to any other layout. It leaves the original blob
-directory untouched (except from eventually creating a temporary directory and
-the storage layout marker).
-
-The migration is accessible as a library function:
-
->>> from ZODB.scripts.migrateblobs import migrate
-
-Create a `lawn` directory structure and migrate it to the new `bushy` one:
-
->>> from ZODB.blob import FilesystemHelper
->>> d = tempfile.mkdtemp()
->>> old = os.path.join(d, 'old')
->>> old_fsh = FilesystemHelper(old, 'lawn')
->>> old_fsh.create()
->>> blob1 = old_fsh.getPathForOID(7039, create=True)
->>> blob2 = old_fsh.getPathForOID(10, create=True)
->>> blob3 = old_fsh.getPathForOID(7034, create=True)
->>> open(os.path.join(blob1, 'foo'), 'wb').write('foo')
->>> open(os.path.join(blob1, 'foo2'), 'wb').write('bar')
->>> open(os.path.join(blob2, 'foo3'), 'wb').write('baz')
->>> open(os.path.join(blob2, 'foo4'), 'wb').write('qux')
->>> open(os.path.join(blob3, 'foo5'), 'wb').write('quux')
->>> open(os.path.join(blob3, 'foo6'), 'wb').write('corge')
-
-Committed blobs have their permissions set to 000
-
-The migration function is called with the old and the new path and the layout
-that shall be used for the new directory:
-
->>> bushy = os.path.join(d, 'bushy')
->>> migrate(old, bushy, 'bushy')  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-Migrating blob data from `/.../old` (lawn) to `/.../bushy` (bushy)
-    OID: 0x1b7f - 2 files 
-    OID: 0x0a - 2 files 
-    OID: 0x1b7a - 2 files 
-
-The new directory now contains the same files in different directories, but
-with the same sizes and permissions:
-
->>> import string
->>> def stat(path):
-...     s = os.stat(path)
-...     print "%s\t%s\t%s" % (string.rjust(oct(s.st_mode), 10), s.st_size, path)
->>> def ls(path):
-...     for p, dirs, files in os.walk(path):
-...         stat(p)
-...         for file in files:
-...             stat(os.path.join(p, file))
->>> ls(bushy)
-     040700  4096  /.../bushy
-    0100644  5     /.../bushy/.layout
-     040700  4096  /.../bushy/0x7a
-     040700  4096  /.../bushy/0x7a/0x1b
-     040700  4096  /.../bushy/0x7a/0x1b/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
-    0100644  5     /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo6
-    0100644  4     /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo5
-     040700  4096  /.../bushy/tmp
-     040700  4096  /.../bushy/0x0a
-     040700  4096  /.../bushy/0x0a/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00
-    0100644  3     /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo4
-    0100644  3     /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo3
-     040700  4096  /.../bushy/0x7f
-     040700  4096  /.../bushy/0x7f/0x1b
-     040700  4096  /.../bushy/0x7f/0x1b/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
-    0100644  3     /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo
-    0100644  3     /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo2
-
-We can also migrate the bushy layout back to the lawn layout:
-
->>> lawn = os.path.join(d, 'lawn')
->>> migrate(bushy, lawn, 'lawn')
-Migrating blob data from `/.../bushy` (bushy) to `/.../lawn` (lawn)
-    OID: 0x1b7a - 2 files 
-    OID: 0x0a - 2 files 
-    OID: 0x1b7f - 2 files 
->>> ls(lawn)
-    040700  4096    /.../lawn
-   0100644  4       /.../lawn/.layout
-    040700  4096    /.../lawn/0x1b7f
-   0100644  3       /.../lawn/0x1b7f/foo
-   0100644  3       /.../lawn/0x1b7f/foo2
-    040700  4096    /.../lawn/tmp
-    040700  4096    /.../lawn/0x0a
-   0100644  3       /.../lawn/0x0a/foo4
-   0100644  3       /.../lawn/0x0a/foo3
-    040700  4096    /.../lawn/0x1b7a
-   0100644  5       /.../lawn/0x1b7a/foo6
-   0100644  4       /.../lawn/0x1b7a/foo5
-
->>> shutil.rmtree(d)

Modified: ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_packing.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_packing.txt	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_packing.txt	2008-08-04 18:39:18 UTC (rev 89351)
@@ -240,6 +240,37 @@
     >>> os.path.exists(os.path.split(fns[0])[0])
     False
 
+Avoiding parallel packs
+=======================
+
+Blob packing (similar to FileStorage) can only be run once at a time. For
+this, a flag (_blobs_pack_is_in_progress) is set. If the pack method is called
+while this flag is set, it will refuse to perform another pack, until the flag
+is reset:
+
+    >>> blob_storage._blobs_pack_is_in_progress
+    False
+    >>> blob_storage._blobs_pack_is_in_progress = True
+    >>> blob_storage.pack(packtime, referencesf)
+    Traceback (most recent call last):
+    BlobStorageError: Already packing
+    >>> blob_storage._blobs_pack_is_in_progress = False
+    >>> blob_storage.pack(packtime, referencesf)
+
+We can also see, that the flag is set during the pack, by leveraging the
+knowledge that the underlying storage's pack method is also called:
+
+    >>> def dummy_pack(time, ref):
+    ...     print "_blobs_pack_is_in_progress =", blob_storage._blobs_pack_is_in_progress
+    ...     return base_pack(time, ref)
+    >>> base_pack = base_storage.pack
+    >>> base_storage.pack = dummy_pack
+    >>> blob_storage.pack(packtime, referencesf)
+    _blobs_pack_is_in_progress = True
+    >>> blob_storage._blobs_pack_is_in_progress
+    False
+    >>> base_storage.pack = base_pack
+
 Clean up our blob directory:
 
     >>> shutil.rmtree(blob_dir)

Modified: ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_tempdir.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_tempdir.txt	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_tempdir.txt	2008-08-04 18:39:18 UTC (rev 89351)
@@ -32,7 +32,7 @@
   >>> from ZODB.DB import DB
   >>> from tempfile import mkdtemp
   >>> import os.path
-  >>> base_storage = MappingStorage('test')
+  >>> base_storage = MappingStorage("test")
   >>> blob_dir = mkdtemp()
   >>> blob_storage = BlobStorage(blob_dir, base_storage)
   >>> database = DB(blob_storage)

Modified: ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_transaction.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_transaction.txt	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/blob_transaction.txt	2008-08-04 18:39:18 UTC (rev 89351)
@@ -353,9 +353,9 @@
     >>> base_storage = DummyBaseStorage()
     >>> blob_dir2 = mkdtemp()
     >>> blob_storage2 = BlobStorage(blob_dir2, base_storage)
-    >>> committed_blob_dir = blob_storage2.fshelper.getPathForOID(0)
-    >>> os.makedirs(committed_blob_dir)
-    >>> committed_blob_file = blob_storage2.fshelper.getBlobFilename(0, 0)
+    >>> committed_blob_dir = os.path.join(blob_dir2, '0')
+    >>> committed_blob_file = os.path.join(committed_blob_dir, '0.blob')
+    >>> os.mkdir(committed_blob_dir)
     >>> open(os.path.join(committed_blob_file), 'w').write('foo')
     >>> os.path.exists(committed_blob_file)
     True

Modified: ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/testblob.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/testblob.py	2008-08-04 17:47:44 UTC (rev 89350)
+++ ZODB/branches/ctheune-bushy-directory/src/ZODB/tests/testblob.py	2008-08-04 18:39:18 UTC (rev 89351)
@@ -105,6 +105,7 @@
         self.here = os.getcwd()
         os.chdir(self.test_dir)
         self.storagefile = 'Data.fs'
+        os.mkdir('blobs')
         self.blob_dir = 'blobs'
 
     def tearDown(self):
@@ -482,7 +483,7 @@
     We can access the blob correctly:
 
     >>> tmpstore.loadBlob(blob_oid, tid) # doctest: +ELLIPSIS
-    '.../0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x...blob'
+    '.../0x01/0x...blob'
 
     Clean up:
 
@@ -507,12 +508,6 @@
         setUp=ZODB.tests.util.setUp,
         tearDown=ZODB.tests.util.tearDown,
         ))
-    suite.addTest(doctest.DocFileSuite(
-        "blob_layout.txt",
-        optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE|doctest.REPORT_NDIFF,
-        setUp=ZODB.tests.util.setUp,
-        tearDown=ZODB.tests.util.tearDown,
-        ))
     suite.addTest(doctest.DocTestSuite(
         setUp=ZODB.tests.util.setUp,
         tearDown=ZODB.tests.util.tearDown,



More information about the Zodb-checkins mailing list