[Zodb-checkins] SVN: ZODB/trunk/src/ - Added migration script
Christian Theune
ct at gocept.com
Sat Jun 21 08:17:53 EDT 2008
Log message for revision 87622:
- Added migration script
- Fixed bug in bushy layout: oid recognition pattern would not handle hex
representations correctly
- Fixed bug in lawn layout: empty strings (the base directory) would be
recognized as the oid 0.
Changed:
U ZODB/trunk/src/CHANGES.txt
U ZODB/trunk/src/ZODB/blob.py
A ZODB/trunk/src/ZODB/scripts/migrateblobs.py
U ZODB/trunk/src/ZODB/tests/blob_layout.txt
U ZODB/trunk/src/ZODB/tests/testblob.py
-=-
Modified: ZODB/trunk/src/CHANGES.txt
===================================================================
--- ZODB/trunk/src/CHANGES.txt 2008-06-21 09:27:56 UTC (rev 87621)
+++ ZODB/trunk/src/CHANGES.txt 2008-06-21 12:17:51 UTC (rev 87622)
@@ -11,7 +11,8 @@
- Changed layout strategy for the blob directory to a bushy approach (8 levels
deep, at most ~256 entries per directory level, one directory for each
blob). Old directories are automatically detected and will be handled with
- the old strategy.
+ the old strategy. A migration script (`migrateblobs.py`) is provided to
+ convert the different layouts.
- Versions are no-longer supported.
Modified: ZODB/trunk/src/ZODB/blob.py
===================================================================
--- ZODB/trunk/src/ZODB/blob.py 2008-06-21 09:27:56 UTC (rev 87621)
+++ ZODB/trunk/src/ZODB/blob.py 2008-06-21 12:17:51 UTC (rev 87622)
@@ -451,26 +451,24 @@
def auto_layout_select(path):
# A heuristic to look at a path and determine which directory layout to
- # use. Basically we try to figure out if the directory is either already
- # used and contains an explicit marker, is unused or used without a
- # marker.
+ # use.
layout_marker = os.path.join(path, LAYOUT_MARKER)
if not os.path.exists(path):
log('Blob directory %s does not exist. '
'Selected `bushy` layout. ' % path)
layout = 'bushy'
elif len(os.listdir(path)) == 0:
- log('Blob directory %s is unused and has no layout marker set.'
+ log('Blob directory `%s` is unused and has no layout marker set. '
'Selected `bushy` layout. ' % path)
layout = 'bushy'
elif LAYOUT_MARKER not in os.listdir(path):
- log('Blob directory %s is used but has no layout marker set.'
+ log('Blob directory `%s` is used but has no layout marker set. '
'Selected `lawn` layout. ' % path)
layout = 'lawn'
else:
layout = open(layout_marker, 'rb').read()
layout = layout.strip()
- log('Blob directory %s has layout marker set.'
+ log('Blob directory `%s` has layout marker set. '
'Selected `%s` layout. ' % (path, layout))
return layout
@@ -483,7 +481,7 @@
"""
- blob_path_pattern = r'^' + (r'0x[0-9]{1,2}/*'*8) + r'$'
+ blob_path_pattern = r'^' + (r'0x[0-9a-f]{1,2}/*'*8) + r'$'
blob_path_pattern = re.compile(blob_path_pattern)
def oid_to_path(self, oid):
@@ -496,8 +494,7 @@
def path_to_oid(self, path):
if self.blob_path_pattern.match(path) is None:
- raise ValueError("Not a valid OID path: %s" % path)
- # The path always has a leading slash that we need to ignore.
+ raise ValueError("Not a valid OID path: `%s`" % path)
path = path.split('/')
# The path contains the OID in little endian form but the OID itself
# is big endian.
@@ -522,9 +519,13 @@
def path_to_oid(self, path):
try:
+ if path == '':
+ # This is a special case where repr_to_oid converts '' to the
+ # OID z64.
+ raise TypeError()
return utils.repr_to_oid(path)
except TypeError:
- raise ValueError('Not a valid OID path: %s' % path)
+ raise ValueError('Not a valid OID path: `%s`' % path)
LAYOUTS['lawn'] = LawnLayout()
Added: ZODB/trunk/src/ZODB/scripts/migrateblobs.py
===================================================================
--- ZODB/trunk/src/ZODB/scripts/migrateblobs.py (rev 0)
+++ ZODB/trunk/src/ZODB/scripts/migrateblobs.py 2008-06-21 12:17:51 UTC (rev 87622)
@@ -0,0 +1,74 @@
+##############################################################################
+#
+# Copyright (c) 2008 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""A script to migrate a blob directory into a different layout.
+"""
+
+import logging
+import optparse
+import os
+
+from ZODB.blob import FilesystemHelper, rename_or_copy_blob
+from ZODB.utils import cp, oid_repr
+
+
+def link_or_copy(f1, f2):
+ try:
+ os.link(f1, f2)
+ except OSError:
+ shutil.copy(f1, f2)
+
+
+def migrate(source, dest, layout):
+ source_fsh = FilesystemHelper(source)
+ source_fsh.create()
+ dest_fsh = FilesystemHelper(dest, layout)
+ dest_fsh.create()
+ print "Migrating blob data from `%s` (%s) to `%s` (%s)" % (
+ source, source_fsh.layout_name, dest, dest_fsh.layout_name)
+ for oid, path in source_fsh.listOIDs():
+ dest_path = dest_fsh.getPathForOID(oid, create=True)
+ files = os.listdir(path)
+ for file in files:
+ source_file = os.path.join(path, file)
+ dest_file = os.path.join(dest_path, file)
+ link_or_copy(source_file, dest_file)
+ print "\tOID: %s - %s files " % (oid_repr(oid), len(files))
+
+
+def main(source=None, dest=None, layout="bushy"):
+ usage = "usage: %prog [options] <source> <dest> <layout>"
+ description = ("Create the new directory <dest> and migrate all blob "
+ "data <source> to <dest> while using the new <layout> for "
+ "<dest>")
+
+ parser = optparse.OptionParser(usage=usage, description=description)
+ parser.add_option("-l", "--layout",
+ default=layout, type='choice',
+ choices=['bushy', 'lawn'],
+ help="Define the layout to use for the new directory "
+ "(bushy or lawn). Default: %default")
+ options, args = parser.parse_args()
+
+ if not len(args) == 2:
+ parser.error("source and destination must be given")
+
+ logging.getLogger().addHandler(logging.StreamHandler())
+ logging.getLogger().setLevel(0)
+
+ source, dest = args
+ migrate(source, dest, options.layout)
+
+
+if __name__ == '__main__':
+ main()
Property changes on: ZODB/trunk/src/ZODB/scripts/migrateblobs.py
___________________________________________________________________
Name: svn:eol-style
+ native
Modified: ZODB/trunk/src/ZODB/tests/blob_layout.txt
===================================================================
--- ZODB/trunk/src/ZODB/tests/blob_layout.txt 2008-06-21 09:27:56 UTC (rev 87621)
+++ ZODB/trunk/src/ZODB/tests/blob_layout.txt 2008-06-21 12:17:51 UTC (rev 87622)
@@ -31,12 +31,14 @@
>>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
'\x00\x00\x00\x00\x00\x00\x00\x01'
+>>> bushy.path_to_oid('0xff/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
+'\x00\x00\x00\x00\x00\x00\x00\xff'
Paths that do not represent an OID will cause a ValueError:
>>> bushy.path_to_oid('tmp')
Traceback (most recent call last):
-ValueError: Not a valid OID path: tmp
+ValueError: Not a valid OID path: `tmp`
The `lawn` layout
@@ -61,7 +63,10 @@
>>> lawn.path_to_oid('tmp')
Traceback (most recent call last):
-ValueError: Not a valid OID path: tmp
+ValueError: Not a valid OID path: `tmp`
+>>> lawn.path_to_oid('')
+Traceback (most recent call last):
+ValueError: Not a valid OID path: ``
Auto-detecting the layout of a directory
@@ -162,3 +167,117 @@
>>> shutil.rmtree(d)
+
+
+Migrating between directory layouts
+===================================
+
+A script called `migrateblobs.py` is distributed with the ZODB for offline
+migration capabilities between different directory layouts. It can migrate any
+blob directory layout to any other layout. It leaves the original blob
+directory untouched (except from eventually creating a temporary directory and
+the storage layout marker).
+
+The migration is accessible as a library function:
+
+>>> from ZODB.scripts.migrateblobs import migrate
+
+Create a `lawn` directory structure and migrate it to the new `bushy` one:
+
+>>> from ZODB.blob import FilesystemHelper
+>>> d = tempfile.mkdtemp()
+>>> old = os.path.join(d, 'old')
+>>> old_fsh = FilesystemHelper(old, 'lawn')
+>>> old_fsh.create()
+>>> blob1 = old_fsh.getPathForOID(7039, create=True)
+>>> blob2 = old_fsh.getPathForOID(10, create=True)
+>>> blob3 = old_fsh.getPathForOID(7034, create=True)
+>>> open(os.path.join(blob1, 'foo'), 'wb').write('foo')
+>>> open(os.path.join(blob1, 'foo2'), 'wb').write('bar')
+>>> open(os.path.join(blob2, 'foo3'), 'wb').write('baz')
+>>> open(os.path.join(blob2, 'foo4'), 'wb').write('qux')
+>>> open(os.path.join(blob3, 'foo5'), 'wb').write('quux')
+>>> open(os.path.join(blob3, 'foo6'), 'wb').write('corge')
+
+Committed blobs have their permissions set to 000
+
+The migration function is called with the old and the new path and the layout
+that shall be used for the new directory:
+
+>>> bushy = os.path.join(d, 'bushy')
+>>> migrate(old, bushy, 'bushy') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+Migrating blob data from `/.../old` (lawn) to `/.../bushy` (bushy)
+ OID: 0x1b7f - 2 files
+ OID: 0x0a - 2 files
+ OID: 0x1b7a - 2 files
+
+The new directory now contains the same files in different directories, but
+with the same sizes and permissions:
+
+>>> import string
+>>> def stat(path):
+... s = os.stat(path)
+... print "%s\t%s\t%s" % (string.rjust(oct(s.st_mode), 10), s.st_size, path)
+>>> def ls(path):
+... for p, dirs, files in os.walk(path):
+... stat(p)
+... for file in files:
+... stat(os.path.join(p, file))
+>>> ls(bushy)
+ 040700 4096 /.../bushy
+ 0100644 5 /.../bushy/.layout
+ 040700 4096 /.../bushy/0x7a
+ 040700 4096 /.../bushy/0x7a/0x1b
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
+ 0100644 5 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo6
+ 0100644 4 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo5
+ 040700 4096 /.../bushy/tmp
+ 040700 4096 /.../bushy/0x0a
+ 040700 4096 /.../bushy/0x0a/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00
+ 0100644 3 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo4
+ 0100644 3 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo3
+ 040700 4096 /.../bushy/0x7f
+ 040700 4096 /.../bushy/0x7f/0x1b
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00
+ 040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
+ 0100644 3 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo
+ 0100644 3 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo2
+
+We can also migrate the bushy layout back to the lawn layout:
+
+>>> lawn = os.path.join(d, 'lawn')
+>>> migrate(bushy, lawn, 'lawn')
+Migrating blob data from `/.../bushy` (bushy) to `/.../lawn` (lawn)
+ OID: 0x1b7a - 2 files
+ OID: 0x0a - 2 files
+ OID: 0x1b7f - 2 files
+>>> ls(lawn)
+ 040700 4096 /.../lawn
+ 0100644 4 /.../lawn/.layout
+ 040700 4096 /.../lawn/0x1b7f
+ 0100644 3 /.../lawn/0x1b7f/foo
+ 0100644 3 /.../lawn/0x1b7f/foo2
+ 040700 4096 /.../lawn/tmp
+ 040700 4096 /.../lawn/0x0a
+ 0100644 3 /.../lawn/0x0a/foo4
+ 0100644 3 /.../lawn/0x0a/foo3
+ 040700 4096 /.../lawn/0x1b7a
+ 0100644 5 /.../lawn/0x1b7a/foo6
+ 0100644 4 /.../lawn/0x1b7a/foo5
+
+>>> shutil.rmtree(d)
Modified: ZODB/trunk/src/ZODB/tests/testblob.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testblob.py 2008-06-21 09:27:56 UTC (rev 87621)
+++ ZODB/trunk/src/ZODB/tests/testblob.py 2008-06-21 12:17:51 UTC (rev 87622)
@@ -502,10 +502,16 @@
suite.addTest(doctest.DocFileSuite(
"blob_basic.txt", "blob_connection.txt", "blob_transaction.txt",
"blob_packing.txt", "blob_importexport.txt", "blob_consume.txt",
- "blob_tempdir.txt", "blob_layout.txt",
+ "blob_tempdir.txt",
setUp=ZODB.tests.util.setUp,
tearDown=ZODB.tests.util.tearDown,
))
+ suite.addTest(doctest.DocFileSuite(
+ "blob_layout.txt",
+ optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE|doctest.REPORT_NDIFF,
+ setUp=ZODB.tests.util.setUp,
+ tearDown=ZODB.tests.util.tearDown,
+ ))
suite.addTest(doctest.DocTestSuite(
setUp=ZODB.tests.util.setUp,
tearDown=ZODB.tests.util.tearDown,
More information about the Zodb-checkins
mailing list