[Zodb-checkins] SVN: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/ Switch to 'big endian' byte order for creating the bushy directory structure.

Christian Theune ct at gocept.com
Wed Aug 6 04:34:01 EDT 2008


Log message for revision 89426:
  Switch to 'big endian' byte order for creating the bushy directory structure.
  In practice it shows that the 'little endian' order creates too many
  unnecessary top-level directories without actually using up the space created
  in the deeper levels.
  

Changed:
  U   ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/blob.py
  U   ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_layout.txt
  U   ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/testblob.py

-=-
Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/blob.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/blob.py	2008-08-06 08:25:24 UTC (rev 89425)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/blob.py	2008-08-06 08:34:00 UTC (rev 89426)
@@ -442,6 +442,10 @@
         files.
         """
         for path, dirs, files in os.walk(self.base_dir):
+            # Make sure we traverse in a stable order. This is mainly to make
+            # testing predictable.
+            dirs.sort()
+            files.sort()
             try:
                 oid = self.getOIDForPath(path)
             except ValueError:
@@ -477,7 +481,7 @@
     """A bushy directory layout for blob directories.
 
     Creates an 8-level directory structure (one level per byte) in
-    little-endian order from the OID of an object.
+    big-endian order from the OID of an object.
 
     """
 
@@ -488,7 +492,7 @@
         directories = []
         # Create the bushy directory structure with the least significant byte
         # first
-        for byte in reversed(str(oid)):
+        for byte in str(oid):
             directories.append('0x%s' % binascii.hexlify(byte))
         return '/'.join(directories)
 
@@ -496,9 +500,6 @@
         if self.blob_path_pattern.match(path) is None:
             raise ValueError("Not a valid OID path: `%s`" % path)
         path = path.split('/')
-        # The path contains the OID in little endian form but the OID itself
-        # is big endian.
-        path.reverse()
         # Each path segment stores a byte in hex representation. Turn it into
         # an int and then get the character for our byte string.
         oid = ''.join(binascii.unhexlify(byte[2:]) for byte in path)

Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_layout.txt
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_layout.txt	2008-08-06 08:25:24 UTC (rev 89425)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/blob_layout.txt	2008-08-06 08:34:00 UTC (rev 89426)
@@ -27,12 +27,12 @@
 >>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
 '0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
 >>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
-'0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
+'0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x01'
 
 >>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
-'\x00\x00\x00\x00\x00\x00\x00\x01'
+'\x01\x00\x00\x00\x00\x00\x00\x00'
 >>> bushy.path_to_oid('0xff/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
-'\x00\x00\x00\x00\x00\x00\x00\xff'
+'\xff\x00\x00\x00\x00\x00\x00\x00'
 
 Paths that do not represent an OID will cause a ValueError:
 
@@ -207,9 +207,9 @@
 >>> bushy = os.path.join(d, 'bushy')
 >>> migrate(old, bushy, 'bushy')  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
 Migrating blob data from `/.../old` (lawn) to `/.../bushy` (bushy)
-    OID: 0x1b7a - 2 files 
-    OID: 0x0a - 2 files 
-    OID: 0x1b7f - 2 files 
+    OID: 0x0a - 2 files
+    OID: 0x1b7a - 2 files
+    OID: 0x1b7f - 2 files
 
 The new directory now contains the same files in different directories, but
 with the same sizes and permissions:
@@ -224,48 +224,35 @@
 ...         for file in files:
 ...             stat(os.path.join(p, file))
 >>> ls(bushy)
-     040700  4096  /.../bushy
-    0100644  5     /.../bushy/.layout
-     040700  4096  /.../bushy/0x7f
-     040700  4096  /.../bushy/0x7f/0x1b
-     040700  4096  /.../bushy/0x7f/0x1b/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
-    0100644  3     /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo2
-    0100644  3     /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo
-     040700  4096  /.../bushy/0x0a
-     040700  4096  /.../bushy/0x0a/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00
-    0100644  3     /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo4
-    0100644  3     /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo3
-     040700  4096  /.../bushy/0x7a
-     040700  4096  /.../bushy/0x7a/0x1b
-     040700  4096  /.../bushy/0x7a/0x1b/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00
-     040700  4096  /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
-    0100644  4     /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo5
-    0100644  5     /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo6
-     040700  4096  /.../bushy/tmp
+    040700  4096  /.../bushy
+   0100644  5     /.../bushy/.layout
+    040700  4096  /.../bushy/tmp
+    040700  4096  /.../bushy/0x00
+    040700  4096  /.../bushy/0x00/0x00
+    040700  4096  /.../bushy/0x00/0x00/0x00
+    040700  4096  /.../bushy/0x00/0x00/0x00/0x00
+    040700  4096  /.../bushy/0x00/0x00/0x00/0x00/0x00
+    040700  4096  /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00
+    040700  4096  /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x1b
+    040700  4096  /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x1b/0x7f
+   0100644  3     /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x1b/0x7f/foo2
+   0100644  3     /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x1b/0x7f/foo
+    040700  4096  /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x1b/0x7a
+   0100644  4     /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x1b/0x7a/foo5
+   0100644  5     /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x1b/0x7a/foo6
+    040700  4096  /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x00
+    040700  4096  /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x0a
+   0100644  3     /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x0a/foo4
+   0100644  3     /.../bushy/0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x0a/foo3
 
 We can also migrate the bushy layout back to the lawn layout:
 
 >>> lawn = os.path.join(d, 'lawn')
 >>> migrate(bushy, lawn, 'lawn')
 Migrating blob data from `/.../bushy` (bushy) to `/.../lawn` (lawn)
-    OID: 0x1b7f - 2 files 
-    OID: 0x0a - 2 files 
-    OID: 0x1b7a - 2 files 
+   OID: 0x0a - 2 files
+   OID: 0x1b7a - 2 files
+   OID: 0x1b7f - 2 files
 >>> ls(lawn)
      040700  4096  /.../lawn
     0100644  4     /.../lawn/.layout

Modified: ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/testblob.py
===================================================================
--- ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/testblob.py	2008-08-06 08:25:24 UTC (rev 89425)
+++ ZODB/branches/ctheune-bushy-directory-3.8/src/ZODB/tests/testblob.py	2008-08-06 08:34:00 UTC (rev 89426)
@@ -482,7 +482,7 @@
     We can access the blob correctly:
 
     >>> tmpstore.loadBlob(blob_oid, tid) # doctest: +ELLIPSIS
-    '.../0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x...blob'
+    '.../0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x01/0x...blob'
 
     Clean up:
 



More information about the Zodb-checkins mailing list