[Zodb-checkins] SVN: ZODB/branches/3.3/src/ZODB/ Move get_pickle_metadata() into utils.py.

Tim Peters tim.one at comcast.net
Wed Nov 10 21:12:38 EST 2004


Log message for revision 28428:
  Move get_pickle_metadata() into utils.py.
  
  Try to make more sense of the ZODB pickle format "docs".
  

Changed:
  U   ZODB/branches/3.3/src/ZODB/FileStorage/fsdump.py
  U   ZODB/branches/3.3/src/ZODB/serialize.py
  U   ZODB/branches/3.3/src/ZODB/utils.py

-=-
Modified: ZODB/branches/3.3/src/ZODB/FileStorage/fsdump.py
===================================================================
--- ZODB/branches/3.3/src/ZODB/FileStorage/fsdump.py	2004-11-11 00:12:06 UTC (rev 28427)
+++ ZODB/branches/3.3/src/ZODB/FileStorage/fsdump.py	2004-11-11 02:12:38 UTC (rev 28428)
@@ -1,5 +1,16 @@
-from cPickle import Unpickler
-from cStringIO import StringIO
+##############################################################################
+#
+# Copyright (c) 2003 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
 import md5
 import struct
 
@@ -7,48 +18,9 @@
 from ZODB.FileStorage.format \
      import TRANS_HDR, TRANS_HDR_LEN, DATA_HDR, DATA_HDR_LEN
 from ZODB.TimeStamp import TimeStamp
-from ZODB.utils import u64
+from ZODB.utils import u64, get_pickle_metadata
 from ZODB.tests.StorageTestBase import zodb_unpickle
 
-def get_pickle_metadata(data):
-    # ZODB's data records contain two pickles.  The first is the class
-    # of the object, the second is the object.  We're only trying to
-    # pick apart the first here, to extract the module and class names.
-    if data.startswith('(c'):   # pickle MARK GLOBAL opcode sequence
-        global_prefix = 2
-    elif data.startswith('c'):  # pickle GLOBAL opcode
-        global_prefix = 1
-    else:
-        global_prefix = 0
-
-    if global_prefix:
-        # Don't actually unpickle a class, because it will attempt to
-        # load the class.  Just break open the pickle and get the
-        # module and class from it.  The module and the class names are
-        # given by newline-terminated strings following the GLOBAL opcode.
-        modname, classname, rest = data.split('\n', 2)
-        modname = modname[global_prefix:]   # strip GLOBAL opcode
-        return modname, classname
-
-    # Else there are a bunch of other possible formats.
-    f = StringIO(data)
-    u = Unpickler(f)
-    try:
-        class_info = u.load()
-    except Exception, err:
-        print "Error", err
-        return '', ''
-    if isinstance(class_info, tuple):
-        if isinstance(class_info[0], tuple):
-            modname, classname = class_info[0]
-        else:
-            modname, classname = class_info
-    else:
-        # XXX not sure what to do here
-        modname = repr(class_info)
-        classname = ''
-    return modname, classname
-
 def fsdump(path, file=None, with_offset=1):
     i = 0
     iter = FileIterator(path)

Modified: ZODB/branches/3.3/src/ZODB/serialize.py
===================================================================
--- ZODB/branches/3.3/src/ZODB/serialize.py	2004-11-11 00:12:06 UTC (rev 28427)
+++ ZODB/branches/3.3/src/ZODB/serialize.py	2004-11-11 02:12:38 UTC (rev 28428)
@@ -34,36 +34,44 @@
 provide backwards compatibility with earlier versions of Zope.  The
 two current formats for class description are:
 
-    - type(obj)
-    - type(obj), obj.__getnewargs__()
+    1. type(obj)
+    2. type(obj), obj.__getnewargs__()
 
-The second of these options is used if the object has a
-__getnewargs__() method.  It is intended to support objects like
-persistent classes that have custom C layouts that are determined by
-arguments to __new__().
+The second of these options is used if the object has a __getnewargs__()
+method.  It is intended to support objects like persistent classes that have
+custom C layouts that are determined by arguments to __new__().
 
-The type object is usually stored using the standard pickle mechanism,
-which uses a string containing the class's module and name.  The type
-may itself be a persistent object, in which case a persistent
-reference (see below) is used.
+The type object is usually stored using the standard pickle mechanism, which
+involves the pickle GLOBAL opcode (giving the type's module and name as
+strings).  The type may itself be a persistent object, in which case a
+persistent reference (see below) is used.
 
+It's unclear what "usually" means in the last paragraph.  There are two
+useful places to concentrate confusion about exactly which formats exist:
+
+- BaseObjectReader.getClassName() below returns a dotted "module.class"
+  string, via actually loading a pickle.  This requires that the
+  implementation of application objects be available.
+
+- ZODB/utils.py's get_pickle_metadata() tries to return the module and
+  class names (as strings) without importing any application modules or
+  classes, via analyzing the pickle.
+
 Earlier versions of Zope supported several other kinds of class
-descriptions.  The current serialization code reads these
-descriptions, but does not write them.
+descriptions.  The current serialization code reads these descriptions, but
+does not write them.  The four earlier formats are:
 
-The four formats are:
+    3. (module name, class name), None
+    4. (module name, class name), __getinitargs__()
+    5. class, None
+    6. class, __getinitargs__()
 
-    1. (module name, class name), None
-    2. (module name, class name), __getinitargs__()
-    3. class, None
-    4. class, __getinitargs__()
+Formats 4 and 6 are used only if the class defines a __getinitargs__()
+method.  Formats 5 and 6 are used if the class does not have a __module__
+attribute (I'm not sure when this applies, but I think it occurs for some
+but not all ZClasses).
 
-Formats 2 and 4 are used only if the class defines an
-__getinitargs__() method.  Formats 3 and 4 are used if the class does
-not have an __module__ attribute.  (I'm not sure when this applies,
-but I think it occurs for some but not all ZClasses.)
 
-
 Persistent references
 ---------------------
 
@@ -79,7 +87,6 @@
 changed the class of an object, a new record with new class metadata
 would be written but all the old references would still include the
 old class.
-
 """
 
 import cPickle

Modified: ZODB/branches/3.3/src/ZODB/utils.py
===================================================================
--- ZODB/branches/3.3/src/ZODB/utils.py	2004-11-11 00:12:06 UTC (rev 28427)
+++ ZODB/branches/3.3/src/ZODB/utils.py	2004-11-11 02:12:38 UTC (rev 28428)
@@ -16,6 +16,8 @@
 import time
 from struct import pack, unpack
 from binascii import hexlify
+import cPickle as pickle
+from cStringIO import StringIO
 
 from persistent.TimeStamp import TimeStamp
 
@@ -109,3 +111,52 @@
             result += 1L << 64
             assert result >= 0 # else addresses are fatter than 64 bits
     return result
+
+# Given a ZODB pickle, return pair of strings (module_name, class_name).
+# Do this without importing the module or class object.
+# See ZODB/serialize.py's module docstring for the only docs that exist about
+# ZODB pickle format.  If the code here gets smarter, please update those
+# docs to be at least as smart.  The code here doesn't appear to make sense
+# for what serialize.py calls formats 5 and 6.
+
+def get_pickle_metadata(data):
+    # ZODB's data records contain two pickles.  The first is the class
+    # of the object, the second is the object.  We're only trying to
+    # pick apart the first here, to extract the module and class names.
+    if data.startswith('(c'):   # pickle MARK GLOBAL opcode sequence
+        global_prefix = 2
+    elif data.startswith('c'):  # pickle GLOBAL opcode
+        global_prefix = 1
+    else:
+        global_prefix = 0
+
+    if global_prefix:
+        # Formats 1 and 2.
+        # Don't actually unpickle a class, because it will attempt to
+        # load the class.  Just break open the pickle and get the
+        # module and class from it.  The module and class names are given by
+        # newline-terminated strings following the GLOBAL opcode.
+        modname, classname, rest = data.split('\n', 2)
+        modname = modname[global_prefix:]   # strip GLOBAL opcode
+        return modname, classname
+
+    # Else there are a bunch of other possible formats.
+    f = StringIO(data)
+    u = pickle.Unpickler(f)
+    try:
+        class_info = u.load()
+    except Exception, err:
+        print "Error", err
+        return '', ''
+    if isinstance(class_info, tuple):
+        if isinstance(class_info[0], tuple):
+            # Formats 3 and 4.
+            modname, classname = class_info[0]
+        else:
+            # Formats 5 and 6 (probably) end up here.
+            modname, classname = class_info
+    else:
+        # This isn't a known format.
+        modname = repr(class_info)
+        classname = ''
+    return modname, classname



More information about the Zodb-checkins mailing list