[ZODB-Dev] ConflictError and POSKeyError with 2.6 CVS :-(

Jeremy Hylton jeremy@zope.com
Wed, 16 Oct 2002 09:58:13 -0400


Chris,

I just checked in a little script that should report on dangling
references in a storage.  It scans the entire database and prints a
little report like this for a corrupted storage:

oid 0x2 __main__.P
last updated: 2002-10-16 12:52:43.445419, tid=0x34833C4B95DFBA2L
refers to unknown object:
        oid 0x3: __main__.P

I hope this will help track down the source of key errors.  If you
find any, you'll know the object that has the dangling reference and
(with luck) the class of the object it once referred to.  The update
time and transaction id will let you search through fsdump.py output
and find the last transaction to modify the object; fsdump.py will
show transaction metadata which presumably has path / user info.

Jeremy

#! /usr/bin/env python

##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################

"""Check FileStorage for dangling references.

usage: fsrefs.py data.fs

This script ignores versions, which might produce incorrect results
for storages that use versions.
"""

from ZODB.FileStorage import FileStorage
from ZODB.TimeStamp import TimeStamp
from ZODB.utils import u64
from ZODB.fsdump import get_pickle_metadata

import cPickle
import cStringIO
import types

def get_refs(pickle):
    refs = []
    f = cStringIO.StringIO(pickle)
    u = cPickle.Unpickler(f)
    u.persistent_load = refs
    u.noload()
    u.noload()
    return refs

def report(oid, data, serial, fs, missing):
    from_mod, from_class = get_pickle_metadata(data)
    if len(missing) > 1:
        plural = "s"
    else:
        plural = ""
    ts = TimeStamp(serial)
    print "oid %s %s.%s" % (hex(u64(oid)), from_mod, from_class)
    print "last updated: %s, tid=%s" % (ts, hex(u64(serial)))
    print "refers to unknown object%s:" % plural
    for oid, info in missing:
        if isinstance(info, types.TupleType):
            description = "%s.%s" % info
        else:
            description = str(info)
        print "\toid %s: %s" % (hex(u64(oid)), description)
    print

def main(path):
    fs = FileStorage(path, read_only=1)
    for oid in fs._index.keys():
        data, serial = fs.load(oid, "")
        refs = get_refs(data)
        missing = []
        for ref, klass in refs:
            if not fs._index.has_key(ref):
                missing.append((ref, klass))
        if missing:
            report(oid, data, serial, fs, missing)

if __name__ == "__main__":
    import sys
    main(sys.argv[1])