[Zope3-checkins] CVS: ZODB4/src/zodb/storage/file - recover.py:1.2 main.py:1.7 __init__.py:1.4

Jeremy Hylton jeremy@zope.com
Thu, 19 Jun 2003 17:41:40 -0400


Update of /cvs-repository/ZODB4/src/zodb/storage/file
In directory cvs.zope.org:/tmp/cvs-serv15960/src/zodb/storage/file

Modified Files:
	main.py __init__.py 
Added Files:
	recover.py 
Log Message:
Merge ZODB3-2-merge branch to the head.

This completes the porting of bug fixes and random improvements from
ZODB 3.2 to ZODB 4.


=== ZODB4/src/zodb/storage/file/recover.py 1.1 => 1.2 ===
--- /dev/null	Thu Jun 19 17:41:40 2003
+++ ZODB4/src/zodb/storage/file/recover.py	Thu Jun 19 17:41:09 2003
@@ -0,0 +1,370 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""Simple script for repairing damaged FileStorage files.
+
+Usage: %s [-f] input output
+
+Recover data from a FileStorage data file, skipping over damaged
+data. Any damaged data will be lost. This could lead to useless output
+of critical data were lost.
+
+Options:
+
+    -f
+       Force output to output file even if it exists
+
+    -v level
+
+       Set the verbosity level:
+
+         0 -- Show progress indicator (default)
+
+         1 -- Show transaction times and sizes
+
+         2 -- Show transaction times and sizes, and
+              show object (record) ids, versions, and sizes.
+
+    -p
+
+       Copy partial transactions. If a data record in the middle of a
+       transaction is bad, the data up to the bad data are packed. The
+       output record is marked as packed. If this option is not used,
+       transaction with any bad data are skipped.
+
+    -P t
+
+       Pack data to t seconds in the past. Note that is the "-p"
+       option is used, then t should be 0.
+
+
+Important note: The ZODB package must be imporatble.  You may need
+                to adjust PYTHONPATH accordingly.
+
+"""
+
+# Algorithm:
+#
+#     position to start of input
+#     while 1:
+#         if end of file: break
+#          try: copy_transaction
+#          except:
+#                 scan for transaction
+#                 continue
+
+import sys, os
+import time
+import struct
+import getopt
+
+from struct import unpack
+from cPickle import loads
+
+if __name__ == '__main__' and len(sys.argv) < 3:
+    print __doc__ % sys.argv[0]
+
+def die(mess=''):
+    if not mess: mess="%s: %s" % sys.exc_info()[:2]
+    print mess+'\n'
+    sys.exit(1)
+
+from zodb.storage.file.main import FileStorage, RecordIterator
+from zodb.storage.file.format import FileStorageFormatter
+from zodb.timestamp import TimeStamp
+from zodb.utils import p64, u64
+
+magic = "FS" + FileStorageFormatter._format_version 
+
+class ErrorFound(Exception): pass
+
+def error(mess, *args):
+    raise ErrorFound(mess % args)
+
+def read_txn_header(f, pos, file_size, outp, ltid):
+    # Read the transaction record
+    f.seek(pos)
+    h = f.read(23)
+    if len(h) < 23:
+        raise EOFError
+
+    tid, tl, status, ul, dl, el = unpack(">QQcHHH",h)
+
+    if pos + (tl + 8) > file_size:
+        error("bad transaction length at %s", pos)
+
+    if tl < (23 + ul + dl + el):
+        error("invalid transaction length, %s, at %s", tl, pos)
+
+    if ltid and tid < ltid:
+        error("time-stamp reducation %s < %s, at %s", u64(tid), u64(ltid), pos)
+
+    if status == "c":
+        truncate(f, pos, file_size, output)
+        raise EOFError
+
+    if status not in " up":
+        error("invalid status, %r, at %s", status, pos)
+
+    tpos = pos
+    tend = tpos + tl
+
+    if status == "u":
+        # Undone transaction, skip it
+        f.seek(tend)
+        rtl = unpack(">Q", f.read(8))[0]
+        if rtl != stl:
+            error("inconsistent transaction length at %s", pos)
+        pos = tend + 8
+        return pos, None, tid
+
+    pos = tpos+(23+ul+dl+el)
+    user = f.read(ul)
+    description = f.read(dl)
+    if el:
+        try: e=loads(f.read(el))
+        except: e={}
+    else: e={}
+
+    result = RecordIterator(tid, status, user, description, e, pos, tend,
+                            f, tpos)
+    pos = tend
+
+    # Read the (intentionally redundant) transaction length
+    f.seek(pos)
+    h = f.read(8)
+    if h != stl:
+        error("redundant transaction length check failed at %s", pos)
+    pos += 8
+
+    return pos, result, tid
+
+def truncate(f, pos, file_size, outp):
+    """Copy data from pos to end of f to a .trNNN file."""
+
+    i = 0
+    while 1:
+        trname = outp + ".tr%d" % i
+        if os.path.exists(trname):
+            i += 1
+    tr = open(trname, "wb")
+    copy(f, tr, file_size - pos)
+    f.seek(pos)
+    tr.close()
+
+def copy(src, dst, n):
+    while n:
+        buf = src.read(8096)
+        if not buf:
+            break
+        if len(buf) > n:
+            buf = buf[:n]
+        dst.write(buf)
+        n -= len(buf)
+
+def scan(f, pos):
+    """Return a potential transaction location following pos in f.
+
+    This routine scans forward from pos looking for the last data
+    record in a transaction.  A period '.' always occurs at the end of
+    a pickle, and an 8-byte transaction length follows the last
+    pickle.  If a period is followed by a plausible 8-byte transaction
+    length, assume that we have found the end of a transaction.
+
+    The caller should try to verify that the returned location is
+    actually a transaction header.
+    """
+    while 1:
+        f.seek(pos)
+        data = f.read(8096)
+        if not data:
+            return 0
+
+        s = 0
+        while 1:
+            l = data.find(".", s)
+            if l < 0:
+                pos += len(data)
+                break
+            # If we are less than 8 bytes from the end of the
+            # string, we need to read more data.
+            s = l + 1
+            if s > len(data) - 8:
+                pos += l
+                break
+            tl = u64(data[s:s+8])
+            if tl < pos:
+                return pos + s + 8
+
+def iprogress(i):
+    if i % 2:
+        print ".",
+    else:
+        print (i/2) % 10,
+    sys.stdout.flush()
+
+def progress(p):
+    for i in range(p):
+        iprogress(i)
+
+def main():
+    try:
+        opts, (inp, outp) = getopt.getopt(sys.argv[1:], "fv:pP:")
+    except getopt.error:
+        die()
+        print __doc__ % argv[0]
+        
+    force = partial = verbose = 0
+    pack = None
+    for opt, v in opts:
+        if opt == "-v":
+            verbose = int(v)
+        elif opt == "-p":
+            partial = 1
+        elif opt == "-f":
+            force = 1
+        elif opt == "-P":
+            pack = time.time() - float(v)
+
+    recover(inp, outp, verbose, partial, force, pack)
+
+def recover(inp, outp, verbose=0, partial=0, force=0, pack=0):
+    print "Recovering", inp, "into", outp
+
+    if os.path.exists(outp) and not force:
+        die("%s exists" % outp)
+
+    f = open(inp, "rb")
+    if f.read(4) != magic:
+        die("input is not a file storage")
+
+    f.seek(0,2)
+    file_size = f.tell()
+
+    ofs = FileStorage(outp, create=1)
+    _ts = None
+    ok = 1
+    prog1 = 0
+    undone = 0
+
+    pos = 4L
+    ltid = None
+    while pos:
+        try:
+            npos, txn, tid = read_txn_header(f, pos, file_size, outp, ltid)
+        except EOFError:
+            break
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except Exception, err:
+            print "error reading txn header:", err
+            if not verbose:
+                progress(prog1)
+            pos = scan(f, pos)
+            if verbose > 1:
+                print "looking for valid txn header at", pos
+            continue
+        ltid = tid
+
+        if txn is None:
+            undone = undone + npos - pos
+            pos = npos
+            continue
+        else:
+            pos = npos
+
+        tid = txn.tid
+
+        if _ts is None:
+            _ts = TimeStamp(tid)
+        else:
+            t = TimeStamp(tid)
+            if t <= _ts:
+                if ok:
+                    print ("Time stamps out of order %s, %s" % (_ts, t))
+                ok = 0
+                _ts = t.laterThan(_ts)
+                tid = `_ts`
+            else:
+                _ts = t
+                if not ok:
+                    print ("Time stamps back in order %s" % (t))
+                    ok = 1
+
+        ofs.tpc_begin(txn, tid, txn.status)
+
+        if verbose:
+            print "begin", pos, _ts,
+            if verbose > 1:
+                print
+            sys.stdout.flush()
+
+        nrec = 0
+        try:
+            for r in txn:
+                if verbose > 1:
+                    if r.data is None:
+                        l = "bp"
+                    else:
+                        l = len(r.data)
+                        
+                    print "%7d %s %s" % (u64(r.oid), l, r.version)
+                s = ofs.restore(r.oid, r.serial, r.data, r.version,
+                                r.data_txn, txn)
+                nrec += 1
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except Exception, err:
+            if partial and nrec:
+                ofs._status = "p"
+                ofs.tpc_vote(txn)
+                ofs.tpc_finish(txn)
+                if verbose:
+                    print "partial"
+            else:
+                ofs.tpc_abort(txn)
+            print "error copying transaction:", err
+            if not verbose:
+                progress(prog1)
+            pos = scan(f, pos)
+            if verbose > 1:
+                print "looking for valid txn header at", pos
+        else:
+            ofs.tpc_vote(txn)
+            ofs.tpc_finish(txn)
+            if verbose:
+                print "finish"
+                sys.stdout.flush()
+
+        if not verbose:
+            prog = pos * 20l / file_size
+            while prog > prog1:
+                prog1 = prog1 + 1
+                iprogress(prog1)
+
+
+    bad = file_size - undone - ofs._pos
+
+    print "\n%s bytes removed during recovery" % bad
+    if undone:
+        print "%s bytes of undone transaction data were skipped" % undone
+
+    if pack is not None:
+        print "Packing ..."
+        ofs.pack(pack)
+
+    ofs.close()
+
+if __name__ == "__main__":
+    main()
+


=== ZODB4/src/zodb/storage/file/main.py 1.6 => 1.7 ===
--- ZODB4/src/zodb/storage/file/main.py:1.6	Fri Jun  6 11:24:21 2003
+++ ZODB4/src/zodb/storage/file/main.py	Thu Jun 19 17:41:09 2003
@@ -387,7 +387,7 @@
             self._file.read(h.nrefs * 8)
             if h.plen:
                 return self._file.read(h.plen)
-            return self._loadBack(oid, h.pnv)[0]
+            return self._loadBack(oid, h.back)[0]
         finally:
             self._lock_release()
 


=== ZODB4/src/zodb/storage/file/__init__.py 1.3 => 1.4 ===
--- ZODB4/src/zodb/storage/file/__init__.py:1.3	Fri Apr 25 15:28:46 2003
+++ ZODB4/src/zodb/storage/file/__init__.py	Thu Jun 19 17:41:09 2003
@@ -11,4 +11,4 @@
 # FOR A PARTICULAR PURPOSE.
 #
 ##############################################################################
-from zodb.storage.file.main import FileStorage
+from zodb.storage.file.main import FileStorage, FileIterator