[Zope-Checkins] CVS: Zope/utilities/ZODBTools - netspace.py:1.1.4.1 space.py:1.3.4.1 zeoreplay.py:1.2.2.1

Jeremy Hylton jeremy@zope.com
Fri, 10 May 2002 16:26:49 -0400


Update of /cvs-repository/Zope/utilities/ZODBTools
In directory cvs.zope.org:/tmp/cvs-serv14309/utilities/ZODBTools

Added Files:
      Tag: TestIndexDS9-branch
	netspace.py space.py zeoreplay.py 
Log Message:
Commit recent changes from the Zope trunk.

Of particular interest: setup.py! (works with Python 2.3)



=== Added File Zope/utilities/ZODBTools/netspace.py ===
"""Report on the net size of objects counting subobjects.

usage: netspace.py [-P | -v] data.fs

-P: do a pack first
-v: print info for all objects, even if a traversal path isn't found
"""


from __future__ import nested_scopes

import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata
from ZODB.referencesf import referencesf

def find_paths(root, maxdist):
    """Find Python attribute traversal paths for objects to maxdist distance.

    Starting at a root object, traverse attributes up to distance levels
    from the root, looking for persistent objects.  Return a dict
    mapping oids to traversal paths.

    XXX Assumes that the keys of the root are not themselves
    persistent objects.

    XXX Doesn't traverse containers.
    """
    paths = {}

    # Handle the root as a special case because it's a dict
    objs = []
    for k, v in root.items():
        oid = getattr(v, '_p_oid', None)
        objs.append((k, v, oid, 0))

    for path, obj, oid, dist in objs:
        if oid is not None:
            paths[oid] = path
        if dist < maxdist:
            getattr(obj, 'foo', None) # unghostify
            try:
                items = obj.__dict__.items()
            except AttributeError:
                continue
            for k, v in items:
                oid = getattr(v, '_p_oid', None)
                objs.append(("%s.%s" % (path, k), v, oid, dist + 1))

    return paths

def main(path):
    fs = FileStorage(path, read_only=1)
    if PACK:
        fs.pack()

    db = ZODB.DB(fs)
    rt = db.open().root()
    paths = find_paths(rt, 3)

    def total_size(oid):
        cache = {}
        cache_size = 1000
        def _total_size(oid, seen):
            v = cache.get(oid)
            if v is not None:
                return v
            data, serialno = fs.load(oid, '')
            size = len(data)
            for suboid in referencesf(data):
                if seen.has_key(suboid):
                    continue
                seen[suboid] = 1
                size += _total_size(suboid, seen)
            cache[oid] = size
            if len(cache) == cache_size:
                cache.popitem()
            return size
        return _total_size(oid, {})

    keys = fs._index.keys()
    keys.sort()
    keys.reverse()

    if not VERBOSE:
        # If not running verbosely, don't print an entry for an object
        # unless it has an entry in paths.
        keys = filter(paths.has_key, keys)

    fmt = "%8s %5d %8d %s %s.%s"
    
    for oid in keys:
        data, serialno = fs.load(oid, '')
        mod, klass = get_pickle_metadata(data)
        refs = referencesf(data)
        path = paths.get(oid, '-')
        print fmt % (U64(oid), len(data), total_size(oid), path, mod, klass)

if __name__ == "__main__":
    import sys
    import getopt

    PACK = 0
    VERBOSE = 0
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'Pv')
        path, = args
    except getopt.error, err:
        print err
        print __doc__
        sys.exit(2)
    except ValueError:
        print "expected one argument, got", len(args)
        print __doc__
        sys.exit(2)
    for o, v in opts:
        if o == '-P':
            PACK = 1
        if o == '-v':
            VERBOSE += 1
    main(path)


=== Added File Zope/utilities/ZODBTools/space.py ===
#! /usr/bin/env python

"""Report on the space used by objects in a storage.

usage: space.py data.fs

The current implementation only supports FileStorage.

Current limitations / simplifications: Ignores revisions and versions.
"""

import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata

def run(path, v=0):
    fs = FileStorage(path, read_only=1)
    # break into the file implementation
    if hasattr(fs._index, 'iterkeys'):
        iter = fs._index.iterkeys()
    else:
        iter = fs._index.keys()
    totals = {}
    for oid in iter:
        data, serialno = fs.load(oid, '')
        mod, klass = get_pickle_metadata(data)
        key = "%s.%s" % (mod, klass)
        bytes, count = totals.get(key, (0, 0))
        bytes += len(data)
        count += 1
        totals[key] = bytes, count
        if v:
            print "%8s %5d %s" % (U64(oid), len(data), key)
    L = totals.items()
    L.sort(lambda a, b: cmp(a[1], b[1]))
    L.reverse()
    print "Totals per object class:"
    for key, (bytes, count) in L:
        print "%8d %8d %s" % (count, bytes, key)

def main():
    import sys
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], "v")
    except getopt.error, msg:
        print msg
        print "usage: space.py [-v] Data.fs"
        sys.exit(2)
    if len(args) != 1:
        print "usage: space.py [-v] Data.fs"
        sys.exit(2)
    v = 0
    for o, a in opts:
        if o == "-v":
            v += 1
    path = args[0]
    run(path, v)

if __name__ == "__main__":
    main()


=== Added File Zope/utilities/ZODBTools/zeoreplay.py ===
"""Parse the BLATHER logging generated by ZEO, and optionally replay it.

Usage: zeointervals.py [options]

Options:

    --help / -h
        Print this message and exit.

    --replay=storage
    -r storage
        Replay the parsed transactions through the new storage

    --maxtxn=count
    -m count
        Parse no more than count transactions.

    --report / -p
        Print a report as we're parsing.

Unlike parsezeolog.py, this script generates timestamps for each transaction,
and sub-command in the transaction.  We can use this to compare timings with
synthesized data.
"""

import re
import sys
import time
import getopt
import operator
# ZEO logs measure wall-clock time so for consistency we need to do the same
#from time import clock as now
from time import time as now

from ZODB.FileStorage import FileStorage
#from bsddb3Storage.Full import Full
#from Standby.primary import PrimaryStorage
#from Standby.config import RS_PORT
from ZODB.Transaction import Transaction
from ZODB.utils import p64

datecre = re.compile('(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)')
methcre = re.compile("ZEO Server (\w+)\((.*)\) \('(.*)', (\d+)")

class StopParsing(Exception):
    pass



def usage(code, msg=''):
    print __doc__
    if msg:
        print msg
    sys.exit(code)



def parse_time(line):
    """Return the time portion of a zLOG line in seconds or None."""
    mo = datecre.match(line)
    if mo is None:
        return None
    date, time_ = mo.group(1, 2)
    date_l = [int(elt) for elt in date.split('-')]
    time_l = [int(elt) for elt in time_.split(':')]
    return int(time.mktime(date_l + time_l + [0, 0, 0]))


def parse_line(line):
    """Parse a log entry and return time, method info, and client."""
    t = parse_time(line)
    if t is None:
        return None, None, None
    mo = methcre.search(line)
    if mo is None:
        return None, None, None
    meth_name = mo.group(1)
    meth_args = mo.group(2)
    meth_args = [s.strip() for s in meth_args.split(',')]
    m = meth_name, tuple(meth_args)
    c = mo.group(3), mo.group(4)
    return t, m, c



class StoreStat:
    def __init__(self, when, oid, size):
        self.when = when
        self.oid = oid
        self.size = size

    # Crufty
    def __getitem__(self, i):
        if i == 0: return self.oid
        if i == 1: return self.size
        raise IndexError


class TxnStat:
    def __init__(self):
        self._begintime = None
        self._finishtime = None
        self._aborttime = None
        self._url = None
        self._objects = []

    def tpc_begin(self, when, args, client):
        self._begintime = when
        # args are txnid, user, description (looks like it's always a url)
        self._url = args[2]

    def storea(self, when, args, client):
        oid = int(args[0])
        # args[1] is "[numbytes]"
        size = int(args[1][1:-1])
        s = StoreStat(when, oid, size)
        self._objects.append(s)

    def tpc_abort(self, when):
        self._aborttime = when

    def tpc_finish(self, when):
        self._finishtime = when



# Mapping oid -> revid
_revids = {}

class ReplayTxn(TxnStat):
    def __init__(self, storage):
        self._storage = storage
        self._replaydelta = 0
        TxnStat.__init__(self)

    def replay(self):
        ZERO = '\0'*8
        t0 = now()
        t = Transaction()
        self._storage.tpc_begin(t)
        for obj in self._objects:
            oid = obj.oid
            revid = _revids.get(oid, ZERO)
            # BAW: simulate a pickle of the given size
            data = 'x' * obj.size
            # BAW: ignore versions for now
            newrevid  = self._storage.store(p64(oid), revid, data, '', t)
            _revids[oid] = newrevid
        if self._aborttime:
            self._storage.tpc_abort(t)
            origdelta = self._aborttime - self._begintime
        else:
            self._storage.tpc_vote(t)
            self._storage.tpc_finish(t)
            origdelta = self._finishtime - self._begintime
        t1 = now()
        # Shows how many seconds behind (positive) or ahead (negative) of the
        # original reply our local update took
        self._replaydelta = t1 - t0 - origdelta



class ZEOParser:
    def __init__(self, maxtxns=-1, report=1, storage=None):
        self.__txns = []
        self.__curtxn = {}
        self.__skipped = 0
        self.__maxtxns = maxtxns
        self.__finishedtxns = 0
        self.__report = report
        self.__storage = storage

    def parse(self, line):
        t, m, c = parse_line(line)
        if t is None:
            # Skip this line
            return
        name = m[0]
        meth = getattr(self, name, None)
        if meth is not None:
            meth(t, m[1], c)

    def tpc_begin(self, when, args, client):
        txn = ReplayTxn(self.__storage)
        self.__curtxn[client] = txn
        meth = getattr(txn, 'tpc_begin', None)
        if meth is not None:
            meth(when, args, client)
        
    def storea(self, when, args, client):
        txn = self.__curtxn.get(client)
        if txn is None:
            self.__skipped += 1
            return
        meth = getattr(txn, 'storea', None)
        if meth is not None:
            meth(when, args, client)

    def tpc_finish(self, when, args, client):
        txn = self.__curtxn.get(client)
        if txn is None:
            self.__skipped += 1
            return
        meth = getattr(txn, 'tpc_finish', None)
        if meth is not None:
            meth(when)
        if self.__report:
            self.report(txn)
        self.__txns.append(txn)
        self.__curtxn[client] = None
        self.__finishedtxns += 1
        if self.__maxtxns > 0 and self.__finishedtxns >= self.__maxtxns:
            raise StopParsing

    def report(self, txn):
        """Print a report about the transaction"""
        if txn._objects:
            bytes = reduce(operator.add, [size for oid, size in txn._objects])
        else:
            bytes = 0
        print '%s %s %4d %10d %s %s' % (
            txn._begintime, txn._finishtime - txn._begintime,
            len(txn._objects),
            bytes, 
            time.ctime(txn._begintime),
            txn._url)

    def replay(self):
        for txn in self.__txns:
            txn.replay()
        # How many fell behind?
        slower = []
        faster = []
        for txn in self.__txns:
            if txn._replaydelta > 0:
                slower.append(txn)
            else:
                faster.append(txn)
        print len(slower), 'laggards,', len(faster), 'on-time or faster'
        # Find some averages
        if slower:
            sum = reduce(operator.add,
                         [txn._replaydelta for txn in slower], 0)
            print 'average slower txn was:', float(sum) / len(slower)
        if faster:
            sum = reduce(operator.add,
                         [txn._replaydelta for txn in faster], 0)
            print 'average faster txn was:', float(sum) / len(faster)



def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
            'hr:pm:',
            ['help', 'replay=', 'report', 'maxtxns='])
    except getopt.error, e:
        usage(1, e)

    if args:
        usage(1)

    replay = 0
    maxtxns = -1
    report = 0
    storagefile = None
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-r', '--replay'):
            replay = 1
            storagefile = arg
        elif opt in ('-p', '--report'):
            report = 1
        elif opt in ('-m', '--maxtxns'):
            try:
                maxtxns = int(arg)
            except ValueError:
                usage(1, 'Bad -m argument: %s' % arg)

    if replay:
        storage = FileStorage(storagefile)
	#storage = Full(storagefile)
        #storage = PrimaryStorage('yyz', storage, RS_PORT)
    t0 = now()
    p = ZEOParser(maxtxns, report, storage)
    i = 0
    while 1:
        line = sys.stdin.readline()
        if not line:
            break
        i += 1
        try:
            p.parse(line)
        except StopParsing:
            break
        except:
            print 'input file line:', i
            raise
    t1 = now()
    print 'total parse time:', t1-t0
    t2 = now()
    if replay:
        p.replay()
    t3 = now()
    print 'total replay time:', t3-t2
    print 'total time:', t3-t0



if __name__ == '__main__':
    main()