[Zodb-checkins] CVS: Zope/utilities/ZODBTools - netspace.py:1.1.4.1 space.py:1.3.4.1 zeoreplay.py:1.2.2.1
   
    Jeremy Hylton
     
    jeremy@zope.com
       
    Fri, 10 May 2002 16:26:49 -0400
    
    
  
Update of /cvs-repository/Zope/utilities/ZODBTools
In directory cvs.zope.org:/tmp/cvs-serv14309/utilities/ZODBTools
Added Files:
      Tag: TestIndexDS9-branch
	netspace.py space.py zeoreplay.py 
Log Message:
Commit recent changes from the Zope trunk.
Of particular interest: setup.py! (works with Python 2.3)
=== Added File Zope/utilities/ZODBTools/netspace.py ===
"""Report on the net size of objects counting subobjects.
usage: netspace.py [-P | -v] data.fs
-P: do a pack first
-v: print info for all objects, even if a traversal path isn't found
"""
from __future__ import nested_scopes
import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata
from ZODB.referencesf import referencesf
def find_paths(root, maxdist):
    """Find Python attribute traversal paths for objects to maxdist distance.
    Starting at a root object, traverse attributes up to distance levels
    from the root, looking for persistent objects.  Return a dict
    mapping oids to traversal paths.
    XXX Assumes that the keys of the root are not themselves
    persistent objects.
    XXX Doesn't traverse containers.
    """
    paths = {}
    # Handle the root as a special case because it's a dict
    objs = []
    for k, v in root.items():
        oid = getattr(v, '_p_oid', None)
        objs.append((k, v, oid, 0))
    for path, obj, oid, dist in objs:
        if oid is not None:
            paths[oid] = path
        if dist < maxdist:
            getattr(obj, 'foo', None) # unghostify
            try:
                items = obj.__dict__.items()
            except AttributeError:
                continue
            for k, v in items:
                oid = getattr(v, '_p_oid', None)
                objs.append(("%s.%s" % (path, k), v, oid, dist + 1))
    return paths
def main(path):
    fs = FileStorage(path, read_only=1)
    if PACK:
        fs.pack()
    db = ZODB.DB(fs)
    rt = db.open().root()
    paths = find_paths(rt, 3)
    def total_size(oid):
        cache = {}
        cache_size = 1000
        def _total_size(oid, seen):
            v = cache.get(oid)
            if v is not None:
                return v
            data, serialno = fs.load(oid, '')
            size = len(data)
            for suboid in referencesf(data):
                if seen.has_key(suboid):
                    continue
                seen[suboid] = 1
                size += _total_size(suboid, seen)
            cache[oid] = size
            if len(cache) == cache_size:
                cache.popitem()
            return size
        return _total_size(oid, {})
    keys = fs._index.keys()
    keys.sort()
    keys.reverse()
    if not VERBOSE:
        # If not running verbosely, don't print an entry for an object
        # unless it has an entry in paths.
        keys = filter(paths.has_key, keys)
    fmt = "%8s %5d %8d %s %s.%s"
    
    for oid in keys:
        data, serialno = fs.load(oid, '')
        mod, klass = get_pickle_metadata(data)
        refs = referencesf(data)
        path = paths.get(oid, '-')
        print fmt % (U64(oid), len(data), total_size(oid), path, mod, klass)
if __name__ == "__main__":
    import sys
    import getopt
    PACK = 0
    VERBOSE = 0
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'Pv')
        path, = args
    except getopt.error, err:
        print err
        print __doc__
        sys.exit(2)
    except ValueError:
        print "expected one argument, got", len(args)
        print __doc__
        sys.exit(2)
    for o, v in opts:
        if o == '-P':
            PACK = 1
        if o == '-v':
            VERBOSE += 1
    main(path)
=== Added File Zope/utilities/ZODBTools/space.py ===
#! /usr/bin/env python
"""Report on the space used by objects in a storage.
usage: space.py data.fs
The current implementation only supports FileStorage.
Current limitations / simplifications: Ignores revisions and versions.
"""
import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata
def run(path, v=0):
    fs = FileStorage(path, read_only=1)
    # break into the file implementation
    if hasattr(fs._index, 'iterkeys'):
        iter = fs._index.iterkeys()
    else:
        iter = fs._index.keys()
    totals = {}
    for oid in iter:
        data, serialno = fs.load(oid, '')
        mod, klass = get_pickle_metadata(data)
        key = "%s.%s" % (mod, klass)
        bytes, count = totals.get(key, (0, 0))
        bytes += len(data)
        count += 1
        totals[key] = bytes, count
        if v:
            print "%8s %5d %s" % (U64(oid), len(data), key)
    L = totals.items()
    L.sort(lambda a, b: cmp(a[1], b[1]))
    L.reverse()
    print "Totals per object class:"
    for key, (bytes, count) in L:
        print "%8d %8d %s" % (count, bytes, key)
def main():
    import sys
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], "v")
    except getopt.error, msg:
        print msg
        print "usage: space.py [-v] Data.fs"
        sys.exit(2)
    if len(args) != 1:
        print "usage: space.py [-v] Data.fs"
        sys.exit(2)
    v = 0
    for o, a in opts:
        if o == "-v":
            v += 1
    path = args[0]
    run(path, v)
if __name__ == "__main__":
    main()
=== Added File Zope/utilities/ZODBTools/zeoreplay.py ===
"""Parse the BLATHER logging generated by ZEO, and optionally replay it.
Usage: zeointervals.py [options]
Options:
    --help / -h
        Print this message and exit.
    --replay=storage
    -r storage
        Replay the parsed transactions through the new storage
    --maxtxn=count
    -m count
        Parse no more than count transactions.
    --report / -p
        Print a report as we're parsing.
Unlike parsezeolog.py, this script generates timestamps for each transaction,
and sub-command in the transaction.  We can use this to compare timings with
synthesized data.
"""
import re
import sys
import time
import getopt
import operator
# ZEO logs measure wall-clock time so for consistency we need to do the same
#from time import clock as now
from time import time as now
from ZODB.FileStorage import FileStorage
#from bsddb3Storage.Full import Full
#from Standby.primary import PrimaryStorage
#from Standby.config import RS_PORT
from ZODB.Transaction import Transaction
from ZODB.utils import p64
datecre = re.compile('(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)')
methcre = re.compile("ZEO Server (\w+)\((.*)\) \('(.*)', (\d+)")
class StopParsing(Exception):
    pass
def usage(code, msg=''):
    print __doc__
    if msg:
        print msg
    sys.exit(code)
def parse_time(line):
    """Return the time portion of a zLOG line in seconds or None."""
    mo = datecre.match(line)
    if mo is None:
        return None
    date, time_ = mo.group(1, 2)
    date_l = [int(elt) for elt in date.split('-')]
    time_l = [int(elt) for elt in time_.split(':')]
    return int(time.mktime(date_l + time_l + [0, 0, 0]))
def parse_line(line):
    """Parse a log entry and return time, method info, and client."""
    t = parse_time(line)
    if t is None:
        return None, None, None
    mo = methcre.search(line)
    if mo is None:
        return None, None, None
    meth_name = mo.group(1)
    meth_args = mo.group(2)
    meth_args = [s.strip() for s in meth_args.split(',')]
    m = meth_name, tuple(meth_args)
    c = mo.group(3), mo.group(4)
    return t, m, c
class StoreStat:
    def __init__(self, when, oid, size):
        self.when = when
        self.oid = oid
        self.size = size
    # Crufty
    def __getitem__(self, i):
        if i == 0: return self.oid
        if i == 1: return self.size
        raise IndexError
class TxnStat:
    def __init__(self):
        self._begintime = None
        self._finishtime = None
        self._aborttime = None
        self._url = None
        self._objects = []
    def tpc_begin(self, when, args, client):
        self._begintime = when
        # args are txnid, user, description (looks like it's always a url)
        self._url = args[2]
    def storea(self, when, args, client):
        oid = int(args[0])
        # args[1] is "[numbytes]"
        size = int(args[1][1:-1])
        s = StoreStat(when, oid, size)
        self._objects.append(s)
    def tpc_abort(self, when):
        self._aborttime = when
    def tpc_finish(self, when):
        self._finishtime = when
# Mapping oid -> revid
_revids = {}
class ReplayTxn(TxnStat):
    def __init__(self, storage):
        self._storage = storage
        self._replaydelta = 0
        TxnStat.__init__(self)
    def replay(self):
        ZERO = '\0'*8
        t0 = now()
        t = Transaction()
        self._storage.tpc_begin(t)
        for obj in self._objects:
            oid = obj.oid
            revid = _revids.get(oid, ZERO)
            # BAW: simulate a pickle of the given size
            data = 'x' * obj.size
            # BAW: ignore versions for now
            newrevid  = self._storage.store(p64(oid), revid, data, '', t)
            _revids[oid] = newrevid
        if self._aborttime:
            self._storage.tpc_abort(t)
            origdelta = self._aborttime - self._begintime
        else:
            self._storage.tpc_vote(t)
            self._storage.tpc_finish(t)
            origdelta = self._finishtime - self._begintime
        t1 = now()
        # Shows how many seconds behind (positive) or ahead (negative) of the
        # original reply our local update took
        self._replaydelta = t1 - t0 - origdelta
class ZEOParser:
    def __init__(self, maxtxns=-1, report=1, storage=None):
        self.__txns = []
        self.__curtxn = {}
        self.__skipped = 0
        self.__maxtxns = maxtxns
        self.__finishedtxns = 0
        self.__report = report
        self.__storage = storage
    def parse(self, line):
        t, m, c = parse_line(line)
        if t is None:
            # Skip this line
            return
        name = m[0]
        meth = getattr(self, name, None)
        if meth is not None:
            meth(t, m[1], c)
    def tpc_begin(self, when, args, client):
        txn = ReplayTxn(self.__storage)
        self.__curtxn[client] = txn
        meth = getattr(txn, 'tpc_begin', None)
        if meth is not None:
            meth(when, args, client)
        
    def storea(self, when, args, client):
        txn = self.__curtxn.get(client)
        if txn is None:
            self.__skipped += 1
            return
        meth = getattr(txn, 'storea', None)
        if meth is not None:
            meth(when, args, client)
    def tpc_finish(self, when, args, client):
        txn = self.__curtxn.get(client)
        if txn is None:
            self.__skipped += 1
            return
        meth = getattr(txn, 'tpc_finish', None)
        if meth is not None:
            meth(when)
        if self.__report:
            self.report(txn)
        self.__txns.append(txn)
        self.__curtxn[client] = None
        self.__finishedtxns += 1
        if self.__maxtxns > 0 and self.__finishedtxns >= self.__maxtxns:
            raise StopParsing
    def report(self, txn):
        """Print a report about the transaction"""
        if txn._objects:
            bytes = reduce(operator.add, [size for oid, size in txn._objects])
        else:
            bytes = 0
        print '%s %s %4d %10d %s %s' % (
            txn._begintime, txn._finishtime - txn._begintime,
            len(txn._objects),
            bytes, 
            time.ctime(txn._begintime),
            txn._url)
    def replay(self):
        for txn in self.__txns:
            txn.replay()
        # How many fell behind?
        slower = []
        faster = []
        for txn in self.__txns:
            if txn._replaydelta > 0:
                slower.append(txn)
            else:
                faster.append(txn)
        print len(slower), 'laggards,', len(faster), 'on-time or faster'
        # Find some averages
        if slower:
            sum = reduce(operator.add,
                         [txn._replaydelta for txn in slower], 0)
            print 'average slower txn was:', float(sum) / len(slower)
        if faster:
            sum = reduce(operator.add,
                         [txn._replaydelta for txn in faster], 0)
            print 'average faster txn was:', float(sum) / len(faster)
def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
            'hr:pm:',
            ['help', 'replay=', 'report', 'maxtxns='])
    except getopt.error, e:
        usage(1, e)
    if args:
        usage(1)
    replay = 0
    maxtxns = -1
    report = 0
    storagefile = None
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-r', '--replay'):
            replay = 1
            storagefile = arg
        elif opt in ('-p', '--report'):
            report = 1
        elif opt in ('-m', '--maxtxns'):
            try:
                maxtxns = int(arg)
            except ValueError:
                usage(1, 'Bad -m argument: %s' % arg)
    if replay:
        storage = FileStorage(storagefile)
	#storage = Full(storagefile)
        #storage = PrimaryStorage('yyz', storage, RS_PORT)
    t0 = now()
    p = ZEOParser(maxtxns, report, storage)
    i = 0
    while 1:
        line = sys.stdin.readline()
        if not line:
            break
        i += 1
        try:
            p.parse(line)
        except StopParsing:
            break
        except:
            print 'input file line:', i
            raise
    t1 = now()
    print 'total parse time:', t1-t0
    t2 = now()
    if replay:
        p.replay()
    t3 = now()
    print 'total replay time:', t3-t2
    print 'total time:', t3-t0
if __name__ == '__main__':
    main()