[Zodb-checkins] CVS: Zope/utilities/ZODBTools - netspace.py:1.1.4.1 space.py:1.3.4.1 zeoreplay.py:1.2.2.1
Jeremy Hylton
jeremy@zope.com
Fri, 10 May 2002 16:26:49 -0400
Update of /cvs-repository/Zope/utilities/ZODBTools
In directory cvs.zope.org:/tmp/cvs-serv14309/utilities/ZODBTools
Added Files:
Tag: TestIndexDS9-branch
netspace.py space.py zeoreplay.py
Log Message:
Commit recent changes from the Zope trunk.
Of particular interest: setup.py! (works with Python 2.3)
=== Added File Zope/utilities/ZODBTools/netspace.py ===
"""Report on the net size of objects counting subobjects.
usage: netspace.py [-P | -v] data.fs
-P: do a pack first
-v: print info for all objects, even if a traversal path isn't found
"""
from __future__ import nested_scopes
import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata
from ZODB.referencesf import referencesf
def find_paths(root, maxdist):
"""Find Python attribute traversal paths for objects to maxdist distance.
Starting at a root object, traverse attributes up to distance levels
from the root, looking for persistent objects. Return a dict
mapping oids to traversal paths.
XXX Assumes that the keys of the root are not themselves
persistent objects.
XXX Doesn't traverse containers.
"""
paths = {}
# Handle the root as a special case because it's a dict
objs = []
for k, v in root.items():
oid = getattr(v, '_p_oid', None)
objs.append((k, v, oid, 0))
for path, obj, oid, dist in objs:
if oid is not None:
paths[oid] = path
if dist < maxdist:
getattr(obj, 'foo', None) # unghostify
try:
items = obj.__dict__.items()
except AttributeError:
continue
for k, v in items:
oid = getattr(v, '_p_oid', None)
objs.append(("%s.%s" % (path, k), v, oid, dist + 1))
return paths
def main(path):
fs = FileStorage(path, read_only=1)
if PACK:
fs.pack()
db = ZODB.DB(fs)
rt = db.open().root()
paths = find_paths(rt, 3)
def total_size(oid):
cache = {}
cache_size = 1000
def _total_size(oid, seen):
v = cache.get(oid)
if v is not None:
return v
data, serialno = fs.load(oid, '')
size = len(data)
for suboid in referencesf(data):
if seen.has_key(suboid):
continue
seen[suboid] = 1
size += _total_size(suboid, seen)
cache[oid] = size
if len(cache) == cache_size:
cache.popitem()
return size
return _total_size(oid, {})
keys = fs._index.keys()
keys.sort()
keys.reverse()
if not VERBOSE:
# If not running verbosely, don't print an entry for an object
# unless it has an entry in paths.
keys = filter(paths.has_key, keys)
fmt = "%8s %5d %8d %s %s.%s"
for oid in keys:
data, serialno = fs.load(oid, '')
mod, klass = get_pickle_metadata(data)
refs = referencesf(data)
path = paths.get(oid, '-')
print fmt % (U64(oid), len(data), total_size(oid), path, mod, klass)
if __name__ == "__main__":
import sys
import getopt
PACK = 0
VERBOSE = 0
try:
opts, args = getopt.getopt(sys.argv[1:], 'Pv')
path, = args
except getopt.error, err:
print err
print __doc__
sys.exit(2)
except ValueError:
print "expected one argument, got", len(args)
print __doc__
sys.exit(2)
for o, v in opts:
if o == '-P':
PACK = 1
if o == '-v':
VERBOSE += 1
main(path)
=== Added File Zope/utilities/ZODBTools/space.py ===
#! /usr/bin/env python
"""Report on the space used by objects in a storage.
usage: space.py data.fs
The current implementation only supports FileStorage.
Current limitations / simplifications: Ignores revisions and versions.
"""
import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata
def run(path, v=0):
fs = FileStorage(path, read_only=1)
# break into the file implementation
if hasattr(fs._index, 'iterkeys'):
iter = fs._index.iterkeys()
else:
iter = fs._index.keys()
totals = {}
for oid in iter:
data, serialno = fs.load(oid, '')
mod, klass = get_pickle_metadata(data)
key = "%s.%s" % (mod, klass)
bytes, count = totals.get(key, (0, 0))
bytes += len(data)
count += 1
totals[key] = bytes, count
if v:
print "%8s %5d %s" % (U64(oid), len(data), key)
L = totals.items()
L.sort(lambda a, b: cmp(a[1], b[1]))
L.reverse()
print "Totals per object class:"
for key, (bytes, count) in L:
print "%8d %8d %s" % (count, bytes, key)
def main():
import sys
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], "v")
except getopt.error, msg:
print msg
print "usage: space.py [-v] Data.fs"
sys.exit(2)
if len(args) != 1:
print "usage: space.py [-v] Data.fs"
sys.exit(2)
v = 0
for o, a in opts:
if o == "-v":
v += 1
path = args[0]
run(path, v)
if __name__ == "__main__":
main()
=== Added File Zope/utilities/ZODBTools/zeoreplay.py ===
"""Parse the BLATHER logging generated by ZEO, and optionally replay it.
Usage: zeointervals.py [options]
Options:
--help / -h
Print this message and exit.
--replay=storage
-r storage
Replay the parsed transactions through the new storage
--maxtxn=count
-m count
Parse no more than count transactions.
--report / -p
Print a report as we're parsing.
Unlike parsezeolog.py, this script generates timestamps for each transaction,
and sub-command in the transaction. We can use this to compare timings with
synthesized data.
"""
import re
import sys
import time
import getopt
import operator
# ZEO logs measure wall-clock time so for consistency we need to do the same
#from time import clock as now
from time import time as now
from ZODB.FileStorage import FileStorage
#from bsddb3Storage.Full import Full
#from Standby.primary import PrimaryStorage
#from Standby.config import RS_PORT
from ZODB.Transaction import Transaction
from ZODB.utils import p64
datecre = re.compile('(\d\d\d\d-\d\d-\d\d)T(\d\d:\d\d:\d\d)')
methcre = re.compile("ZEO Server (\w+)\((.*)\) \('(.*)', (\d+)")
class StopParsing(Exception):
pass
def usage(code, msg=''):
print __doc__
if msg:
print msg
sys.exit(code)
def parse_time(line):
"""Return the time portion of a zLOG line in seconds or None."""
mo = datecre.match(line)
if mo is None:
return None
date, time_ = mo.group(1, 2)
date_l = [int(elt) for elt in date.split('-')]
time_l = [int(elt) for elt in time_.split(':')]
return int(time.mktime(date_l + time_l + [0, 0, 0]))
def parse_line(line):
"""Parse a log entry and return time, method info, and client."""
t = parse_time(line)
if t is None:
return None, None, None
mo = methcre.search(line)
if mo is None:
return None, None, None
meth_name = mo.group(1)
meth_args = mo.group(2)
meth_args = [s.strip() for s in meth_args.split(',')]
m = meth_name, tuple(meth_args)
c = mo.group(3), mo.group(4)
return t, m, c
class StoreStat:
def __init__(self, when, oid, size):
self.when = when
self.oid = oid
self.size = size
# Crufty
def __getitem__(self, i):
if i == 0: return self.oid
if i == 1: return self.size
raise IndexError
class TxnStat:
def __init__(self):
self._begintime = None
self._finishtime = None
self._aborttime = None
self._url = None
self._objects = []
def tpc_begin(self, when, args, client):
self._begintime = when
# args are txnid, user, description (looks like it's always a url)
self._url = args[2]
def storea(self, when, args, client):
oid = int(args[0])
# args[1] is "[numbytes]"
size = int(args[1][1:-1])
s = StoreStat(when, oid, size)
self._objects.append(s)
def tpc_abort(self, when):
self._aborttime = when
def tpc_finish(self, when):
self._finishtime = when
# Mapping oid -> revid
_revids = {}
class ReplayTxn(TxnStat):
def __init__(self, storage):
self._storage = storage
self._replaydelta = 0
TxnStat.__init__(self)
def replay(self):
ZERO = '\0'*8
t0 = now()
t = Transaction()
self._storage.tpc_begin(t)
for obj in self._objects:
oid = obj.oid
revid = _revids.get(oid, ZERO)
# BAW: simulate a pickle of the given size
data = 'x' * obj.size
# BAW: ignore versions for now
newrevid = self._storage.store(p64(oid), revid, data, '', t)
_revids[oid] = newrevid
if self._aborttime:
self._storage.tpc_abort(t)
origdelta = self._aborttime - self._begintime
else:
self._storage.tpc_vote(t)
self._storage.tpc_finish(t)
origdelta = self._finishtime - self._begintime
t1 = now()
# Shows how many seconds behind (positive) or ahead (negative) of the
# original reply our local update took
self._replaydelta = t1 - t0 - origdelta
class ZEOParser:
def __init__(self, maxtxns=-1, report=1, storage=None):
self.__txns = []
self.__curtxn = {}
self.__skipped = 0
self.__maxtxns = maxtxns
self.__finishedtxns = 0
self.__report = report
self.__storage = storage
def parse(self, line):
t, m, c = parse_line(line)
if t is None:
# Skip this line
return
name = m[0]
meth = getattr(self, name, None)
if meth is not None:
meth(t, m[1], c)
def tpc_begin(self, when, args, client):
txn = ReplayTxn(self.__storage)
self.__curtxn[client] = txn
meth = getattr(txn, 'tpc_begin', None)
if meth is not None:
meth(when, args, client)
def storea(self, when, args, client):
txn = self.__curtxn.get(client)
if txn is None:
self.__skipped += 1
return
meth = getattr(txn, 'storea', None)
if meth is not None:
meth(when, args, client)
def tpc_finish(self, when, args, client):
txn = self.__curtxn.get(client)
if txn is None:
self.__skipped += 1
return
meth = getattr(txn, 'tpc_finish', None)
if meth is not None:
meth(when)
if self.__report:
self.report(txn)
self.__txns.append(txn)
self.__curtxn[client] = None
self.__finishedtxns += 1
if self.__maxtxns > 0 and self.__finishedtxns >= self.__maxtxns:
raise StopParsing
def report(self, txn):
"""Print a report about the transaction"""
if txn._objects:
bytes = reduce(operator.add, [size for oid, size in txn._objects])
else:
bytes = 0
print '%s %s %4d %10d %s %s' % (
txn._begintime, txn._finishtime - txn._begintime,
len(txn._objects),
bytes,
time.ctime(txn._begintime),
txn._url)
def replay(self):
for txn in self.__txns:
txn.replay()
# How many fell behind?
slower = []
faster = []
for txn in self.__txns:
if txn._replaydelta > 0:
slower.append(txn)
else:
faster.append(txn)
print len(slower), 'laggards,', len(faster), 'on-time or faster'
# Find some averages
if slower:
sum = reduce(operator.add,
[txn._replaydelta for txn in slower], 0)
print 'average slower txn was:', float(sum) / len(slower)
if faster:
sum = reduce(operator.add,
[txn._replaydelta for txn in faster], 0)
print 'average faster txn was:', float(sum) / len(faster)
def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
'hr:pm:',
['help', 'replay=', 'report', 'maxtxns='])
except getopt.error, e:
usage(1, e)
if args:
usage(1)
replay = 0
maxtxns = -1
report = 0
storagefile = None
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-r', '--replay'):
replay = 1
storagefile = arg
elif opt in ('-p', '--report'):
report = 1
elif opt in ('-m', '--maxtxns'):
try:
maxtxns = int(arg)
except ValueError:
usage(1, 'Bad -m argument: %s' % arg)
if replay:
storage = FileStorage(storagefile)
#storage = Full(storagefile)
#storage = PrimaryStorage('yyz', storage, RS_PORT)
t0 = now()
p = ZEOParser(maxtxns, report, storage)
i = 0
while 1:
line = sys.stdin.readline()
if not line:
break
i += 1
try:
p.parse(line)
except StopParsing:
break
except:
print 'input file line:', i
raise
t1 = now()
print 'total parse time:', t1-t0
t2 = now()
if replay:
p.replay()
t3 = now()
print 'total replay time:', t3-t2
print 'total time:', t3-t0
if __name__ == '__main__':
main()