[Zodb-checkins] CVS: ZODB3/Tools - space.py:1.5.8.1
fsstats.py:1.1.10.1 zodbload.py:1.3.2.1 zeoup.py:1.14.4.1
zeoserverlog.py:1.3.2.1 repozo.py:1.6.4.1 netspace.py:1.2.4.1
Jeremy Hylton
jeremy at zope.com
Tue Dec 23 14:06:41 EST 2003
Update of /cvs-repository/ZODB3/Tools
In directory cvs.zope.org:/tmp/cvs-serv26665/Tools
Modified Files:
Tag: ZODB3-mvcc-2-branch
zodbload.py zeoup.py zeoserverlog.py repozo.py netspace.py
Added Files:
Tag: ZODB3-mvcc-2-branch
space.py fsstats.py
Log Message:
Merge the head to the mvcc branch.
This merge should be the final preparation for merging the branch to
the trunk.
=== Added File ZODB3/Tools/space.py ===
#! /usr/bin/env python
"""Report on the space used by objects in a storage.
usage: space.py data.fs
The current implementation only supports FileStorage.
Current limitations / simplifications: Ignores revisions and versions.
"""
import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata
def run(path, v=0):
fs = FileStorage(path, read_only=1)
# break into the file implementation
if hasattr(fs._index, 'iterkeys'):
iter = fs._index.iterkeys()
else:
iter = fs._index.keys()
totals = {}
for oid in iter:
data, serialno = fs.load(oid, '')
mod, klass = get_pickle_metadata(data)
key = "%s.%s" % (mod, klass)
bytes, count = totals.get(key, (0, 0))
bytes += len(data)
count += 1
totals[key] = bytes, count
if v:
print "%8s %5d %s" % (U64(oid), len(data), key)
L = totals.items()
L.sort(lambda a, b: cmp(a[1], b[1]))
L.reverse()
print "Totals per object class:"
for key, (bytes, count) in L:
print "%8d %8d %s" % (count, bytes, key)
def main():
import sys
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], "v")
except getopt.error, msg:
print msg
print "usage: space.py [-v] Data.fs"
sys.exit(2)
if len(args) != 1:
print "usage: space.py [-v] Data.fs"
sys.exit(2)
v = 0
for o, a in opts:
if o == "-v":
v += 1
path = args[0]
run(path, v)
if __name__ == "__main__":
main()
=== Added File ZODB3/Tools/fsstats.py ===
#!python
"""Print details statistics from fsdump output."""
import re
import sys
rx_txn = re.compile("tid=([0-9a-f]+).*size=(\d+)")
rx_data = re.compile("oid=([0-9a-f]+) class=(\S+) size=(\d+)")
def sort_byhsize(seq, reverse=False):
L = [(v.size(), k, v) for k, v in seq]
L.sort()
if reverse:
L.reverse()
return [(k, v) for n, k, v in L]
class Histogram(dict):
def add(self, size):
self[size] = self.get(size, 0) + 1
def size(self):
return sum(self.itervalues())
def mean(self):
product = sum([k * v for k, v in self.iteritems()])
return product / self.size()
def median(self):
# close enough?
n = self.size() / 2
L = self.keys()
L.sort()
L.reverse()
while 1:
k = L.pop()
if self[k] > n:
return k
n -= self[k]
def mode(self):
mode = 0
value = 0
for k, v in self.iteritems():
if v > value:
value = v
mode = k
return mode
def make_bins(self, binsize):
maxkey = max(self.iterkeys())
self.binsize = binsize
self.bins = [0] * (1 + maxkey / binsize)
for k, v in self.iteritems():
b = k / binsize
self.bins[b] += v
def report(self, name, binsize=50, usebins=False, gaps=True, skip=True):
if usebins:
# Use existing bins with whatever size they have
binsize = self.binsize
else:
# Make new bins
self.make_bins(binsize)
maxval = max(self.bins)
# Print up to 40 dots for a value
dot = max(maxval / 40, 1)
tot = sum(self.bins)
print name
print "Total", tot,
print "Median", self.median(),
print "Mean", self.mean(),
print "Mode", self.mode(),
print "Max", max(self)
print "One * represents", dot
gap = False
cum = 0
for i, n in enumerate(self.bins):
if gaps and (not n or (skip and not n / dot)):
if not gap:
print " ..."
gap = True
continue
gap = False
p = 100 * n / tot
cum += n
pc = 100 * cum / tot
print "%6d %6d %3d%% %3d%% %s" % (
i * binsize, n, p, pc, "*" * (n / dot))
print
def class_detail(class_size):
# summary of classes
fmt = "%5s %6s %6s %6s %-50.50s"
labels = ["num", "median", "mean", "mode", "class"]
print fmt % tuple(labels)
print fmt % tuple(["-" * len(s) for s in labels])
for klass, h in sort_byhsize(class_size.iteritems()):
print fmt % (h.size(), h.median(), h.mean(), h.mode(), klass)
print
# per class details
for klass, h in sort_byhsize(class_size.iteritems(), reverse=True):
h.make_bins(50)
if len(filter(None, h.bins)) == 1:
continue
h.report("Object size for %s" % klass, usebins=True)
def revision_detail(lifetimes, classes):
# Report per-class details for any object modified more than once
for name, oids in classes.iteritems():
h = Histogram()
keep = False
for oid in dict.fromkeys(oids, 1):
L = lifetimes.get(oid)
n = len(L)
h.add(n)
if n > 1:
keep = True
if keep:
h.report("Number of revisions for %s" % name, binsize=10)
def main(path):
txn_objects = Histogram() # histogram of txn size in objects
txn_bytes = Histogram() # histogram of txn size in bytes
obj_size = Histogram() # histogram of object size
n_updates = Histogram() # oid -> num updates
n_classes = Histogram() # class -> num objects
lifetimes = {} # oid -> list of tids
class_size = {} # class -> histogram of object size
classes = {} # class -> list of oids
MAX = 0
tid = None
f = open(path, "rb")
for i, line in enumerate(f):
if MAX and i > MAX:
break
if line.startswith(" data"):
m = rx_data.search(line)
if not m:
continue
oid, klass, size = m.groups()
size = int(size)
obj_size.add(size)
n_updates.add(oid)
n_classes.add(klass)
h = class_size.get(klass)
if h is None:
h = class_size[klass] = Histogram()
h.add(size)
L = lifetimes.setdefault(oid, [])
L.append(tid)
L = classes.setdefault(klass, [])
L.append(oid)
objects += 1
elif line.startswith("Trans"):
if tid is not None:
txn_objects.add(objects)
m = rx_txn.search(line)
if not m:
continue
tid, size = m.groups()
size = int(size)
objects = 0
txn_bytes.add(size)
f.close()
print "Summary: %d txns, %d objects, %d revisions" % (
txn_objects.size(), len(n_updates), n_updates.size())
print
txn_bytes.report("Transaction size (bytes)", binsize=1024)
txn_objects.report("Transaction size (objects)", binsize=10)
obj_size.report("Object size", binsize=128)
# object lifetime info
h = Histogram()
for k, v in lifetimes.items():
h.add(len(v))
h.report("Number of revisions", binsize=10, skip=False)
# details about revisions
revision_detail(lifetimes, classes)
class_detail(class_size)
if __name__ == "__main__":
main(sys.argv[1])
=== ZODB3/Tools/zodbload.py 1.3 => 1.3.2.1 ===
--- ZODB3/Tools/zodbload.py:1.3 Thu Oct 2 14:17:26 2003
+++ ZODB3/Tools/zodbload.py Tue Dec 23 14:06:09 2003
@@ -424,7 +424,11 @@
results = cat(PrincipiaSearchSource=term)
n += len(results)
for result in results:
- did = result.getObject().getId()
+ obj = result.getObject()
+ # Apparently, there is a bug in Zope that leads obj to be None
+ # on occasion.
+ if obj is not None:
+ obj.getId()
return n
=== ZODB3/Tools/zeoup.py 1.14 => 1.14.4.1 ===
--- ZODB3/Tools/zeoup.py:1.14 Mon Sep 15 12:29:19 2003
+++ ZODB3/Tools/zeoup.py Tue Dec 23 14:06:09 2003
@@ -34,7 +34,7 @@
from ZODB.POSException import ConflictError
from ZODB.tests.MinPO import MinPO
from ZEO.ClientStorage import ClientStorage
-from ZODB.PersistentMapping import PersistentMapping
+from persistent.mapping import PersistentMapping
ZEO_VERSION = 2
=== ZODB3/Tools/zeoserverlog.py 1.3 => 1.3.2.1 ===
--- ZODB3/Tools/zeoserverlog.py:1.3 Thu Oct 2 14:17:26 2003
+++ ZODB3/Tools/zeoserverlog.py Tue Dec 23 14:06:09 2003
@@ -362,6 +362,11 @@
else:
f = xopen(f)
+ cols = ["time", "reads", "stores", "commits", "aborts", "txns"]
+ fmt = "%18s %6s %6s %7s %6s %6s"
+ print fmt % cols
+ print fmt % ["-"*len(col) for col in cols]
+
mlast = r = s = c = a = cl = None
rs = []
ss = []
@@ -381,7 +386,7 @@
if m != mlast:
if mlast:
if detail:
- print mlast, len(cl), r, s, c, a, a+c
+ print fmt % (mlast, len(cl), r, s, c, a, a+c)
cls.append(len(cl))
rs.append(r)
ss.append(s)
@@ -406,7 +411,7 @@
if mlast:
if detail:
- print mlast, len(cl), r, s, c, a, a+c
+ print fmt % (mlast, len(cl), r, s, c, a, a+c)
cls.append(len(cl))
rs.append(r)
ss.append(s)
=== ZODB3/Tools/repozo.py 1.6 => 1.6.4.1 ===
--- ZODB3/Tools/repozo.py:1.6 Mon Sep 15 12:29:19 2003
+++ ZODB3/Tools/repozo.py Tue Dec 23 14:06:09 2003
@@ -62,8 +62,6 @@
written to stdout.
"""
-from __future__ import nested_scopes
-
import os
import sys
import md5
=== ZODB3/Tools/netspace.py 1.2 => 1.2.4.1 ===
--- ZODB3/Tools/netspace.py:1.2 Mon Sep 15 12:29:19 2003
+++ ZODB3/Tools/netspace.py Tue Dec 23 14:06:09 2003
@@ -7,9 +7,6 @@
-v: print info for all objects, even if a traversal path isn't found
"""
-
-from __future__ import nested_scopes
-
import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
More information about the Zodb-checkins
mailing list