[Zodb-checkins] CVS: ZODB3/Tools - space.py:1.5.8.1 fsstats.py:1.1.10.1 zodbload.py:1.3.2.1 zeoup.py:1.14.4.1 zeoserverlog.py:1.3.2.1 repozo.py:1.6.4.1 netspace.py:1.2.4.1

Jeremy Hylton jeremy at zope.com
Tue Dec 23 14:06:41 EST 2003


Update of /cvs-repository/ZODB3/Tools
In directory cvs.zope.org:/tmp/cvs-serv26665/Tools

Modified Files:
      Tag: ZODB3-mvcc-2-branch
	zodbload.py zeoup.py zeoserverlog.py repozo.py netspace.py 
Added Files:
      Tag: ZODB3-mvcc-2-branch
	space.py fsstats.py 
Log Message:
Merge the head to the mvcc branch.

This merge should be the final preparation for merging the branch to
the trunk.


=== Added File ZODB3/Tools/space.py ===
#! /usr/bin/env python

"""Report on the space used by objects in a storage.

usage: space.py data.fs

The current implementation only supports FileStorage.

Current limitations / simplifications: Ignores revisions and versions.
"""

import ZODB
from ZODB.FileStorage import FileStorage
from ZODB.utils import U64
from ZODB.fsdump import get_pickle_metadata

def run(path, v=0):
    fs = FileStorage(path, read_only=1)
    # break into the file implementation
    if hasattr(fs._index, 'iterkeys'):
        iter = fs._index.iterkeys()
    else:
        iter = fs._index.keys()
    totals = {}
    for oid in iter:
        data, serialno = fs.load(oid, '')
        mod, klass = get_pickle_metadata(data)
        key = "%s.%s" % (mod, klass)
        bytes, count = totals.get(key, (0, 0))
        bytes += len(data)
        count += 1
        totals[key] = bytes, count
        if v:
            print "%8s %5d %s" % (U64(oid), len(data), key)
    L = totals.items()
    L.sort(lambda a, b: cmp(a[1], b[1]))
    L.reverse()
    print "Totals per object class:"
    for key, (bytes, count) in L:
        print "%8d %8d %s" % (count, bytes, key)

def main():
    import sys
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], "v")
    except getopt.error, msg:
        print msg
        print "usage: space.py [-v] Data.fs"
        sys.exit(2)
    if len(args) != 1:
        print "usage: space.py [-v] Data.fs"
        sys.exit(2)
    v = 0
    for o, a in opts:
        if o == "-v":
            v += 1
    path = args[0]
    run(path, v)

if __name__ == "__main__":
    main()


=== Added File ZODB3/Tools/fsstats.py ===
#!python
"""Print details statistics from fsdump output."""

import re
import sys

rx_txn = re.compile("tid=([0-9a-f]+).*size=(\d+)")
rx_data = re.compile("oid=([0-9a-f]+) class=(\S+) size=(\d+)")

def sort_byhsize(seq, reverse=False):
    L = [(v.size(), k, v) for k, v in seq]
    L.sort()
    if reverse:
        L.reverse()
    return [(k, v) for n, k, v in L]

class Histogram(dict):

    def add(self, size):
        self[size] = self.get(size, 0) + 1

    def size(self):
        return sum(self.itervalues())

    def mean(self):
        product = sum([k * v for k, v in self.iteritems()])
        return product / self.size()

    def median(self):
        # close enough?
        n = self.size() / 2
        L = self.keys()
        L.sort()
        L.reverse()
        while 1:
            k = L.pop()
            if self[k] > n:
                return k
            n -= self[k]

    def mode(self):
        mode = 0
        value = 0
        for k, v in self.iteritems():
            if v > value:
                value = v
                mode = k
        return mode

    def make_bins(self, binsize):
        maxkey = max(self.iterkeys())
        self.binsize = binsize
        self.bins = [0] * (1 + maxkey / binsize)
        for k, v in self.iteritems():
            b = k / binsize
            self.bins[b] += v

    def report(self, name, binsize=50, usebins=False, gaps=True, skip=True):
        if usebins:
            # Use existing bins with whatever size they have
            binsize = self.binsize
        else:
            # Make new bins
            self.make_bins(binsize)
        maxval = max(self.bins)
        # Print up to 40 dots for a value
        dot = max(maxval / 40, 1)
        tot = sum(self.bins)
        print name
        print "Total", tot,
        print "Median", self.median(),
        print "Mean", self.mean(),
        print "Mode", self.mode(),
        print "Max", max(self)
        print "One * represents", dot
        gap = False
        cum = 0
        for i, n in enumerate(self.bins):
            if gaps and (not n or (skip and not n / dot)):
                if not gap:
                    print "   ..."
                gap = True
                continue
            gap = False
            p = 100 * n / tot
            cum += n
            pc = 100 * cum / tot
            print "%6d %6d %3d%% %3d%% %s" % (
                i * binsize, n, p, pc, "*" * (n / dot))
        print

def class_detail(class_size):
    # summary of classes
    fmt = "%5s %6s %6s %6s   %-50.50s"
    labels = ["num", "median", "mean", "mode", "class"]
    print fmt % tuple(labels)
    print fmt % tuple(["-" * len(s) for s in labels])
    for klass, h in sort_byhsize(class_size.iteritems()):
        print fmt % (h.size(), h.median(), h.mean(), h.mode(), klass)
    print

    # per class details
    for klass, h in sort_byhsize(class_size.iteritems(), reverse=True):
        h.make_bins(50)
        if len(filter(None, h.bins)) == 1:
            continue
        h.report("Object size for %s" % klass, usebins=True)

def revision_detail(lifetimes, classes):
    # Report per-class details for any object modified more than once
    for name, oids in classes.iteritems():
        h = Histogram()
        keep = False
        for oid in dict.fromkeys(oids, 1):
            L = lifetimes.get(oid)
            n = len(L)
            h.add(n)
            if n > 1:
                keep = True
        if keep:
            h.report("Number of revisions for %s" % name, binsize=10)

def main(path):
    txn_objects = Histogram() # histogram of txn size in objects
    txn_bytes = Histogram() # histogram of txn size in bytes
    obj_size = Histogram() # histogram of object size
    n_updates = Histogram() # oid -> num updates
    n_classes = Histogram() # class -> num objects
    lifetimes = {} # oid -> list of tids
    class_size = {} # class -> histogram of object size
    classes = {} # class -> list of oids

    MAX = 0
    tid = None

    f = open(path, "rb")
    for i, line in enumerate(f):
        if MAX and i > MAX:
            break
        if line.startswith("  data"):
            m = rx_data.search(line)
            if not m:
                continue
            oid, klass, size = m.groups()
            size = int(size)

            obj_size.add(size)
            n_updates.add(oid)
            n_classes.add(klass)

            h = class_size.get(klass)
            if h is None:
                h = class_size[klass] = Histogram()
            h.add(size)

            L = lifetimes.setdefault(oid, [])
            L.append(tid)

            L = classes.setdefault(klass, [])
            L.append(oid)
            objects += 1

        elif line.startswith("Trans"):

            if tid is not None:
                txn_objects.add(objects)
            
            m = rx_txn.search(line)
            if not m:
                continue
            tid, size = m.groups()
            size = int(size)
            objects = 0

            txn_bytes.add(size)
    f.close()

    print "Summary: %d txns, %d objects, %d revisions" % (
        txn_objects.size(), len(n_updates), n_updates.size())
    print

    txn_bytes.report("Transaction size (bytes)", binsize=1024)
    txn_objects.report("Transaction size (objects)", binsize=10)
    obj_size.report("Object size", binsize=128)

    # object lifetime info
    h = Histogram()
    for k, v in lifetimes.items():
        h.add(len(v))
    h.report("Number of revisions", binsize=10, skip=False)

    # details about revisions
    revision_detail(lifetimes, classes)

    class_detail(class_size)

if __name__ == "__main__":
    main(sys.argv[1])


=== ZODB3/Tools/zodbload.py 1.3 => 1.3.2.1 ===
--- ZODB3/Tools/zodbload.py:1.3	Thu Oct  2 14:17:26 2003
+++ ZODB3/Tools/zodbload.py	Tue Dec 23 14:06:09 2003
@@ -424,7 +424,11 @@
         results = cat(PrincipiaSearchSource=term)
         n += len(results)
         for result in results:
-            did = result.getObject().getId()
+            obj = result.getObject()
+            # Apparently, there is a bug in Zope that leads obj to be None
+            # on occasion.
+            if obj is not None:
+                obj.getId()
 
     return n
 


=== ZODB3/Tools/zeoup.py 1.14 => 1.14.4.1 ===
--- ZODB3/Tools/zeoup.py:1.14	Mon Sep 15 12:29:19 2003
+++ ZODB3/Tools/zeoup.py	Tue Dec 23 14:06:09 2003
@@ -34,7 +34,7 @@
 from ZODB.POSException import ConflictError
 from ZODB.tests.MinPO import MinPO
 from ZEO.ClientStorage import ClientStorage
-from ZODB.PersistentMapping import PersistentMapping
+from persistent.mapping import PersistentMapping
 
 ZEO_VERSION = 2
 


=== ZODB3/Tools/zeoserverlog.py 1.3 => 1.3.2.1 ===
--- ZODB3/Tools/zeoserverlog.py:1.3	Thu Oct  2 14:17:26 2003
+++ ZODB3/Tools/zeoserverlog.py	Tue Dec 23 14:06:09 2003
@@ -362,6 +362,11 @@
     else:
         f = xopen(f)
 
+    cols = ["time", "reads", "stores", "commits", "aborts", "txns"]
+    fmt = "%18s %6s %6s %7s %6s %6s"
+    print fmt % cols
+    print fmt % ["-"*len(col) for col in cols]
+
     mlast = r = s = c = a = cl = None
     rs = []
     ss = []
@@ -381,7 +386,7 @@
             if m != mlast:
                 if mlast:
                     if detail:
-                        print mlast, len(cl), r, s, c, a, a+c
+                        print fmt % (mlast, len(cl), r, s, c, a, a+c)
                     cls.append(len(cl))
                     rs.append(r)
                     ss.append(s)
@@ -406,7 +411,7 @@
 
     if mlast:
         if detail:
-            print mlast, len(cl), r, s, c, a, a+c
+            print fmt % (mlast, len(cl), r, s, c, a, a+c)
         cls.append(len(cl))
         rs.append(r)
         ss.append(s)


=== ZODB3/Tools/repozo.py 1.6 => 1.6.4.1 ===
--- ZODB3/Tools/repozo.py:1.6	Mon Sep 15 12:29:19 2003
+++ ZODB3/Tools/repozo.py	Tue Dec 23 14:06:09 2003
@@ -62,8 +62,6 @@
         written to stdout.
 """
 
-from __future__ import nested_scopes
-
 import os
 import sys
 import md5


=== ZODB3/Tools/netspace.py 1.2 => 1.2.4.1 ===
--- ZODB3/Tools/netspace.py:1.2	Mon Sep 15 12:29:19 2003
+++ ZODB3/Tools/netspace.py	Tue Dec 23 14:06:09 2003
@@ -7,9 +7,6 @@
 -v: print info for all objects, even if a traversal path isn't found
 """
 
-
-from __future__ import nested_scopes
-
 import ZODB
 from ZODB.FileStorage import FileStorage
 from ZODB.utils import U64




More information about the Zodb-checkins mailing list