[Zodb-checkins] SVN: ZODB/branches/jim-simulation/src/Z checkpoint
Jim Fulton
jim at zope.com
Tue Jun 1 13:51:54 EDT 2010
Log message for revision 112890:
checkpoint
Changed:
A ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py
A ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_stats.py
D ZODB/branches/jim-simulation/src/ZODB/scripts/simul.py
D ZODB/branches/jim-simulation/src/ZODB/scripts/stats.py
-=-
Copied: ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py (from rev 112483, ZODB/trunk/src/ZODB/scripts/simul.py)
===================================================================
--- ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py (rev 0)
+++ ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py 2010-06-01 17:51:54 UTC (rev 112890)
@@ -0,0 +1,1757 @@
+#! /usr/bin/env python
+##############################################################################
+#
+# Copyright (c) 2001-2005 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""Cache simulation.
+
+Usage: simul.py [-s size] tracefile
+
+Options:
+-s size: cache size in MB (default 20 MB)
+"""
+
+import sys
+import time
+import getopt
+import struct
+import math
+import bisect
+from sets import Set
+
+from ZODB.utils import z64
+
+def usage(msg):
+ print >> sys.stderr, msg
+ print >> sys.stderr, __doc__
+
+def main():
+ # Parse options.
+ MB = 1024**2
+ cachelimit = 20*MB
+ simclass = CircularCacheSimulation
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOaTUs:")
+ except getopt.error, msg:
+ usage(msg)
+ return 2
+ for o, a in opts:
+ if o == '-b':
+ simclass = BuddyCacheSimulation
+ elif o == '-f':
+ simclass = SimpleCacheSimulation
+ elif o == '-l':
+ simclass = LRUCacheSimulation
+ elif o == '-y':
+ simclass = AltZEOCacheSimulation
+ elif o == '-z':
+ simclass = ZEOCacheSimulation
+ elif o == '-s':
+ cachelimit = int(float(a)*MB)
+ elif o == '-2':
+ simclass = TwoQSimluation
+ elif o == '-c':
+ simclass = CircularCacheSimulation
+ elif o == '-O':
+ simclass = OracleSimulation
+ elif o == '-a':
+ simclass = ARCCacheSimulation
+ elif o == '-T':
+ simclass = ThorSimulation
+ elif o == '-U':
+ simclass = UnboundedSimulation
+ else:
+ assert False, (o, a)
+
+ if len(args) != 1:
+ usage("exactly one file argument required")
+ return 2
+ filename = args[0]
+
+ # Open file.
+ if filename.endswith(".gz"):
+ # Open gzipped file.
+ try:
+ import gzip
+ except ImportError:
+ print >> sys.stderr, "can't read gzipped files (no module gzip)"
+ return 1
+ try:
+ f = gzip.open(filename, "rb")
+ except IOError, msg:
+ print >> sys.stderr, "can't open %s: %s" % (filename, msg)
+ return 1
+ elif filename == "-":
+ # Read from stdin.
+ f = sys.stdin
+ else:
+ # Open regular file.
+ try:
+ f = open(filename, "rb")
+ except IOError, msg:
+ print >> sys.stderr, "can't open %s: %s" % (filename, msg)
+ return 1
+
+ # Create simulation object.
+ if simclass is OracleSimulation:
+ sim = simclass(cachelimit, filename)
+ else:
+ sim = simclass(cachelimit)
+
+ # Print output header.
+ sim.printheader()
+
+ # Read trace file, simulating cache behavior.
+ f_read = f.read
+ unpack = struct.unpack
+ FMT = ">iiH8s8s"
+ FMT_SIZE = struct.calcsize(FMT)
+ assert FMT_SIZE == 26
+ while 1:
+ # Read a record and decode it.
+ r = f_read(FMT_SIZE)
+ if len(r) < FMT_SIZE:
+ break
+ ts, code, oidlen, start_tid, end_tid = unpack(FMT, r)
+ if ts == 0:
+ # Must be a misaligned record caused by a crash; skip 8 bytes
+ # and try again. Why 8? Lost in the mist of history.
+ f.seek(f.tell() - FMT_SIZE + 8)
+ continue
+ oid = f_read(oidlen)
+ if len(oid) < oidlen:
+ break
+ # Decode the code.
+ dlen, version, code = (code & 0x7fffff00,
+ code & 0x80,
+ code & 0x7e)
+ # And pass it to the simulation.
+ sim.event(ts, dlen, version, code, oid, start_tid, end_tid)
+
+ f.close()
+ # Finish simulation.
+ sim.finish()
+
+ # Exit code from main().
+ return 0
+
+class Simulation(object):
+ """Base class for simulations.
+
+ The driver program calls: event(), printheader(), finish().
+
+ The standard event() method calls these additional methods:
+ write(), load(), inval(), report(), restart(); the standard
+ finish() method also calls report().
+ """
+
+ def __init__(self, cachelimit):
+ self.cachelimit = cachelimit
+ # Initialize global statistics.
+ self.epoch = None
+ self.total_loads = 0
+ self.total_hits = 0 # subclass must increment
+ self.total_invals = 0 # subclass must increment
+ self.total_writes = 0
+ if not hasattr(self, "extras"):
+ self.extras = (self.extraname,)
+ self.format = self.format + " %7s" * len(self.extras)
+ # Reset per-run statistics and set up simulation data.
+ self.restart()
+
+ def restart(self):
+ # Reset per-run statistics.
+ self.loads = 0
+ self.hits = 0 # subclass must increment
+ self.invals = 0 # subclass must increment
+ self.writes = 0
+ self.ts0 = None
+
+ def event(self, ts, dlen, _version, code, oid,
+ start_tid, end_tid):
+ # Record first and last timestamp seen.
+ if self.ts0 is None:
+ self.ts0 = ts
+ if self.epoch is None:
+ self.epoch = ts
+ self.ts1 = ts
+
+ # Simulate cache behavior. Caution: the codes in the trace file
+ # record whether the actual cache missed or hit on each load, but
+ # that bears no necessary relationship to whether the simulated cache
+ # will hit or miss. Relatedly, if the actual cache needed to store
+ # an object, the simulated cache may not need to (it may already
+ # have the data).
+ action = code & 0x70
+ if action == 0x20:
+ # Load.
+ self.loads += 1
+ self.total_loads += 1
+ # Asserting that dlen is 0 iff it's a load miss.
+ # assert (dlen == 0) == (code in (0x20, 0x24))
+ self.load(oid, dlen, start_tid)
+ elif action == 0x50:
+ # Store.
+ assert dlen
+ self.write(oid, dlen, start_tid, end_tid)
+ elif action == 0x10:
+ # Invalidate.
+ self.inval(oid, start_tid)
+ elif action == 0x00:
+ # Restart.
+ self.report()
+ self.restart()
+ else:
+ raise ValueError("unknown trace code 0x%x" % code)
+
+ def write(self, oid, size, start_tid, end_tid):
+ pass
+
+ def load(self, oid, size, start_tid):
+ # Must increment .hits and .total_hits as appropriate.
+ pass
+
+ def inval(self, oid, start_tid):
+ # Must increment .invals and .total_invals as appropriate.
+ pass
+
+ format = "%12s %9s %8s %8s %6s %6s %7s"
+
+ # Subclass should override extraname to name known instance variables;
+ # if extraname is 'foo', both self.foo and self.total_foo must exist:
+ extraname = "*** please override ***"
+
+ def printheader(self):
+ print "%s, cache size %s bytes" % (self.__class__.__name__,
+ addcommas(self.cachelimit))
+ self.extraheader()
+ extranames = tuple([s.upper() for s in self.extras])
+ args = ("START TIME", "DURATION", "LOADS", "HITS",
+ "INVALS", "WRITES", "HITRATE") + extranames
+ print self.format % args
+
+ def extraheader(self):
+ pass
+
+ nreports = 0
+
+ def report(self, extratext=''):
+ if self.loads:
+ self.nreports += 1
+ args = (time.ctime(self.ts0)[4:-8],
+ duration(self.ts1 - self.ts0),
+ self.loads, self.hits, self.invals, self.writes,
+ hitrate(self.loads, self.hits))
+ args += tuple([getattr(self, name) for name in self.extras])
+ print self.format % args, extratext
+
+ def finish(self):
+ # Make sure that the last line of output ends with "OVERALL". This
+ # makes it much easier for another program parsing the output to
+ # find summary statistics.
+ if self.nreports < 2:
+ self.report('OVERALL')
+ else:
+ self.report()
+ args = (
+ time.ctime(self.epoch)[4:-8],
+ duration(self.ts1 - self.epoch),
+ self.total_loads,
+ self.total_hits,
+ self.total_invals,
+ self.total_writes,
+ hitrate(self.total_loads, self.total_hits))
+ args += tuple([getattr(self, "total_" + name)
+ for name in self.extras])
+ print (self.format + " OVERALL") % args
+
+
+# For use in CircularCacheSimulation.
+class CircularCacheEntry(object):
+ __slots__ = (# object key: an (oid, start_tid) pair, where
+ # start_tid is the tid of the transaction that created
+ # this revision of oid
+ 'key',
+
+ # tid of transaction that created the next revision;
+ # z64 iff this is the current revision
+ 'end_tid',
+
+ # Offset from start of file to the object's data
+ # record; this includes all overhead bytes (status
+ # byte, size bytes, etc).
+ 'offset',
+ )
+
+ def __init__(self, key, end_tid, offset):
+ self.key = key
+ self.end_tid = end_tid
+ self.offset = offset
+
+from ZEO.cache import ZEC_HEADER_SIZE
+
+TOTAL_FIXED_SIZE = 22
+OBJECT_HEADER_SIZE = 21
+class CircularCacheSimulation(Simulation):
+ """Simulate the ZEO 3.0 cache."""
+
+ # The cache is managed as a single file with a pointer that
+ # goes around the file, circularly, forever. New objects
+ # are written at the current pointer, evicting whatever was
+ # there previously.
+
+ extras = "evicts", "inuse"
+
+ def __init__(self, cachelimit):
+ from ZEO import cache
+
+ Simulation.__init__(self, cachelimit)
+ self.total_evicts = 0 # number of cache evictions
+
+ # Current offset in file.
+ self.offset = ZEC_HEADER_SIZE
+
+ # Map offset in file to (size, CircularCacheEntry) pair, or to
+ # (size, None) if the offset starts a free block.
+ self.filemap = {ZEC_HEADER_SIZE: (self.cachelimit - ZEC_HEADER_SIZE,
+ None)}
+ # Map key to CircularCacheEntry. A key is an (oid, tid) pair.
+ self.key2entry = {}
+
+ # Map oid to tid of current revision.
+ self.current = {}
+
+ # Map oid to list of (start_tid, end_tid) pairs in sorted order.
+ # Used to find matching key for load of non-current data.
+ self.noncurrent = {}
+
+ # The number of overhead bytes needed to store an object pickle
+ # on disk (all bytes beyond those needed for the object pickle).
+ self.overhead = cache.allocated_record_overhead
+
+ def restart(self):
+ Simulation.restart(self)
+ self.evicts = 0
+
+ def load(self, oid, size, tid):
+ if tid == z64:
+ # Trying to load current revision.
+ if oid in self.current: # else it's a cache miss
+ self.hits += 1
+ self.total_hits += 1
+ return
+
+ # May or may not be trying to load current revision.
+ cur_tid = self.current.get(oid)
+ if cur_tid == tid:
+ self.hits += 1
+ self.total_hits += 1
+ return
+
+ # It's a load for non-current data. Do we know about this oid?
+ L = self.noncurrent.get(oid)
+ if L is None:
+ return # cache miss
+ i = bisect.bisect_left(L, (tid, None))
+ if i == 0:
+ # This tid is smaller than any we know about -- miss.
+ return
+ lo, hi = L[i-1]
+ assert lo < tid
+ if tid > hi:
+ # No data in the right tid range -- miss.
+ return
+ # Cache hit.
+ self.hits += 1
+ self.total_hits += 1
+
+ # (oid, tid) is in the cache. Remove it: take it out of key2entry,
+ # and in `filemap` mark the space it occupied as being free. The
+ # caller is responsible for removing it from `current` or `noncurrent`.
+ def _remove(self, oid, tid):
+ key = oid, tid
+ e = self.key2entry.pop(key)
+ pos = e.offset
+ size, _e = self.filemap[pos]
+ assert e is _e
+ self.filemap[pos] = size, None
+
+ def _remove_noncurrent_revisions(self, oid):
+ noncurrent_list = self.noncurrent.get(oid)
+ if noncurrent_list:
+ self.invals += len(noncurrent_list)
+ self.total_invals += len(noncurrent_list)
+ for start_tid, end_tid in noncurrent_list:
+ self._remove(oid, start_tid)
+ del self.noncurrent[oid]
+
+ def inval(self, oid, tid):
+ if tid == z64:
+ # This is part of startup cache verification: forget everything
+ # about this oid.
+ self._remove_noncurrent_revisions(oid)
+
+ cur_tid = self.current.get(oid)
+ if cur_tid is None:
+ # We don't have current data, so nothing more to do.
+ return
+
+ # We had current data for oid, but no longer.
+ self.invals += 1
+ self.total_invals += 1
+ del self.current[oid]
+ if tid == z64:
+ # Startup cache verification: forget this oid entirely.
+ self._remove(oid, cur_tid)
+ return
+
+ # Our current data becomes non-current data.
+ # Add the validity range to the list of non-current data for oid.
+ assert cur_tid < tid
+ L = self.noncurrent.setdefault(oid, [])
+ bisect.insort_left(L, (cur_tid, tid))
+ # Update the end of oid's validity range in its CircularCacheEntry.
+ e = self.key2entry[oid, cur_tid]
+ assert e.end_tid == z64
+ e.end_tid = tid
+
+ def write(self, oid, size, start_tid, end_tid):
+ if end_tid == z64:
+ # Storing current revision.
+ if oid in self.current: # we already have it in cache
+ return
+ self.current[oid] = start_tid
+ self.writes += 1
+ self.total_writes += 1
+ self.add(oid, size, start_tid)
+ return
+ # Storing non-current revision.
+ L = self.noncurrent.setdefault(oid, [])
+ p = start_tid, end_tid
+ if p in L:
+ return # we already have it in cache
+ bisect.insort_left(L, p)
+ self.writes += 1
+ self.total_writes += 1
+ self.add(oid, size, start_tid, end_tid)
+
+ # Add `oid` to the cache, evicting objects as needed to make room.
+ # This updates `filemap` and `key2entry`; it's the caller's
+ # responsibilty to update `current` or `noncurrent` appropriately.
+ def add(self, oid, size, start_tid, end_tid=z64):
+ key = oid, start_tid
+ assert key not in self.key2entry
+ size += self.overhead
+ avail = self.makeroom(size)
+ e = CircularCacheEntry(key, end_tid, self.offset)
+ self.filemap[self.offset] = size, e
+ self.key2entry[key] = e
+ self.offset += size
+ # All the space made available must be accounted for in filemap.
+ excess = avail - size
+ if excess:
+ self.filemap[self.offset] = excess, None
+
+ # Evict enough objects to make at least `need` contiguous bytes, starting
+ # at `self.offset`, available. Evicted objects are removed from
+ # `filemap`, `key2entry`, `current` and `noncurrent`. The caller is
+ # responsible for adding new entries to `filemap` to account for all
+ # the freed bytes, and for advancing `self.offset`. The number of bytes
+ # freed is the return value, and will be >= need.
+ def makeroom(self, need):
+ if self.offset + need > self.cachelimit:
+ self.offset = ZEC_HEADER_SIZE
+ pos = self.offset
+ while need > 0:
+ assert pos < self.cachelimit
+ size, e = self.filemap.pop(pos)
+ if e: # there is an object here (else it's already free space)
+ self.evicts += 1
+ self.total_evicts += 1
+ assert pos == e.offset
+ _e = self.key2entry.pop(e.key)
+ assert e is _e
+ oid, start_tid = e.key
+ if e.end_tid == z64:
+ del self.current[oid]
+ else:
+ L = self.noncurrent[oid]
+ L.remove((start_tid, e.end_tid))
+ need -= size
+ pos += size
+ return pos - self.offset # total number of bytes freed
+
+ def report(self, extratext=''):
+ self.check()
+ free = used = total = 0
+ for size, e in self.filemap.itervalues():
+ total += size
+ if e:
+ used += size
+ else:
+ free += size
+
+ self.inuse = round(100.0 * used / total, 1)
+ self.total_inuse = self.inuse
+ Simulation.report(self, extratext)
+
+ def check(self):
+ oidcount = 0
+ pos = ZEC_HEADER_SIZE
+ while pos < self.cachelimit:
+ size, e = self.filemap[pos]
+ if e:
+ oidcount += 1
+ assert self.key2entry[e.key].offset == pos
+ pos += size
+ assert oidcount == len(self.key2entry)
+ assert pos == self.cachelimit
+
+ def dump(self):
+ print len(self.filemap)
+ L = list(self.filemap)
+ L.sort()
+ for k in L:
+ v = self.filemap[k]
+ print k, v[0], repr(v[1])
+
+#############################################################################
+# CAUTION: It's most likely that none of the simulators below this
+# point work anymore. A great many changes were needed to teach
+# CircularCacheSimulation (above) about MVCC, including method signature
+# changes and changes in cache file format, and none of the other simulator
+# classes were changed.
+#############################################################################
+
+class ZEOCacheSimulation(Simulation):
+ """Simulate the ZEO 1.0 and 2.0 cache behavior.
+
+ This assumes the cache is not persistent (we don't know how to
+ simulate cache validation.)
+ """
+
+ extraname = "flips"
+
+ def __init__(self, cachelimit):
+ # Initialize base class
+ Simulation.__init__(self, cachelimit)
+ # Initialize additional global statistics
+ self.total_flips = 0
+
+ def restart(self):
+ # Reset base class
+ Simulation.restart(self)
+ # Reset additional per-run statistics
+ self.flips = 0
+ # Set up simulation
+ self.filesize = [4, 4] # account for magic number
+ self.fileoids = [{}, {}]
+ self.current = 0 # index into filesize, fileoids
+
+ def load(self, oid, size):
+ if (self.fileoids[self.current].get(oid) or
+ self.fileoids[1 - self.current].get(oid)):
+ self.hits += 1
+ self.total_hits += 1
+ else:
+ self.write(oid, size)
+
+ def write(self, oid, size):
+ # Fudge because size is rounded up to multiples of 256. (31
+ # is header overhead per cache record; 127 is to compensate
+ # for rounding up to multiples of 256.)
+ size = size + 31 - 127
+ if self.filesize[self.current] + size > self.cachelimit / 2:
+ # Cache flip
+ self.flips += 1
+ self.total_flips += 1
+ self.current = 1 - self.current
+ self.filesize[self.current] = 4
+ self.fileoids[self.current] = {}
+ self.filesize[self.current] += size
+ self.fileoids[self.current][oid] = 1
+
+ def inval(self, oid):
+ if self.fileoids[self.current].get(oid):
+ self.invals += 1
+ self.total_invals += 1
+ del self.fileoids[self.current][oid]
+ elif self.fileoids[1 - self.current].get(oid):
+ self.invals += 1
+ self.total_invals += 1
+ del self.fileoids[1 - self.current][oid]
+
+class AltZEOCacheSimulation(ZEOCacheSimulation):
+ """A variation of the ZEO cache that copies to the current file.
+
+ When a hit is found in the non-current cache file, it is copied to
+ the current cache file. Exception: when the copy would cause a
+ cache flip, we don't copy (this is part laziness, part concern
+ over causing extraneous flips).
+ """
+
+ def load(self, oid, size):
+ if self.fileoids[self.current].get(oid):
+ self.hits += 1
+ self.total_hits += 1
+ elif self.fileoids[1 - self.current].get(oid):
+ self.hits += 1
+ self.total_hits += 1
+ # Simulate a write, unless it would cause a flip
+ size = size + 31 - 127
+ if self.filesize[self.current] + size <= self.cachelimit / 2:
+ self.filesize[self.current] += size
+ self.fileoids[self.current][oid] = 1
+ del self.fileoids[1 - self.current][oid]
+ else:
+ self.write(oid, size)
+
+class LRUCacheSimulation(Simulation):
+
+ extraname = "evicts"
+
+ def __init__(self, cachelimit):
+ # Initialize base class
+ Simulation.__init__(self, cachelimit)
+ # Initialize additional global statistics
+ self.total_evicts = 0
+
+ def restart(self):
+ # Reset base class
+ Simulation.restart(self)
+ # Reset additional per-run statistics
+ self.evicts = 0
+ # Set up simulation
+ self.cache = {}
+ self.size = 0
+ self.head = Node(None, None)
+ self.head.linkbefore(self.head)
+
+ def load(self, oid, size):
+ node = self.cache.get(oid)
+ if node is not None:
+ self.hits += 1
+ self.total_hits += 1
+ node.linkbefore(self.head)
+ else:
+ self.write(oid, size)
+
+ def write(self, oid, size):
+ node = self.cache.get(oid)
+ if node is not None:
+ node.unlink()
+ assert self.head.next is not None
+ self.size -= node.size
+ node = Node(oid, size)
+ self.cache[oid] = node
+ node.linkbefore(self.head)
+ self.size += size
+ # Evict LRU nodes
+ while self.size > self.cachelimit:
+ self.evicts += 1
+ self.total_evicts += 1
+ node = self.head.next
+ assert node is not self.head
+ node.unlink()
+ assert self.head.next is not None
+ del self.cache[node.oid]
+ self.size -= node.size
+
+ def inval(self, oid):
+ node = self.cache.get(oid)
+ if node is not None:
+ assert node.oid == oid
+ self.invals += 1
+ self.total_invals += 1
+ node.unlink()
+ assert self.head.next is not None
+ del self.cache[oid]
+ self.size -= node.size
+ assert self.size >= 0
+
+class Node(object):
+ """Node in a doubly-linked list, storing oid and size as payload.
+
+ A node can be linked or unlinked; in the latter case, next and
+ prev are None. Initially a node is unlinked.
+ """
+
+ __slots__ = ['prev', 'next', 'oid', 'size']
+
+ def __init__(self, oid, size):
+ self.oid = oid
+ self.size = size
+ self.prev = self.next = None
+
+ def unlink(self):
+ prev = self.prev
+ next = self.next
+ if prev is not None:
+ assert next is not None
+ assert prev.next is self
+ assert next.prev is self
+ prev.next = next
+ next.prev = prev
+ self.prev = self.next = None
+ else:
+ assert next is None
+
+ def linkbefore(self, next):
+ self.unlink()
+ prev = next.prev
+ if prev is None:
+ assert next.next is None
+ prev = next
+ self.prev = prev
+ self.next = next
+ prev.next = next.prev = self
+
+am = object()
+a1in = object()
+a1out = object()
+
+class Node2Q(Node):
+
+ __slots__ = ["kind", "hits"]
+
+ def __init__(self, oid, size, kind=None):
+ Node.__init__(self, oid, size)
+ self.kind = kind
+ self.hits = 0
+
+ def linkbefore(self, next):
+ if next.kind != self.kind:
+ self.kind = next.kind
+ Node.linkbefore(self, next)
+
+class TwoQSimluation(Simulation):
+ # The original 2Q algorithm is page based and the authors offer
+ # tuning guidlines based on a page-based cache. Our cache is
+ # object based, so, for example, it's hard to compute the number
+ # of oids to store in a1out based on the size of a1in.
+
+ extras = "evicts", "hothit", "am_add"
+
+ NodeClass = Node2Q
+
+ def __init__(self, cachelimit, outlen=10000, threshold=0):
+ Simulation.__init__(self, cachelimit)
+
+ # The promotion threshold: If a hit occurs in a1out, it is
+ # promoted to am if the number of hits on the object while it
+ # was in a1in is at least threshold. The standard 2Q scheme
+ # uses a threshold of 0.
+ self.threshold = threshold
+ self.am_limit = 3 * self.cachelimit / 4
+ self.a1in_limit = self.cachelimit / 4
+
+ self.cache = {}
+ self.am_size = 0
+ self.a1in_size = 0
+ self.a1out_size = 0
+
+ self.total_evicts = 0
+ self.total_hothit = 0
+ self.total_am_add = 0
+ self.a1out_limit = outlen
+
+ # An LRU queue of hot objects
+ self.am = self.NodeClass(None, None, am)
+ self.am.linkbefore(self.am)
+ # A FIFO queue of recently referenced objects. It's purpose
+ # is to absorb references to objects that are accessed a few
+ # times in short order, then forgotten about.
+ self.a1in = self.NodeClass(None, None, a1in)
+ self.a1in.linkbefore(self.a1in)
+ # A FIFO queue of recently reference oids.
+ # This queue only stores the oids, not any data. If we get a
+ # hit in this queue, promote the object to am.
+ self.a1out = self.NodeClass(None, None, a1out)
+ self.a1out.linkbefore(self.a1out)
+
+ def makespace(self, size):
+ for space in 0, size:
+ if self.enoughspace(size):
+ return
+ self.evict_a1in(space)
+ if self.enoughspace(size):
+ return
+ self.evict_am(space)
+
+ def enoughspace(self, size):
+ totalsize = self.a1in_size + self.am_size
+ return totalsize + size < self.cachelimit
+
+ def evict_a1in(self, extra):
+ while self.a1in_size + extra > self.a1in_limit:
+ if self.a1in.next is self.a1in:
+ return
+ assert self.a1in.next is not None
+ node = self.a1in.next
+ self.evicts += 1
+ self.total_evicts += 1
+ node.linkbefore(self.a1out)
+ self.a1out_size += 1
+ self.a1in_size -= node.size
+ if self.a1out_size > self.a1out_limit:
+ assert self.a1out.next is not None
+ node = self.a1out.next
+ node.unlink()
+ del self.cache[node.oid]
+ self.a1out_size -= 1
+
+ def evict_am(self, extra):
+ while self.am_size + extra > self.am_limit:
+ if self.am.next is self.am:
+ return
+ assert self.am.next is not None
+ node = self.am.next
+ self.evicts += 1
+ self.total_evicts += 1
+ # This node hasn't been accessed in a while, so just
+ # forget about it.
+ node.unlink()
+ del self.cache[node.oid]
+ self.am_size -= node.size
+
+ def write(self, oid, size):
+ # A write always follows a read (ZODB doesn't allow blind writes).
+ # So this write must have followed a recent read of the object.
+ # Don't change it's position, but do update the size.
+
+ # XXX For now, don't evict pages if the new version of the object
+ # is big enough to require eviction.
+ node = self.cache.get(oid)
+ if node is None or node.kind is a1out:
+ return
+ if node.kind is am:
+ self.am_size = self.am_size - node.size + size
+ node.size = size
+ else:
+ self.a1in_size = self.a1in_size - node.size + size
+ node.size = size
+
+ def load(self, oid, size):
+ node = self.cache.get(oid)
+ if node is not None:
+ if node.kind is am:
+ self.hits += 1
+ self.total_hits += 1
+ self.hothit += 1
+ self.total_hothit += 1
+ node.hits += 1
+ node.linkbefore(self.am)
+ elif node.kind is a1in:
+ self.hits += 1
+ self.total_hits += 1
+ node.hits += 1
+ elif node.kind is a1out:
+ self.a1out_size -= 1
+ if node.hits >= self.threshold:
+ self.makespace(node.size)
+ self.am_size += node.size
+ node.linkbefore(self.am)
+ self.cache[oid] = node
+ self.am_add += 1
+ self.total_am_add += 1
+ else:
+ node.unlink()
+ self.insert(oid, size)
+ else:
+ self.insert(oid, size)
+
+ def insert(self, oid, size):
+ # New objects enter the cache via a1in. If they
+ # are frequently used over a long enough time, they
+ # will be promoted to am -- but only via a1out.
+ self.makespace(size)
+ node = self.NodeClass(oid, size, a1in)
+ node.linkbefore(self.a1in)
+ self.cache[oid] = node
+ self.a1in_size += node.size
+
+ def inval(self, oid):
+ # The original 2Q algorithm didn't have to deal with
+ # invalidations. My own solution: Move it to the head of
+ # a1out.
+ node = self.cache.get(oid)
+ if node is None:
+ return
+ self.invals += 1
+ self.total_invals += 1
+ # XXX Should an invalidation to a1out count?
+ if node.kind is a1out:
+ return
+ node.linkbefore(self.a1out)
+ if node.kind is am:
+ self.am_size -= node.size
+ else:
+ self.a1in_size -= node.size
+
+ def restart(self):
+ Simulation.restart(self)
+
+ self.evicts = 0
+ self.hothit = 0
+ self.am_add = 0
+
+lruT = object()
+lruB = object()
+fifoT = object()
+fifoB = object()
+
+class ARCCacheSimulation(Simulation):
+
+ # Based on the paper ARC: A Self-Tuning, Low Overhead Replacement
+ # Cache by Nimrod Megiddo and Dharmendra S. Modha, USENIX FAST
+ # 2003. The paper describes a block-based cache. A lot of the
+ # details need to be fiddled to work with an object-based cache.
+ # For size issues, the key insight ended up being conditions
+ # A.1-A.5 rather than the details of the algorithm in Fig. 4.
+
+ extras = "lruThits", "evicts", "p"
+
+ def __init__(self, cachelimit):
+ Simulation.__init__(self, cachelimit)
+ # There are two pairs of linked lists. Each pair has a top and
+ # bottom half. The bottom half contains metadata, but not actual
+ # objects.
+
+ # LRU list of frequently used objects
+ self.lruT = Node2Q(None, None, lruT)
+ self.lruT.linkbefore(self.lruT)
+ self.lruT_len = 0
+ self.lruT_size = 0
+
+ self.lruB = Node2Q(None, None, lruB)
+ self.lruB.linkbefore(self.lruB)
+ self.lruB_len = 0
+ self.lruB_size = 0
+
+ # FIFO list of objects seen once
+ self.fifoT = Node2Q(None, None, fifoT)
+ self.fifoT.linkbefore(self.fifoT)
+ self.fifoT_len = 0
+ self.fifoT_size = 0
+
+ self.fifoB = Node2Q(None, None, fifoB)
+ self.fifoB.linkbefore(self.fifoB)
+ self.fifoB_len = 0
+ self.fifoB_size = 0
+
+ # maps oid to node
+ self.cache = {}
+
+ # The paper says that p should be adjust be 1 as the minimum:
+ # "The compound effect of such small increments and decrements
+ # to p s quite profound as we will demonstrated in the next
+ # section." Not really, as far as I can tell. In my traces
+ # with a very small cache, it was taking far too long to adjust
+ # towards favoring some FIFO component. I would guess that the
+ # chief difference is that our caches are much bigger than the
+ # ones they experimented with. Their biggest cache had 512K
+ # entries, while our smallest cache will have 40 times that many
+ # entries.
+
+ self.p = 0
+ # XXX multiply computed adjustments to p by walk_factor
+ self.walk_factor = 500
+
+ # statistics
+ self.total_hits = 0
+ self.total_lruThits = 0
+ self.total_fifoThits = 0
+ self.total_evicts = 0
+
+ def restart(self):
+ Simulation.restart(self)
+ self.hits = 0
+ self.lruThits = 0
+ self.fifoThits = 0
+ self.evicts = 0
+
+ def write(self, oid, size):
+ pass
+
+ def replace(self, lruB=False):
+ self.evicts += 1
+ self.total_evicts += 1
+ if self.fifoT_size > self.p or (lruB and self.fifoT_size == self.p):
+ node = self.fifoT.next
+ if node is self.fifoT:
+ return 0
+ assert node is not self.fifoT, self.stats()
+ node.linkbefore(self.fifoB)
+ self.fifoT_len -= 1
+ self.fifoT_size -= node.size
+ self.fifoB_len += 1
+ self.fifoB_size += node.size
+ else:
+ node = self.lruT.next
+ if node is self.lruT:
+ return 0
+ assert node is not self.lruT, self.stats()
+ node.linkbefore(self.lruB)
+ self.lruT_len -= 1
+ self.lruT_size -= node.size
+ self.lruB_len += 1
+ self.lruB_size += node.size
+ return node.size
+
+ def stats(self):
+ self.totalsize = self.lruT_size + self.fifoT_size
+ self.allsize = self.totalsize + self.lruB_size + self.fifoB_size
+ print "cachelimit = %s totalsize=%s allsize=%s" % (
+ addcommas(self.cachelimit),
+ addcommas(self.totalsize),
+ addcommas(self.allsize))
+
+ fmt = (
+ "p=%(p)d\n"
+ "lruT = %(lruT_len)5d / %(lruT_size)8d / %(lruThits)d\n"
+ "fifoT = %(fifoT_len)5d / %(fifoT_size)8d / %(fifoThits)d\n"
+ "lruB = %(lruB_len)5d / %(lruB_size)8d\n"
+ "fifoB = %(fifoB_len)5d / %(fifoB_size)8d\n"
+ "loads=%(loads)d hits=%(hits)d evicts=%(evicts)d\n"
+ )
+ print fmt % self.__dict__
+
+ def report(self, extratext=''):
+ self.total_p = self.p
+ Simulation.report(self, extratext)
+
+ def load(self, oid, size):
+ node = self.cache.get(oid)
+ if node is None:
+ # cache miss: We're going to insert a new object in fifoT.
+ # If fifo is full, we'll need to evict something to make
+ # room for it.
+
+ prev = need = size
+ while need > 0:
+ if size + self.fifoT_size + self.fifoB_size >= self.cachelimit:
+ if need + self.fifoT_size >= self.cachelimit:
+ node = self.fifoB.next
+ assert node is not self.fifoB, self.stats()
+ node.unlink()
+ del self.cache[node.oid]
+ self.fifoB_size -= node.size
+ self.fifoB_len -= 1
+ self.evicts += 1
+ self.total_evicts += 1
+ else:
+ node = self.fifoB.next
+ assert node is not self.fifoB, self.stats()
+ node.unlink()
+ del self.cache[node.oid]
+ self.fifoB_size -= node.size
+ self.fifoB_len -= 1
+ if self.fifoT_size + self.lruT_size > self.cachelimit:
+ need -= self.replace()
+ else:
+ incache_size = self.fifoT_size + self.lruT_size + need
+ total_size = (incache_size + self.fifoB_size
+ + self.lruB_size)
+ if total_size >= self.cachelimit * 2:
+ node = self.lruB.next
+ if node is self.lruB:
+ break
+ assert node is not self.lruB
+ node.unlink()
+ del self.cache[node.oid]
+ self.lruB_size -= node.size
+ self.lruB_len -= 1
+ elif incache_size > self.cachelimit:
+ need -= self.replace()
+ else:
+ break
+ if need == prev:
+ # XXX hack, apparently we can't get rid of anything else
+ break
+ prev = need
+
+ node = Node2Q(oid, size)
+ node.linkbefore(self.fifoT)
+ self.fifoT_len += 1
+ self.fifoT_size += size
+ self.cache[oid] = node
+ else:
+ # a cache hit, but possibly in a bottom list that doesn't
+ # actually hold the object
+ if node.kind is lruT:
+ node.linkbefore(self.lruT)
+
+ self.hits += 1
+ self.total_hits += 1
+ self.lruThits += 1
+ self.total_lruThits += 1
+
+ elif node.kind is fifoT:
+ node.linkbefore(self.lruT)
+ self.fifoT_len -= 1
+ self.lruT_len += 1
+ self.fifoT_size -= node.size
+ self.lruT_size += node.size
+
+ self.hits += 1
+ self.total_hits += 1
+ self.fifoThits += 1
+ self.total_fifoThits += 1
+
+ elif node.kind is fifoB:
+ node.linkbefore(self.lruT)
+ self.fifoB_len -= 1
+ self.lruT_len += 1
+ self.fifoB_size -= node.size
+ self.lruT_size += node.size
+
+ # XXX need a better min than 1?
+## print "adapt+", max(1, self.lruB_size // self.fifoB_size)
+ delta = max(1, self.lruB_size / max(1, self.fifoB_size))
+ self.p += delta * self.walk_factor
+ if self.p > self.cachelimit:
+ self.p = self.cachelimit
+
+ need = node.size
+ if self.lruT_size + self.fifoT_size + need > self.cachelimit:
+ while need > 0:
+ r = self.replace()
+ if not r:
+ break
+ need -= r
+
+ elif node.kind is lruB:
+ node.linkbefore(self.lruT)
+ self.lruB_len -= 1
+ self.lruT_len += 1
+ self.lruB_size -= node.size
+ self.lruT_size += node.size
+
+ # XXX need a better min than 1?
+## print "adapt-", max(1, self.fifoB_size // self.lruB_size)
+ delta = max(1, self.fifoB_size / max(1, self.lruB_size))
+ self.p -= delta * self.walk_factor
+ if self.p < 0:
+ self.p = 0
+
+ need = node.size
+ if self.lruT_size + self.fifoT_size + need > self.cachelimit:
+ while need > 0:
+ r = self.replace(lruB=True)
+ if not r:
+ break
+ need -= r
+
+ def inval(self, oid):
+ pass
+
+ def extraheader(self):
+ pass
+
+class OracleSimulation(LRUCacheSimulation):
+ # Not sure how to implement this yet. This is a cache where I
+ # cheat to see how good we could actually do. The cache
+ # replacement problem for multi-size caches is NP-hard, so we're
+ # not going to have an optimal solution.
+
+ # At the moment, the oracle is mostly blind. It knows which
+ # objects will be referenced more than once, so that it can
+ # ignore objects referenced only once. In most traces, these
+ # objects account for about 20% of references.
+
+ def __init__(self, cachelimit, filename):
+ LRUCacheSimulation.__init__(self, cachelimit)
+ self.count = {}
+ self.scan(filename)
+
+ def load(self, oid, size):
+ node = self.cache.get(oid)
+ if node is not None:
+ self.hits += 1
+ self.total_hits += 1
+ node.linkbefore(self.head)
+ else:
+ if oid in self.count:
+ self.write(oid, size)
+
+ def scan(self, filename):
+ # scan the file in advance to figure out which objects will
+ # be referenced more than once.
+ f = open(filename, "rb")
+ struct_unpack = struct.unpack
+ f_read = f.read
+ offset = 0
+ while 1:
+ # Read a record and decode it
+ r = f_read(8)
+ if len(r) < 8:
+ break
+ offset += 8
+ ts, code = struct_unpack(">ii", r)
+ if ts == 0:
+ # Must be a misaligned record caused by a crash
+ ##print "Skipping 8 bytes at offset", offset-8
+ continue
+ r = f_read(16)
+ if len(r) < 16:
+ break
+ offset += 16
+ oid, serial = struct_unpack(">8s8s", r)
+ if code & 0x70 == 0x20:
+ # only look at loads
+ self.count[oid] = self.count.get(oid, 0) + 1
+
+ all = len(self.count)
+
+ # Now remove everything with count == 1
+ once = [oid for oid, count in self.count.iteritems()
+ if count == 1]
+ for oid in once:
+ del self.count[oid]
+
+ print "Scanned file, %d unique oids, %d repeats" % (
+ all, len(self.count))
+
+class BuddyCacheSimulation(LRUCacheSimulation):
+
+ def __init__(self, cachelimit):
+ LRUCacheSimulation.__init__(self, roundup(cachelimit))
+
+ def restart(self):
+ LRUCacheSimulation.restart(self)
+ self.allocator = self.allocatorFactory(self.cachelimit)
+
+ def allocatorFactory(self, size):
+ return BuddyAllocator(size)
+
+ # LRUCacheSimulation.load() is just fine
+
+ def write(self, oid, size):
+ node = self.cache.get(oid)
+ if node is not None:
+ node.unlink()
+ assert self.head.next is not None
+ self.size -= node.size
+ self.allocator.free(node)
+ while 1:
+ node = self.allocator.alloc(size)
+ if node is not None:
+ break
+ # Failure to allocate. Evict something and try again.
+ node = self.head.next
+ assert node is not self.head
+ self.evicts += 1
+ self.total_evicts += 1
+ node.unlink()
+ assert self.head.next is not None
+ del self.cache[node.oid]
+ self.size -= node.size
+ self.allocator.free(node)
+ node.oid = oid
+ self.cache[oid] = node
+ node.linkbefore(self.head)
+ self.size += node.size
+
+ def inval(self, oid):
+ node = self.cache.get(oid)
+ if node is not None:
+ assert node.oid == oid
+ self.invals += 1
+ self.total_invals += 1
+ node.unlink()
+ assert self.head.next is not None
+ del self.cache[oid]
+ self.size -= node.size
+ assert self.size >= 0
+ self.allocator.free(node)
+
+class SimpleCacheSimulation(BuddyCacheSimulation):
+
+ def allocatorFactory(self, size):
+ return SimpleAllocator(size)
+
+ def finish(self):
+ BuddyCacheSimulation.finish(self)
+ self.allocator.report()
+
+MINSIZE = 256
+
+class BuddyAllocator:
+
+ def __init__(self, cachelimit):
+ cachelimit = roundup(cachelimit)
+ self.cachelimit = cachelimit
+ self.avail = {} # Map rounded-up sizes to free list node heads
+ self.nodes = {} # Map address to node
+ k = MINSIZE
+ while k <= cachelimit:
+ self.avail[k] = n = Node(None, None) # Not BlockNode; has no addr
+ n.linkbefore(n)
+ k += k
+ node = BlockNode(None, cachelimit, 0)
+ self.nodes[0] = node
+ node.linkbefore(self.avail[cachelimit])
+
+ def alloc(self, size):
+ size = roundup(size)
+ k = size
+ while k <= self.cachelimit:
+ head = self.avail[k]
+ node = head.next
+ if node is not head:
+ break
+ k += k
+ else:
+ return None # Store is full, or block is too large
+ node.unlink()
+ size2 = node.size
+ while size2 > size:
+ size2 = size2 / 2
+ assert size2 >= size
+ node.size = size2
+ buddy = BlockNode(None, size2, node.addr + size2)
+ self.nodes[buddy.addr] = buddy
+ buddy.linkbefore(self.avail[size2])
+ node.oid = 1 # Flag as in-use
+ return node
+
+ def free(self, node):
+ assert node is self.nodes[node.addr]
+ assert node.prev is node.next is None
+ node.oid = None # Flag as free
+ while node.size < self.cachelimit:
+ buddy_addr = node.addr ^ node.size
+ buddy = self.nodes[buddy_addr]
+ assert buddy.addr == buddy_addr
+ if buddy.oid is not None or buddy.size != node.size:
+ break
+ # Merge node with buddy
+ buddy.unlink()
+ if buddy.addr < node.addr: # buddy prevails
+ del self.nodes[node.addr]
+ node = buddy
+ else: # node prevails
+ del self.nodes[buddy.addr]
+ node.size *= 2
+ assert node is self.nodes[node.addr]
+ node.linkbefore(self.avail[node.size])
+
+ def dump(self, msg=""):
+ if msg:
+ print msg,
+ size = MINSIZE
+ blocks = bytes = 0
+ while size <= self.cachelimit:
+ head = self.avail[size]
+ node = head.next
+ count = 0
+ while node is not head:
+ count += 1
+ node = node.next
+ if count:
+ print "%d:%d" % (size, count),
+ blocks += count
+ bytes += count*size
+ size += size
+ print "-- %d, %d" % (bytes, blocks)
+
+def roundup(size):
+ k = MINSIZE
+ while k < size:
+ k += k
+ return k
+
+class SimpleAllocator:
+
+ def __init__(self, arenasize):
+ self.arenasize = arenasize
+ self.avail = BlockNode(None, 0, 0) # Weird: empty block as list head
+ self.rover = self.avail
+ node = BlockNode(None, arenasize, 0)
+ node.linkbefore(self.avail)
+ self.taglo = {0: node}
+ self.taghi = {arenasize: node}
+ # Allocator statistics
+ self.nallocs = 0
+ self.nfrees = 0
+ self.allocloops = 0
+ self.freebytes = arenasize
+ self.freeblocks = 1
+ self.allocbytes = 0
+ self.allocblocks = 0
+
+ def report(self):
+ print ("NA=%d AL=%d NF=%d ABy=%d ABl=%d FBy=%d FBl=%d" %
+ (self.nallocs, self.allocloops,
+ self.nfrees,
+ self.allocbytes, self.allocblocks,
+ self.freebytes, self.freeblocks))
+
+ def alloc(self, size):
+ self.nallocs += 1
+ # First fit algorithm
+ rover = stop = self.rover
+ while 1:
+ self.allocloops += 1
+ if rover.size >= size:
+ break
+ rover = rover.next
+ if rover is stop:
+ return None # We went round the list without finding space
+ if rover.size == size:
+ self.rover = rover.next
+ rover.unlink()
+ del self.taglo[rover.addr]
+ del self.taghi[rover.addr + size]
+ self.freeblocks -= 1
+ self.allocblocks += 1
+ self.freebytes -= size
+ self.allocbytes += size
+ return rover
+ # Take space from the beginning of the roving pointer
+ assert rover.size > size
+ node = BlockNode(None, size, rover.addr)
+ del self.taglo[rover.addr]
+ rover.size -= size
+ rover.addr += size
+ self.taglo[rover.addr] = rover
+ #self.freeblocks += 0 # No change here
+ self.allocblocks += 1
+ self.freebytes -= size
+ self.allocbytes += size
+ return node
+
+ def free(self, node):
+ self.nfrees += 1
+ self.freeblocks += 1
+ self.allocblocks -= 1
+ self.freebytes += node.size
+ self.allocbytes -= node.size
+ node.linkbefore(self.avail)
+ self.taglo[node.addr] = node
+ self.taghi[node.addr + node.size] = node
+ x = self.taghi.get(node.addr)
+ if x is not None:
+ # Merge x into node
+ x.unlink()
+ self.freeblocks -= 1
+ del self.taglo[x.addr]
+ del self.taghi[x.addr + x.size]
+ del self.taglo[node.addr]
+ node.addr = x.addr
+ node.size += x.size
+ self.taglo[node.addr] = node
+ x = self.taglo.get(node.addr + node.size)
+ if x is not None:
+ # Merge x into node
+ x.unlink()
+ self.freeblocks -= 1
+ del self.taglo[x.addr]
+ del self.taghi[x.addr + x.size]
+ del self.taghi[node.addr + node.size]
+ node.size += x.size
+ self.taghi[node.addr + node.size] = node
+ # It's possible that either one of the merges above invalidated
+ # the rover.
+ # It's simplest to simply reset the rover to the newly freed block.
+ self.rover = node
+
+ def dump(self, msg=""):
+ if msg:
+ print msg,
+ count = 0
+ bytes = 0
+ node = self.avail.next
+ while node is not self.avail:
+ bytes += node.size
+ count += 1
+ node = node.next
+ print count, "free blocks,", bytes, "free bytes"
+ self.report()
+
+class BlockNode(Node):
+
+ __slots__ = ['addr']
+
+ def __init__(self, oid, size, addr):
+ Node.__init__(self, oid, size)
+ self.addr = addr
+
+def testallocator(factory=BuddyAllocator):
+ # Run one of Knuth's experiments as a test
+ import random
+ import heapq # This only runs with Python 2.3, folks :-)
+ reportfreq = 100
+ cachelimit = 2**17
+ cache = factory(cachelimit)
+ queue = []
+ T = 0
+ blocks = 0
+ while T < 5000:
+ while queue and queue[0][0] <= T:
+ time, node = heapq.heappop(queue)
+ assert time == T
+ ##print "free addr=%d, size=%d" % (node.addr, node.size)
+ cache.free(node)
+ blocks -= 1
+ size = random.randint(100, 2000)
+ lifetime = random.randint(1, 100)
+ node = cache.alloc(size)
+ if node is None:
+ print "out of mem"
+ cache.dump("T=%4d: %d blocks;" % (T, blocks))
+ break
+ else:
+ ##print "alloc addr=%d, size=%d" % (node.addr, node.size)
+ blocks += 1
+ heapq.heappush(queue, (T + lifetime, node))
+ T = T+1
+ if T % reportfreq == 0:
+ cache.dump("T=%4d: %d blocks;" % (T, blocks))
+
+def hitrate(loads, hits):
+ return "%5.1f%%" % (100.0 * hits / max(1, loads))
+
+def duration(secs):
+ mm, ss = divmod(secs, 60)
+ hh, mm = divmod(mm, 60)
+ if hh:
+ return "%d:%02d:%02d" % (hh, mm, ss)
+ if mm:
+ return "%d:%02d" % (mm, ss)
+ return "%d" % ss
+
+def addcommas(n):
+ sign, s = '', str(n)
+ if s[0] == '-':
+ sign, s = '-', s[1:]
+ i = len(s) - 3
+ while i > 0:
+ s = s[:i] + ',' + s[i:]
+ i -= 3
+ return sign + s
+
+import random
+
+def maybe(f, p=0.5):
+ if random.random() < p:
+ f()
+
+#############################################################################
+# Thor-like eviction scheme.
+#
+# The cache keeps a list of all objects, and uses a travelling pointer
+# to decay the worth of objects over time.
+
+class ThorNode(Node):
+
+ __slots__ = ['worth']
+
+ def __init__(self, oid, size, worth=None):
+ Node.__init__(self, oid, size)
+ self.worth = worth
+
+class ThorListHead(Node):
+ def __init__(self):
+ Node.__init__(self, 0, 0)
+ self.next = self.prev = self
+
+class ThorSimulation(Simulation):
+
+ extras = "evicts", "trips"
+
+ def __init__(self, cachelimit):
+ Simulation.__init__(self, cachelimit)
+
+ # Maximum total of object sizes we keep in cache.
+ self.maxsize = cachelimit
+ # Current total of object sizes in cache.
+ self.currentsize = 0
+
+ # A worth byte maps to a set of all objects with that worth.
+ # This is cheap to keep updated, and makes finding low-worth
+ # objects for eviction trivial (just march over the worthsets
+ # list, in order).
+ self.worthsets = [Set() for dummy in range(256)]
+
+ # We keep a circular list of all objects in cache. currentobj
+ # walks around it forever. Each time _tick() is called, the
+ # worth of currentobj is decreased, basically by shifting
+ # right 1, and currentobj moves on to the next object. When
+ # an object is first inserted, it enters the list right before
+ # currentobj. When an object is accessed, its worth is
+ # increased by or'ing in 0x80. This scheme comes from the
+ # Thor system, and is an inexpensive way to account for both
+ # recency and frequency of access: recency is reflected in
+ # the leftmost bit set, and frequency by how many bits are
+ # set.
+ #
+ # Note: because evictions are interleaved with ticks,
+ # unlinking an object is tricky, lest we evict currentobj. The
+ # class _unlink method takes care of this properly.
+ self.listhead = ThorListHead()
+ self.currentobj = self.listhead
+
+ # Map an object.oid to its ThorNode.
+ self.oid2object = {}
+
+ self.total_evicts = self.total_trips = 0
+
+ # Unlink object from the circular list, taking care not to lose
+ # track of the current object. Always call this instead of
+ # invoking obj.unlink() directly.
+ def _unlink(self, obj):
+ assert obj is not self.listhead
+ if obj is self.currentobj:
+ self.currentobj = obj.next
+ obj.unlink()
+
+ # Change obj.worth to newworth, maintaining invariants.
+ def _change_worth(self, obj, newworth):
+ if obj.worth != newworth:
+ self.worthsets[obj.worth].remove(obj)
+ obj.worth = newworth
+ self.worthsets[newworth].add(obj)
+
+ def add(self, object):
+ assert object.oid not in self.oid2object
+ self.oid2object[object.oid] = object
+
+ newsize = self.currentsize + object.size
+ if newsize > self.maxsize:
+ self._evictbytes(newsize - self.maxsize)
+ self.currentsize += object.size
+ object.linkbefore(self.currentobj)
+
+ if object.worth is None:
+ # Give smaller objects higher initial worth. This favors kicking
+ # out unreferenced large objects before kicking out unreferenced
+ # small objects. On real life traces, this is a significant
+ # win for the hit rate.
+ object.worth = 32 - int(round(math.log(object.size, 2)))
+ self.worthsets[object.worth].add(object)
+
+ # Decrease the worth of the current object, and advance the
+ # current object.
+ def _tick(self):
+ c = self.currentobj
+ if c is self.listhead:
+ c = c.next
+ if c is self.listhead: # list is empty
+ return
+ self.total_trips += 1
+ self.trips += 1
+ self._change_worth(c, (c.worth + 1) >> 1)
+ self.currentobj = c.next
+
+ def access(self, oid):
+ self._tick()
+ obj = self.oid2object.get(oid)
+ if obj is None:
+ return None
+ self._change_worth(obj, obj.worth | 0x80)
+ return obj
+
+ # Evict objects of least worth first, until at least nbytes bytes
+ # have been freed.
+ def _evictbytes(self, nbytes):
+ for s in self.worthsets:
+ while s:
+ if nbytes <= 0:
+ return
+ obj = s.pop()
+ nbytes -= obj.size
+ self._evictobj(obj)
+
+ def _evictobj(self, obj):
+ self.currentsize -= obj.size
+ self.worthsets[obj.worth].discard(obj)
+ del self.oid2object[obj.oid]
+ self._unlink(obj)
+
+ self.evicts += 1
+ self.total_evicts += 1
+
+ def _evict_without_bumping_evict_stats(self, obj):
+ self._evictobj(obj)
+ self.evicts -= 1
+ self.total_evicts -= 1
+
+ # Simulator overrides from here on.
+
+ def restart(self):
+ # Reset base class
+ Simulation.restart(self)
+ # Reset additional per-run statistics
+ self.evicts = self.trips = 0
+
+ def write(self, oid, size):
+ obj = self.oid2object.get(oid)
+ worth = None
+ if obj is not None:
+ worth = obj.worth
+ self._evict_without_bumping_evict_stats(obj)
+ self.add(ThorNode(oid, size, worth))
+
+ def load(self, oid, size):
+ if self.access(oid) is not None:
+ self.hits += 1
+ self.total_hits += 1
+ else:
+ self.write(oid, size)
+
+ def inval(self, oid):
+ obj = self.oid2object.get(oid)
+ if obj is not None:
+ self.invals += 1
+ self.total_invals += 1
+ self._evict_without_bumping_evict_stats(obj)
+
+ # Take the "x" off to see additional stats after each restart period.
+ def xreport(self):
+ Simulation.report(self)
+ print 'non-empty worth sets', sum(map(bool, self.worthsets)),
+ print 'objects', len(self.oid2object),
+ print 'size', self.currentsize
+
+#############################################################################
+# Perfection: What if the cache were unbounded, and never forgot anything?
+# This simulator answers that question directly; the cache size parameter
+# isn't used.
+
+class UnboundedSimulation(Simulation):
+
+ extraname = 'evicts' # for some reason we *have* to define >= 1 extra
+
+ def __init__(self, cachelimit):
+ Simulation.__init__(self, cachelimit)
+ self.oids = Set()
+ self.evicts = self.total_evicts = 0
+
+ def write(self, oid, size):
+ self.oids.add(oid)
+
+ def load(self, oid, size):
+ if oid in self.oids:
+ self.hits += 1
+ self.total_hits += 1
+ else:
+ self.oids.add(oid)
+
+ def inval(self, oid):
+ if oid in self.oids:
+ self.invals += 1
+ self.total_invals += 1
+ self.oids.remove(oid)
+
+if __name__ == "__main__":
+ sys.exit(main())
Property changes on: ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py
___________________________________________________________________
Added: svn:executable
+ *
Added: cvs2svn:cvs-rev
+ 1.18
Added: svn:keywords
+ Id
Added: svn:mergeinfo
+
Added: svn:eol-style
+ native
Copied: ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_stats.py (from rev 112483, ZODB/trunk/src/ZODB/scripts/stats.py)
===================================================================
--- ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_stats.py (rev 0)
+++ ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_stats.py 2010-06-01 17:51:54 UTC (rev 112890)
@@ -0,0 +1,387 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""Trace file statistics analyzer.
+
+Usage: stats.py [-h] [-i interval] [-q] [-s] [-S] [-v] [-X] tracefile
+-h: print histogram of object load frequencies
+-i: summarizing interval in minutes (default 15; max 60)
+-q: quiet; don't print summaries
+-s: print histogram of object sizes
+-S: don't print statistics
+-v: verbose; print each record
+-X: enable heuristic checking for misaligned records: oids > 2**32
+ will be rejected; this requires the tracefile to be seekable
+"""
+
+"""File format:
+
+Each record is 26 bytes, plus a variable number of bytes to store an oid,
+with the following layout. Numbers are big-endian integers.
+
+Offset Size Contents
+
+0 4 timestamp (seconds since 1/1/1970)
+4 3 data size, in 256-byte increments, rounded up
+7 1 code (see below)
+8 2 object id length
+10 8 start tid
+18 8 end tid
+26 variable object id
+
+The code at offset 7 packs three fields:
+
+Mask bits Contents
+
+0x80 1 set if there was a non-empty version string
+0x7e 6 function and outcome code
+0x01 1 current cache file (0 or 1)
+
+The "current cache file" bit is no longer used; it refers to a 2-file
+cache scheme used before ZODB 3.3.
+
+The function and outcome codes are documented in detail at the end of
+this file in the 'explain' dictionary. Note that the keys there (and
+also the arguments to _trace() in ClientStorage.py) are 'code & 0x7e',
+i.e. the low bit is always zero.
+"""
+
+import sys
+import time
+import getopt
+import struct
+from types import StringType
+
+def usage(msg):
+ print >> sys.stderr, msg
+ print >> sys.stderr, __doc__
+
+def main():
+ # Parse options
+ verbose = False
+ quiet = False
+ dostats = True
+ print_size_histogram = False
+ print_histogram = False
+ interval = 15*60 # Every 15 minutes
+ heuristic = False
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hi:qsSvX")
+ except getopt.error, msg:
+ usage(msg)
+ return 2
+ for o, a in opts:
+ if o == '-h':
+ print_histogram = True
+ elif o == "-i":
+ interval = int(60 * float(a))
+ if interval <= 0:
+ interval = 60
+ elif interval > 3600:
+ interval = 3600
+ elif o == "-q":
+ quiet = True
+ verbose = False
+ elif o == "-s":
+ print_size_histogram = True
+ elif o == "-S":
+ dostats = False
+ elif o == "-v":
+ verbose = True
+ elif o == '-X':
+ heuristic = True
+ else:
+ assert False, (o, opts)
+
+ if len(args) != 1:
+ usage("exactly one file argument required")
+ return 2
+ filename = args[0]
+
+ # Open file
+ if filename.endswith(".gz"):
+ # Open gzipped file
+ try:
+ import gzip
+ except ImportError:
+ print >> sys.stderr, "can't read gzipped files (no module gzip)"
+ return 1
+ try:
+ f = gzip.open(filename, "rb")
+ except IOError, msg:
+ print >> sys.stderr, "can't open %s: %s" % (filename, msg)
+ return 1
+ elif filename == '-':
+ # Read from stdin
+ f = sys.stdin
+ else:
+ # Open regular file
+ try:
+ f = open(filename, "rb")
+ except IOError, msg:
+ print >> sys.stderr, "can't open %s: %s" % (filename, msg)
+ return 1
+
+ rt0 = time.time()
+ bycode = {} # map code to count of occurrences
+ byinterval = {} # map code to count in current interval
+ records = 0 # number of trace records read
+ versions = 0 # number of trace records with versions
+ datarecords = 0 # number of records with dlen set
+ datasize = 0L # sum of dlen across records with dlen set
+ oids = {} # map oid to number of times it was loaded
+ bysize = {} # map data size to number of loads
+ bysizew = {} # map data size to number of writes
+ total_loads = 0
+ t0 = None # first timestamp seen
+ te = None # most recent timestamp seen
+ h0 = None # timestamp at start of current interval
+ he = None # timestamp at end of current interval
+ thisinterval = None # generally te//interval
+ f_read = f.read
+ unpack = struct.unpack
+ FMT = ">iiH8s8s"
+ FMT_SIZE = struct.calcsize(FMT)
+ assert FMT_SIZE == 26
+ # Read file, gathering statistics, and printing each record if verbose.
+ try:
+ while 1:
+ r = f_read(FMT_SIZE)
+ if len(r) < FMT_SIZE:
+ break
+ ts, code, oidlen, start_tid, end_tid = unpack(FMT, r)
+ if ts == 0:
+ # Must be a misaligned record caused by a crash.
+ if not quiet:
+ print "Skipping 8 bytes at offset", f.tell() - FMT_SIZE
+ f.seek(f.tell() - FMT_SIZE + 8)
+ continue
+ oid = f_read(oidlen)
+ if len(oid) < oidlen:
+ break
+ records += 1
+ if t0 is None:
+ t0 = ts
+ thisinterval = t0 // interval
+ h0 = he = ts
+ te = ts
+ if ts // interval != thisinterval:
+ if not quiet:
+ dumpbyinterval(byinterval, h0, he)
+ byinterval = {}
+ thisinterval = ts // interval
+ h0 = ts
+ he = ts
+ dlen, code = code & 0x7fffff00, code & 0xff
+ if dlen:
+ datarecords += 1
+ datasize += dlen
+ if code & 0x80:
+ version = 'V'
+ versions += 1
+ else:
+ version = '-'
+ code &= 0x7e
+ bycode[code] = bycode.get(code, 0) + 1
+ byinterval[code] = byinterval.get(code, 0) + 1
+ if dlen:
+ if code & 0x70 == 0x20: # All loads
+ bysize[dlen] = d = bysize.get(dlen) or {}
+ d[oid] = d.get(oid, 0) + 1
+ elif code & 0x70 == 0x50: # All stores
+ bysizew[dlen] = d = bysizew.get(dlen) or {}
+ d[oid] = d.get(oid, 0) + 1
+ if verbose:
+ print "%s %02x %s %016x %016x %c %s" % (
+ time.ctime(ts)[4:-5],
+ code,
+ oid_repr(oid),
+ U64(start_tid),
+ U64(end_tid),
+ version,
+ dlen and str(dlen) or "")
+ if code & 0x70 == 0x20:
+ oids[oid] = oids.get(oid, 0) + 1
+ total_loads += 1
+ elif code == 0x00: # restart
+ if not quiet:
+ dumpbyinterval(byinterval, h0, he)
+ byinterval = {}
+ thisinterval = ts // interval
+ h0 = he = ts
+ if not quiet:
+ print time.ctime(ts)[4:-5],
+ print '='*20, "Restart", '='*20
+ except KeyboardInterrupt:
+ print "\nInterrupted. Stats so far:\n"
+
+ end_pos = f.tell()
+ f.close()
+ rte = time.time()
+ if not quiet:
+ dumpbyinterval(byinterval, h0, he)
+
+ # Error if nothing was read
+ if not records:
+ print >> sys.stderr, "No records processed"
+ return 1
+
+ # Print statistics
+ if dostats:
+ print
+ print "Read %s trace records (%s bytes) in %.1f seconds" % (
+ addcommas(records), addcommas(end_pos), rte-rt0)
+ print "Versions: %s records used a version" % addcommas(versions)
+ print "First time: %s" % time.ctime(t0)
+ print "Last time: %s" % time.ctime(te)
+ print "Duration: %s seconds" % addcommas(te-t0)
+ print "Data recs: %s (%.1f%%), average size %.1f KB" % (
+ addcommas(datarecords),
+ 100.0 * datarecords / records,
+ datasize / 1024.0 / datarecords)
+ print "Hit rate: %.1f%% (load hits / loads)" % hitrate(bycode)
+ print
+ codes = bycode.keys()
+ codes.sort()
+ print "%13s %4s %s" % ("Count", "Code", "Function (action)")
+ for code in codes:
+ print "%13s %02x %s" % (
+ addcommas(bycode.get(code, 0)),
+ code,
+ explain.get(code) or "*** unknown code ***")
+
+ # Print histogram.
+ if print_histogram:
+ print
+ print "Histogram of object load frequency"
+ total = len(oids)
+ print "Unique oids: %s" % addcommas(total)
+ print "Total loads: %s" % addcommas(total_loads)
+ s = addcommas(total)
+ width = max(len(s), len("objects"))
+ fmt = "%5d %" + str(width) + "s %5.1f%% %5.1f%% %5.1f%%"
+ hdr = "%5s %" + str(width) + "s %6s %6s %6s"
+ print hdr % ("loads", "objects", "%obj", "%load", "%cum")
+ cum = 0.0
+ for binsize, count in histogram(oids):
+ obj_percent = 100.0 * count / total
+ load_percent = 100.0 * count * binsize / total_loads
+ cum += load_percent
+ print fmt % (binsize, addcommas(count),
+ obj_percent, load_percent, cum)
+
+ # Print size histogram.
+ if print_size_histogram:
+ print
+ print "Histograms of object sizes"
+ print
+ dumpbysize(bysizew, "written", "writes")
+ dumpbysize(bysize, "loaded", "loads")
+
+def dumpbysize(bysize, how, how2):
+ print
+ print "Unique sizes %s: %s" % (how, addcommas(len(bysize)))
+ print "%10s %6s %6s" % ("size", "objs", how2)
+ sizes = bysize.keys()
+ sizes.sort()
+ for size in sizes:
+ loads = 0
+ for n in bysize[size].itervalues():
+ loads += n
+ print "%10s %6d %6d" % (addcommas(size),
+ len(bysize.get(size, "")),
+ loads)
+
+def dumpbyinterval(byinterval, h0, he):
+ loads = hits = 0
+ for code in byinterval:
+ if code & 0x70 == 0x20:
+ n = byinterval[code]
+ loads += n
+ if code in (0x22, 0x26):
+ hits += n
+ if not loads:
+ return
+ if loads:
+ hr = 100.0 * hits / loads
+ else:
+ hr = 0.0
+ print "%s-%s %10s loads, %10s hits,%5.1f%% hit rate" % (
+ time.ctime(h0)[4:-8], time.ctime(he)[14:-8],
+ addcommas(loads), addcommas(hits), hr)
+
+def hitrate(bycode):
+ loads = hits = 0
+ for code in bycode:
+ if code & 0x70 == 0x20:
+ n = bycode[code]
+ loads += n
+ if code in (0x22, 0x26):
+ hits += n
+ if loads:
+ return 100.0 * hits / loads
+ else:
+ return 0.0
+
+def histogram(d):
+ bins = {}
+ for v in d.itervalues():
+ bins[v] = bins.get(v, 0) + 1
+ L = bins.items()
+ L.sort()
+ return L
+
+def U64(s):
+ return struct.unpack(">Q", s)[0]
+
+def oid_repr(oid):
+ if isinstance(oid, StringType) and len(oid) == 8:
+ return '%16x' % U64(oid)
+ else:
+ return repr(oid)
+
+def addcommas(n):
+ sign, s = '', str(n)
+ if s[0] == '-':
+ sign, s = '-', s[1:]
+ i = len(s) - 3
+ while i > 0:
+ s = s[:i] + ',' + s[i:]
+ i -= 3
+ return sign + s
+
+explain = {
+ # The first hex digit shows the operation, the second the outcome.
+ # If the second digit is in "02468" then it is a 'miss'.
+ # If it is in "ACE" then it is a 'hit'.
+
+ 0x00: "_setup_trace (initialization)",
+
+ 0x10: "invalidate (miss)",
+ 0x1A: "invalidate (hit, version)",
+ 0x1C: "invalidate (hit, saving non-current)",
+ # 0x1E can occur during startup verification.
+ 0x1E: "invalidate (hit, discarding current or non-current)",
+
+ 0x20: "load (miss)",
+ 0x22: "load (hit)",
+ 0x24: "load (non-current, miss)",
+ 0x26: "load (non-current, hit)",
+
+ 0x50: "store (version)",
+ 0x52: "store (current, non-version)",
+ 0x54: "store (non-current)",
+ }
+
+if __name__ == "__main__":
+ sys.exit(main())
Property changes on: ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_stats.py
___________________________________________________________________
Added: svn:executable
+ *
Added: cvs2svn:cvs-rev
+ 1.22
Added: svn:keywords
+ Id
Added: svn:mergeinfo
+
Added: svn:eol-style
+ native
Deleted: ZODB/branches/jim-simulation/src/ZODB/scripts/simul.py
===================================================================
--- ZODB/branches/jim-simulation/src/ZODB/scripts/simul.py 2010-06-01 17:50:56 UTC (rev 112889)
+++ ZODB/branches/jim-simulation/src/ZODB/scripts/simul.py 2010-06-01 17:51:54 UTC (rev 112890)
@@ -1,1758 +0,0 @@
-#! /usr/bin/env python
-##############################################################################
-#
-# Copyright (c) 2001-2005 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Cache simulation.
-
-Usage: simul.py [-s size] tracefile
-
-Options:
--s size: cache size in MB (default 20 MB)
-"""
-
-import sys
-import time
-import getopt
-import struct
-import math
-import bisect
-from sets import Set
-
-from ZODB.utils import z64
-
-def usage(msg):
- print >> sys.stderr, msg
- print >> sys.stderr, __doc__
-
-def main():
- # Parse options.
- MB = 1024**2
- cachelimit = 20*MB
- simclass = CircularCacheSimulation
- try:
- opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOaTUs:")
- except getopt.error, msg:
- usage(msg)
- return 2
- for o, a in opts:
- if o == '-b':
- simclass = BuddyCacheSimulation
- elif o == '-f':
- simclass = SimpleCacheSimulation
- elif o == '-l':
- simclass = LRUCacheSimulation
- elif o == '-y':
- simclass = AltZEOCacheSimulation
- elif o == '-z':
- simclass = ZEOCacheSimulation
- elif o == '-s':
- cachelimit = int(float(a)*MB)
- elif o == '-2':
- simclass = TwoQSimluation
- elif o == '-c':
- simclass = CircularCacheSimulation
- elif o == '-O':
- simclass = OracleSimulation
- elif o == '-a':
- simclass = ARCCacheSimulation
- elif o == '-T':
- simclass = ThorSimulation
- elif o == '-U':
- simclass = UnboundedSimulation
- else:
- assert False, (o, a)
-
- if len(args) != 1:
- usage("exactly one file argument required")
- return 2
- filename = args[0]
-
- # Open file.
- if filename.endswith(".gz"):
- # Open gzipped file.
- try:
- import gzip
- except ImportError:
- print >> sys.stderr, "can't read gzipped files (no module gzip)"
- return 1
- try:
- f = gzip.open(filename, "rb")
- except IOError, msg:
- print >> sys.stderr, "can't open %s: %s" % (filename, msg)
- return 1
- elif filename == "-":
- # Read from stdin.
- f = sys.stdin
- else:
- # Open regular file.
- try:
- f = open(filename, "rb")
- except IOError, msg:
- print >> sys.stderr, "can't open %s: %s" % (filename, msg)
- return 1
-
- # Create simulation object.
- if simclass is OracleSimulation:
- sim = simclass(cachelimit, filename)
- else:
- sim = simclass(cachelimit)
-
- # Print output header.
- sim.printheader()
-
- # Read trace file, simulating cache behavior.
- f_read = f.read
- unpack = struct.unpack
- FMT = ">iiH8s8s"
- FMT_SIZE = struct.calcsize(FMT)
- assert FMT_SIZE == 26
- while 1:
- # Read a record and decode it.
- r = f_read(FMT_SIZE)
- if len(r) < FMT_SIZE:
- break
- ts, code, oidlen, start_tid, end_tid = unpack(FMT, r)
- if ts == 0:
- # Must be a misaligned record caused by a crash; skip 8 bytes
- # and try again. Why 8? Lost in the mist of history.
- f.seek(f.tell() - FMT_SIZE + 8)
- continue
- oid = f_read(oidlen)
- if len(oid) < oidlen:
- break
- # Decode the code.
- dlen, version, code = (code & 0x7fffff00,
- code & 0x80,
- code & 0x7e)
- # And pass it to the simulation.
- sim.event(ts, dlen, version, code, oid, start_tid, end_tid)
-
- f.close()
- # Finish simulation.
- sim.finish()
-
- # Exit code from main().
- return 0
-
-class Simulation(object):
- """Base class for simulations.
-
- The driver program calls: event(), printheader(), finish().
-
- The standard event() method calls these additional methods:
- write(), load(), inval(), report(), restart(); the standard
- finish() method also calls report().
- """
-
- def __init__(self, cachelimit):
- self.cachelimit = cachelimit
- # Initialize global statistics.
- self.epoch = None
- self.total_loads = 0
- self.total_hits = 0 # subclass must increment
- self.total_invals = 0 # subclass must increment
- self.total_writes = 0
- if not hasattr(self, "extras"):
- self.extras = (self.extraname,)
- self.format = self.format + " %7s" * len(self.extras)
- # Reset per-run statistics and set up simulation data.
- self.restart()
-
- def restart(self):
- # Reset per-run statistics.
- self.loads = 0
- self.hits = 0 # subclass must increment
- self.invals = 0 # subclass must increment
- self.writes = 0
- self.ts0 = None
-
- def event(self, ts, dlen, _version, code, oid,
- start_tid, end_tid):
- # Record first and last timestamp seen.
- if self.ts0 is None:
- self.ts0 = ts
- if self.epoch is None:
- self.epoch = ts
- self.ts1 = ts
-
- # Simulate cache behavior. Caution: the codes in the trace file
- # record whether the actual cache missed or hit on each load, but
- # that bears no necessary relationship to whether the simulated cache
- # will hit or miss. Relatedly, if the actual cache needed to store
- # an object, the simulated cache may not need to (it may already
- # have the data).
- action = code & 0x70
- if action == 0x20:
- # Load.
- self.loads += 1
- self.total_loads += 1
- # Asserting that dlen is 0 iff it's a load miss.
- # assert (dlen == 0) == (code in (0x20, 0x24))
- self.load(oid, dlen, start_tid)
- elif action == 0x50:
- # Store.
- assert dlen
- self.write(oid, dlen, start_tid, end_tid)
- elif action == 0x10:
- # Invalidate.
- self.inval(oid, start_tid)
- elif action == 0x00:
- # Restart.
- self.report()
- self.restart()
- else:
- raise ValueError("unknown trace code 0x%x" % code)
-
- def write(self, oid, size, start_tid, end_tid):
- pass
-
- def load(self, oid, size, start_tid):
- # Must increment .hits and .total_hits as appropriate.
- pass
-
- def inval(self, oid, start_tid):
- # Must increment .invals and .total_invals as appropriate.
- pass
-
- format = "%12s %9s %8s %8s %6s %6s %7s"
-
- # Subclass should override extraname to name known instance variables;
- # if extraname is 'foo', both self.foo and self.total_foo must exist:
- extraname = "*** please override ***"
-
- def printheader(self):
- print "%s, cache size %s bytes" % (self.__class__.__name__,
- addcommas(self.cachelimit))
- self.extraheader()
- extranames = tuple([s.upper() for s in self.extras])
- args = ("START TIME", "DURATION", "LOADS", "HITS",
- "INVALS", "WRITES", "HITRATE") + extranames
- print self.format % args
-
- def extraheader(self):
- pass
-
- nreports = 0
-
- def report(self, extratext=''):
- if self.loads:
- self.nreports += 1
- args = (time.ctime(self.ts0)[4:-8],
- duration(self.ts1 - self.ts0),
- self.loads, self.hits, self.invals, self.writes,
- hitrate(self.loads, self.hits))
- args += tuple([getattr(self, name) for name in self.extras])
- print self.format % args, extratext
-
- def finish(self):
- # Make sure that the last line of output ends with "OVERALL". This
- # makes it much easier for another program parsing the output to
- # find summary statistics.
- if self.nreports < 2:
- self.report('OVERALL')
- else:
- self.report()
- args = (
- time.ctime(self.epoch)[4:-8],
- duration(self.ts1 - self.epoch),
- self.total_loads,
- self.total_hits,
- self.total_invals,
- self.total_writes,
- hitrate(self.total_loads, self.total_hits))
- args += tuple([getattr(self, "total_" + name)
- for name in self.extras])
- print (self.format + " OVERALL") % args
-
-
-# For use in CircularCacheSimulation.
-class CircularCacheEntry(object):
- __slots__ = (# object key: an (oid, start_tid) pair, where
- # start_tid is the tid of the transaction that created
- # this revision of oid
- 'key',
-
- # tid of transaction that created the next revision;
- # z64 iff this is the current revision
- 'end_tid',
-
- # Offset from start of file to the object's data
- # record; this includes all overhead bytes (status
- # byte, size bytes, etc).
- 'offset',
- )
-
- def __init__(self, key, end_tid, offset):
- self.key = key
- self.end_tid = end_tid
- self.offset = offset
-
-from ZEO.cache import ZEC3_HEADER_SIZE
-
-class CircularCacheSimulation(Simulation):
- """Simulate the ZEO 3.0 cache."""
-
- # The cache is managed as a single file with a pointer that
- # goes around the file, circularly, forever. New objects
- # are written at the current pointer, evicting whatever was
- # there previously.
-
- extras = "evicts", "inuse"
-
- def __init__(self, cachelimit):
- from ZEO import cache
-
- Simulation.__init__(self, cachelimit)
- self.total_evicts = 0 # number of cache evictions
-
- # Current offset in file.
- self.offset = ZEC3_HEADER_SIZE
-
- # Map offset in file to (size, CircularCacheEntry) pair, or to
- # (size, None) if the offset starts a free block.
- self.filemap = {ZEC3_HEADER_SIZE: (self.cachelimit - ZEC3_HEADER_SIZE,
- None)}
- # Map key to CircularCacheEntry. A key is an (oid, tid) pair.
- self.key2entry = {}
-
- # Map oid to tid of current revision.
- self.current = {}
-
- # Map oid to list of (start_tid, end_tid) pairs in sorted order.
- # Used to find matching key for load of non-current data.
- self.noncurrent = {}
-
- # The number of overhead bytes needed to store an object pickle
- # on disk (all bytes beyond those needed for the object pickle).
- self.overhead = (cache.Object.TOTAL_FIXED_SIZE +
- cache.OBJECT_HEADER_SIZE)
-
- def restart(self):
- Simulation.restart(self)
- self.evicts = 0
-
- def load(self, oid, size, tid):
- if tid == z64:
- # Trying to load current revision.
- if oid in self.current: # else it's a cache miss
- self.hits += 1
- self.total_hits += 1
- return
-
- # May or may not be trying to load current revision.
- cur_tid = self.current.get(oid)
- if cur_tid == tid:
- self.hits += 1
- self.total_hits += 1
- return
-
- # It's a load for non-current data. Do we know about this oid?
- L = self.noncurrent.get(oid)
- if L is None:
- return # cache miss
- i = bisect.bisect_left(L, (tid, None))
- if i == 0:
- # This tid is smaller than any we know about -- miss.
- return
- lo, hi = L[i-1]
- assert lo < tid
- if tid > hi:
- # No data in the right tid range -- miss.
- return
- # Cache hit.
- self.hits += 1
- self.total_hits += 1
-
- # (oid, tid) is in the cache. Remove it: take it out of key2entry,
- # and in `filemap` mark the space it occupied as being free. The
- # caller is responsible for removing it from `current` or `noncurrent`.
- def _remove(self, oid, tid):
- key = oid, tid
- e = self.key2entry.pop(key)
- pos = e.offset
- size, _e = self.filemap[pos]
- assert e is _e
- self.filemap[pos] = size, None
-
- def _remove_noncurrent_revisions(self, oid):
- noncurrent_list = self.noncurrent.get(oid)
- if noncurrent_list:
- self.invals += len(noncurrent_list)
- self.total_invals += len(noncurrent_list)
- for start_tid, end_tid in noncurrent_list:
- self._remove(oid, start_tid)
- del self.noncurrent[oid]
-
- def inval(self, oid, tid):
- if tid == z64:
- # This is part of startup cache verification: forget everything
- # about this oid.
- self._remove_noncurrent_revisions(oid)
-
- cur_tid = self.current.get(oid)
- if cur_tid is None:
- # We don't have current data, so nothing more to do.
- return
-
- # We had current data for oid, but no longer.
- self.invals += 1
- self.total_invals += 1
- del self.current[oid]
- if tid == z64:
- # Startup cache verification: forget this oid entirely.
- self._remove(oid, cur_tid)
- return
-
- # Our current data becomes non-current data.
- # Add the validity range to the list of non-current data for oid.
- assert cur_tid < tid
- L = self.noncurrent.setdefault(oid, [])
- bisect.insort_left(L, (cur_tid, tid))
- # Update the end of oid's validity range in its CircularCacheEntry.
- e = self.key2entry[oid, cur_tid]
- assert e.end_tid == z64
- e.end_tid = tid
-
- def write(self, oid, size, start_tid, end_tid):
- if end_tid == z64:
- # Storing current revision.
- if oid in self.current: # we already have it in cache
- return
- self.current[oid] = start_tid
- self.writes += 1
- self.total_writes += 1
- self.add(oid, size, start_tid)
- return
- # Storing non-current revision.
- L = self.noncurrent.setdefault(oid, [])
- p = start_tid, end_tid
- if p in L:
- return # we already have it in cache
- bisect.insort_left(L, p)
- self.writes += 1
- self.total_writes += 1
- self.add(oid, size, start_tid, end_tid)
-
- # Add `oid` to the cache, evicting objects as needed to make room.
- # This updates `filemap` and `key2entry`; it's the caller's
- # responsibilty to update `current` or `noncurrent` appropriately.
- def add(self, oid, size, start_tid, end_tid=z64):
- key = oid, start_tid
- assert key not in self.key2entry
- size += self.overhead
- avail = self.makeroom(size)
- e = CircularCacheEntry(key, end_tid, self.offset)
- self.filemap[self.offset] = size, e
- self.key2entry[key] = e
- self.offset += size
- # All the space made available must be accounted for in filemap.
- excess = avail - size
- if excess:
- self.filemap[self.offset] = excess, None
-
- # Evict enough objects to make at least `need` contiguous bytes, starting
- # at `self.offset`, available. Evicted objects are removed from
- # `filemap`, `key2entry`, `current` and `noncurrent`. The caller is
- # responsible for adding new entries to `filemap` to account for all
- # the freed bytes, and for advancing `self.offset`. The number of bytes
- # freed is the return value, and will be >= need.
- def makeroom(self, need):
- if self.offset + need > self.cachelimit:
- self.offset = ZEC3_HEADER_SIZE
- pos = self.offset
- while need > 0:
- assert pos < self.cachelimit
- size, e = self.filemap.pop(pos)
- if e: # there is an object here (else it's already free space)
- self.evicts += 1
- self.total_evicts += 1
- assert pos == e.offset
- _e = self.key2entry.pop(e.key)
- assert e is _e
- oid, start_tid = e.key
- if e.end_tid == z64:
- del self.current[oid]
- else:
- L = self.noncurrent[oid]
- L.remove((start_tid, e.end_tid))
- need -= size
- pos += size
- return pos - self.offset # total number of bytes freed
-
- def report(self):
- self.check()
- free = used = total = 0
- for size, e in self.filemap.itervalues():
- total += size
- if e:
- used += size
- else:
- free += size
-
- self.inuse = round(100.0 * used / total, 1)
- self.total_inuse = self.inuse
- Simulation.report(self)
-
- def check(self):
- oidcount = 0
- pos = ZEC3_HEADER_SIZE
- while pos < self.cachelimit:
- size, e = self.filemap[pos]
- if e:
- oidcount += 1
- assert self.key2entry[e.key].offset == pos
- pos += size
- assert oidcount == len(self.key2entry)
- assert pos == self.cachelimit
-
- def dump(self):
- print len(self.filemap)
- L = list(self.filemap)
- L.sort()
- for k in L:
- v = self.filemap[k]
- print k, v[0], repr(v[1])
-
-#############################################################################
-# CAUTION: It's most likely that none of the simulators below this
-# point work anymore. A great many changes were needed to teach
-# CircularCacheSimulation (above) about MVCC, including method signature
-# changes and changes in cache file format, and none of the other simulator
-# classes were changed.
-#############################################################################
-
-class ZEOCacheSimulation(Simulation):
- """Simulate the ZEO 1.0 and 2.0 cache behavior.
-
- This assumes the cache is not persistent (we don't know how to
- simulate cache validation.)
- """
-
- extraname = "flips"
-
- def __init__(self, cachelimit):
- # Initialize base class
- Simulation.__init__(self, cachelimit)
- # Initialize additional global statistics
- self.total_flips = 0
-
- def restart(self):
- # Reset base class
- Simulation.restart(self)
- # Reset additional per-run statistics
- self.flips = 0
- # Set up simulation
- self.filesize = [4, 4] # account for magic number
- self.fileoids = [{}, {}]
- self.current = 0 # index into filesize, fileoids
-
- def load(self, oid, size):
- if (self.fileoids[self.current].get(oid) or
- self.fileoids[1 - self.current].get(oid)):
- self.hits += 1
- self.total_hits += 1
- else:
- self.write(oid, size)
-
- def write(self, oid, size):
- # Fudge because size is rounded up to multiples of 256. (31
- # is header overhead per cache record; 127 is to compensate
- # for rounding up to multiples of 256.)
- size = size + 31 - 127
- if self.filesize[self.current] + size > self.cachelimit / 2:
- # Cache flip
- self.flips += 1
- self.total_flips += 1
- self.current = 1 - self.current
- self.filesize[self.current] = 4
- self.fileoids[self.current] = {}
- self.filesize[self.current] += size
- self.fileoids[self.current][oid] = 1
-
- def inval(self, oid):
- if self.fileoids[self.current].get(oid):
- self.invals += 1
- self.total_invals += 1
- del self.fileoids[self.current][oid]
- elif self.fileoids[1 - self.current].get(oid):
- self.invals += 1
- self.total_invals += 1
- del self.fileoids[1 - self.current][oid]
-
-class AltZEOCacheSimulation(ZEOCacheSimulation):
- """A variation of the ZEO cache that copies to the current file.
-
- When a hit is found in the non-current cache file, it is copied to
- the current cache file. Exception: when the copy would cause a
- cache flip, we don't copy (this is part laziness, part concern
- over causing extraneous flips).
- """
-
- def load(self, oid, size):
- if self.fileoids[self.current].get(oid):
- self.hits += 1
- self.total_hits += 1
- elif self.fileoids[1 - self.current].get(oid):
- self.hits += 1
- self.total_hits += 1
- # Simulate a write, unless it would cause a flip
- size = size + 31 - 127
- if self.filesize[self.current] + size <= self.cachelimit / 2:
- self.filesize[self.current] += size
- self.fileoids[self.current][oid] = 1
- del self.fileoids[1 - self.current][oid]
- else:
- self.write(oid, size)
-
-class LRUCacheSimulation(Simulation):
-
- extraname = "evicts"
-
- def __init__(self, cachelimit):
- # Initialize base class
- Simulation.__init__(self, cachelimit)
- # Initialize additional global statistics
- self.total_evicts = 0
-
- def restart(self):
- # Reset base class
- Simulation.restart(self)
- # Reset additional per-run statistics
- self.evicts = 0
- # Set up simulation
- self.cache = {}
- self.size = 0
- self.head = Node(None, None)
- self.head.linkbefore(self.head)
-
- def load(self, oid, size):
- node = self.cache.get(oid)
- if node is not None:
- self.hits += 1
- self.total_hits += 1
- node.linkbefore(self.head)
- else:
- self.write(oid, size)
-
- def write(self, oid, size):
- node = self.cache.get(oid)
- if node is not None:
- node.unlink()
- assert self.head.next is not None
- self.size -= node.size
- node = Node(oid, size)
- self.cache[oid] = node
- node.linkbefore(self.head)
- self.size += size
- # Evict LRU nodes
- while self.size > self.cachelimit:
- self.evicts += 1
- self.total_evicts += 1
- node = self.head.next
- assert node is not self.head
- node.unlink()
- assert self.head.next is not None
- del self.cache[node.oid]
- self.size -= node.size
-
- def inval(self, oid):
- node = self.cache.get(oid)
- if node is not None:
- assert node.oid == oid
- self.invals += 1
- self.total_invals += 1
- node.unlink()
- assert self.head.next is not None
- del self.cache[oid]
- self.size -= node.size
- assert self.size >= 0
-
-class Node(object):
- """Node in a doubly-linked list, storing oid and size as payload.
-
- A node can be linked or unlinked; in the latter case, next and
- prev are None. Initially a node is unlinked.
- """
-
- __slots__ = ['prev', 'next', 'oid', 'size']
-
- def __init__(self, oid, size):
- self.oid = oid
- self.size = size
- self.prev = self.next = None
-
- def unlink(self):
- prev = self.prev
- next = self.next
- if prev is not None:
- assert next is not None
- assert prev.next is self
- assert next.prev is self
- prev.next = next
- next.prev = prev
- self.prev = self.next = None
- else:
- assert next is None
-
- def linkbefore(self, next):
- self.unlink()
- prev = next.prev
- if prev is None:
- assert next.next is None
- prev = next
- self.prev = prev
- self.next = next
- prev.next = next.prev = self
-
-am = object()
-a1in = object()
-a1out = object()
-
-class Node2Q(Node):
-
- __slots__ = ["kind", "hits"]
-
- def __init__(self, oid, size, kind=None):
- Node.__init__(self, oid, size)
- self.kind = kind
- self.hits = 0
-
- def linkbefore(self, next):
- if next.kind != self.kind:
- self.kind = next.kind
- Node.linkbefore(self, next)
-
-class TwoQSimluation(Simulation):
- # The original 2Q algorithm is page based and the authors offer
- # tuning guidlines based on a page-based cache. Our cache is
- # object based, so, for example, it's hard to compute the number
- # of oids to store in a1out based on the size of a1in.
-
- extras = "evicts", "hothit", "am_add"
-
- NodeClass = Node2Q
-
- def __init__(self, cachelimit, outlen=10000, threshold=0):
- Simulation.__init__(self, cachelimit)
-
- # The promotion threshold: If a hit occurs in a1out, it is
- # promoted to am if the number of hits on the object while it
- # was in a1in is at least threshold. The standard 2Q scheme
- # uses a threshold of 0.
- self.threshold = threshold
- self.am_limit = 3 * self.cachelimit / 4
- self.a1in_limit = self.cachelimit / 4
-
- self.cache = {}
- self.am_size = 0
- self.a1in_size = 0
- self.a1out_size = 0
-
- self.total_evicts = 0
- self.total_hothit = 0
- self.total_am_add = 0
- self.a1out_limit = outlen
-
- # An LRU queue of hot objects
- self.am = self.NodeClass(None, None, am)
- self.am.linkbefore(self.am)
- # A FIFO queue of recently referenced objects. It's purpose
- # is to absorb references to objects that are accessed a few
- # times in short order, then forgotten about.
- self.a1in = self.NodeClass(None, None, a1in)
- self.a1in.linkbefore(self.a1in)
- # A FIFO queue of recently reference oids.
- # This queue only stores the oids, not any data. If we get a
- # hit in this queue, promote the object to am.
- self.a1out = self.NodeClass(None, None, a1out)
- self.a1out.linkbefore(self.a1out)
-
- def makespace(self, size):
- for space in 0, size:
- if self.enoughspace(size):
- return
- self.evict_a1in(space)
- if self.enoughspace(size):
- return
- self.evict_am(space)
-
- def enoughspace(self, size):
- totalsize = self.a1in_size + self.am_size
- return totalsize + size < self.cachelimit
-
- def evict_a1in(self, extra):
- while self.a1in_size + extra > self.a1in_limit:
- if self.a1in.next is self.a1in:
- return
- assert self.a1in.next is not None
- node = self.a1in.next
- self.evicts += 1
- self.total_evicts += 1
- node.linkbefore(self.a1out)
- self.a1out_size += 1
- self.a1in_size -= node.size
- if self.a1out_size > self.a1out_limit:
- assert self.a1out.next is not None
- node = self.a1out.next
- node.unlink()
- del self.cache[node.oid]
- self.a1out_size -= 1
-
- def evict_am(self, extra):
- while self.am_size + extra > self.am_limit:
- if self.am.next is self.am:
- return
- assert self.am.next is not None
- node = self.am.next
- self.evicts += 1
- self.total_evicts += 1
- # This node hasn't been accessed in a while, so just
- # forget about it.
- node.unlink()
- del self.cache[node.oid]
- self.am_size -= node.size
-
- def write(self, oid, size):
- # A write always follows a read (ZODB doesn't allow blind writes).
- # So this write must have followed a recent read of the object.
- # Don't change it's position, but do update the size.
-
- # XXX For now, don't evict pages if the new version of the object
- # is big enough to require eviction.
- node = self.cache.get(oid)
- if node is None or node.kind is a1out:
- return
- if node.kind is am:
- self.am_size = self.am_size - node.size + size
- node.size = size
- else:
- self.a1in_size = self.a1in_size - node.size + size
- node.size = size
-
- def load(self, oid, size):
- node = self.cache.get(oid)
- if node is not None:
- if node.kind is am:
- self.hits += 1
- self.total_hits += 1
- self.hothit += 1
- self.total_hothit += 1
- node.hits += 1
- node.linkbefore(self.am)
- elif node.kind is a1in:
- self.hits += 1
- self.total_hits += 1
- node.hits += 1
- elif node.kind is a1out:
- self.a1out_size -= 1
- if node.hits >= self.threshold:
- self.makespace(node.size)
- self.am_size += node.size
- node.linkbefore(self.am)
- self.cache[oid] = node
- self.am_add += 1
- self.total_am_add += 1
- else:
- node.unlink()
- self.insert(oid, size)
- else:
- self.insert(oid, size)
-
- def insert(self, oid, size):
- # New objects enter the cache via a1in. If they
- # are frequently used over a long enough time, they
- # will be promoted to am -- but only via a1out.
- self.makespace(size)
- node = self.NodeClass(oid, size, a1in)
- node.linkbefore(self.a1in)
- self.cache[oid] = node
- self.a1in_size += node.size
-
- def inval(self, oid):
- # The original 2Q algorithm didn't have to deal with
- # invalidations. My own solution: Move it to the head of
- # a1out.
- node = self.cache.get(oid)
- if node is None:
- return
- self.invals += 1
- self.total_invals += 1
- # XXX Should an invalidation to a1out count?
- if node.kind is a1out:
- return
- node.linkbefore(self.a1out)
- if node.kind is am:
- self.am_size -= node.size
- else:
- self.a1in_size -= node.size
-
- def restart(self):
- Simulation.restart(self)
-
- self.evicts = 0
- self.hothit = 0
- self.am_add = 0
-
-lruT = object()
-lruB = object()
-fifoT = object()
-fifoB = object()
-
-class ARCCacheSimulation(Simulation):
-
- # Based on the paper ARC: A Self-Tuning, Low Overhead Replacement
- # Cache by Nimrod Megiddo and Dharmendra S. Modha, USENIX FAST
- # 2003. The paper describes a block-based cache. A lot of the
- # details need to be fiddled to work with an object-based cache.
- # For size issues, the key insight ended up being conditions
- # A.1-A.5 rather than the details of the algorithm in Fig. 4.
-
- extras = "lruThits", "evicts", "p"
-
- def __init__(self, cachelimit):
- Simulation.__init__(self, cachelimit)
- # There are two pairs of linked lists. Each pair has a top and
- # bottom half. The bottom half contains metadata, but not actual
- # objects.
-
- # LRU list of frequently used objects
- self.lruT = Node2Q(None, None, lruT)
- self.lruT.linkbefore(self.lruT)
- self.lruT_len = 0
- self.lruT_size = 0
-
- self.lruB = Node2Q(None, None, lruB)
- self.lruB.linkbefore(self.lruB)
- self.lruB_len = 0
- self.lruB_size = 0
-
- # FIFO list of objects seen once
- self.fifoT = Node2Q(None, None, fifoT)
- self.fifoT.linkbefore(self.fifoT)
- self.fifoT_len = 0
- self.fifoT_size = 0
-
- self.fifoB = Node2Q(None, None, fifoB)
- self.fifoB.linkbefore(self.fifoB)
- self.fifoB_len = 0
- self.fifoB_size = 0
-
- # maps oid to node
- self.cache = {}
-
- # The paper says that p should be adjust be 1 as the minimum:
- # "The compound effect of such small increments and decrements
- # to p s quite profound as we will demonstrated in the next
- # section." Not really, as far as I can tell. In my traces
- # with a very small cache, it was taking far too long to adjust
- # towards favoring some FIFO component. I would guess that the
- # chief difference is that our caches are much bigger than the
- # ones they experimented with. Their biggest cache had 512K
- # entries, while our smallest cache will have 40 times that many
- # entries.
-
- self.p = 0
- # XXX multiply computed adjustments to p by walk_factor
- self.walk_factor = 500
-
- # statistics
- self.total_hits = 0
- self.total_lruThits = 0
- self.total_fifoThits = 0
- self.total_evicts = 0
-
- def restart(self):
- Simulation.restart(self)
- self.hits = 0
- self.lruThits = 0
- self.fifoThits = 0
- self.evicts = 0
-
- def write(self, oid, size):
- pass
-
- def replace(self, lruB=False):
- self.evicts += 1
- self.total_evicts += 1
- if self.fifoT_size > self.p or (lruB and self.fifoT_size == self.p):
- node = self.fifoT.next
- if node is self.fifoT:
- return 0
- assert node is not self.fifoT, self.stats()
- node.linkbefore(self.fifoB)
- self.fifoT_len -= 1
- self.fifoT_size -= node.size
- self.fifoB_len += 1
- self.fifoB_size += node.size
- else:
- node = self.lruT.next
- if node is self.lruT:
- return 0
- assert node is not self.lruT, self.stats()
- node.linkbefore(self.lruB)
- self.lruT_len -= 1
- self.lruT_size -= node.size
- self.lruB_len += 1
- self.lruB_size += node.size
- return node.size
-
- def stats(self):
- self.totalsize = self.lruT_size + self.fifoT_size
- self.allsize = self.totalsize + self.lruB_size + self.fifoB_size
- print "cachelimit = %s totalsize=%s allsize=%s" % (
- addcommas(self.cachelimit),
- addcommas(self.totalsize),
- addcommas(self.allsize))
-
- fmt = (
- "p=%(p)d\n"
- "lruT = %(lruT_len)5d / %(lruT_size)8d / %(lruThits)d\n"
- "fifoT = %(fifoT_len)5d / %(fifoT_size)8d / %(fifoThits)d\n"
- "lruB = %(lruB_len)5d / %(lruB_size)8d\n"
- "fifoB = %(fifoB_len)5d / %(fifoB_size)8d\n"
- "loads=%(loads)d hits=%(hits)d evicts=%(evicts)d\n"
- )
- print fmt % self.__dict__
-
- def report(self):
- self.total_p = self.p
- Simulation.report(self)
-## self.stats()
-
- def load(self, oid, size):
-## maybe(self.stats, p=0.002)
- node = self.cache.get(oid)
- if node is None:
- # cache miss: We're going to insert a new object in fifoT.
- # If fifo is full, we'll need to evict something to make
- # room for it.
-
- prev = need = size
- while need > 0:
- if size + self.fifoT_size + self.fifoB_size >= self.cachelimit:
- if need + self.fifoT_size >= self.cachelimit:
- node = self.fifoB.next
- assert node is not self.fifoB, self.stats()
- node.unlink()
- del self.cache[node.oid]
- self.fifoB_size -= node.size
- self.fifoB_len -= 1
- self.evicts += 1
- self.total_evicts += 1
- else:
- node = self.fifoB.next
- assert node is not self.fifoB, self.stats()
- node.unlink()
- del self.cache[node.oid]
- self.fifoB_size -= node.size
- self.fifoB_len -= 1
- if self.fifoT_size + self.lruT_size > self.cachelimit:
- need -= self.replace()
- else:
- incache_size = self.fifoT_size + self.lruT_size + need
- total_size = (incache_size + self.fifoB_size
- + self.lruB_size)
- if total_size >= self.cachelimit * 2:
- node = self.lruB.next
- if node is self.lruB:
- break
- assert node is not self.lruB
- node.unlink()
- del self.cache[node.oid]
- self.lruB_size -= node.size
- self.lruB_len -= 1
- elif incache_size > self.cachelimit:
- need -= self.replace()
- else:
- break
- if need == prev:
- # XXX hack, apparently we can't get rid of anything else
- break
- prev = need
-
- node = Node2Q(oid, size)
- node.linkbefore(self.fifoT)
- self.fifoT_len += 1
- self.fifoT_size += size
- self.cache[oid] = node
- else:
- # a cache hit, but possibly in a bottom list that doesn't
- # actually hold the object
- if node.kind is lruT:
- node.linkbefore(self.lruT)
-
- self.hits += 1
- self.total_hits += 1
- self.lruThits += 1
- self.total_lruThits += 1
-
- elif node.kind is fifoT:
- node.linkbefore(self.lruT)
- self.fifoT_len -= 1
- self.lruT_len += 1
- self.fifoT_size -= node.size
- self.lruT_size += node.size
-
- self.hits += 1
- self.total_hits += 1
- self.fifoThits += 1
- self.total_fifoThits += 1
-
- elif node.kind is fifoB:
- node.linkbefore(self.lruT)
- self.fifoB_len -= 1
- self.lruT_len += 1
- self.fifoB_size -= node.size
- self.lruT_size += node.size
-
- # XXX need a better min than 1?
-## print "adapt+", max(1, self.lruB_size // self.fifoB_size)
- delta = max(1, self.lruB_size / max(1, self.fifoB_size))
- self.p += delta * self.walk_factor
- if self.p > self.cachelimit:
- self.p = self.cachelimit
-
- need = node.size
- if self.lruT_size + self.fifoT_size + need > self.cachelimit:
- while need > 0:
- r = self.replace()
- if not r:
- break
- need -= r
-
- elif node.kind is lruB:
- node.linkbefore(self.lruT)
- self.lruB_len -= 1
- self.lruT_len += 1
- self.lruB_size -= node.size
- self.lruT_size += node.size
-
- # XXX need a better min than 1?
-## print "adapt-", max(1, self.fifoB_size // self.lruB_size)
- delta = max(1, self.fifoB_size / max(1, self.lruB_size))
- self.p -= delta * self.walk_factor
- if self.p < 0:
- self.p = 0
-
- need = node.size
- if self.lruT_size + self.fifoT_size + need > self.cachelimit:
- while need > 0:
- r = self.replace(lruB=True)
- if not r:
- break
- need -= r
-
- def inval(self, oid):
- pass
-
- def extraheader(self):
- pass
-
-class OracleSimulation(LRUCacheSimulation):
- # Not sure how to implement this yet. This is a cache where I
- # cheat to see how good we could actually do. The cache
- # replacement problem for multi-size caches is NP-hard, so we're
- # not going to have an optimal solution.
-
- # At the moment, the oracle is mostly blind. It knows which
- # objects will be referenced more than once, so that it can
- # ignore objects referenced only once. In most traces, these
- # objects account for about 20% of references.
-
- def __init__(self, cachelimit, filename):
- LRUCacheSimulation.__init__(self, cachelimit)
- self.count = {}
- self.scan(filename)
-
- def load(self, oid, size):
- node = self.cache.get(oid)
- if node is not None:
- self.hits += 1
- self.total_hits += 1
- node.linkbefore(self.head)
- else:
- if oid in self.count:
- self.write(oid, size)
-
- def scan(self, filename):
- # scan the file in advance to figure out which objects will
- # be referenced more than once.
- f = open(filename, "rb")
- struct_unpack = struct.unpack
- f_read = f.read
- offset = 0
- while 1:
- # Read a record and decode it
- r = f_read(8)
- if len(r) < 8:
- break
- offset += 8
- ts, code = struct_unpack(">ii", r)
- if ts == 0:
- # Must be a misaligned record caused by a crash
- ##print "Skipping 8 bytes at offset", offset-8
- continue
- r = f_read(16)
- if len(r) < 16:
- break
- offset += 16
- oid, serial = struct_unpack(">8s8s", r)
- if code & 0x70 == 0x20:
- # only look at loads
- self.count[oid] = self.count.get(oid, 0) + 1
-
- all = len(self.count)
-
- # Now remove everything with count == 1
- once = [oid for oid, count in self.count.iteritems()
- if count == 1]
- for oid in once:
- del self.count[oid]
-
- print "Scanned file, %d unique oids, %d repeats" % (
- all, len(self.count))
-
-class BuddyCacheSimulation(LRUCacheSimulation):
-
- def __init__(self, cachelimit):
- LRUCacheSimulation.__init__(self, roundup(cachelimit))
-
- def restart(self):
- LRUCacheSimulation.restart(self)
- self.allocator = self.allocatorFactory(self.cachelimit)
-
- def allocatorFactory(self, size):
- return BuddyAllocator(size)
-
- # LRUCacheSimulation.load() is just fine
-
- def write(self, oid, size):
- node = self.cache.get(oid)
- if node is not None:
- node.unlink()
- assert self.head.next is not None
- self.size -= node.size
- self.allocator.free(node)
- while 1:
- node = self.allocator.alloc(size)
- if node is not None:
- break
- # Failure to allocate. Evict something and try again.
- node = self.head.next
- assert node is not self.head
- self.evicts += 1
- self.total_evicts += 1
- node.unlink()
- assert self.head.next is not None
- del self.cache[node.oid]
- self.size -= node.size
- self.allocator.free(node)
- node.oid = oid
- self.cache[oid] = node
- node.linkbefore(self.head)
- self.size += node.size
-
- def inval(self, oid):
- node = self.cache.get(oid)
- if node is not None:
- assert node.oid == oid
- self.invals += 1
- self.total_invals += 1
- node.unlink()
- assert self.head.next is not None
- del self.cache[oid]
- self.size -= node.size
- assert self.size >= 0
- self.allocator.free(node)
-
-class SimpleCacheSimulation(BuddyCacheSimulation):
-
- def allocatorFactory(self, size):
- return SimpleAllocator(size)
-
- def finish(self):
- BuddyCacheSimulation.finish(self)
- self.allocator.report()
-
-MINSIZE = 256
-
-class BuddyAllocator:
-
- def __init__(self, cachelimit):
- cachelimit = roundup(cachelimit)
- self.cachelimit = cachelimit
- self.avail = {} # Map rounded-up sizes to free list node heads
- self.nodes = {} # Map address to node
- k = MINSIZE
- while k <= cachelimit:
- self.avail[k] = n = Node(None, None) # Not BlockNode; has no addr
- n.linkbefore(n)
- k += k
- node = BlockNode(None, cachelimit, 0)
- self.nodes[0] = node
- node.linkbefore(self.avail[cachelimit])
-
- def alloc(self, size):
- size = roundup(size)
- k = size
- while k <= self.cachelimit:
- head = self.avail[k]
- node = head.next
- if node is not head:
- break
- k += k
- else:
- return None # Store is full, or block is too large
- node.unlink()
- size2 = node.size
- while size2 > size:
- size2 = size2 / 2
- assert size2 >= size
- node.size = size2
- buddy = BlockNode(None, size2, node.addr + size2)
- self.nodes[buddy.addr] = buddy
- buddy.linkbefore(self.avail[size2])
- node.oid = 1 # Flag as in-use
- return node
-
- def free(self, node):
- assert node is self.nodes[node.addr]
- assert node.prev is node.next is None
- node.oid = None # Flag as free
- while node.size < self.cachelimit:
- buddy_addr = node.addr ^ node.size
- buddy = self.nodes[buddy_addr]
- assert buddy.addr == buddy_addr
- if buddy.oid is not None or buddy.size != node.size:
- break
- # Merge node with buddy
- buddy.unlink()
- if buddy.addr < node.addr: # buddy prevails
- del self.nodes[node.addr]
- node = buddy
- else: # node prevails
- del self.nodes[buddy.addr]
- node.size *= 2
- assert node is self.nodes[node.addr]
- node.linkbefore(self.avail[node.size])
-
- def dump(self, msg=""):
- if msg:
- print msg,
- size = MINSIZE
- blocks = bytes = 0
- while size <= self.cachelimit:
- head = self.avail[size]
- node = head.next
- count = 0
- while node is not head:
- count += 1
- node = node.next
- if count:
- print "%d:%d" % (size, count),
- blocks += count
- bytes += count*size
- size += size
- print "-- %d, %d" % (bytes, blocks)
-
-def roundup(size):
- k = MINSIZE
- while k < size:
- k += k
- return k
-
-class SimpleAllocator:
-
- def __init__(self, arenasize):
- self.arenasize = arenasize
- self.avail = BlockNode(None, 0, 0) # Weird: empty block as list head
- self.rover = self.avail
- node = BlockNode(None, arenasize, 0)
- node.linkbefore(self.avail)
- self.taglo = {0: node}
- self.taghi = {arenasize: node}
- # Allocator statistics
- self.nallocs = 0
- self.nfrees = 0
- self.allocloops = 0
- self.freebytes = arenasize
- self.freeblocks = 1
- self.allocbytes = 0
- self.allocblocks = 0
-
- def report(self):
- print ("NA=%d AL=%d NF=%d ABy=%d ABl=%d FBy=%d FBl=%d" %
- (self.nallocs, self.allocloops,
- self.nfrees,
- self.allocbytes, self.allocblocks,
- self.freebytes, self.freeblocks))
-
- def alloc(self, size):
- self.nallocs += 1
- # First fit algorithm
- rover = stop = self.rover
- while 1:
- self.allocloops += 1
- if rover.size >= size:
- break
- rover = rover.next
- if rover is stop:
- return None # We went round the list without finding space
- if rover.size == size:
- self.rover = rover.next
- rover.unlink()
- del self.taglo[rover.addr]
- del self.taghi[rover.addr + size]
- self.freeblocks -= 1
- self.allocblocks += 1
- self.freebytes -= size
- self.allocbytes += size
- return rover
- # Take space from the beginning of the roving pointer
- assert rover.size > size
- node = BlockNode(None, size, rover.addr)
- del self.taglo[rover.addr]
- rover.size -= size
- rover.addr += size
- self.taglo[rover.addr] = rover
- #self.freeblocks += 0 # No change here
- self.allocblocks += 1
- self.freebytes -= size
- self.allocbytes += size
- return node
-
- def free(self, node):
- self.nfrees += 1
- self.freeblocks += 1
- self.allocblocks -= 1
- self.freebytes += node.size
- self.allocbytes -= node.size
- node.linkbefore(self.avail)
- self.taglo[node.addr] = node
- self.taghi[node.addr + node.size] = node
- x = self.taghi.get(node.addr)
- if x is not None:
- # Merge x into node
- x.unlink()
- self.freeblocks -= 1
- del self.taglo[x.addr]
- del self.taghi[x.addr + x.size]
- del self.taglo[node.addr]
- node.addr = x.addr
- node.size += x.size
- self.taglo[node.addr] = node
- x = self.taglo.get(node.addr + node.size)
- if x is not None:
- # Merge x into node
- x.unlink()
- self.freeblocks -= 1
- del self.taglo[x.addr]
- del self.taghi[x.addr + x.size]
- del self.taghi[node.addr + node.size]
- node.size += x.size
- self.taghi[node.addr + node.size] = node
- # It's possible that either one of the merges above invalidated
- # the rover.
- # It's simplest to simply reset the rover to the newly freed block.
- self.rover = node
-
- def dump(self, msg=""):
- if msg:
- print msg,
- count = 0
- bytes = 0
- node = self.avail.next
- while node is not self.avail:
- bytes += node.size
- count += 1
- node = node.next
- print count, "free blocks,", bytes, "free bytes"
- self.report()
-
-class BlockNode(Node):
-
- __slots__ = ['addr']
-
- def __init__(self, oid, size, addr):
- Node.__init__(self, oid, size)
- self.addr = addr
-
-def testallocator(factory=BuddyAllocator):
- # Run one of Knuth's experiments as a test
- import random
- import heapq # This only runs with Python 2.3, folks :-)
- reportfreq = 100
- cachelimit = 2**17
- cache = factory(cachelimit)
- queue = []
- T = 0
- blocks = 0
- while T < 5000:
- while queue and queue[0][0] <= T:
- time, node = heapq.heappop(queue)
- assert time == T
- ##print "free addr=%d, size=%d" % (node.addr, node.size)
- cache.free(node)
- blocks -= 1
- size = random.randint(100, 2000)
- lifetime = random.randint(1, 100)
- node = cache.alloc(size)
- if node is None:
- print "out of mem"
- cache.dump("T=%4d: %d blocks;" % (T, blocks))
- break
- else:
- ##print "alloc addr=%d, size=%d" % (node.addr, node.size)
- blocks += 1
- heapq.heappush(queue, (T + lifetime, node))
- T = T+1
- if T % reportfreq == 0:
- cache.dump("T=%4d: %d blocks;" % (T, blocks))
-
-def hitrate(loads, hits):
- return "%5.1f%%" % (100.0 * hits / max(1, loads))
-
-def duration(secs):
- mm, ss = divmod(secs, 60)
- hh, mm = divmod(mm, 60)
- if hh:
- return "%d:%02d:%02d" % (hh, mm, ss)
- if mm:
- return "%d:%02d" % (mm, ss)
- return "%d" % ss
-
-def addcommas(n):
- sign, s = '', str(n)
- if s[0] == '-':
- sign, s = '-', s[1:]
- i = len(s) - 3
- while i > 0:
- s = s[:i] + ',' + s[i:]
- i -= 3
- return sign + s
-
-import random
-
-def maybe(f, p=0.5):
- if random.random() < p:
- f()
-
-#############################################################################
-# Thor-like eviction scheme.
-#
-# The cache keeps a list of all objects, and uses a travelling pointer
-# to decay the worth of objects over time.
-
-class ThorNode(Node):
-
- __slots__ = ['worth']
-
- def __init__(self, oid, size, worth=None):
- Node.__init__(self, oid, size)
- self.worth = worth
-
-class ThorListHead(Node):
- def __init__(self):
- Node.__init__(self, 0, 0)
- self.next = self.prev = self
-
-class ThorSimulation(Simulation):
-
- extras = "evicts", "trips"
-
- def __init__(self, cachelimit):
- Simulation.__init__(self, cachelimit)
-
- # Maximum total of object sizes we keep in cache.
- self.maxsize = cachelimit
- # Current total of object sizes in cache.
- self.currentsize = 0
-
- # A worth byte maps to a set of all objects with that worth.
- # This is cheap to keep updated, and makes finding low-worth
- # objects for eviction trivial (just march over the worthsets
- # list, in order).
- self.worthsets = [Set() for dummy in range(256)]
-
- # We keep a circular list of all objects in cache. currentobj
- # walks around it forever. Each time _tick() is called, the
- # worth of currentobj is decreased, basically by shifting
- # right 1, and currentobj moves on to the next object. When
- # an object is first inserted, it enters the list right before
- # currentobj. When an object is accessed, its worth is
- # increased by or'ing in 0x80. This scheme comes from the
- # Thor system, and is an inexpensive way to account for both
- # recency and frequency of access: recency is reflected in
- # the leftmost bit set, and frequency by how many bits are
- # set.
- #
- # Note: because evictions are interleaved with ticks,
- # unlinking an object is tricky, lest we evict currentobj. The
- # class _unlink method takes care of this properly.
- self.listhead = ThorListHead()
- self.currentobj = self.listhead
-
- # Map an object.oid to its ThorNode.
- self.oid2object = {}
-
- self.total_evicts = self.total_trips = 0
-
- # Unlink object from the circular list, taking care not to lose
- # track of the current object. Always call this instead of
- # invoking obj.unlink() directly.
- def _unlink(self, obj):
- assert obj is not self.listhead
- if obj is self.currentobj:
- self.currentobj = obj.next
- obj.unlink()
-
- # Change obj.worth to newworth, maintaining invariants.
- def _change_worth(self, obj, newworth):
- if obj.worth != newworth:
- self.worthsets[obj.worth].remove(obj)
- obj.worth = newworth
- self.worthsets[newworth].add(obj)
-
- def add(self, object):
- assert object.oid not in self.oid2object
- self.oid2object[object.oid] = object
-
- newsize = self.currentsize + object.size
- if newsize > self.maxsize:
- self._evictbytes(newsize - self.maxsize)
- self.currentsize += object.size
- object.linkbefore(self.currentobj)
-
- if object.worth is None:
- # Give smaller objects higher initial worth. This favors kicking
- # out unreferenced large objects before kicking out unreferenced
- # small objects. On real life traces, this is a significant
- # win for the hit rate.
- object.worth = 32 - int(round(math.log(object.size, 2)))
- self.worthsets[object.worth].add(object)
-
- # Decrease the worth of the current object, and advance the
- # current object.
- def _tick(self):
- c = self.currentobj
- if c is self.listhead:
- c = c.next
- if c is self.listhead: # list is empty
- return
- self.total_trips += 1
- self.trips += 1
- self._change_worth(c, (c.worth + 1) >> 1)
- self.currentobj = c.next
-
- def access(self, oid):
- self._tick()
- obj = self.oid2object.get(oid)
- if obj is None:
- return None
- self._change_worth(obj, obj.worth | 0x80)
- return obj
-
- # Evict objects of least worth first, until at least nbytes bytes
- # have been freed.
- def _evictbytes(self, nbytes):
- for s in self.worthsets:
- while s:
- if nbytes <= 0:
- return
- obj = s.pop()
- nbytes -= obj.size
- self._evictobj(obj)
-
- def _evictobj(self, obj):
- self.currentsize -= obj.size
- self.worthsets[obj.worth].discard(obj)
- del self.oid2object[obj.oid]
- self._unlink(obj)
-
- self.evicts += 1
- self.total_evicts += 1
-
- def _evict_without_bumping_evict_stats(self, obj):
- self._evictobj(obj)
- self.evicts -= 1
- self.total_evicts -= 1
-
- # Simulator overrides from here on.
-
- def restart(self):
- # Reset base class
- Simulation.restart(self)
- # Reset additional per-run statistics
- self.evicts = self.trips = 0
-
- def write(self, oid, size):
- obj = self.oid2object.get(oid)
- worth = None
- if obj is not None:
- worth = obj.worth
- self._evict_without_bumping_evict_stats(obj)
- self.add(ThorNode(oid, size, worth))
-
- def load(self, oid, size):
- if self.access(oid) is not None:
- self.hits += 1
- self.total_hits += 1
- else:
- self.write(oid, size)
-
- def inval(self, oid):
- obj = self.oid2object.get(oid)
- if obj is not None:
- self.invals += 1
- self.total_invals += 1
- self._evict_without_bumping_evict_stats(obj)
-
- # Take the "x" off to see additional stats after each restart period.
- def xreport(self):
- Simulation.report(self)
- print 'non-empty worth sets', sum(map(bool, self.worthsets)),
- print 'objects', len(self.oid2object),
- print 'size', self.currentsize
-
-#############################################################################
-# Perfection: What if the cache were unbounded, and never forgot anything?
-# This simulator answers that question directly; the cache size parameter
-# isn't used.
-
-class UnboundedSimulation(Simulation):
-
- extraname = 'evicts' # for some reason we *have* to define >= 1 extra
-
- def __init__(self, cachelimit):
- Simulation.__init__(self, cachelimit)
- self.oids = Set()
- self.evicts = self.total_evicts = 0
-
- def write(self, oid, size):
- self.oids.add(oid)
-
- def load(self, oid, size):
- if oid in self.oids:
- self.hits += 1
- self.total_hits += 1
- else:
- self.oids.add(oid)
-
- def inval(self, oid):
- if oid in self.oids:
- self.invals += 1
- self.total_invals += 1
- self.oids.remove(oid)
-
-if __name__ == "__main__":
- sys.exit(main())
Deleted: ZODB/branches/jim-simulation/src/ZODB/scripts/stats.py
===================================================================
--- ZODB/branches/jim-simulation/src/ZODB/scripts/stats.py 2010-06-01 17:50:56 UTC (rev 112889)
+++ ZODB/branches/jim-simulation/src/ZODB/scripts/stats.py 2010-06-01 17:51:54 UTC (rev 112890)
@@ -1,387 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Trace file statistics analyzer.
-
-Usage: stats.py [-h] [-i interval] [-q] [-s] [-S] [-v] [-X] tracefile
--h: print histogram of object load frequencies
--i: summarizing interval in minutes (default 15; max 60)
--q: quiet; don't print summaries
--s: print histogram of object sizes
--S: don't print statistics
--v: verbose; print each record
--X: enable heuristic checking for misaligned records: oids > 2**32
- will be rejected; this requires the tracefile to be seekable
-"""
-
-"""File format:
-
-Each record is 26 bytes, plus a variable number of bytes to store an oid,
-with the following layout. Numbers are big-endian integers.
-
-Offset Size Contents
-
-0 4 timestamp (seconds since 1/1/1970)
-4 3 data size, in 256-byte increments, rounded up
-7 1 code (see below)
-8 2 object id length
-10 8 start tid
-18 8 end tid
-26 variable object id
-
-The code at offset 7 packs three fields:
-
-Mask bits Contents
-
-0x80 1 set if there was a non-empty version string
-0x7e 6 function and outcome code
-0x01 1 current cache file (0 or 1)
-
-The "current cache file" bit is no longer used; it refers to a 2-file
-cache scheme used before ZODB 3.3.
-
-The function and outcome codes are documented in detail at the end of
-this file in the 'explain' dictionary. Note that the keys there (and
-also the arguments to _trace() in ClientStorage.py) are 'code & 0x7e',
-i.e. the low bit is always zero.
-"""
-
-import sys
-import time
-import getopt
-import struct
-from types import StringType
-
-def usage(msg):
- print >> sys.stderr, msg
- print >> sys.stderr, __doc__
-
-def main():
- # Parse options
- verbose = False
- quiet = False
- dostats = True
- print_size_histogram = False
- print_histogram = False
- interval = 15*60 # Every 15 minutes
- heuristic = False
- try:
- opts, args = getopt.getopt(sys.argv[1:], "hi:qsSvX")
- except getopt.error, msg:
- usage(msg)
- return 2
- for o, a in opts:
- if o == '-h':
- print_histogram = True
- elif o == "-i":
- interval = int(60 * float(a))
- if interval <= 0:
- interval = 60
- elif interval > 3600:
- interval = 3600
- elif o == "-q":
- quiet = True
- verbose = False
- elif o == "-s":
- print_size_histogram = True
- elif o == "-S":
- dostats = False
- elif o == "-v":
- verbose = True
- elif o == '-X':
- heuristic = True
- else:
- assert False, (o, opts)
-
- if len(args) != 1:
- usage("exactly one file argument required")
- return 2
- filename = args[0]
-
- # Open file
- if filename.endswith(".gz"):
- # Open gzipped file
- try:
- import gzip
- except ImportError:
- print >> sys.stderr, "can't read gzipped files (no module gzip)"
- return 1
- try:
- f = gzip.open(filename, "rb")
- except IOError, msg:
- print >> sys.stderr, "can't open %s: %s" % (filename, msg)
- return 1
- elif filename == '-':
- # Read from stdin
- f = sys.stdin
- else:
- # Open regular file
- try:
- f = open(filename, "rb")
- except IOError, msg:
- print >> sys.stderr, "can't open %s: %s" % (filename, msg)
- return 1
-
- rt0 = time.time()
- bycode = {} # map code to count of occurrences
- byinterval = {} # map code to count in current interval
- records = 0 # number of trace records read
- versions = 0 # number of trace records with versions
- datarecords = 0 # number of records with dlen set
- datasize = 0L # sum of dlen across records with dlen set
- oids = {} # map oid to number of times it was loaded
- bysize = {} # map data size to number of loads
- bysizew = {} # map data size to number of writes
- total_loads = 0
- t0 = None # first timestamp seen
- te = None # most recent timestamp seen
- h0 = None # timestamp at start of current interval
- he = None # timestamp at end of current interval
- thisinterval = None # generally te//interval
- f_read = f.read
- unpack = struct.unpack
- FMT = ">iiH8s8s"
- FMT_SIZE = struct.calcsize(FMT)
- assert FMT_SIZE == 26
- # Read file, gathering statistics, and printing each record if verbose.
- try:
- while 1:
- r = f_read(FMT_SIZE)
- if len(r) < FMT_SIZE:
- break
- ts, code, oidlen, start_tid, end_tid = unpack(FMT, r)
- if ts == 0:
- # Must be a misaligned record caused by a crash.
- if not quiet:
- print "Skipping 8 bytes at offset", f.tell() - FMT_SIZE
- f.seek(f.tell() - FMT_SIZE + 8)
- continue
- oid = f_read(oidlen)
- if len(oid) < oidlen:
- break
- records += 1
- if t0 is None:
- t0 = ts
- thisinterval = t0 // interval
- h0 = he = ts
- te = ts
- if ts // interval != thisinterval:
- if not quiet:
- dumpbyinterval(byinterval, h0, he)
- byinterval = {}
- thisinterval = ts // interval
- h0 = ts
- he = ts
- dlen, code = code & 0x7fffff00, code & 0xff
- if dlen:
- datarecords += 1
- datasize += dlen
- if code & 0x80:
- version = 'V'
- versions += 1
- else:
- version = '-'
- code &= 0x7e
- bycode[code] = bycode.get(code, 0) + 1
- byinterval[code] = byinterval.get(code, 0) + 1
- if dlen:
- if code & 0x70 == 0x20: # All loads
- bysize[dlen] = d = bysize.get(dlen) or {}
- d[oid] = d.get(oid, 0) + 1
- elif code & 0x70 == 0x50: # All stores
- bysizew[dlen] = d = bysizew.get(dlen) or {}
- d[oid] = d.get(oid, 0) + 1
- if verbose:
- print "%s %02x %s %016x %016x %c %s" % (
- time.ctime(ts)[4:-5],
- code,
- oid_repr(oid),
- U64(start_tid),
- U64(end_tid),
- version,
- dlen and str(dlen) or "")
- if code & 0x70 == 0x20:
- oids[oid] = oids.get(oid, 0) + 1
- total_loads += 1
- elif code == 0x00: # restart
- if not quiet:
- dumpbyinterval(byinterval, h0, he)
- byinterval = {}
- thisinterval = ts // interval
- h0 = he = ts
- if not quiet:
- print time.ctime(ts)[4:-5],
- print '='*20, "Restart", '='*20
- except KeyboardInterrupt:
- print "\nInterrupted. Stats so far:\n"
-
- end_pos = f.tell()
- f.close()
- rte = time.time()
- if not quiet:
- dumpbyinterval(byinterval, h0, he)
-
- # Error if nothing was read
- if not records:
- print >> sys.stderr, "No records processed"
- return 1
-
- # Print statistics
- if dostats:
- print
- print "Read %s trace records (%s bytes) in %.1f seconds" % (
- addcommas(records), addcommas(end_pos), rte-rt0)
- print "Versions: %s records used a version" % addcommas(versions)
- print "First time: %s" % time.ctime(t0)
- print "Last time: %s" % time.ctime(te)
- print "Duration: %s seconds" % addcommas(te-t0)
- print "Data recs: %s (%.1f%%), average size %.1f KB" % (
- addcommas(datarecords),
- 100.0 * datarecords / records,
- datasize / 1024.0 / datarecords)
- print "Hit rate: %.1f%% (load hits / loads)" % hitrate(bycode)
- print
- codes = bycode.keys()
- codes.sort()
- print "%13s %4s %s" % ("Count", "Code", "Function (action)")
- for code in codes:
- print "%13s %02x %s" % (
- addcommas(bycode.get(code, 0)),
- code,
- explain.get(code) or "*** unknown code ***")
-
- # Print histogram.
- if print_histogram:
- print
- print "Histogram of object load frequency"
- total = len(oids)
- print "Unique oids: %s" % addcommas(total)
- print "Total loads: %s" % addcommas(total_loads)
- s = addcommas(total)
- width = max(len(s), len("objects"))
- fmt = "%5d %" + str(width) + "s %5.1f%% %5.1f%% %5.1f%%"
- hdr = "%5s %" + str(width) + "s %6s %6s %6s"
- print hdr % ("loads", "objects", "%obj", "%load", "%cum")
- cum = 0.0
- for binsize, count in histogram(oids):
- obj_percent = 100.0 * count / total
- load_percent = 100.0 * count * binsize / total_loads
- cum += load_percent
- print fmt % (binsize, addcommas(count),
- obj_percent, load_percent, cum)
-
- # Print size histogram.
- if print_size_histogram:
- print
- print "Histograms of object sizes"
- print
- dumpbysize(bysizew, "written", "writes")
- dumpbysize(bysize, "loaded", "loads")
-
-def dumpbysize(bysize, how, how2):
- print
- print "Unique sizes %s: %s" % (how, addcommas(len(bysize)))
- print "%10s %6s %6s" % ("size", "objs", how2)
- sizes = bysize.keys()
- sizes.sort()
- for size in sizes:
- loads = 0
- for n in bysize[size].itervalues():
- loads += n
- print "%10s %6d %6d" % (addcommas(size),
- len(bysize.get(size, "")),
- loads)
-
-def dumpbyinterval(byinterval, h0, he):
- loads = hits = 0
- for code in byinterval:
- if code & 0x70 == 0x20:
- n = byinterval[code]
- loads += n
- if code in (0x22, 0x26):
- hits += n
- if not loads:
- return
- if loads:
- hr = 100.0 * hits / loads
- else:
- hr = 0.0
- print "%s-%s %10s loads, %10s hits,%5.1f%% hit rate" % (
- time.ctime(h0)[4:-8], time.ctime(he)[14:-8],
- addcommas(loads), addcommas(hits), hr)
-
-def hitrate(bycode):
- loads = hits = 0
- for code in bycode:
- if code & 0x70 == 0x20:
- n = bycode[code]
- loads += n
- if code in (0x22, 0x26):
- hits += n
- if loads:
- return 100.0 * hits / loads
- else:
- return 0.0
-
-def histogram(d):
- bins = {}
- for v in d.itervalues():
- bins[v] = bins.get(v, 0) + 1
- L = bins.items()
- L.sort()
- return L
-
-def U64(s):
- return struct.unpack(">Q", s)[0]
-
-def oid_repr(oid):
- if isinstance(oid, StringType) and len(oid) == 8:
- return '%16x' % U64(oid)
- else:
- return repr(oid)
-
-def addcommas(n):
- sign, s = '', str(n)
- if s[0] == '-':
- sign, s = '-', s[1:]
- i = len(s) - 3
- while i > 0:
- s = s[:i] + ',' + s[i:]
- i -= 3
- return sign + s
-
-explain = {
- # The first hex digit shows the operation, the second the outcome.
- # If the second digit is in "02468" then it is a 'miss'.
- # If it is in "ACE" then it is a 'hit'.
-
- 0x00: "_setup_trace (initialization)",
-
- 0x10: "invalidate (miss)",
- 0x1A: "invalidate (hit, version)",
- 0x1C: "invalidate (hit, saving non-current)",
- # 0x1E can occur during startup verification.
- 0x1E: "invalidate (hit, discarding current or non-current)",
-
- 0x20: "load (miss)",
- 0x22: "load (hit)",
- 0x24: "load (non-current, miss)",
- 0x26: "load (non-current, hit)",
-
- 0x50: "store (version)",
- 0x52: "store (current, non-version)",
- 0x54: "store (non-current)",
- }
-
-if __name__ == "__main__":
- sys.exit(main())
More information about the Zodb-checkins
mailing list