[Zodb-checkins] SVN: ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py Cleaned up code, including moving cache iteration into a generator for
Jim Fulton
jim at zope.com
Fri Jun 4 19:04:25 EDT 2010
Log message for revision 113120:
Cleaned up code, including moving cache iteration into a generator for
use in simulation and in analysis.
Added logic to simulate writes that would result from cache misses
when simulating smaller cache sizes.
Added a item size limit.
Added a temporary work around for the cache tracing logic. :(
Changed:
U ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py
-=-
Modified: ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py
===================================================================
--- ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py 2010-06-04 21:53:39 UTC (rev 113119)
+++ ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py 2010-06-04 23:04:25 UTC (rev 113120)
@@ -26,6 +26,8 @@
import struct
import math
import bisect
+import BTrees.OOBTree
+
from sets import Set
from ZODB.utils import z64
@@ -38,15 +40,18 @@
# Parse options.
MB = 1024**2
cachelimit = 20*MB
+ itemlimit = 1<<30
simclass = CircularCacheSimulation
try:
- opts, args = getopt.getopt(sys.argv[1:], "cs:")
+ opts, args = getopt.getopt(sys.argv[1:], "ci:s:")
except getopt.error, msg:
usage(msg)
return 2
for o, a in opts:
if o == '-s':
- cachelimit = int(float(a)*MB)
+ cachelimit = int(a)*MB
+ elif o == '-i':
+ itemlimit = int(a)
elif o == '-c':
simclass = CircularCacheSimulation
else:
@@ -57,36 +62,38 @@
return 2
filename = args[0]
- # Open file.
- if filename.endswith(".gz"):
- # Open gzipped file.
- try:
+ sim = simclass(cachelimit, itemlimit)
+
+ # Print output header.
+ sim.printheader()
+
+ for ts, dlen, code, oid, start_tid, end_tid in events(filename):
+ if (sim.ts0 is not None) and (ts/900 - sim.ts0/900):
+ sim.report()
+ sim.restart()
+ sim.event(ts, dlen, code, oid, start_tid, end_tid)
+
+ # Finish simulation.
+ sim.finish()
+
+ # Exit code from main().
+ return 0
+
+def events(f):
+ if isinstance(f, str):
+ # Open file.
+ filename = f
+ if filename.endswith(".gz"):
+ # Open gzipped file.
import gzip
- except ImportError:
- print >> sys.stderr, "can't read gzipped files (no module gzip)"
- return 1
- try:
f = gzip.open(filename, "rb")
- except IOError, msg:
- print >> sys.stderr, "can't open %s: %s" % (filename, msg)
- return 1
- elif filename == "-":
- # Read from stdin.
- f = sys.stdin
- else:
- # Open regular file.
- try:
+ elif filename == "-":
+ # Read from stdin.
+ f = sys.stdin
+ else:
+ # Open regular file.
f = open(filename, "rb")
- except IOError, msg:
- print >> sys.stderr, "can't open %s: %s" % (filename, msg)
- return 1
- sim = simclass(cachelimit)
-
- # Print output header.
- sim.printheader()
-
- # Read trace file, simulating cache behavior.
f_read = f.read
unpack = struct.unpack
FMT = ">iiH8s8s"
@@ -104,28 +111,24 @@
f.seek(f.tell() - FMT_SIZE + 8)
continue
-
- if (sim.ts0 is not None) and (ts - sim.ts0 > 900):
- sim.report()
- sim.restart()
-
oid = f_read(oidlen)
if len(oid) < oidlen:
break
+
# Decode the code.
dlen, version, code = (code & 0x7fffff00,
code & 0x80,
code & 0x7e)
- # And pass it to the simulation.
- sim.event(ts, dlen, version, code, oid, start_tid, end_tid)
+ # work around a trace bug
+ if dlen > 255:
+ dlen -= 127
+ assert not version
+
+ yield ts, dlen, code, oid, start_tid, end_tid
+
f.close()
- # Finish simulation.
- sim.finish()
- # Exit code from main().
- return 0
-
class Simulation(object):
"""Base class for simulations.
@@ -136,8 +139,9 @@
finish() method also calls report().
"""
- def __init__(self, cachelimit):
+ def __init__(self, cachelimit, itemlimit):
self.cachelimit = cachelimit
+ self.itemlimit = itemlimit
# Initialize global statistics.
self.epoch = None
self.total_loads = 0
@@ -158,8 +162,7 @@
self.writes = 0
self.ts0 = None
- def event(self, ts, dlen, _version, code, oid,
- start_tid, end_tid):
+ def event(self, ts, dlen, code, oid, start_tid, end_tid):
# Record first and last timestamp seen.
if self.ts0 is None:
self.ts0 = ts
@@ -180,7 +183,7 @@
self.total_loads += 1
# Asserting that dlen is 0 iff it's a load miss.
# assert (dlen == 0) == (code in (0x20, 0x24))
- self.load(oid, dlen, start_tid)
+ self.load(oid, dlen, start_tid, end_tid, code)
elif action == 0x50:
# Store.
assert dlen
@@ -198,7 +201,7 @@
def write(self, oid, size, start_tid, end_tid):
pass
- def load(self, oid, size, start_tid):
+ def load(self, oid, size, start_tid, end_tid, code):
# Must increment .hits and .total_hits as appropriate.
pass
@@ -293,10 +296,10 @@
extras = "evicts", "inuse"
- def __init__(self, cachelimit):
+ def __init__(self, cachelimit, itemlimit):
from ZEO import cache
- Simulation.__init__(self, cachelimit)
+ Simulation.__init__(self, cachelimit, itemlimit)
self.total_evicts = 0 # number of cache evictions
# Current offset in file.
@@ -320,23 +323,33 @@
# on disk (all bytes beyond those needed for the object pickle).
self.overhead = cache.allocated_record_overhead
+ self.write_calls = {}
+
def restart(self):
Simulation.restart(self)
self.evicts = 0
- def load(self, oid, size, tid):
- if tid == z64:
+ def load(self, oid, size, tid, end_tid, code):
+ if not (code & 4):
# Trying to load current revision.
if oid in self.current: # else it's a cache miss
self.hits += 1
self.total_hits += 1
- return
+ return
- # May or may not be trying to load current revision.
- cur_tid = self.current.get(oid)
- if cur_tid == tid:
- self.hits += 1
- self.total_hits += 1
+ # simulate the subsequent write, if we can. This is to
+ # overcome the fact that the recorded trace only has
+ # actual writes, which result, in part from evictions. If
+ # we're simularing a smaller cache size we'll get more
+ # evictions, and we need to simulate the writes that would
+ # have occurred when loads following evictions happen.
+
+ oid_writes = self.write_calls.get(oid)
+ if oid_writes:
+ start_tid, (size, end_tid) = oid_writes.items()[-1]
+ if (oid, start_tid) not in self.key2entry:
+ self.write(oid, size, start_tid, end_tid)
+
return
# It's a load for non-current data. Do we know about this oid?
@@ -407,6 +420,15 @@
e.end_tid = tid
def write(self, oid, size, start_tid, end_tid):
+
+ if size > self.itemlimit:
+ return
+
+ oid_writes = self.write_calls.get(oid)
+ if not oid_writes:
+ oid_writes = self.write_calls[oid] = BTrees.OOBTree.Bucket()
+ oid_writes[start_tid] = size, end_tid
+
if end_tid == z64:
# Storing current revision.
if oid in self.current: # we already have it in cache
@@ -433,7 +455,7 @@
key = oid, start_tid
assert key not in self.key2entry
size += self.overhead
- avail = self.makeroom(size)
+ avail = self.makeroom(size+1)
e = CircularCacheEntry(key, end_tid, self.offset)
self.filemap[self.offset] = size, e
self.key2entry[key] = e
More information about the Zodb-checkins
mailing list