[Zodb-checkins] SVN: ZODB/trunk/src/ZEO/ Merge rev 37422 from 3.4
branch.
Tim Peters
tim.one at comcast.net
Mon Jul 25 21:15:21 EDT 2005
Log message for revision 37423:
Merge rev 37422 from 3.4 branch.
Massive rewrite to make simul.py's circular cache aware of
MVCC. It isn't blowing up, but it thinks the hit rate is
100% -- might have missed something there <wink>.
Changed:
U ZODB/trunk/src/ZEO/cache.py
U ZODB/trunk/src/ZEO/simul.py
-=-
Modified: ZODB/trunk/src/ZEO/cache.py
===================================================================
--- ZODB/trunk/src/ZEO/cache.py 2005-07-26 01:14:18 UTC (rev 37422)
+++ ZODB/trunk/src/ZEO/cache.py 2005-07-26 01:15:21 UTC (rev 37423)
@@ -205,17 +205,20 @@
if L is None:
self._trace(0x24, oid, tid)
return None
- # A pair with None as the second element will always be less
- # than any pair with the same first tid.
+ # A pair with None as the second element is less than any pair with
+ # the same first tid. Dubious: this relies on that None is less
+ # than any comparable non-None object in recent Pythons.
i = bisect.bisect_left(L, (tid, None))
- # The least element left of tid was written before tid. If
- # there is no element, the cache doesn't have old enough data.
+ # Now L[i-1] < (tid, None) < L[i], and the start_tid for everything in
+ # L[:i} is < tid, and the start_tid for everything in L[i:] is >= tid.
+ # Therefore the largest start_tid < tid must be at L[i-1]. If i is 0,
+ # there is no start_tid < tid: we don't have any data old enougn.
if i == 0:
self._trace(0x24, oid, tid)
return
lo, hi = L[i-1]
- # lo should always be less than tid
- if not lo < tid <= hi:
+ assert lo < tid
+ if tid > hi: # we don't have any data in the right range
self._trace(0x24, oid, tid)
return None
o = self.fc.access((oid, lo))
@@ -284,7 +287,7 @@
p = start_tid, end_tid
if p in L:
return # duplicate store
- bisect.insort_left(L, (start_tid, end_tid))
+ bisect.insort_left(L, p)
self._trace(0x54, oid, version, start_tid, end_tid,
dlen=len(data))
self.fc.add(o)
@@ -457,7 +460,7 @@
self._trace = notrace
def _trace(self,
- code, oid="", version="", tid="", end_tid=z64, dlen=0,
+ code, oid="", version="", tid=z64, end_tid=z64, dlen=0,
# The next two are just speed hacks.
time_time=time.time, struct_pack=struct.pack):
# The code argument is two hex digits; bits 0 and 7 must be zero.
Modified: ZODB/trunk/src/ZEO/simul.py
===================================================================
--- ZODB/trunk/src/ZEO/simul.py 2005-07-26 01:14:18 UTC (rev 37422)
+++ ZODB/trunk/src/ZEO/simul.py 2005-07-26 01:15:21 UTC (rev 37423)
@@ -34,12 +34,14 @@
import getopt
import struct
import math
-
+import bisect
from sets import Set
+from ZODB.utils import z64
+
def usage(msg):
- print >>sys.stderr, msg
- print >>sys.stderr, __doc__
+ print >> sys.stderr, msg
+ print >> sys.stderr, __doc__
def main():
# Parse options.
@@ -143,7 +145,7 @@
code & 0x7e,
code & 0x01)
# And pass it to the simulation.
- sim.event(ts, dlen, version, code, current, oid, start_tid)
+ sim.event(ts, dlen, version, code, current, oid, start_tid, end_tid)
f.close()
# Finish simulation.
@@ -185,7 +187,8 @@
self.writes = 0
self.ts0 = None
- def event(self, ts, dlen, _version, code, _current, oid, _serial):
+ def event(self, ts, dlen, _version, code, _current, oid,
+ start_tid, end_tid):
# Record first and last timestamp seen.
if self.ts0 is None:
self.ts0 = ts
@@ -203,15 +206,14 @@
self.loads += 1
self.total_loads += 1
assert (dlen == 0) == (code in (0x20, 0x24))
- if dlen:
- self.load(oid, dlen)
+ self.load(oid, dlen, start_tid)
elif action == 0x50:
# Store.
assert dlen
- self.write(oid, dlen)
+ self.write(oid, dlen, start_tid, end_tid)
elif action == 0x10:
# Invalidate.
- self.inval(oid)
+ self.inval(oid, start_tid)
elif action == 0x00:
# Restart.
self.report()
@@ -219,14 +221,14 @@
else:
raise ValueError("unknown trace code 0x%x" % code)
- def write(self, oid, size):
+ def write(self, oid, size, start_tid, end_tid):
pass
- def load(self, oid, size):
+ def load(self, oid, size, start_tid):
# Must increment .hits and .total_hits as appropriate.
pass
- def inval(self, oid):
+ def inval(self, oid, start_tid):
# Must increment .invals and .total_invals as appropriate.
pass
@@ -280,6 +282,255 @@
for name in self.extras])
print (self.format + " OVERALL") % args
+from ZEO.cache import ZEC3_HEADER_SIZE
+# An Entry just wraps a (key, offset) pair. A key is in turn an
+# (oid, tid) pair.
+from ZEO.cache import Entry
+
+class CircularCacheSimulation(Simulation):
+ # The cache is managed as a single file with a pointer that
+ # goes around the file, circularly, forever. New objects
+ # are written at the current pointer, evicting whatever was
+ # there previously.
+
+ extras = "evicts", "inuse"
+
+ def __init__(self, cachelimit):
+ from ZEO import cache
+ from BTrees.OIBTree import OIBTree
+
+ Simulation.__init__(self, cachelimit)
+ self.total_evicts = 0
+ # Current offset in file.
+ self.offset = ZEC3_HEADER_SIZE
+ # Map offset in file to (size, Entry) pair, or to (size, None) if
+ # the offset starts a free block.
+ self.filemap = {ZEC3_HEADER_SIZE: (self.cachelimit - ZEC3_HEADER_SIZE,
+ None)}
+ # Map key to Entry. A key is an (oid, tid) pair.
+ self.key2entry = {}
+
+ # Map oid to tid of current revision.
+ self.current = {}
+
+ # Map oid to list of (start_tid, end_tid) pairs in sorted order.
+ # Used to find matching key for load of non-current data.
+ self.noncurrent = {}
+
+ # Map key (an (oid, tid) pair) to the size of the object state.
+ # Unlike the others, this accumulates knowledge over time, regardless
+ # of what's in the cache. The problem: the trace file may have
+ # a load hit where we have a load miss. There won't be a store in
+ # the trace file "soon" since the actual cache had the data. What
+ # can the simulated cache do? If the object has ever been seen
+ # before, it can look up its size in this dict, and "pretend" to
+ # do a store. This isn't faithful in all cases, since we don't
+ # know the right tid: if we miss on a plain load(), the trace
+ # fail contains no indication of the appropriate tid.
+ self.key2size = OIBTree()
+
+ # The number of overhead bytes needed to store an object pickle
+ # on disk (all bytes beyond those needed for the object pickle).
+ self.overhead = (cache.Object.TOTAL_FIXED_SIZE +
+ cache.OBJECT_HEADER_SIZE)
+
+ def restart(self):
+ Simulation.restart(self)
+ self.evicts = 0
+
+ def load(self, oid, size, tid):
+ if tid == z64:
+ # Trying to load current revision.
+ if oid in self.current:
+ self.hits += 1
+ self.total_hits += 1
+ else:
+ self._cache_miss(oid, tid)
+ return
+
+ # May or may not be trying to load current revisiion.
+ cur_tid = self.current.get(oid)
+ if cur_tid == tid:
+ self.hits += 1
+ self.total_hits += 1
+ return
+
+ # It's a load for non-current data. Do we know about this oid?
+ L = self.noncurrent.get(oid)
+ if L is None:
+ self._cache_miss(oid, tid)
+ return
+ i = bisect.bisect_left(L, (tid, None))
+ if i == 0:
+ # This tid is smaller than any we know about -- miss.
+ self._cache_miss(oid, tid)
+ return
+ lo, hi = L[i-1]
+ assert lo < tid
+ if tid > hi:
+ self._cache_miss(oid, tid)
+ return
+ # Cache hit.
+ self.hits += 1
+ self.total_hits += 1
+
+ def _cache_miss(self, oid, tid, HUGE64='\xff'*8):
+ have_data = False
+ if tid == z64:
+ # Miss on current data. Find the most revision we ever saw.
+ items = self.key2size.items(min=(oid, z64), max=(oid, HUGE64))
+ if items:
+ (oid, tid), size = items[-1] # most recent
+ have_data = True
+ else:
+ # Miss on non-current data. Find one "that fits", approximately.
+ items = self.key2size.items(min=(oid, tid), max=(oid, HUGE64))
+ if items:
+ (oid, tid), size = items[0] # first one at or after tid
+ have_data = True
+
+ if have_data:
+ # Pretend the cache miss was followed by a store.
+ self.writes += 1
+ self.total_writes += 1
+ self.add(oid, tid, size)
+
+ # (oid, tid) is in the cache. Remove it: take it out of key2entry,
+ # and in `filemap` mark the space it occupied as being free.
+ def _remove(self, oid, tid):
+ key = oid, tid
+ e = self.key2entry.pop(key)
+ pos = e.offset
+ size, _e = self.filemap[pos]
+ assert e is _e
+ self.filemap[pos] = size, None
+
+ def _remove_noncurrent_revisions(self, oid):
+ noncurrent_list = self.noncurrent.get(oid)
+ if noncurrent_list:
+ self.invals += len(noncurrent_list)
+ self.total_invals += len(noncurrent_list)
+ for start_tid, end_tid in noncurrent_list:
+ self._remove(oid, start_tid)
+ del self.noncurrent[oid]
+
+ def inval(self, oid, tid):
+ if tid == z64:
+ # This is part of startup cache verification.
+ self._remove_noncurrent_revisions(oid, version)
+
+ cur_tid = self.current.get(oid)
+ if cur_tid is None:
+ # We don't have current data, so nothing to do.
+ return
+
+ # We had current data for oid, but no longer.
+ self.invals += 1
+ self.total_invals += 1
+ del self.current[oid]
+ if tid == z64:
+ # Startup cache verification: forget this oid entirely.
+ self._remove(oid, current_tid)
+ return
+
+ # Add the validty range to the list of non-current data for oid.
+ assert cur_tid < tid
+ L = self.noncurrent.setdefault(oid, [])
+ bisect.insort_left(L, (cur_tid, tid))
+
+ def write(self, oid, size, start_tid, end_tid):
+ if end_tid == z64:
+ # Storing current revision.
+ if oid in self.current: # we already have it in cache
+ return
+ self.current[oid] = start_tid
+ self.key2size[oid, start_tid] = size
+ self.writes += 1
+ self.total_writes += 1
+ self.add(oid, start_tid, size)
+ return
+ # Storing non-current revision.
+ L = self.noncurrent.setdefault(oid, [])
+ p = start_tid, end_tid
+ if p in L:
+ return # we already have it in cache
+ bisect.insort_left(L, p)
+ self.key2size[(oid, start_tid)] = size
+ self.writes += 1
+ self.total_writes += 1
+ self.add(oid, start_tid, size)
+
+ def add(self, oid, tid, size):
+ size += self.overhead
+ avail = self.makeroom(size)
+ key = oid, tid
+ assert key not in self.key2entry
+ e = Entry(key, self.offset)
+ self.filemap[self.offset] = size, e
+ self.key2entry[key] = e
+ self.offset += size
+ # All the space made available must be accounted for in filemap.
+ excess = avail - size
+ if excess:
+ self.filemap[self.offset] = excess, None
+
+ def makeroom(self, need):
+ # Evict enough objects to make the necessary space available.
+ if self.offset + need > self.cachelimit:
+ self.offset = ZEC3_HEADER_SIZE
+ pos = self.offset
+ while need > 0:
+ assert pos < self.cachelimit
+ try:
+ size, e = self.filemap[pos]
+ except KeyError:
+ self.dump()
+ raise
+ del self.filemap[pos]
+ if e:
+ self.evicts += 1
+ self.total_evicts += 1
+ assert pos == e.offset
+ _e = self.key2entry.pop(e.key)
+ assert e is _e
+ need -= size
+ pos += size
+ return pos - self.offset # total number of bytes freed
+
+ def report(self):
+ self.check()
+ free = used = total = 0
+ for size, e in self.filemap.itervalues():
+ total += size
+ if e:
+ used += size
+ else:
+ free += size
+
+ self.inuse = round(100.0 * used / total, 1)
+ self.total_inuse = self.inuse
+ Simulation.report(self)
+
+ def check(self):
+ oidcount = 0
+ pos = ZEC3_HEADER_SIZE
+ while pos < self.cachelimit:
+ size, e = self.filemap[pos]
+ if e:
+ oidcount += 1
+ assert self.key2entry[e.key].offset == pos
+ pos += size
+ assert oidcount == len(self.key2entry)
+ assert pos == self.cachelimit
+
+ def dump(self):
+ print len(self.filemap)
+ L = list(self.filemap)
+ L.sort()
+ for k in L:
+ v = self.filemap[k]
+ print k, v[0], repr(v[1])
+
class ZEOCacheSimulation(Simulation):
"""Simulate the ZEO 1.0 and 2.0cache behavior.
@@ -971,131 +1222,6 @@
print "Scanned file, %d unique oids, %d repeats" % (
all, len(self.count))
-
-from ZEO.cache import ZEC3_HEADER_SIZE
-
-class CircularCacheSimulation(Simulation):
- # The cache is managed as a single file with a pointer that
- # goes around the file, circularly, forever. New objects
- # are written at the current pointer, evicting whatever was
- # there previously.
-
- extras = "evicts", "inuse"
-
- def __init__(self, cachelimit):
- from ZEO import cache
-
- Simulation.__init__(self, cachelimit)
- self.total_evicts = 0
- # Current offset in file.
- self.offset = ZEC3_HEADER_SIZE
- # Map offset in file to (size, oid) pair.
- self.filemap = {ZEC3_HEADER_SIZE: (self.cachelimit - ZEC3_HEADER_SIZE,
- None)}
- # Map oid to file offset.
- self.oid2ofs = {}
-
- self.overhead = (cache.Object.TOTAL_FIXED_SIZE +
- cache.OBJECT_HEADER_SIZE)
-
- def restart(self):
- Simulation.restart(self)
- self.evicts = 0
-
- def load(self, oid, size):
- if oid in self.oid2ofs:
- self.hits += 1
- self.total_hits += 1
- elif size:
- self.writes += 1
- self.total_writes += 1
- self.add(oid, size)
- # Else it was a load miss in the trace file, and a load miss here too.
-
- def inval(self, oid):
- pos = self.oid2ofs.pop(oid, None)
- if pos is None:
- return
- self.invals += 1
- self.total_invals += 1
- size, _oid = self.filemap[pos]
- assert oid == _oid
- self.filemap[pos] = size, None
-
- def write(self, oid, size):
- if oid not in self.oid2ofs:
- self.writes += 1
- self.total_writes += 1
- self.add(oid, size)
-
- def add(self, oid, size):
- size += self.overhead
- avail = self.makeroom(size)
- assert oid not in self.oid2ofs
- self.filemap[self.offset] = size, oid
- self.oid2ofs[oid] = self.offset
- self.offset += size
- # All the space made available must be accounted for in filemap.
- excess = avail - size
- if excess:
- self.filemap[self.offset] = excess, None
-
- def makeroom(self, need):
- # Evict enough objects to make the necessary space available.
- if self.offset + need > self.cachelimit:
- self.offset = ZEC3_HEADER_SIZE
- pos = self.offset
- while need > 0:
- assert pos < self.cachelimit
- try:
- size, oid = self.filemap[pos]
- except KeyError:
- self.dump()
- raise
- del self.filemap[pos]
- if oid:
- self.evicts += 1
- self.total_evicts += 1
- _pos = self.oid2ofs.pop(oid)
- assert pos == _pos
- need -= size
- pos += size
- return pos - self.offset # total number of bytes freed
-
- def report(self):
- self.check()
- free = used = total = 0
- for size, oid in self.filemap.itervalues():
- total += size
- if oid:
- used += size
- else:
- free += size
-
- self.inuse = round(100.0 * used / total, 1)
- self.total_inuse = self.inuse
- Simulation.report(self)
-
- def check(self):
- oidcount = 0
- pos = ZEC3_HEADER_SIZE
- while pos < self.cachelimit:
- size, oid = self.filemap[pos]
- if oid:
- oidcount += 1
- assert self.oid2ofs[oid] == pos
- pos += size
- assert oidcount == len(self.oid2ofs)
- assert pos == self.cachelimit
-
- def dump(self):
- print len(self.filemap)
- L = list(self.filemap)
- L.sort()
- for k in L:
- v = self.filemap[k]
- print k, v[0], repr(v[1])
-
class BuddyCacheSimulation(LRUCacheSimulation):
def __init__(self, cachelimit):
More information about the Zodb-checkins
mailing list