[Zodb-checkins] CVS: ZODB3/ZEO - simul.py:1.12.8.2.18.10
Tim Peters
cvs-admin at zope.org
Thu Dec 4 21:16:42 EST 2003
Update of /cvs-repository/ZODB3/ZEO
In directory cvs.zope.org:/tmp/cvs-serv27791/ZEO
Modified Files:
Tag: Zope-2_6-branch
simul.py
Log Message:
Stirring the pot: added a Thor-like simulator. At the 50MB cache
size we've been using, it looks pretty dreadful (overall hit rate 27.2%
on zope.org-main).
Boost it to 100MB, though, and nothing beats it:
LRU 38.5%
Thor 55.1%
2Q 50.3%
More astonishing than that are the per-restart stats: Thor exceeded a 60%
hit rate in 8 of the 19 periods, while 2Q never did; Thor's max hit rate
was 78.1% (in the final period), and 2Q's was 58.9% (also in the final
period).
Note: our LRU simulation doesn't persist across restarts. At the 50MB
cache size, it doesn't seem to matter much (hit rate goes from 30.1% to
30.9% if I make the LRU simulation persistent).
Overall, more new questions than answers.
=== ZODB3/ZEO/simul.py 1.12.8.2.18.9 => 1.12.8.2.18.10 ===
--- ZODB3/ZEO/simul.py:1.12.8.2.18.9 Thu Dec 4 01:52:43 2003
+++ ZODB3/ZEO/simul.py Thu Dec 4 21:16:41 2003
@@ -34,6 +34,8 @@
import getopt
import struct
+from sets import Set
+
def usage(msg):
print >>sys.stderr, msg
print >>sys.stderr, __doc__
@@ -45,7 +47,7 @@
simclass = ZEOCacheSimulation
theta = omicron = None
try:
- opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOas:o:t:")
+ opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOaTs:o:t:")
except getopt.error, msg:
usage(msg)
return 2
@@ -74,6 +76,9 @@
simclass = OracleSimulation
if o == '-a':
simclass = ARCCacheSimulation
+ if o == '-T':
+ simclass = ThorSimulation
+
if len(args) != 1:
usage("exactly one file argument required")
return 2
@@ -171,8 +176,8 @@
# Initialize global statistics
self.epoch = None
self.total_loads = 0
- self.total_hits = 0 # Subclass must increment
- self.total_invals = 0
+ self.total_hits = 0 # subclass must increment
+ self.total_invals = 0 # subclass must increment
self.total_writes = 0
if not hasattr(self, "extras"):
self.extras = (self.extraname,)
@@ -183,8 +188,8 @@
def restart(self):
# Reset per-run statistics
self.loads = 0
- self.hits = 0 # Subclass must increment
- self.invals = 0
+ self.hits = 0 # subclass must increment
+ self.invals = 0 # subclass must increment
self.writes = 0
self.ts0 = None
@@ -222,9 +227,11 @@
pass
def load(self, oid, size):
+ # Must increment .hits and .total_hits as appropriate.
pass
def inval(self, oid):
+ # Must increment .invals and .total_invals as appropriate.
pass
format = "%12s %9s %8s %8s %6s %6s %7s"
@@ -476,15 +483,15 @@
def __init__(self, cachelimit, outlen=10000):
Simulation.__init__(self, cachelimit)
-
+
self.am_limit = 3 * self.cachelimit / 4
self.a1in_limit = self.cachelimit / 4
-
+
self.cache = {}
self.am_size = 0
self.a1in_size = 0
self.a1out_size = 0
-
+
self.total_evicts = 0
self.total_hothit = 0
self.total_am_add = 0
@@ -503,7 +510,7 @@
# hit in this queue, promote the object to am.
self.a1out = Node2Q(None, None, a1out)
self.a1out.linkbefore(self.a1out)
-
+
def makespace(self, size):
for space in 0, size:
if self.enoughspace(size):
@@ -654,7 +661,7 @@
# There are two pairs of linked lists. Each pair has a top and
# bottom half. The bottom half contains metadata, but not actual
# objects.
-
+
# LRU list of frequently used objects
self.lruT = Node2Q(None, None, lruT)
self.lruT.linkbefore(self.lruT)
@@ -690,7 +697,7 @@
# ones they experimented with. Their biggest cache had 512K
# entries, while our smallest cache will have 40 times that many
# entries.
-
+
self.p = 0
# XXX multiply computed adjustments to p by walk_factor
self.walk_factor = 500
@@ -743,7 +750,7 @@
addcommas(self.cachelimit),
addcommas(self.totalsize),
addcommas(self.allsize))
-
+
fmt = (
"p=%(p)d\n"
"lruT = %(lruT_len)5d / %(lruT_size)8d / %(lruThits)d\n"
@@ -820,19 +827,19 @@
# actually hold the object
if node.kind is lruT:
node.linkbefore(self.lruT)
-
+
self.hits += 1
self.total_hits += 1
self.lruThits += 1
self.total_lruThits += 1
-
+
elif node.kind is fifoT:
node.linkbefore(self.lruT)
self.fifoT_len -= 1
self.lruT_len += 1
self.fifoT_size -= node.size
self.lruT_size += node.size
-
+
self.hits += 1
self.total_hits += 1
self.fifoThits += 1
@@ -859,7 +866,7 @@
if not r:
break
need -= r
-
+
elif node.kind is lruB:
node.linkbefore(self.lruT)
self.lruB_len -= 1
@@ -881,7 +888,7 @@
if not r:
break
need -= r
-
+
def inval(self, oid):
pass
@@ -964,7 +971,7 @@
# pointer offset and the offset of the cached data record. The
# cache can be configured to copy objects to the current offset
# depending on how far away they are now. The omicron parameter
- # specifies a percentage
+ # specifies a percentage
extras = "evicts", "copies", "inuse", "skips"
@@ -1054,14 +1061,14 @@
except:
self.dump()
raise
-
+
if not evicted and self.skip and oid and self.closeenough(oid):
self.skips += 1
self.total_skips += 1
self.offset += size
pos += size
continue
-
+
evicted = True
del self.filemap[pos]
@@ -1472,6 +1479,176 @@
def maybe(f, p=0.5):
if random.random() < p:
f()
+
+#############################################################################
+# Thor-like eviction scheme.
+#
+# The cache keeps such a list of all objects, and uses a travelling pointer
+# to decay the worth of objects over time.
+
+class ThorNode(Node):
+
+ __slots__ = ['worth']
+
+ def __init__(self, oid, size, worth=None):
+ Node.__init__(self, oid, size)
+ self.worth = worth
+
+class ThorListHead(Node):
+ def __init__(self):
+ Node.__init__(self, 0, 0)
+ self.next = self.prev = self
+
+class ThorSimulation(Simulation):
+
+ extraname = "evicts"
+
+ def __init__(self, cachelimit):
+ Simulation.__init__(self, cachelimit)
+
+ # Maximum total of object sizes we keep in cache.
+ self.maxsize = cachelimit
+ # Current total of object sizes in cache.
+ self.currentsize = 0
+
+ # A worth byte maps to a set of all objects with that worth.
+ # This is cheap to keep updated, and makes finding low-worth
+ # objects for eviction trivial (just march over the worthsets
+ # list, in order).
+ self.worthsets = [Set() for dummy in range(256)]
+
+ # We keep a circular list of all objects in cache. currentobj
+ # walks around it forever. Each time _tick() is called, the
+ # worth of currentobj is decreased, basically by shifting
+ # right 1, and currentobj moves on to the next object. When
+ # an object is first inserted, it enters the list right before
+ # currentobj. When an object is accessed, its worth is
+ # increased by or'ing in 0x80. This scheme comes from the
+ # Thor system, and is an inexpensive way to account for both
+ # recency and frequency of access: recency is reflected in
+ # the leftmost bit set, and frequency by how many bits are
+ # set.
+ #
+ # Note: because evictions are interleaved with ticks,
+ # unlinking an object is tricky, lest we evict currentobj. The
+ # class _unlink method takes care of this properly.
+ self.listhead = ThorListHead()
+ self.currentobj = self.listhead
+
+ # Map an object.oid to its ThorNode.
+ self.oid2object = {}
+
+ self.total_evicts = 0
+
+ # Unlink object from the circular list, taking care not to lose
+ # track of the current object. Always call this instead of
+ # invoking obj.unlink() directly.
+ def _unlink(self, obj):
+ assert obj is not self.listhead
+ if obj is self.currentobj:
+ self.currentobj = obj.next
+ obj.unlink()
+
+ # Change obj.worth to newworth, maintaining invariants.
+ def _change_worth(self, obj, newworth):
+ if obj.worth != newworth:
+ self.worthsets[obj.worth].remove(obj)
+ obj.worth = newworth
+ self.worthsets[newworth].add(obj)
+
+ def add(self, object):
+ assert object.oid not in self.oid2object
+ self.oid2object[object.oid] = object
+
+ newsize = self.currentsize + object.size
+ if newsize > self.maxsize:
+ self._evictbytes(newsize - self.maxsize)
+ self.currentsize += object.size
+ object.linkbefore(self.currentobj)
+
+ # Give new object an intial worth roughly equal to the log
+ # (base 2) of its size. The intuition is that larger objects
+ # are more expensive to fetch over the network, so are worth
+ # more (at least at first).
+ worth = 0
+ targetsize = 1
+ while object.size > targetsize:
+ worth += 1
+ targetsize <<= 1
+ object.worth = worth
+ self.worthsets[worth].add(object)
+
+ # Decrease the worth of the current object, and advance the
+ # current object.
+ def _tick(self):
+ c = self.currentobj
+ if c is self.listhead:
+ c = c.next
+ if c is self.listhead: # list is empty
+ return
+ self._change_worth(c, (c.worth + 1) >> 1)
+ self.currentobj = c.next
+
+ def access(self, oid):
+ self._tick()
+ obj = self.oid2object.get(oid)
+ if obj is None:
+ return None
+ self._change_worth(obj, obj.worth | 0x80)
+ return obj
+
+ # Evict objects of least worth first, until at least nbytes bytes
+ # have been freed.
+ def _evictbytes(self, nbytes):
+ for s in self.worthsets:
+ while s:
+ if nbytes <= 0:
+ return
+ obj = s.pop()
+ nbytes -= obj.size
+ self._evictobj(obj)
+
+ def _evictobj(self, obj):
+ self.currentsize -= obj.size
+ self.worthsets[obj.worth].discard(obj)
+ del self.oid2object[obj.oid]
+ self._unlink(obj)
+
+ self.evicts += 1
+ self.total_evicts += 1
+
+ def _evict_without_bumping_evict_stats(self, obj):
+ self._evictobj(obj)
+ self.evicts -= 1
+ self.total_evicts -= 1
+
+ # Simulator overrides from here on.
+
+ def restart(self):
+ # Reset base class
+ Simulation.restart(self)
+ # Reset additional per-run statistics
+ self.evicts = 0
+
+ def write(self, oid, size):
+ obj = self.oid2object.get(oid)
+ if obj is not None:
+ self._evict_without_bumping_evict_stats(obj)
+ self.add(ThorNode(oid, size))
+
+ def load(self, oid, size):
+ if self.access(oid) is not None:
+ self.hits += 1
+ self.total_hits += 1
+ else:
+ self.write(oid, size)
+
+ def inval(self, oid):
+ obj = self.oid2object.get(oid)
+ if obj is not None:
+ self.invals += 1
+ self.total_invals += 1
+ self._evict_without_bumping_evict_stats(obj)
if __name__ == "__main__":
sys.exit(main())
More information about the Zodb-checkins
mailing list