[Zope-Checkins] CVS: ZODB3/ZEO - simul.py:1.12.8.2.18.10

Thu Dec 4 21:16:41 EST 2003

Update of /cvs-repository/ZODB3/ZEO
In directory cvs.zope.org:/tmp/cvs-serv27791/ZEO

Modified Files:
      Tag: Zope-2_6-branch
	simul.py 
Log Message:
Stirring the pot:  added a Thor-like simulator.  At the 50MB cache
size we've been using, it looks pretty dreadful (overall hit rate 27.2%
on zope.org-main).

Boost it to 100MB, though, and nothing beats it:

    LRU   38.5%
    Thor  55.1%
    2Q    50.3%

More astonishing than that are the per-restart stats:  Thor exceeded a 60%
hit rate in 8 of the 19 periods, while 2Q never did; Thor's max hit rate
was 78.1% (in the final period), and 2Q's was 58.9% (also in the final
period).

Note:  our LRU simulation doesn't persist across restarts.  At the 50MB
cache size, it doesn't seem to matter much (hit rate goes from 30.1% to
30.9% if I make the LRU simulation persistent).

Overall, more new questions than answers.


=== ZODB3/ZEO/simul.py 1.12.8.2.18.9 => 1.12.8.2.18.10 ===

--- ZODB3/ZEO/simul.py:1.12.8.2.18.9	Thu Dec  4 01:52:43 2003
+++ ZODB3/ZEO/simul.py	Thu Dec  4 21:16:41 2003
@@ -34,6 +34,8 @@
 import getopt
 import struct
 
+from sets import Set
+
 def usage(msg):
     print >>sys.stderr, msg
     print >>sys.stderr, __doc__
@@ -45,7 +47,7 @@
     simclass = ZEOCacheSimulation
     theta = omicron = None
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOas:o:t:")
+        opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOaTs:o:t:")
     except getopt.error, msg:
         usage(msg)
         return 2
@@ -74,6 +76,9 @@
             simclass = OracleSimulation
         if o == '-a':
             simclass = ARCCacheSimulation
+        if o == '-T':
+            simclass = ThorSimulation
+
     if len(args) != 1:
         usage("exactly one file argument required")
         return 2
@@ -171,8 +176,8 @@
         # Initialize global statistics
         self.epoch = None
         self.total_loads = 0
-        self.total_hits = 0 # Subclass must increment
-        self.total_invals = 0
+        self.total_hits = 0       # subclass must increment
+        self.total_invals = 0     # subclass must increment
         self.total_writes = 0
         if not hasattr(self, "extras"):
             self.extras = (self.extraname,)
@@ -183,8 +188,8 @@
     def restart(self):
         # Reset per-run statistics
         self.loads = 0
-        self.hits = 0 # Subclass must increment
-        self.invals = 0
+        self.hits = 0       # subclass must increment
+        self.invals = 0     # subclass must increment
         self.writes = 0
         self.ts0 = None
 
@@ -222,9 +227,11 @@
         pass
 
     def load(self, oid, size):
+        # Must increment .hits and .total_hits as appropriate.
         pass
 
     def inval(self, oid):
+        # Must increment .invals and .total_invals as appropriate.
         pass
 
     format = "%12s %9s %8s %8s %6s %6s %7s"
@@ -476,15 +483,15 @@
 
     def __init__(self, cachelimit, outlen=10000):
         Simulation.__init__(self, cachelimit)
-        
+
         self.am_limit = 3 * self.cachelimit / 4
         self.a1in_limit = self.cachelimit / 4
-        
+
         self.cache = {}
         self.am_size = 0
         self.a1in_size = 0
         self.a1out_size = 0
-        
+
         self.total_evicts = 0
         self.total_hothit = 0
         self.total_am_add = 0
@@ -503,7 +510,7 @@
         # hit in this queue, promote the object to am.
         self.a1out = Node2Q(None, None, a1out)
         self.a1out.linkbefore(self.a1out)
-        
+
     def makespace(self, size):
         for space in 0, size:
             if self.enoughspace(size):
@@ -654,7 +661,7 @@
         # There are two pairs of linked lists.  Each pair has a top and
         # bottom half.  The bottom half contains metadata, but not actual
         # objects.
-        
+
         # LRU list of frequently used objects
         self.lruT = Node2Q(None, None, lruT)
         self.lruT.linkbefore(self.lruT)
@@ -690,7 +697,7 @@
         # ones they experimented with.  Their biggest cache had 512K
         # entries, while our smallest cache will have 40 times that many
         # entries.
-        
+
         self.p = 0
         # XXX multiply computed adjustments to p by walk_factor
         self.walk_factor = 500
@@ -743,7 +750,7 @@
             addcommas(self.cachelimit),
             addcommas(self.totalsize),
             addcommas(self.allsize))
-                                                
+
         fmt = (
             "p=%(p)d\n"
             "lruT  = %(lruT_len)5d / %(lruT_size)8d / %(lruThits)d\n"
@@ -820,19 +827,19 @@
             # actually hold the object
             if node.kind is lruT:
                 node.linkbefore(self.lruT)
-                
+
                 self.hits += 1
                 self.total_hits += 1
                 self.lruThits += 1
                 self.total_lruThits += 1
-                
+
             elif node.kind is fifoT:
                 node.linkbefore(self.lruT)
                 self.fifoT_len -= 1
                 self.lruT_len += 1
                 self.fifoT_size -= node.size
                 self.lruT_size += node.size
-                
+
                 self.hits += 1
                 self.total_hits += 1
                 self.fifoThits += 1
@@ -859,7 +866,7 @@
                         if not r:
                             break
                         need -= r
-                        
+
             elif node.kind is lruB:
                 node.linkbefore(self.lruT)
                 self.lruB_len -= 1
@@ -881,7 +888,7 @@
                         if not r:
                             break
                         need -= r
-                
+
     def inval(self, oid):
         pass
 
@@ -964,7 +971,7 @@
     # pointer offset and the offset of the cached data record.  The
     # cache can be configured to copy objects to the current offset
     # depending on how far away they are now.  The omicron parameter
-    # specifies a percentage 
+    # specifies a percentage
 
     extras = "evicts", "copies", "inuse", "skips"
 
@@ -1054,14 +1061,14 @@
             except:
                 self.dump()
                 raise
-                
+
             if not evicted and self.skip and oid and self.closeenough(oid):
                 self.skips += 1
                 self.total_skips += 1
                 self.offset += size
                 pos += size
                 continue
-                
+
             evicted = True
             del self.filemap[pos]
 
@@ -1472,6 +1479,176 @@
 def maybe(f, p=0.5):
     if random.random() < p:
         f()
+
+#############################################################################
+# Thor-like eviction scheme.
+#
+# The cache keeps such a list of all objects, and uses a travelling pointer
+# to decay the worth of objects over time.
+
+class ThorNode(Node):
+
+    __slots__ = ['worth']
+
+    def __init__(self, oid, size, worth=None):
+        Node.__init__(self, oid, size)
+        self.worth = worth
+
+class ThorListHead(Node):
+    def __init__(self):
+        Node.__init__(self, 0, 0)
+        self.next = self.prev = self
+
+class ThorSimulation(Simulation):
+
+    extraname = "evicts"
+
+    def __init__(self, cachelimit):
+        Simulation.__init__(self, cachelimit)
+
+        # Maximum total of object sizes we keep in cache.
+        self.maxsize = cachelimit
+        # Current total of object sizes in cache.
+        self.currentsize = 0
+
+        # A worth byte maps to a set of all objects with that worth.
+        # This is cheap to keep updated, and makes finding low-worth
+        # objects for eviction trivial (just march over the worthsets
+        # list, in order).
+        self.worthsets = [Set() for dummy in range(256)]
+
+        # We keep a circular list of all objects in cache.  currentobj
+        # walks around it forever.  Each time _tick() is called, the
+        # worth of currentobj is decreased, basically by shifting
+        # right 1, and currentobj moves on to the next object.  When
+        # an object is first inserted, it enters the list right before
+        # currentobj.  When an object is accessed, its worth is
+        # increased by or'ing in 0x80.  This scheme comes from the
+        # Thor system, and is an inexpensive way to account for both
+        # recency and frequency of access:  recency is reflected in
+        # the leftmost bit set, and frequency by how many bits are
+        # set.
+        #
+        # Note:  because evictions are interleaved with ticks,
+        # unlinking an object is tricky, lest we evict currentobj.  The
+        # class _unlink method takes care of this properly.
+        self.listhead = ThorListHead()
+        self.currentobj = self.listhead
+
+        # Map an object.oid to its ThorNode.
+        self.oid2object = {}
+
+        self.total_evicts = 0
+
+    # Unlink object from the circular list, taking care not to lose
+    # track of the current object.  Always call this instead of
+    # invoking obj.unlink() directly.
+    def _unlink(self, obj):
+        assert obj is not self.listhead
+        if obj is self.currentobj:
+            self.currentobj = obj.next
+        obj.unlink()
+
+    # Change obj.worth to newworth, maintaining invariants.
+    def _change_worth(self, obj, newworth):
+        if obj.worth != newworth:
+            self.worthsets[obj.worth].remove(obj)
+            obj.worth = newworth
+            self.worthsets[newworth].add(obj)
+
+    def add(self, object):
+        assert object.oid not in self.oid2object
+        self.oid2object[object.oid] = object
+
+        newsize = self.currentsize + object.size
+        if newsize > self.maxsize:
+            self._evictbytes(newsize - self.maxsize)
+        self.currentsize += object.size
+        object.linkbefore(self.currentobj)
+
+        # Give new object an intial worth roughly equal to the log
+        # (base 2) of its size.  The intuition is that larger objects
+        # are more expensive to fetch over the network, so are worth
+        # more (at least at first).
+        worth = 0
+        targetsize = 1
+        while object.size > targetsize:
+            worth += 1
+            targetsize <<= 1
+        object.worth = worth
+        self.worthsets[worth].add(object)
+
+    # Decrease the worth of the current object, and advance the
+    # current object.
+    def _tick(self):
+        c = self.currentobj
+        if c is self.listhead:
+            c = c.next
+            if c is self.listhead:  # list is empty
+                return
+        self._change_worth(c, (c.worth + 1) >> 1)
+        self.currentobj = c.next
+
+    def access(self, oid):
+        self._tick()
+        obj = self.oid2object.get(oid)
+        if obj is None:
+            return None
+        self._change_worth(obj, obj.worth | 0x80)
+        return obj
+
+    # Evict objects of least worth first, until at least nbytes bytes
+    # have been freed.
+    def _evictbytes(self, nbytes):
+        for s in self.worthsets:
+            while s:
+                if nbytes <= 0:
+                    return
+                obj = s.pop()
+                nbytes -= obj.size
+                self._evictobj(obj)
+
+    def _evictobj(self, obj):
+        self.currentsize -= obj.size
+        self.worthsets[obj.worth].discard(obj)
+        del self.oid2object[obj.oid]
+        self._unlink(obj)
+
+        self.evicts += 1
+        self.total_evicts += 1
+
+    def _evict_without_bumping_evict_stats(self, obj):
+        self._evictobj(obj)
+        self.evicts -= 1
+        self.total_evicts -= 1
+
+    # Simulator overrides from here on.
+
+    def restart(self):
+        # Reset base class
+        Simulation.restart(self)
+        # Reset additional per-run statistics
+        self.evicts = 0
+
+    def write(self, oid, size):
+        obj = self.oid2object.get(oid)
+        if obj is not None:
+            self._evict_without_bumping_evict_stats(obj)
+        self.add(ThorNode(oid, size))
+
+    def load(self, oid, size):
+        if self.access(oid) is not None:
+            self.hits += 1
+            self.total_hits += 1
+        else:
+            self.write(oid, size)
+
+    def inval(self, oid):
+        obj = self.oid2object.get(oid)
+        if obj is not None:
+            self.invals += 1
+            self.total_invals += 1
+            self._evict_without_bumping_evict_stats(obj)
 
 if __name__ == "__main__":
     sys.exit(main())