[Zodb-checkins] CVS: ZODB3/ZEO - simul.py:1.5

Guido van Rossum guido@python.org
Fri, 6 Sep 2002 13:16:53 -0400


Update of /cvs-repository/ZODB3/ZEO
In directory cvs.zope.org:/tmp/cvs-serv11563

Modified Files:
	simul.py 
Log Message:
- Fix a bug introduced by the previous checkin which would count *all*
  invalidations instead of only the ones that hit the cache.

- Support reading gzipped files.  This is triggered automatically when
  the filename ends in .gz.  Also support reading from stdin (pass '-'
  as filename).

- Widen the FLIPS column by one to match the EVICTS column width of
  the LRU simulation.

- Support simulating alternative caching strategies.  For now, only
  LRU is supported, triggered by the -l option.

- Change the Simulation base class to store the cachelimit argument.
  (The constructor signature must be the same across all subclasses.)


=== ZODB3/ZEO/simul.py 1.4 => 1.5 ===
--- ZODB3/ZEO/simul.py:1.4	Fri Sep  6 12:09:10 2002
+++ ZODB3/ZEO/simul.py	Fri Sep  6 13:16:53 2002
@@ -14,8 +14,9 @@
 ##############################################################################
 """Cache simulation.
 
-Usage: simul.py [-s size] tracefile
+Usage: simul.py [-l] [-s size] tracefile
 
+-l: simulate idealized LRU cache (default: simulate ZEO 2.0 cache)
 -s size: cache size in MB (default 20 MB)
 """
 
@@ -31,12 +32,15 @@
 def main():
     # Parse options
     cachelimit = 20*1000*1000
+    simclass = ZEOCacheSimulation
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "s:")
+        opts, args = getopt.getopt(sys.argv[1:], "ls:")
     except getopt.error, msg:
         usage(msg)
         return 2
     for o, a in opts:
+        if o == '-l':
+            simclass = LRUCacheSimulation
         if o == '-s':
             cachelimit = int(float(a) * 1e6)
     if len(args) != 1:
@@ -45,14 +49,31 @@
     filename = args[0]
 
     # Open file
-    try:
-        f = open(filename, "rb")
-    except IOError, msg:
-        print "can't open %s: %s" % (filename, msg)
-        return 1
+    if filename.endswith(".gz"):
+        # Open gzipped file
+        try:
+            import gzip
+        except ImportError:
+            print >>sys.stderr,  "can't read gzipped files (no module gzip)"
+            return 1
+        try:
+            f = gzip.open(filename, "rb")
+        except IOError, msg:
+            print >>sys.stderr,  "can't open %s: %s" % (filename, msg)
+            return 1
+    elif filename == "-":
+        # Read from stdin
+        f = sys.stdin
+    else:
+        # Open regular file
+        try:
+            f = open(filename, "rb")
+        except IOError, msg:
+            print >>sys.stderr,  "can't open %s: %s" % (filename, msg)
+            return 1
 
     # Create simulation object
-    sim = ZEOCacheSimulation(cachelimit)
+    sim = simclass(cachelimit)
 
     # Print output header
     sim.printheader()
@@ -90,7 +111,8 @@
 
     """
 
-    def __init__(self):
+    def __init__(self, cachelimit):
+        self.cachelimit = cachelimit
         # Initialize global statistics
         self.epoch = None
         self.total_loads = 0
@@ -132,8 +154,6 @@
                 self.load(oid, dlen)
         elif code & 0x70 == 0x10:
             # Invalidate
-            self.invals += 1
-            self.total_invals += 1
             self.inval(oid)
         elif code == 0x00:
             # Restart
@@ -169,9 +189,7 @@
 
     def __init__(self, cachelimit):
         # Initialize base class
-        Simulation.__init__(self)
-        # Store simulation parameters
-        self.filelimit = cachelimit / 2
+        Simulation.__init__(self, cachelimit)
         # Initialize additional global statistics
         self.total_flips = 0
 
@@ -198,7 +216,7 @@
         # is header overhead per cache record; 127 is to compensate
         # for rounding up to multiples of 256.)
         size = size + 31 - 127
-        if self.filesize[self.current] + size > self.filelimit:
+        if self.filesize[self.current] + size > self.cachelimit / 2:
             # Cache flip
             self.flips += 1
             self.total_flips += 1
@@ -210,11 +228,15 @@
 
     def inval(self, oid):
         if self.fileoids[self.current].get(oid):
+            self.invals += 1
+            self.total_invals += 1
             del self.fileoids[self.current][oid]
         elif self.fileoids[1 - self.current].get(oid):
+            self.invals += 1
+            self.total_invals += 1
             del self.fileoids[1 - self.current][oid]
 
-    format = "%12s %9s %8s %8s %6s %6s %5s %6s"
+    format = "%12s %9s %8s %8s %6s %6s %6s %6s"
 
     def printheader(self):
         print self.format % (
@@ -230,8 +252,7 @@
                 hitrate(self.loads, self.hits))
 
     def finish(self):
-        if self.loads:
-            self.report()
+        self.report()
         if self.total_loads:
             print (self.format + " OVERALL") % (
                 time.ctime(self.epoch)[4:-8],
@@ -242,6 +263,135 @@
                 self.total_writes,
                 self.total_flips,
                 hitrate(self.total_loads, self.total_hits))
+
+class LRUCacheSimulation(Simulation):
+
+    def __init__(self, cachelimit):
+        # Initialize base class
+        Simulation.__init__(self, cachelimit)
+        # Initialize additional global statistics
+        self.total_evicts = 0
+
+    def restart(self):
+        # Reset base class
+        Simulation.restart(self)
+        # Reset additional per-run statistics
+        self.evicts = 0
+        # Set up simulation
+        self.cache = {}
+        self.size = 0
+        self.head = Node(None, None)
+        self.head.linkbefore(self.head)
+
+    def load(self, oid, size):
+        node = self.cache.get(oid)
+        if node is not None:
+            self.hits += 1
+            self.total_hits += 1
+            node.linkbefore(self.head)
+        else:
+            self.write(oid, size)
+
+    def write(self, oid, size):
+        node = self.cache.get(oid)
+        if node is not None:
+            node.unlink()
+            assert self.head.next is not None
+            self.size -= node.size
+        node = Node(oid, size)
+        self.cache[oid] = node
+        node.linkbefore(self.head)
+        self.size += size
+        # Evict LRU nodes
+        while self.size > self.cachelimit:
+            self.evicts += 1
+            self.total_evicts += 1
+            node = self.head.next
+            assert node is not self.head
+            node.unlink()
+            assert self.head.next is not None
+            del self.cache[node.oid]
+            self.size -= node.size
+
+    def inval(self, oid):
+        node = self.cache.get(oid)
+        if node is not None:
+            assert node.oid == oid
+            self.invals += 1
+            self.total_invals += 1
+            node.unlink()
+            assert self.head.next is not None
+            del self.cache[oid]
+            self.size -= node.size
+            assert self.size >= 0
+
+    format = "%12s %9s %8s %8s %6s %6s %6s %6s"
+
+    def printheader(self):
+        print self.format % (
+            "START TIME", "DURATION", "LOADS", "HITS",
+            "INVALS", "WRITES", "EVICTS", "HITRATE")
+
+    def report(self):
+        if self.loads:
+            print self.format % (
+                time.ctime(self.ts0)[4:-8],
+                duration(self.ts1 - self.ts0),
+                self.loads, self.hits, self.invals, self.writes, self.evicts,
+                hitrate(self.loads, self.hits))
+
+    def finish(self):
+        self.report()
+        if self.total_loads:
+            print (self.format + " OVERALL") % (
+                time.ctime(self.epoch)[4:-8],
+                duration(self.ts1 - self.epoch),
+                self.total_loads,
+                self.total_hits,
+                self.total_invals,
+                self.total_writes,
+                self.total_evicts,
+                hitrate(self.total_loads, self.total_hits))
+
+class Node:
+
+    """Node in a doubly-linked list, storing oid and size as payload.
+
+    A node can be linked or unlinked; in the latter case, next and
+    prev are None.  Initially a node is unlinked.
+
+    """
+    # Make it a new-style class in Python 2.2 and up; no effect in 2.1
+    __metaclass__ = type
+    __slots__ = ['prev', 'next', 'oid', 'size']
+
+    def __init__(self, oid, size):
+        self.oid = oid
+        self.size = size
+        self.prev = self.next = None
+
+    def unlink(self):
+        prev = self.prev
+        next = self.next
+        if prev is not None:
+            assert next is not None
+            assert prev.next is self
+            assert next.prev is self
+            prev.next = next
+            next.prev = prev
+            self.prev = self.next = None
+        else:
+            assert next is None
+
+    def linkbefore(self, next):
+        self.unlink()
+        prev = next.prev
+        if prev is None:
+            assert next.next is None
+            prev = next
+        self.prev = prev
+        self.next = next
+        prev.next = next.prev = self
 
 def hitrate(loads, hits):
     return "%5.1f%%" % (100.0 * hits / max(1, loads))