[Zodb-checkins] SVN: ZODB/trunk/src/ZEO/cache.py Merged rev 28671 from 3.3 branch.

Tue Dec 21 14:27:41 EST 2004

Log message for revision 28672:
  Merged rev 28671 from 3.3 branch.
  
  More cleanup.  Notable:
  
  - Moved the logger object to module scope.  There's virtually
    no logging of any kind done here, and I suspect that's partly
    because the logging object was clumsy to get at.
  
  - Figured out what the code actually does when the cache size
    asked for doesn't match the actual size of a pre-existing
    cache file.  It apparently wanted to ignore the requested
    size then.  It probably shouldn't, but leaving that for later.
    Still, it left its own idea of the file size out of synch
    with the actual file size, and that was a bug.  For now,
    logged a warning when this happens, and changed the code to
    respect the actual file size.
  

Changed:
  U   ZODB/trunk/src/ZEO/cache.py

-=-
Modified: ZODB/trunk/src/ZEO/cache.py
===================================================================

--- ZODB/trunk/src/ZEO/cache.py	2004-12-21 19:26:08 UTC (rev 28671)
+++ ZODB/trunk/src/ZEO/cache.py	2004-12-21 19:27:39 UTC (rev 28672)
@@ -31,6 +31,8 @@
 
 from ZODB.utils import z64, u64
 
+logger = logging.getLogger("zeo.cache")
+
 ##
 # A disk-based cache for ZEO clients.
 # <p>
@@ -73,7 +75,6 @@
     def __init__(self, path=None, size=None, trace=False):
         self.path = path
         self.size = size
-        self.log = logging.getLogger("zeo.cache")
 
         if trace and path:
             self._setup_trace()
@@ -109,8 +110,13 @@
     def open(self):
         self.fc.scan(self.install)
 
+    ##
+    # Callback for FileCache.scan(), when a pre-existing file cache is
+    # used.  For each object in the file, `install()` is invoked.  `f`
+    # is the file object, positioned at the start of the serialized Object.
+    # `ent` is an Entry giving the object's key ((oid, start_tid) pair).
     def install(self, f, ent):
-        # Called by cache storage layer to insert object
+        # Called by cache storage layer to insert object.
         o = Object.fromFile(f, ent.key, header_only=True)
         if o is None:
             return
@@ -372,8 +378,8 @@
             self._trace(0x00)
         except IOError, msg:
             self.tracefile = None
-            self.log.warning("Could not write to trace file %s: %s",
-                             tfn, msg)
+            logger.warning("Could not write to trace file %s: %s",
+                           tfn, msg)
 
     def _notrace(self, *arg, **kwargs):
         pass
@@ -417,6 +423,9 @@
 # <p>
 # The serialized format does not include the key, because it is stored
 # in the header used by the cache's storage format.
+# <p>
+# Instances of Object are generally short-lived -- they're really a way to
+# package data on the way to or from the disk file.
 
 class Object(object):
     __slots__ = (# pair (object id, txn id) -- something usable as a dict key;
@@ -469,7 +478,7 @@
         self.data = data
         self.start_tid = start_tid
         self.end_tid = end_tid
-        # The size of a the serialized object on disk, including the
+        # The size of the serialized object on disk, including the
         # 14-byte header, the lengths of data and version, and a
         # copy of the 8-byte oid.
         if data is not None:
@@ -483,7 +492,7 @@
                            len(self.data))
 
     def serialize(self, f):
-        # Write standard form of Object to file f.
+        # Write standard form of Object to file f, at its current offset.
         f.writelines([self.get_header(),
                       self.version,
                       self.data,
@@ -496,7 +505,7 @@
 
     # fromFile is a class constructor, unserializing an Object from the
     # current position in file f.  Exclusive access to f for the duration
-    # is assumed.  The key is a (start_tid, oid) pair, and the oid must
+    # is assumed.  The key is a (oid, start_tid) pair, and the oid must
     # match the serialized oid.  If header_only is true, .data is left
     # None in the Object returned.
     def fromFile(cls, f, key, header_only=False):
@@ -610,7 +619,10 @@
 class FileCache(object):
 
     def __init__(self, maxsize, fpath, parent, reuse=True):
-        # - `maxsize`:  total size of the cache file, in bytes
+        # - `maxsize`:  total size of the cache file, in bytes; this is
+        #   ignored if reuse is true and fpath names an existing file;
+        #   perhaps we should attempt to change the cache size in that
+        #   case
         # - `fpath`:  filepath for the cache file, or None; see `reuse`
         # - `parent`:  the ClientCache this FileCache is part of
         # - `reuse`:  If true, and fpath is not None, and fpath names a
@@ -625,6 +637,13 @@
         # stored near the start of the file.
         self.tid = None
 
+        # There's one Entry instance, kept in memory, for each currently
+        # allocated block in the file, and there's one allocated block in the
+        # file per serialized Object.  filemap retrieves the Entry given the
+        # starting offset of a block, and key2entry retrieves the Entry given
+        # an object revision's key (an (oid, start_tid) pair).  From an
+        # Entry, we can get the Object's key and file offset.
+
         # Map offset in file to pair (data record size, Entry).
         # Entry is None iff the block starting at offset is free.
         # filemap always contains a complete account of what's in the
@@ -632,15 +651,15 @@
         # of the relevant invariants.  An offset is at the start of a
         # block iff it's a key in filemap.  The data record size is
         # stored in the file too, so we could just seek to the offset
-        # and read it up; keeping it in memory too is an optimization.
+        # and read it up; keeping it in memory is an optimization.
         self.filemap = {}
 
-        # Map key to Entry.  There's one Entry for each object in the
-        # cache file.  After
+        # Map key to Entry.  After
         #     obj = key2entry[key]
         # then
         #     obj.key == key
-        # is true.
+        # is true.  An object is currently stored on disk iff its key is in
+        # key2entry.
         self.key2entry = {}
 
         # Always the offset into the file of the start of a block.
@@ -648,15 +667,17 @@
         # currentofs.
         self.currentofs = ZEC3_HEADER_SIZE
 
-        # self.new is false iff we're reusing an existing file.
         # self.f is the open file object.
         # When we're not reusing an existing file, self.f is left None
         # here -- the scan() method must be called then to open the file
         # (and it sets self.f).
 
         self.fpath = fpath
-        if not reuse or not fpath or not os.path.exists(fpath):
-            self.new = True
+        if reuse and fpath and os.path.exists(fpath):
+            # Reuse an existing file.  scan() will open & read it.
+            assert fpath
+            self.f = None
+        else:
             if fpath:
                 self.f = open(fpath, 'wb+')
             else:
@@ -675,11 +696,6 @@
             self.sync()
             self.filemap[ZEC3_HEADER_SIZE] = (self.maxsize - ZEC3_HEADER_SIZE,
                                               None)
-        else:
-            # Reuse an existing file.  scan() will open & read it.
-            self.new = False
-            assert fpath
-            self.f = None
 
         # Statistics:  _n_adds, _n_added_bytes,
         #              _n_evicts, _n_evicted_bytes,
@@ -687,13 +703,18 @@
         self.clearStats()
 
     ##
-    # Scan the current contents of the cache file, calling install
+    # Scan the current contents of the cache file, calling `install`
     # for each object found in the cache.  This method should only
     # be called once to initialize the cache from disk.
     def scan(self, install):
-        if self.new:
+        if self.f is not None:
             return
         fsize = os.path.getsize(self.fpath)
+        if fsize != self.maxsize:
+            logger.warning("existing cache file %s has size %d; "
+                           "requested size %d ignored", self.fpath,
+                           fsize, self.maxsize)
+            self.maxsize = fsize
         self.f = open(self.fpath, 'rb+')
         _magic = self.f.read(4)
         if _magic != magic:
@@ -701,8 +722,11 @@
         self.tid = self.f.read(8)
         if len(self.tid) != 8:
             raise ValueError("cache file too small -- no tid at start")
-        # Remember the largest free block.  That seems a
-        # decent place to start currentofs.
+
+        # Populate .filemap and .key2entry to reflect what's currently in the
+        # file, and tell our parent about it too (via the `install` callback).
+        # Remember the location of the largest free block  That seems a decent
+        # place to start currentofs.
         max_free_size = max_free_offset = 0
         ofs = ZEC3_HEADER_SIZE
         while ofs < fsize: