[Zodb-checkins] SVN: ZODB/trunk/src/ZEO/ Merged cache fixes from
3.8 branch:
Jim Fulton
jim at zope.com
Thu May 15 10:20:32 EDT 2008
Log message for revision 86773:
Merged cache fixes from 3.8 branch:
- The cache used an excessive amount of memory, causing applications
with large caches to exhaust available memory.
- Fixed a number of bugs in the handling of persistent ZEO caches:
- Cache records are written in several steps. If a process exits
after writing begins and before it is finishes, the cache will be
corrupt on restart. The way records are writted was changed to
make cache record updates atomic.
- There was no lock file to prevent opening a cache multiple times
at once, which would lead to corruption. Persistent caches now
use lock files, in the same way that file storages do.
- A bug in the cache-opening logic led to cache failure in the
unlikely event that a cache has no free blocks.
Changed:
U ZODB/trunk/src/ZEO/cache.py
D ZODB/trunk/src/ZEO/tests/filecache.txt
U ZODB/trunk/src/ZEO/tests/test_cache.py
-=-
Modified: ZODB/trunk/src/ZEO/cache.py
===================================================================
--- ZODB/trunk/src/ZEO/cache.py 2008-05-15 14:20:25 UTC (rev 86772)
+++ ZODB/trunk/src/ZEO/cache.py 2008-05-15 14:20:31 UTC (rev 86773)
@@ -22,568 +22,64 @@
FileCache.
"""
+from struct import pack, unpack
+
import bisect
+import BTrees.LLBTree
+import BTrees.LOBTree
import logging
import os
-import struct
import tempfile
import time
-from ZODB.utils import z64, u64
+import ZODB.fsIndex
+import ZODB.lock_file
+from ZODB.utils import p64, u64, z64
logger = logging.getLogger("ZEO.cache")
-max32 = (1 << 32) - 1
-
-##
# A disk-based cache for ZEO clients.
-# <p>
+#
# This class provides an interface to a persistent, disk-based cache
# used by ZEO clients to store copies of database records from the
# server.
-# <p>
+#
# The details of the constructor as unspecified at this point.
-# <p>
+#
# Each entry in the cache is valid for a particular range of transaction
# ids. The lower bound is the transaction that wrote the data. The
# upper bound is the next transaction that wrote a revision of the
# object. If the data is current, the upper bound is stored as None;
# the data is considered current until an invalidate() call is made.
-# <p>
+#
# It is an error to call store() twice with the same object without an
# intervening invalidate() to set the upper bound on the first cache
-# entry. <em>Perhaps it will be necessary to have a call the removes
+# entry. Perhaps it will be necessary to have a call the removes
# something from the cache outright, without keeping a non-current
-# entry.</em>
-# <h3>Cache verification</h3>
-# <p>
+# entry.
+
+# Cache verification
+#
# When the client is connected to the server, it receives
# invalidations every time an object is modified. When the client is
# disconnected then reconnects, it must perform cache verification to make
# sure its cached data is synchronized with the storage's current state.
-# <p>
+#
# quick verification
# full verification
-# <p>
+#
-class ClientCache(object):
- """A simple in-memory cache."""
- ##
- # Do we put the constructor here?
- # @param path path of persistent snapshot of cache state (a file path)
- # @param size size of cache file, in bytes
-
- # The default size of 200MB makes a lot more sense than the traditional
- # default of 20MB. The default here is misleading, though, since
- # ClientStorage is the only user of ClientCache, and it always passes an
- # explicit size of its own choosing.
- def __init__(self, path=None, size=200*1024**2):
- self.path = path
- self.size = size
-
- # The cache stores objects in a dict mapping (oid, tid) pairs
- # to Object() records (see below). The tid is the transaction
- # id that wrote the object. An object record includes data,
- # serialno, and end tid. It has auxillary data structures to
- # compute the appropriate tid, given the oid and a transaction id
- # representing an arbitrary point in history.
- #
- # The serialized form of the cache just stores the Object()
- # records. The in-memory form can be reconstructed from these
- # records.
-
- # Maps oid to current tid. Used to compute key for objects.
- self.current = {}
-
- # Maps oid to list of (start_tid, end_tid) pairs in sorted order.
- # Used to find matching key for load of non-current data.
- self.noncurrent = {}
-
- # A FileCache instance does all the low-level work of storing
- # and retrieving objects to/from the cache file.
- self.fc = FileCache(size, self.path, self)
-
- self._setup_trace(self.path)
-
- def open(self):
- self.fc.scan(self.install)
-
- ##
- # Callback for FileCache.scan(), when a pre-existing file cache is
- # used. For each object in the file, `install()` is invoked. `f`
- # is the file object, positioned at the start of the serialized Object.
- # `ent` is an Entry giving the object's key ((oid, start_tid) pair).
- def install(self, f, ent):
- # Called by cache storage layer to insert object.
- o = Object.fromFile(f, ent.key, skip_data=True)
- if o is None:
- return
- oid = o.key[0]
- if o.end_tid is None:
- self.current[oid] = o.start_tid
- else:
- assert o.start_tid < o.end_tid
- this_span = o.start_tid, o.end_tid
- span_list = self.noncurrent.get(oid)
- if span_list:
- bisect.insort_left(span_list, this_span)
- else:
- self.noncurrent[oid] = [this_span]
-
- def close(self):
- self.fc.close()
- if self._tracefile:
- sync(self._tracefile)
- self._tracefile.close()
- self._tracefile = None
-
- ##
- # Set the last transaction seen by the cache.
- # @param tid a transaction id
- # @exception ValueError attempt to set a new tid less than the current tid
-
- def setLastTid(self, tid):
- self.fc.settid(tid)
-
- ##
- # Return the last transaction seen by the cache.
- # @return a transaction id
- # @defreturn string, or None if no transaction is yet known
-
- def getLastTid(self):
- if self.fc.tid == z64:
- return None
- else:
- return self.fc.tid
-
- ##
- # Return the current data record for oid.
- # @param oid object id
- # @return (data record, serial number), or None if the object is not
- # in the cache
- # @defreturn 2-tuple: (string, string)
-
- def load(self, oid):
- tid = None
- if tid is None:
- tid = self.current.get(oid)
- if tid is None:
- self._trace(0x20, oid)
- return None
- o = self.fc.access((oid, tid))
- if o is None:
- self._trace(0x20, oid)
- return None
- self._trace(0x22, oid, o.start_tid, o.end_tid, len(o.data))
- return o.data, tid
-
- ##
- # Return a non-current revision of oid that was current before tid.
- # @param oid object id
- # @param tid id of transaction that wrote next revision of oid
- # @return data record, serial number, start tid, and end tid
- # @defreturn 4-tuple: (string, string, string, string)
-
- def loadBefore(self, oid, tid):
- L = self.noncurrent.get(oid)
- if L is None:
- self._trace(0x24, oid, "", tid)
- return None
- # A pair with None as the second element is less than any pair with
- # the same first tid. Dubious: this relies on that None is less
- # than any comparable non-None object in recent Pythons.
- i = bisect.bisect_left(L, (tid, None))
- # Now L[i-1] < (tid, None) < L[i], and the start_tid for everything in
- # L[:i] is < tid, and the start_tid for everything in L[i:] is >= tid.
- # Therefore the largest start_tid < tid must be at L[i-1]. If i is 0,
- # there is no start_tid < tid: we don't have any data old enougn.
- if i == 0:
- self._trace(0x24, oid, "", tid)
- return
- lo, hi = L[i-1]
- assert lo < tid
- if tid > hi: # we don't have any data in the right range
- self._trace(0x24, oid, "", tid)
- return None
- o = self.fc.access((oid, lo))
- self._trace(0x26, oid, "", tid)
- return o.data, o.start_tid, o.end_tid
-
-
- ##
- # Store a new data record in the cache.
- # @param oid object id
- # @param start_tid the id of the transaction that wrote this revision
- # @param end_tid the id of the transaction that created the next
- # revision of oid. If end_tid is None, the data is
- # current.
- # @param data the actual data
-
- def store(self, oid, start_tid, end_tid, data):
- # It's hard for the client to avoid storing the same object
- # more than once.
- if (oid, start_tid) in self.fc:
- return
- o = Object((oid, start_tid), data, start_tid, end_tid)
- if end_tid is None:
- _cur_start = self.current.get(oid)
- if _cur_start:
- if _cur_start != start_tid:
- raise ValueError(
- "already have current data for oid")
- else:
- return
- if not self.fc.add(o):
- return # too large
- self.current[oid] = start_tid
- self._trace(0x52, oid, start_tid, dlen=len(data))
- else:
- L = self.noncurrent.setdefault(oid, [])
- p = start_tid, end_tid
- if p in L:
- return # duplicate store
- if not self.fc.add(o):
- return # too large
- bisect.insort_left(L, p)
- self._trace(0x54, oid, start_tid, end_tid, dlen=len(data))
-
- ##
- # Remove all knowledge of noncurrent revisions of oid, both in
- # self.noncurrent and in our FileCache. `tid` is used
- # only for trace records.
- def _remove_noncurrent_revisions(self, oid, tid):
- noncurrent_list = self.noncurrent.get(oid)
- if noncurrent_list:
- # Note: must iterate over a copy of noncurrent_list. The
- # FileCache remove() calls our _evicted() method, and that
- # mutates the list.
- for old_tid, dummy in noncurrent_list[:]:
- # 0x1E = invalidate (hit, discarding current or non-current)
- self._trace(0x1E, oid, tid)
- self.fc.remove((oid, old_tid))
- # fc.remove() calling back to _evicted() should have removed
- # the list from noncurrent when the last non-current revision
- # was removed.
- assert oid not in self.noncurrent
-
- ##
- # If `tid` is None, forget all knowledge of `oid`. (`tid` can be
- # None only for invalidations generated by startup cache
- # verification.) If `tid` isn't None, and we had current data for
- # `oid`, stop believing we have current data, and mark the data we
- # had as being valid only up to `tid`. In all other cases, do
- # nothing.
- # @param oid object id
- # @param tid the id of the transaction that wrote a new revision of oid,
- # or None to forget all cached info about oid (current
- # revision, and non-current revisions)
- def invalidate(self, oid, tid):
- if tid > self.fc.tid and tid is not None:
- self.fc.settid(tid)
-
- remove_all_knowledge_of_oid = tid is None
-
- if remove_all_knowledge_of_oid:
- self._remove_noncurrent_revisions(oid, tid)
-
- # Only current data remains to be handled.
-
- cur_tid = self.current.get(oid)
- if not cur_tid:
- # 0x10 == invalidate (miss)
- self._trace(0x10, oid, tid)
- return
-
- # We had current data for oid, but no longer.
-
- if remove_all_knowledge_of_oid:
- # 0x1E = invalidate (hit, discarding current or non-current)
- self._trace(0x1E, oid, tid)
- self.fc.remove((oid, cur_tid))
- assert cur_tid not in self.current # .remove() got rid of it
- return
-
- # Add the data we have to the list of non-current data for oid.
- assert tid is not None and cur_tid <= tid
- # 0x1C = invalidate (hit, saving non-current)
- self._trace(0x1C, oid, tid)
- del self.current[oid] # because we no longer have current data
-
- # Update the end_tid half of oid's validity range on disk.
- # TODO: Want to fetch object without marking it as accessed.
- o = self.fc.access((oid, cur_tid))
- assert o is not None
- assert o.end_tid is None # i.e., o was current
- if o is None:
- # TODO: Since we asserted o is not None above, this block
- # should be removed; waiting on time to prove it can't happen.
- return
- o.end_tid = tid
- self.fc.update(o) # record the new end_tid on disk
- # Add to oid's list of non-current data.
- L = self.noncurrent.setdefault(oid, [])
- bisect.insort_left(L, (cur_tid, tid))
-
- ##
- # Return the number of object revisions in the cache.
- #
- # Or maybe better to just return len(self.cache)? Needs clearer use case.
- def __len__(self):
- n = len(self.current)
- if self.noncurrent:
- n += sum(map(len, self.noncurrent))
- return n
-
- ##
- # Generates (oid, serial) pairs for all objects in the
- # cache. This generator is used by cache verification.
- def contents(self):
- # May need to materialize list instead of iterating;
- # depends on whether the caller may change the cache.
- for o in self.fc:
- oid, tid = o.key
- yield oid, tid
-
- def dump(self):
- from ZODB.utils import oid_repr
- print "cache size", len(self)
- L = list(self.contents())
- L.sort()
- for oid, tid in L:
- print oid_repr(oid), oid_repr(tid)
- print "dll contents"
- L = list(self.fc)
- L.sort(lambda x, y: cmp(x.key, y.key))
- for x in L:
- end_tid = x.end_tid or z64
- print oid_repr(x.key[0]), oid_repr(x.key[1]), oid_repr(end_tid)
- print
-
- def _evicted(self, o):
- # Called by the FileCache to signal that Object o has been evicted.
- oid, tid = o.key
- if o.end_tid is None:
- del self.current[oid]
- else:
- # Although we use bisect to keep the list sorted,
- # we never expect the list to be very long. So the
- # brute force approach should normally be fine.
- L = self.noncurrent[oid]
- element = (o.start_tid, o.end_tid)
- if len(L) == 1:
- # We don't want to leave an empty list in the dict: if
- # the oid is never referenced again, it would consume RAM
- # forever more for no purpose.
- assert L[0] == element
- del self.noncurrent[oid]
- else:
- L.remove(element)
-
- # If `path` isn't None (== we're using a persistent cache file), and
- # envar ZEO_CACHE_TRACE is set to a non-empty value, try to open
- # path+'.trace' as a trace file, and store the file object in
- # self._tracefile. If not, or we can't write to the trace file, disable
- # tracing by setting self._trace to a dummy function, and set
- # self._tracefile to None.
- def _setup_trace(self, path):
- self._tracefile = None
- if path and os.environ.get("ZEO_CACHE_TRACE"):
- tfn = path + ".trace"
- try:
- self._tracefile = open(tfn, "ab")
- self._trace(0x00)
- except IOError, msg:
- self._tracefile = None
- logger.warning("cannot write tracefile %r (%s)", tfn, msg)
- else:
- logger.info("opened tracefile %r", tfn)
-
- if self._tracefile is None:
- def notrace(*args, **kws):
- pass
- self._trace = notrace
-
- def _trace(self,
- code, oid="", tid=z64, end_tid=z64, dlen=0,
- # The next two are just speed hacks.
- time_time=time.time, struct_pack=struct.pack):
- # The code argument is two hex digits; bits 0 and 7 must be zero.
- # The first hex digit shows the operation, the second the outcome.
- # This method has been carefully tuned to be as fast as possible.
- # Note: when tracing is disabled, this method is hidden by a dummy.
- encoded = (dlen + 255) & 0x7fffff00 | code
- if tid is None:
- tid = z64
- if end_tid is None:
- end_tid = z64
- try:
- self._tracefile.write(
- struct_pack(">iiH8s8s",
- time_time(),
- encoded,
- len(oid),
- tid, end_tid) + oid)
- except:
- print `tid`, `end_tid`
- raise
-
-##
-# An Object stores the cached data for a single object.
-# <p>
-# The cached data includes the actual object data, the key, and two
-# data fields that describe the validity period of the
-# object. The key contains the oid and a redundant start_tid. The
-# actual size of an object is variable, depending on the size of the
-# data.
-# <p>
-# The serialized format does not include the key, because it is stored
-# in the header used by the cache file's storage format.
-# <p>
-# Instances of Object are generally short-lived -- they're really a way to
-# package data on the way to or from the disk file.
-
-class Object(object):
- __slots__ = (# pair (object id, txn id) -- something usable as a dict key;
- # the second part of the pair is equal to start_tid
- "key",
-
- # string, tid of txn that wrote the data
- "start_tid",
-
- # string, tid of txn that wrote next revision, or None
- # if the data is current; if not None, end_tid is strictly
- # greater than start_tid
- "end_tid",
-
- # string, the actual data record for the object
- "data",
-
- # total size of serialized object; this includes the
- # data and all overhead (header) bytes.
- "size",
- )
-
- # A serialized Object on disk looks like:
- #
- # offset # bytes value
- # ------ ------- -----
- # 0 8 end_tid; string
- # 8 4 len(data); 4-byte signed int
- # 12 len(data) the object pickle; string
- # 12+len(data) 8 oid; string
-
- # The serialization format uses an end tid of "\0"*8 (z64), the least
- # 8-byte string, to represent None. It isn't possible for an end_tid
- # to be 0, because it must always be strictly greater than the start_tid.
-
- fmt = ">8si" # end_tid, len(self.data)
- FIXED_HEADER_SIZE = struct.calcsize(fmt)
- assert FIXED_HEADER_SIZE == 12
- TOTAL_FIXED_SIZE = FIXED_HEADER_SIZE + 8 # +8 for the oid at the end
-
- def __init__(self, key, data, start_tid, end_tid):
- self.key = key
- self.data = data
- self.start_tid = start_tid
- self.end_tid = end_tid
- # The size of the serialized object on disk, including the
- # 14-byte header, the length of data, and a
- # copy of the 8-byte oid.
- if data is not None:
- self.size = self.TOTAL_FIXED_SIZE + len(data)
-
- ##
- # Return the fixed-sized serialization header as a string: pack end_tid,
- # and the length of the .data members.
- def get_header(self):
- return struct.pack(self.fmt,
- self.end_tid or z64,
- len(self.data))
-
- ##
- # Write the serialized representation of self to file f, at its current
- # position.
- def serialize(self, f):
- f.writelines([self.get_header(),
- self.data,
- self.key[0]])
-
- ##
- # Write the fixed-size header for self, to file f at its current position.
- # The only real use for this is when the current revision of an object
- # in cache is invalidated. Then the end_tid field gets set to the tid
- # of the transaction that caused the invalidation.
- def serialize_header(self, f):
- f.write(self.get_header())
-
- ##
- # fromFile is a class constructor, unserializing an Object from the
- # current position in file f. Exclusive access to f for the duration
- # is assumed. The key is a (oid, start_tid) pair, and the oid must
- # match the serialized oid. If `skip_data` is true, .data is left
- # None in the Object returned, but all the other fields are populated.
- # Else (`skip_data` is false, the default), all fields including .data
- # are populated. .data can be big, so it's prudent to skip it when it
- # isn't needed.
- def fromFile(cls, f, key, skip_data=False):
- s = f.read(cls.FIXED_HEADER_SIZE)
- if not s:
- return None
- oid, start_tid = key
-
- end_tid, dlen = struct.unpack(cls.fmt, s)
- if end_tid == z64:
- end_tid = None
-
- if skip_data:
- data = None
- f.seek(dlen, 1)
- else:
- data = f.read(dlen)
- if dlen != len(data):
- raise ValueError("corrupted record, data")
-
- s = f.read(8)
- if s != oid:
- raise ValueError("corrupted record, oid")
-
- return cls((oid, start_tid), data, start_tid, end_tid)
-
- fromFile = classmethod(fromFile)
-
-
-# Entry just associates a key with a file offset. It's used by FileCache.
-class Entry(object):
- __slots__ = (# object key -- something usable as a dict key.
- 'key',
-
- # Offset from start of file to the object's data
- # record; this includes all overhead bytes (status
- # byte, size bytes, etc). The size of the data
- # record is stored in the file near the start of the
- # record, but for efficiency we also keep size in a
- # dict (filemap; see later).
- 'offset',
- )
-
- def __init__(self, key=None, offset=None):
- self.key = key
- self.offset = offset
-
-
-
-##
# FileCache stores a cache in a single on-disk file.
#
# On-disk cache structure.
#
# The file begins with a 12-byte header. The first four bytes are the
-# file's magic number - ZEC4 - indicating zeo cache version 4. The
+# file's magic number - ZEC3 - indicating zeo cache version 4. The
# next eight bytes are the last transaction id.
magic = "ZEC4"
-ZEC4_HEADER_SIZE = 12
+ZEC_HEADER_SIZE = 12
# After the header, the file contains a contiguous sequence of blocks. All
# blocks begin with a one-byte status indicator:
@@ -596,8 +92,8 @@
# Free. The block is free; the next 8 bytes are >Q format total
# block size.
#
-# '1', '2', '3', '4', '5', '6', '7', '8'
-# The block is free, and consists of 1-8 bytes total.
+# '1', '2', '3', '4'
+# The block is free, and consists of 1, 2, 3 or 4 bytes total.
#
# "Total" includes the status byte, and size bytes. There are no
# empty (size 0) blocks.
@@ -607,12 +103,12 @@
#
# 1 byte allocation status ('a').
# 4 bytes block size, >I format.
-# 16 bytes oid + tid, string.
-# size-OBJECT_HEADER_SIZE bytes, the serialization of an Object (see
-# class Object for details).
+# 8 byte oid
+# 8 byte start_tid
+# 8 byte end_tid
+# 4 byte data size
+# data
-OBJECT_HEADER_SIZE = 1 + 4 + 16
-
# The cache's currentofs goes around the file, circularly, forever.
# It's always the starting offset of some block.
#
@@ -621,77 +117,64 @@
# blocks needed to make enough room for the new object are evicted,
# starting at currentofs. Exception: if currentofs is close enough
# to the end of the file that the new object can't fit in one
-# contiguous chunk, currentofs is reset to ZEC4_HEADER_SIZE first.
+# contiguous chunk, currentofs is reset to ZEC_HEADER_SIZE first.
-# Do all possible to ensure that the bytes we wrote to file f are really on
-# disk.
-def sync(f):
- f.flush()
- if hasattr(os, 'fsync'):
- os.fsync(f.fileno())
-class FileCache(object):
+class ClientCache(object):
+ """A simple in-memory cache."""
- def __init__(self, maxsize, fpath, parent):
+ # The default size of 200MB makes a lot more sense than the traditional
+ # default of 20MB. The default here is misleading, though, since
+ # ClientStorage is the only user of ClientCache, and it always passes an
+ # explicit size of its own choosing.
+ def __init__(self, path=None, size=200*1024**2):
+
+ # - `path`: filepath for the cache file, or None (in which case
+ # a temp file will be created)
+ self.path = path
+
# - `maxsize`: total size of the cache file, in bytes; this is
# ignored path names an existing file; perhaps we should attempt
# to change the cache size in that case
- # - `fpath`: filepath for the cache file, or None (in which case
- # a temp file will be created)
- # - `parent`: the ClientCache instance; its `_evicted()` method
- # is called whenever we need to evict an object to make room in
- # the file
- self.maxsize = maxsize
- self.parent = parent
+ self.maxsize = size
+ # The number of records in the cache.
+ self._len = 0
+
+ # {oid -> pos}
+ self.current = ZODB.fsIndex.fsIndex()
+
+ # {oid -> {tid->pos}}
+ # Note that caches in the wild seem to have very little non-current
+ # data, so this would seem to have little impact on memory consumption.
+ # I wonder if we even need to store non-current data in the cache.
+ self.noncurrent = BTrees.LOBTree.LOBTree()
+
# tid for the most recent transaction we know about. This is also
# stored near the start of the file.
self.tid = None
- # There's one Entry instance, kept in memory, for each currently
- # allocated block in the file, and there's one allocated block in the
- # file per serialized Object. filemap retrieves the Entry given the
- # starting offset of a block, and key2entry retrieves the Entry given
- # an object revision's key (an (oid, start_tid) pair). From an
- # Entry, we can get the Object's key and file offset.
-
- # Map offset in file to pair (data record size, Entry).
- # Entry is None iff the block starting at offset is free.
- # filemap always contains a complete account of what's in the
- # file -- study method _verify_filemap for executable checking
- # of the relevant invariants. An offset is at the start of a
- # block iff it's a key in filemap. The data record size is
- # stored in the file too, so we could just seek to the offset
- # and read it up; keeping it in memory is an optimization.
- self.filemap = {}
-
- # Map key to Entry. After
- # obj = key2entry[key]
- # then
- # obj.key == key
- # is true. An object is currently stored on disk iff its key is in
- # key2entry.
- self.key2entry = {}
-
# Always the offset into the file of the start of a block.
# New and relocated objects are always written starting at
# currentofs.
- self.currentofs = ZEC4_HEADER_SIZE
+ self.currentofs = ZEC_HEADER_SIZE
# self.f is the open file object.
# When we're not reusing an existing file, self.f is left None
# here -- the scan() method must be called then to open the file
# (and it sets self.f).
- self.fpath = fpath
- self.f = None
- if fpath and os.path.exists(fpath):
+ if path:
+ self._lock_file = ZODB.lock_file.LockFile(path + '.lock')
+
+ if path and os.path.exists(path):
# Reuse an existing file. scan() will open & read it.
- logger.info("reusing persistent cache file %r", fpath)
- elif self.maxsize >= 12:
- if fpath:
- self.f = open(fpath, 'wb+')
- logger.info("created persistent cache file %r", fpath)
+ self.f = None
+ logger.info("reusing persistent cache file %r", path)
+ else:
+ if path:
+ self.f = open(path, 'wb+')
+ logger.info("created persistent cache file %r", path)
else:
self.f = tempfile.TemporaryFile()
logger.info("created temporary cache file %r", self.f.name)
@@ -704,35 +187,42 @@
self.f.write(magic)
self.f.write(z64)
# and one free block.
- self.f.write('f' + struct.pack(">Q", self.maxsize -
- ZEC4_HEADER_SIZE))
- self.sync()
- self.filemap[ZEC4_HEADER_SIZE] = (self.maxsize - ZEC4_HEADER_SIZE,
- None)
+ self.f.write('f' + pack(">Q", self.maxsize - ZEC_HEADER_SIZE))
+ sync(self.f)
# Statistics: _n_adds, _n_added_bytes,
# _n_evicts, _n_evicted_bytes,
# _n_accesses
self.clearStats()
+ self._setup_trace(path)
+
+ # Backward compatibility. Client code used to have to use the fc
+ # attr to get to the file cache to get cache stats.
+ @property
+ def fc(self):
+ return self
+
##
# Scan the current contents of the cache file, calling `install`
# for each object found in the cache. This method should only
# be called once to initialize the cache from disk.
- def scan(self, install):
+ def open(self):
if self.f is not None: # we're not (re)using a pre-existing file
return
- fsize = os.path.getsize(self.fpath)
+ fsize = os.path.getsize(self.path)
if fsize != self.maxsize:
logger.warning("existing cache file %r has size %d; "
- "requested size %d ignored", self.fpath,
+ "requested size %d ignored", self.path,
fsize, self.maxsize)
self.maxsize = fsize
- self.f = open(self.fpath, 'rb+')
- _magic = self.f.read(4)
+ self.f = open(self.path, 'rb+')
+ read = self.f.read
+ seek = self.f.seek
+ _magic = read(4)
if _magic != magic:
raise ValueError("unexpected magic number: %r" % _magic)
- self.tid = self.f.read(8)
+ self.tid = read(8)
if len(self.tid) != 8:
raise ValueError("cache file too small -- no tid at start")
@@ -740,39 +230,49 @@
# file, and tell our parent about it too (via the `install` callback).
# Remember the location of the largest free block. That seems a
# decent place to start currentofs.
- max_free_size = max_free_offset = 0
- ofs = ZEC4_HEADER_SIZE
+ max_free_size = l = 0
+ ofs = max_free_offset = ZEC_HEADER_SIZE
+ current = self.current
while ofs < fsize:
- self.f.seek(ofs)
- ent = None
- status = self.f.read(1)
+ seek(ofs)
+ status = read(1)
if status == 'a':
- size, rawkey = struct.unpack(">I16s", self.f.read(20))
- key = rawkey[:8], rawkey[8:]
- assert key not in self.key2entry
- self.key2entry[key] = ent = Entry(key, ofs)
- install(self.f, ent)
+ size, oid, start_tid, end_tid = unpack(">I8s8s8s", read(28))
+
+ if end_tid == z64:
+ current[oid] = ofs
+ else:
+ assert start_tid < end_tid
+ self._set_noncurrent(oid, start_tid, ofs)
+ l += 1
elif status == 'f':
- size, = struct.unpack(">Q", self.f.read(8))
+ size, = unpack(">Q", read(8))
elif status in '12345678':
size = int(status)
else:
raise ValueError("unknown status byte value %s in client "
"cache file" % 0, hex(ord(status)))
-
- self.filemap[ofs] = size, ent
- if ent is None and size > max_free_size:
- max_free_size, max_free_offset = size, ofs
-
ofs += size
if ofs != fsize:
raise ValueError("final offset %s != file size %s in client "
"cache file" % (ofs, fsize))
- if __debug__:
- self._verify_filemap()
self.currentofs = max_free_offset
+ self._len = l
+ def _set_noncurrent(self, oid, tid, ofs):
+ noncurrent_for_oid = self.noncurrent.get(u64(oid))
+ if noncurrent_for_oid is None:
+ noncurrent_for_oid = BTrees.LLBTree.LLBucket()
+ self.noncurrent[u64(oid)] = noncurrent_for_oid
+ noncurrent_for_oid[u64(tid)] = ofs
+
+ def _del_noncurrent(self, oid, tid):
+ noncurrent_for_oid = self.noncurrent[u64(oid)]
+ del noncurrent_for_oid[u64(tid)]
+ if not noncurrent_for_oid:
+ del self.noncurrent[u64(oid)]
+
def clearStats(self):
self._n_adds = self._n_added_bytes = 0
self._n_evicts = self._n_evicted_bytes = 0
@@ -787,37 +287,23 @@
##
# The number of objects currently in the cache.
def __len__(self):
- return len(self.key2entry)
+ return self._len
##
- # Iterate over the objects in the cache, producing an Entry for each.
- def __iter__(self):
- return self.key2entry.itervalues()
-
- ##
- # Test whether an (oid, tid) pair is in the cache.
- def __contains__(self, key):
- return key in self.key2entry
-
- ##
- # Do all possible to ensure all bytes written to the file so far are
- # actually on disk.
- def sync(self):
- sync(self.f)
-
- ##
# Close the underlying file. No methods accessing the cache should be
# used after this.
def close(self):
+ if hasattr(self,'_lock_file'):
+ self._lock_file.close()
if self.f:
- self.sync()
+ sync(self.f)
self.f.close()
self.f = None
##
# Evict objects as necessary to free up at least nbytes bytes,
# starting at currentofs. If currentofs is closer than nbytes to
- # the end of the file, currentofs is reset to ZEC4_HEADER_SIZE first.
+ # the end of the file, currentofs is reset to ZEC_HEADER_SIZE first.
# The number of bytes actually freed may be (and probably will be)
# greater than nbytes, and is _makeroom's return value. The file is not
# altered by _makeroom. filemap and key2entry are updated to reflect the
@@ -826,34 +312,170 @@
# freed (starting at currentofs when _makeroom returns, and
# spanning the number of bytes retured by _makeroom).
def _makeroom(self, nbytes):
- assert 0 < nbytes <= self.maxsize - ZEC4_HEADER_SIZE
- assert nbytes <= max32
+ assert 0 < nbytes <= self.maxsize - ZEC_HEADER_SIZE
if self.currentofs + nbytes > self.maxsize:
- self.currentofs = ZEC4_HEADER_SIZE
+ self.currentofs = ZEC_HEADER_SIZE
ofs = self.currentofs
+ seek = self.f.seek
+ read = self.f.read
+ current = self.current
while nbytes > 0:
- size, e = self.filemap.pop(ofs)
- if e is not None:
- del self.key2entry[e.key]
+ seek(ofs)
+ status = read(1)
+ if status == 'a':
+ size, oid, start_tid, end_tid = unpack(">I8s8s8s", read(28))
self._n_evicts += 1
self._n_evicted_bytes += size
- # Load the object header into memory so we know how to
- # update the parent's in-memory data structures.
- self.f.seek(e.offset + OBJECT_HEADER_SIZE)
- o = Object.fromFile(self.f, e.key, skip_data=True)
- self.parent._evicted(o)
+ if end_tid == z64:
+ del current[oid]
+ else:
+ self._del_noncurrent(oid, start_tid)
+ self._len -= 1
+ else:
+ if status == 'f':
+ size = unpack(">Q", read(8))[0]
+ else:
+ assert status in '12345678'
+ size = int(status)
ofs += size
nbytes -= size
return ofs - self.currentofs
##
- # Write Object obj, with data, to file starting at currentofs.
- # nfreebytes are already available for overwriting, and it's
- # guranteed that's enough. obj.offset is changed to reflect the
- # new data record position, and filemap and key2entry are updated to
- # match.
- def _writeobj(self, obj, nfreebytes):
- size = OBJECT_HEADER_SIZE + obj.size
+ # Update our idea of the most recent tid. This is stored in the
+ # instance, and also written out near the start of the cache file. The
+ # new tid must be strictly greater than our current idea of the most
+ # recent tid.
+ def setLastTid(self, tid):
+ if self.tid is not None and tid <= self.tid:
+ raise ValueError("new last tid (%s) must be greater than "
+ "previous one (%s)" % (u64(tid),
+ u64(self.tid)))
+ assert isinstance(tid, str) and len(tid) == 8
+ self.tid = tid
+ self.f.seek(len(magic))
+ self.f.write(tid)
+ self.f.flush()
+
+ ##
+ # Return the last transaction seen by the cache.
+ # @return a transaction id
+ # @defreturn string, or None if no transaction is yet known
+ def getLastTid(self):
+ if self.tid == z64:
+ return None
+ else:
+ return self.tid
+
+ ##
+ # Return the current data record for oid.
+ # @param oid object id
+ # @return (data record, serial number, tid), or None if the object is not
+ # in the cache
+ # @defreturn 3-tuple: (string, string, string)
+
+ def load(self, oid):
+ ofs = self.current.get(oid)
+ if ofs is None:
+ self._trace(0x20, oid)
+ return None
+ self.f.seek(ofs)
+ read = self.f.read
+ assert read(1) == 'a'
+ size, saved_oid, tid, end_tid, ldata = unpack(
+ ">I8s8s8sI", read(32))
+ assert saved_oid == oid
+
+ data = read(ldata)
+ assert len(data) == ldata
+ assert read(8) == oid
+
+ self._n_accesses += 1
+ self._trace(0x22, oid, tid, end_tid, ldata)
+ return data, tid
+
+ ##
+ # Return a non-current revision of oid that was current before tid.
+ # @param oid object id
+ # @param tid id of transaction that wrote next revision of oid
+ # @return data record, serial number, start tid, and end tid
+ # @defreturn 4-tuple: (string, string, string, string)
+
+ def loadBefore(self, oid, before_tid):
+ noncurrent_for_oid = self.noncurrent.get(u64(oid))
+ if noncurrent_for_oid is None:
+ self._trace(0x24, oid, "", before_tid)
+ return None
+
+ items = noncurrent_for_oid.items(None, u64(before_tid)-1)
+ if not items:
+ self._trace(0x24, oid, "", before_tid)
+ return None
+ tid, ofs = items[-1]
+
+ self.f.seek(ofs)
+ read = self.f.read
+ assert read(1) == 'a'
+ size, saved_oid, saved_tid, end_tid, ldata = unpack(
+ ">I8s8s8sI", read(32))
+ assert saved_oid == oid
+ assert saved_tid == p64(tid)
+ assert end_tid != z64
+ data = read(ldata)
+ assert len(data) == ldata
+ assert read(8) == oid
+
+ if end_tid < before_tid:
+ self._trace(0x24, oid, "", before_tid)
+ return None
+
+ self._n_accesses += 1
+ self._trace(0x26, oid, "", saved_tid)
+ return data, saved_tid, end_tid
+
+ ##
+ # Store a new data record in the cache.
+ # @param oid object id
+ # @param start_tid the id of the transaction that wrote this revision
+ # @param end_tid the id of the transaction that created the next
+ # revision of oid. If end_tid is None, the data is
+ # current.
+ # @param data the actual data
+
+ def store(self, oid, start_tid, end_tid, data):
+ seek = self.f.seek
+ if end_tid is None:
+ ofs = self.current.get(oid)
+ if ofs:
+ seek(ofs)
+ read = self.f.read
+ assert read(1) == 'a'
+ size, saved_oid, saved_tid, end_tid = unpack(
+ ">I8s8s8s", read(28))
+ assert saved_oid == oid
+ assert end_tid == z64
+ if saved_tid == start_tid:
+ return
+ raise ValueError("already have current data for oid")
+ else:
+ noncurrent_for_oid = self.noncurrent.get(u64(oid))
+ if noncurrent_for_oid and (u64(start_tid) in noncurrent_for_oid):
+ return
+
+ size = 41 + len(data)
+
+ # A number of cache simulation experiments all concluded that the
+ # 2nd-level ZEO cache got a much higher hit rate if "very large"
+ # objects simply weren't cached. For now, we ignore the request
+ # only if the entire cache file is too small to hold the object.
+ if size > self.maxsize - ZEC_HEADER_SIZE:
+ return
+
+ self._n_adds += 1
+ self._n_added_bytes += size
+ self._len += 1
+
+ nfreebytes = self._makeroom(size)
assert size <= nfreebytes
excess = nfreebytes - size
# If there's any excess (which is likely), we need to record a
@@ -864,144 +486,158 @@
elif excess < 9:
extra = "012345678"[excess]
else:
- extra = 'f' + struct.pack(">Q", excess)
+ extra = 'f' + pack(">Q", excess)
- self.f.seek(self.currentofs)
- self.f.writelines(('a',
- struct.pack(">I8s8s", size,
- obj.key[0], obj.key[1])))
- obj.serialize(self.f)
- self.f.write(extra)
- e = Entry(obj.key, self.currentofs)
- self.key2entry[obj.key] = e
- self.filemap[self.currentofs] = size, e
- self.currentofs += size
- if excess:
- # We need to record the free block in filemap, but there's
- # no need to advance currentofs beyond it. Instead it
- # gives some breathing room for the next object to get
- # written.
- self.filemap[self.currentofs] = excess, None
+ ofs = self.currentofs
+ seek(ofs)
+ write = self.f.write
- ##
- # Add Object object to the cache. This may evict existing objects, to
- # make room (and almost certainly will, in steady state once the cache
- # is first full). The object must not already be in the cache. If the
- # object is too large for the cache, False is returned, otherwise True.
- def add(self, object):
- size = OBJECT_HEADER_SIZE + object.size
- # A number of cache simulation experiments all concluded that the
- # 2nd-level ZEO cache got a much higher hit rate if "very large"
- # objects simply weren't cached. For now, we ignore the request
- # only if the entire cache file is too small to hold the object.
- if size > self.maxsize - ZEC4_HEADER_SIZE:
- return False
+ # Before writing data, we'll write a free block for the space freed.
+ # We'll come back with a last atomic write to rewrite the start of the
+ # allocated-block header.
+ write('f'+pack(">Q", nfreebytes)+'xxxx')
- assert object.key not in self.key2entry
- assert len(object.key[0]) == 8
- assert len(object.key[1]) == 8
+ # Now write the rest of the allocation block header and object data.
+ write(pack(">8s8sI", start_tid, end_tid or z64, len(data)))
+ write(data)
+ write(oid)
+ write(extra)
- self._n_adds += 1
- self._n_added_bytes += size
+ # Now, we'll go back and rewrite the beginning of the
+ # allocated block header.
+ seek(ofs)
+ write('a'+pack(">I8s", size, oid))
- available = self._makeroom(size)
- self._writeobj(object, available)
- return True
+ if end_tid:
+ self._set_noncurrent(oid, start_tid, ofs)
+ self._trace(0x54, oid, start_tid, end_tid, dlen=len(data))
+ else:
+ self.current[oid] = ofs
+ self._trace(0x52, oid, start_tid, dlen=len(data))
+
+ self.currentofs += size
##
- # Return Object for key, or None if not in cache.
- def access(self, key):
- self._n_accesses += 1
- e = self.key2entry.get(key)
- if e is None:
- return None
- offset = e.offset
- size, e2 = self.filemap[offset]
- assert e is e2
+ # If `tid` is None,
+ # forget all knowledge of `oid`. (`tid` can be None only for
+ # invalidations generated by startup cache verification.) If `tid`
+ # isn't None, and we had current
+ # data for `oid`, stop believing we have current data, and mark the
+ # data we had as being valid only up to `tid`. In all other cases, do
+ # nothing.
+ # @param oid object id
+ # @param tid the id of the transaction that wrote a new revision of oid,
+ # or None to forget all cached info about oid.
+ def invalidate(self, oid, tid):
+ if tid > self.tid and tid is not None:
+ self.setLastTid(tid)
- self.f.seek(offset + OBJECT_HEADER_SIZE)
- return Object.fromFile(self.f, key)
+ ofs = self.current.get(oid)
+ if ofs is None:
+ # 0x10 == invalidate (miss)
+ self._trace(0x10, oid, tid)
+ return
+ self.f.seek(ofs)
+ read = self.f.read
+ assert read(1) == 'a'
+ size, saved_oid, saved_tid, end_tid = unpack(">I8s8s8s", read(28))
+ assert saved_oid == oid
+ assert end_tid == z64
+ del self.current[oid]
+ if tid is None:
+ self.f.seek(ofs)
+ self.f.write('f'+pack(">Q", size))
+ # 0x1E = invalidate (hit, discarding current or non-current)
+ self._trace(0x1E, oid, tid)
+ self._len -= 1
+ else:
+ self.f.seek(ofs+21)
+ self.f.write(tid)
+ self._set_noncurrent(oid, saved_tid, ofs)
+ # 0x1C = invalidate (hit, saving non-current)
+ self._trace(0x1C, oid, tid)
+
##
- # Remove Object for key from cache, if present.
- def remove(self, key):
- # If an object is being explicitly removed, we need to load
- # its header into memory and write a free block marker to the
- # disk where the object was stored. We need to load the
- # header to update the in-memory data structures held by
- # ClientCache.
+ # Generates (oid, serial) oairs for all objects in the
+ # cache. This generator is used by cache verification.
+ def contents(self):
+ # May need to materialize list instead of iterating;
+ # depends on whether the caller may change the cache.
+ seek = self.f.seek
+ read = self.f.read
+ for oid, ofs in self.current.iteritems():
+ seek(ofs)
+ assert read(1) == 'a'
+ size, saved_oid, tid, end_tid = unpack(">I8s8s8s", read(28))
+ assert saved_oid == oid
+ assert end_tid == z64
+ yield oid, tid
- # We could instead just keep the header in memory at all times.
+ def dump(self):
+ from ZODB.utils import oid_repr
+ print "cache size", len(self)
+ L = list(self.contents())
+ L.sort()
+ for oid, tid in L:
+ print oid_repr(oid), oid_repr(tid)
+ print "dll contents"
+ L = list(self)
+ L.sort(lambda x, y: cmp(x.key, y.key))
+ for x in L:
+ end_tid = x.end_tid or z64
+ print oid_repr(x.key[0]), oid_repr(x.key[1]), oid_repr(end_tid)
+ print
- e = self.key2entry.pop(key, None)
- if e is None:
+ # If `path` isn't None (== we're using a persistent cache file), and
+ # envar ZEO_CACHE_TRACE is set to a non-empty value, try to open
+ # path+'.trace' as a trace file, and store the file object in
+ # self._tracefile. If not, or we can't write to the trace file, disable
+ # tracing by setting self._trace to a dummy function, and set
+ # self._tracefile to None.
+ def _setup_trace(self, path):
+ _tracefile = None
+ if path and os.environ.get("ZEO_CACHE_TRACE"):
+ tfn = path + ".trace"
+ try:
+ _tracefile = open(tfn, "ab")
+ except IOError, msg:
+ logger.warning("cannot write tracefile %r (%s)", tfn, msg)
+ else:
+ logger.info("opened tracefile %r", tfn)
+
+ if _tracefile is None:
+ self._trace = lambda *a, **k: None
return
- offset = e.offset
- size, e2 = self.filemap[offset]
- assert e is e2
- self.filemap[offset] = size, None
- self.f.seek(offset + OBJECT_HEADER_SIZE)
- o = Object.fromFile(self.f, key, skip_data=True)
- assert size >= 9 # only free blocks are tiny
- self.f.seek(offset)
- self.f.write('f' + struct.pack(">Q", size))
- self.f.flush()
- self.parent._evicted(o)
- ##
- # Update on-disk representation of Object obj.
- #
- # This method should be called when the object header is modified.
- # obj must be in the cache. The only real use for this is during
- # invalidation, to set the end_tid field on a revision that was current
- # (and so had an end_tid of None, but no longer does).
- def update(self, obj):
- e = self.key2entry[obj.key]
- self.f.seek(e.offset + OBJECT_HEADER_SIZE)
- obj.serialize_header(self.f)
+ now = time.time
+ def _trace(code, oid="", tid=z64, end_tid=z64, dlen=0):
+ # The code argument is two hex digits; bits 0 and 7 must be zero.
+ # The first hex digit shows the operation, the second the outcome.
+ # This method has been carefully tuned to be as fast as possible.
+ # Note: when tracing is disabled, this method is hidden by a dummy.
+ encoded = (dlen + 255) & 0x7fffff00 | code
+ if tid is None:
+ tid = z64
+ if end_tid is None:
+ end_tid = z64
+ try:
+ _tracefile.write(
+ pack(">iiH8s8s",
+ now(), encoded, len(oid), tid, end_tid) + oid,
+ )
+ except:
+ print `tid`, `end_tid`
+ raise
+
+ self._trace = _trace
+ _trace(0x00)
- ##
- # Update our idea of the most recent tid. This is stored in the
- # instance, and also written out near the start of the cache file. The
- # new tid must be strictly greater than our current idea of the most
- # recent tid.
- def settid(self, tid):
- if self.tid is not None and tid <= self.tid:
- raise ValueError("new last tid (%s) must be greater than "
- "previous one (%s)" % (u64(tid),
- u64(self.tid)))
- assert isinstance(tid, str) and len(tid) == 8
- self.tid = tid
- self.f.seek(len(magic))
- self.f.write(tid)
- self.f.flush()
- ##
- # This debug method marches over the entire cache file, verifying that
- # the current contents match the info in self.filemap and self.key2entry.
- def _verify_filemap(self, display=False):
- a = ZEC4_HEADER_SIZE
- f = self.f
- while a < self.maxsize:
- f.seek(a)
- status = f.read(1)
- if status == 'a':
- size, = struct.unpack(">I", f.read(4))
- elif status == 'f':
- size, = struct.unpack(">Q", f.read(8))
- else:
- size = int(status)
- if display:
- if a == self.currentofs:
- print '*****',
- print "%c%d" % (status, size),
- size2, obj = self.filemap[a]
- assert size == size2
- assert (obj is not None) == (status == 'a')
- if obj is not None:
- assert obj.offset == a
- assert self.key2entry[obj.key] is obj
- a += size
- if display:
- print
- assert a == self.maxsize
+def sync(f):
+ f.flush()
+
+if hasattr(os, 'fsync'):
+ def sync(f):
+ f.flush()
+ os.fsync(f.fileno())
Deleted: ZODB/trunk/src/ZEO/tests/filecache.txt
===================================================================
--- ZODB/trunk/src/ZEO/tests/filecache.txt 2008-05-15 14:20:25 UTC (rev 86772)
+++ ZODB/trunk/src/ZEO/tests/filecache.txt 2008-05-15 14:20:31 UTC (rev 86773)
@@ -1,440 +0,0 @@
-====================================
-The client cache file implementation
-====================================
-
-This test exercises the FileCache implementation which is responsible for
-maintaining the ZEO client cache on disk. Specifics of persistent cache files
-are not tested.
-
-As the FileCache calls back to the client cache we'll use a dummy to monitor
-those calls:
-
- >>> from ZEO.tests.test_cache import ClientCacheDummy, oid
- >>> tid = oid
- >>> cache_dummy = ClientCacheDummy()
-
-We'll instanciate a FileCache with 200 bytes of space:
-
- >>> from ZEO.cache import FileCache
- >>> fc = FileCache(maxsize=200, fpath=None, parent=cache_dummy)
-
-Initially the cache is empty:
-
- >>> len(fc)
- 0
- >>> list(fc)
- []
- >>> fc.getStats()
- (0, 0, 0, 0, 0)
-
-
-Basic usage
-===========
-
-Objects are represented in the cache using a special `Object` object. Let's
-start with an object of the size 100 bytes:
-
- >>> from ZEO.cache import Object
- >>> obj1_1 = Object(key=(oid(1), tid(1)), data='#'*100,
- ... start_tid=tid(1), end_tid=None)
-
-Notice that the actual object size is a bit larger because of the headers that
-are written for each object:
-
- >>> obj1_1.size
- 120
-
-Initially the object is not in the cache:
-
- >>> (oid(1), tid(1)) in fc
- False
-
-We can add it to the cache:
-
- >>> fc.add(obj1_1)
- True
-
-And now it's in the cache:
-
- >>> (oid(1), tid(1)) in fc
- True
- >>> len(fc)
- 1
-
-We can get it back and the object will be equal but not identical to the one we
-stored:
-
- >>> obj1_1_copy = fc.access((oid(1), tid(1)))
- >>> obj1_1_copy.data == obj1_1.data
- True
- >>> obj1_1_copy.key == obj1_1.key
- True
- >>> obj1_1_copy is obj1_1
- False
-
-The cache allows us to iterate over all entries in it:
-
- >>> list(fc) # doctest: +ELLIPSIS
- [<ZEO.cache.Entry object at 0x...>]
-
-
-When an object gets superseded we can update it. This only modifies the header,
-not the actual data. This is useful when invalidations tell us about the
-`end_tid` of an object:
-
- >>> obj1_1.data = '.' * 100
- >>> obj1_1.end_tid = tid(2)
- >>> fc.update(obj1_1)
-
-When loading it again we can see that the data was not changed:
-
- >>> obj1_1_copy = fc.access((oid(1), tid(1)))
- >>> obj1_1_copy.data # doctest: +ELLIPSIS
- '#############...################'
- >>> obj1_1_copy.end_tid
- '\x00\x00\x00\x00\x00\x00\x00\x02'
-
-Objects can be explicitly removed from the cache:
-
- >>> fc.remove((oid(1), tid(1)))
- >>> len(fc)
- 0
- >>> (oid(1), tid(1)) in fc
- False
-
-Evicting objects
-================
-
-When the cached data consumes the whole cache file and more objects need to be
-stored the oldest stored objects are evicted until enough space is available.
-In the next sections we'll exercise some of the special cases of the file
-format and look at the cache after each step.
-
-
-The current state is a cache with two records: the one object which we removed
-from the cache and another free record the reaches to the end of the file.
-
-The first record has a size of 141 bytes:
-
- 141 = 1 ('f') + 4 (size) + 8 (OID) + 8 (TID) + 8 (end_tid) +
- 4 (data length) + 100 (old data) + 8 (OID)
-
-The second record has a size of 47 bytes:
-
- 47 = 1 ('f') + 8 (size) + 38 (free space)
-
-Note that the last byte is an 'x' because the initialisation of the cache file
-forced the absolute size of the file by seeking to byte 200 and writing an 'x'.
-
- >>> from ZEO.tests.test_cache import hexprint
- >>> hexprint(fc.f)
- 00000000 5a 45 43 34 00 00 00 00 00 00 00 00 66 00 00 00 |ZEC4........f...|
- 00000010 00 00 00 00 8d 00 00 00 01 00 00 00 00 00 00 00 |................|
- 00000020 01 00 00 00 00 00 00 00 02 00 00 00 64 23 23 23 |............d###|
- 00000030 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000040 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000050 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000060 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000070 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000080 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000090 23 00 00 00 00 00 00 00 01 66 00 00 00 00 00 00 |#........f......|
- 000000a0 00 2f 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |./..............|
- 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
- 000000c0 00 00 00 00 00 00 00 78 |.......x |
-
-Case 1: Allocating a new block that fits after the last used one
-
- >>> obj2_1 = Object(key=(oid(2), tid(1)), data='******',
- ... start_tid=tid(1), end_tid=None)
- >>> fc.add(obj2_1)
- True
-
-The new block fits exactly in the remaining 47 bytes (41 bytes header + 6
-bytes payload) so the beginning of the data is the same except for the last 47
-bytes:
-
- >>> hexprint(fc.f)
- 00000000 5a 45 43 34 00 00 00 00 00 00 00 00 66 00 00 00 |ZEC4........f...|
- 00000010 00 00 00 00 8d 00 00 00 01 00 00 00 00 00 00 00 |................|
- 00000020 01 00 00 00 00 00 00 00 02 00 00 00 64 23 23 23 |............d###|
- 00000030 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000040 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000050 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000060 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000070 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000080 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################|
- 00000090 23 00 00 00 00 00 00 00 01 61 00 00 00 2f 00 00 |#........a.../..|
- 000000a0 00 00 00 00 00 02 00 00 00 00 00 00 00 01 00 00 |................|
- 000000b0 00 00 00 00 00 00 00 00 00 06 2a 2a 2a 2a 2a 2a |..........******|
- 000000c0 00 00 00 00 00 00 00 02 |........ |
-
-Case 2: Allocating a block that wraps around and frees *exactly* one block
-
- >>> obj3_1 = Object(key=(oid(3), tid(1)), data='@'*100,
- ... start_tid=tid(1), end_tid=None)
- >>> fc.add(obj3_1)
- True
-
- >>> hexprint(fc.f)
- 00000000 5a 45 43 34 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC4........a...|
- 00000010 8d 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 |................|
- 00000020 01 00 00 00 00 00 00 00 00 00 00 00 64 40 40 40 |............d@@@|
- 00000030 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000040 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000050 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000060 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000070 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000080 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000090 40 00 00 00 00 00 00 00 03 61 00 00 00 2f 00 00 |@........a.../..|
- 000000a0 00 00 00 00 00 02 00 00 00 00 00 00 00 01 00 00 |................|
- 000000b0 00 00 00 00 00 00 00 00 00 06 2a 2a 2a 2a 2a 2a |..........******|
- 000000c0 00 00 00 00 00 00 00 02 |........ |
-
-Case 3: Allocating a block that requires 1 byte less than the next block
-
- >>> obj4_1 = Object(key=(oid(4), tid(1)), data='~~~~~',
- ... start_tid=tid(1), end_tid=None)
- >>> fc.add(obj4_1)
- True
-
- >>> hexprint(fc.f)
- 00000000 5a 45 43 34 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC4........a...|
- 00000010 8d 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 |................|
- 00000020 01 00 00 00 00 00 00 00 00 00 00 00 64 40 40 40 |............d@@@|
- 00000030 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000040 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000050 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000060 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000070 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000080 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@|
- 00000090 40 00 00 00 00 00 00 00 03 61 00 00 00 2e 00 00 |@........a......|
- 000000a0 00 00 00 00 00 04 00 00 00 00 00 00 00 01 00 00 |................|
- 000000b0 00 00 00 00 00 00 00 00 00 05 7e 7e 7e 7e 7e 00 |..........~~~~~.|
- 000000c0 00 00 00 00 00 00 04 31 |.......1 |
-
-Case 4: Allocating a block that requires 2 bytes less than the next block
-
- >>> obj4_1 = Object(key=(oid(5), tid(1)), data='^'*98,
- ... start_tid=tid(1), end_tid=None)
- >>> fc.add(obj4_1)
- True
-
- >>> hexprint(fc.f)
- 00000000 5a 45 43 34 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC4........a...|
- 00000010 8b 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 |................|
- 00000020 01 00 00 00 00 00 00 00 00 00 00 00 62 5e 5e 5e |............b^^^|
- 00000030 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^|
- 00000040 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^|
- 00000050 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^|
- 00000060 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^|
- 00000070 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^|
- 00000080 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 00 |^^^^^^^^^^^^^^^.|
- 00000090 00 00 00 00 00 00 05 32 03 61 00 00 00 2e 00 00 |.......2.a......|
- 000000a0 00 00 00 00 00 04 00 00 00 00 00 00 00 01 00 00 |................|
- 000000b0 00 00 00 00 00 00 00 00 00 05 7e 7e 7e 7e 7e 00 |..........~~~~~.|
- 000000c0 00 00 00 00 00 00 04 31 |.......1 |
-
-Case 5: Allocating a block that requires 3 bytes less than the next block
-
-The end of the file is already a bit crowded and would create a rather complex
-situation to work on. We create an entry with the size of 95 byte which will
-be inserted at the beginning of the file, leaving a 3 byte free space after
-it.
-
- >>> obj4_1 = Object(key=(oid(6), tid(1)), data='+'*95,
- ... start_tid=tid(1), end_tid=None)
- >>> fc.add(obj4_1)
- True
-
- >>> hexprint(fc.f)
- 00000000 5a 45 43 34 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC4........a...|
- 00000010 88 00 00 00 00 00 00 00 06 00 00 00 00 00 00 00 |................|
- 00000020 01 00 00 00 00 00 00 00 00 00 00 00 5f 2b 2b 2b |............_+++|
- 00000030 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++|
- 00000040 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++|
- 00000050 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++|
- 00000060 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++|
- 00000070 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++|
- 00000080 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 00 00 00 00 |++++++++++++....|
- 00000090 00 00 00 06 33 00 05 32 03 61 00 00 00 2e 00 00 |....3..2.a......|
- 000000a0 00 00 00 00 00 04 00 00 00 00 00 00 00 01 00 00 |................|
- 000000b0 00 00 00 00 00 00 00 00 00 05 7e 7e 7e 7e 7e 00 |..........~~~~~.|
- 000000c0 00 00 00 00 00 00 04 31 |.......1 |
-
-Case 6: Allocating a block that requires 6 bytes less than the next block
-
-As in our previous case, we'll write a block that only fits in the first
-block's place to avoid dealing with the cluttering at the end of the cache
-file.
-
- >>> obj4_1 = Object(key=(oid(7), tid(1)), data='-'*89,
- ... start_tid=tid(1), end_tid=None)
- >>> fc.add(obj4_1)
- True
-
- >>> hexprint(fc.f)
- 00000000 5a 45 43 34 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC4........a...|
- 00000010 82 00 00 00 00 00 00 00 07 00 00 00 00 00 00 00 |................|
- 00000020 01 00 00 00 00 00 00 00 00 00 00 00 59 2d 2d 2d |............Y---|
- 00000030 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------|
- 00000040 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------|
- 00000050 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------|
- 00000060 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------|
- 00000070 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------|
- 00000080 2d 2d 2d 2d 2d 2d 00 00 00 00 00 00 00 07 36 00 |------........6.|
- 00000090 00 00 00 06 33 00 05 32 03 61 00 00 00 2e 00 00 |....3..2.a......|
- 000000a0 00 00 00 00 00 04 00 00 00 00 00 00 00 01 00 00 |................|
- 000000b0 00 00 00 00 00 00 00 00 00 05 7e 7e 7e 7e 7e 00 |..........~~~~~.|
- 000000c0 00 00 00 00 00 00 04 31 |.......1 |
-
-Case 7: Allocating a block that requires >= 5 bytes less than the next block
-
-Again, we replace the block at the beginning of the cache.
-
- >>> obj4_1 = Object(key=(oid(8), tid(1)), data='='*80,
- ... start_tid=tid(1), end_tid=None)
- >>> fc.add(obj4_1)
- True
-
- >>> hexprint(fc.f)
- 00000000 5a 45 43 34 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC4........a...|
- 00000010 79 00 00 00 00 00 00 00 08 00 00 00 00 00 00 00 |y...............|
- 00000020 01 00 00 00 00 00 00 00 00 00 00 00 50 3d 3d 3d |............P===|
- 00000030 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================|
- 00000040 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================|
- 00000050 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================|
- 00000060 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================|
- 00000070 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 00 00 00 |=============...|
- 00000080 00 00 00 00 08 66 00 00 00 00 00 00 00 09 36 00 |.....f........6.|
- 00000090 00 00 00 06 33 00 05 32 03 61 00 00 00 2e 00 00 |....3..2.a......|
- 000000a0 00 00 00 00 00 04 00 00 00 00 00 00 00 01 00 00 |................|
- 000000b0 00 00 00 00 00 00 00 00 00 05 7e 7e 7e 7e 7e 00 |..........~~~~~.|
- 000000c0 00 00 00 00 00 00 04 31 |.......1 |
-
-Statistic functions
-===================
-
-The `getStats` method talks about the added objects, added bytes, evicted
-objects, evicted bytes and accesses to the cache:
-
- >>> fc.getStats()
- (8, 901, 5, 593, 2)
-
-We can reset the stats by calling the `clearStats` method:
-
- >>> fc.clearStats()
- >>> fc.getStats()
- (0, 0, 0, 0, 0)
-
-Small file cache sizes
-======================
-
-The file cache requires a few bytes at the beginning of the file for itself.
-Therefore cache sizes smaller than this threshold do not create a file and
-will cause the cache to be disabled.
-
- >>> obj_small = Object(key=(oid(1), tid(1)), data='#',
- ... start_tid=tid(1), end_tid=None)
- >>> sizes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 53, 54]
- >>> for i in sizes: # doctest: +ELLIPSIS
- ... print "*" * 20
- ... print "Cache file size", i
- ... try:
- ... fc = FileCache(maxsize=i, fpath=None, parent=cache_dummy)
- ... except Exception, v:
- ... print i, v
- ... continue
- ... print "Added", fc.add(obj_small)
- ... print "Length", len(fc)
- ... print "Content", list(fc)
- ... print "Statistics", fc.getStats()
- ********************
- Cache file size 0
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 1
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 2
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 3
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 4
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 5
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 6
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 7
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 8
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 9
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 10
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 53
- Added False
- Length 0
- Content []
- Statistics (0, 0, 0, 0, 0)
- ********************
- Cache file size 54
- Added True
- Length 1
- Content [<ZEO.cache.Entry object at 0x...>]
- Statistics (1, 42, 0, 0, 0)
-
-
-Cleanup
-=======
-
-As the cache is non-persistent, its file will be gone from disk after closing
-the cache:
-
- >>> fc.f # doctest: +ELLIPSIS
- <open file '<fdopen>', mode 'w+b' at 0x...>
- >>> fc.close()
- >>> fc.f
Modified: ZODB/trunk/src/ZEO/tests/test_cache.py
===================================================================
--- ZODB/trunk/src/ZEO/tests/test_cache.py 2008-05-15 14:20:25 UTC (rev 86772)
+++ ZODB/trunk/src/ZEO/tests/test_cache.py 2008-05-15 14:20:31 UTC (rev 86773)
@@ -13,18 +13,20 @@
##############################################################################
"""Basic unit tests for a client cache."""
+from ZODB.utils import p64, repr_to_oid
+from zope.testing import doctest
import os
import random
+import string
+import sys
import tempfile
import unittest
-import doctest
-import string
-import sys
+import ZEO.cache
+import zope.testing.setupstack
import ZEO.cache
-from ZODB.utils import p64, repr_to_oid
+from ZODB.utils import p64, u64
-
n1 = p64(1)
n2 = p64(2)
n3 = p64(3)
@@ -53,16 +55,6 @@
offset += 16
-class ClientCacheDummy(object):
-
- def __init__(self):
- self.objects = {}
-
- def _evicted(self, o):
- if o.key in self.objects:
- del self.objects[o.key]
-
-
def oid(o):
repr = '%016x' % o
return repr_to_oid(repr)
@@ -85,9 +77,10 @@
self.assertEqual(self.cache.getLastTid(), None)
self.cache.setLastTid(n2)
self.assertEqual(self.cache.getLastTid(), n2)
- self.cache.invalidate(None, n1)
+ self.cache.invalidate(n1, n1)
+ self.cache.invalidate(n1, n1)
self.assertEqual(self.cache.getLastTid(), n2)
- self.cache.invalidate(None, n3)
+ self.cache.invalidate(n1, n3)
self.assertEqual(self.cache.getLastTid(), n3)
self.assertRaises(ValueError, self.cache.setLastTid, n2)
@@ -122,101 +115,31 @@
def testException(self):
self.cache.store(n1, n2, None, "data")
+ self.cache.store(n1, n2, None, "data")
self.assertRaises(ValueError,
self.cache.store,
n1, n3, None, "data")
def testEviction(self):
# Manually override the current maxsize
- maxsize = self.cache.size = self.cache.fc.maxsize = 3295 # 1245
- self.cache.fc = ZEO.cache.FileCache(3295, None, self.cache)
+ cache = ZEO.cache.ClientCache(None, 3295)
# Trivial test of eviction code. Doesn't test non-current
# eviction.
data = ["z" * i for i in range(100)]
for i in range(50):
n = p64(i)
- self.cache.store(n, n, None, data[i])
- self.assertEquals(len(self.cache), i + 1)
- # The cache now uses 1225 bytes. The next insert
+ cache.store(n, n, None, data[i])
+ self.assertEquals(len(cache), i + 1)
+ # The cache now uses 3287 bytes. The next insert
# should delete some objects.
n = p64(50)
- self.cache.store(n, n, None, data[51])
- self.assert_(len(self.cache) < 51)
+ cache.store(n, n, None, data[51])
+ self.assert_(len(cache) < 51)
# TODO: Need to make sure eviction of non-current data
# are handled correctly.
- def _run_fuzzing(self):
- current_tid = 1
- current_oid = 1
- def log(*args):
- #print args
- pass
- cache = self.fuzzy_cache
- objects = self.fuzzy_cache_client.objects
- for operation in xrange(10000):
- op = random.choice(['add', 'access', 'remove', 'update', 'settid'])
- if not objects:
- op = 'add'
- log(op)
- if op == 'add':
- current_oid += 1
- key = (oid(current_oid), tid(current_tid))
- object = ZEO.cache.Object(
- key=key, data='*'*random.randint(1,60*1024),
- start_tid=tid(current_tid), end_tid=None)
- assert key not in objects
- log(key, len(object.data), current_tid)
- cache.add(object)
- if (object.size + ZEO.cache.OBJECT_HEADER_SIZE >
- cache.maxsize - ZEO.cache.ZEC4_HEADER_SIZE):
- assert key not in cache
- else:
- objects[key] = object
- assert key in cache, key
- elif op == 'access':
- key = random.choice(objects.keys())
- log(key)
- object = objects[key]
- found = cache.access(key)
- assert object.data == found.data
- assert object.key == found.key
- assert object.size == found.size == (len(object.data)+object.TOTAL_FIXED_SIZE)
- elif op == 'remove':
- key = random.choice(objects.keys())
- log(key)
- cache.remove(key)
- assert key not in cache
- assert key not in objects
- elif op == 'update':
- key = random.choice(objects.keys())
- object = objects[key]
- log(key, object.key)
- if not object.end_tid:
- object.end_tid = tid(current_tid)
- log(key, current_tid)
- cache.update(object)
- elif op == 'settid':
- current_tid += 1
- log(current_tid)
- cache.settid(tid(current_tid))
- cache.close()
-
- def testFuzzing(self):
- random.seed()
- seed = random.randint(0, sys.maxint)
- random.seed(seed)
- self.fuzzy_cache_client = ClientCacheDummy()
- self.fuzzy_cache = ZEO.cache.FileCache(
- random.randint(100, 50*1024), None, self.fuzzy_cache_client)
- try:
- self._run_fuzzing()
- except:
- print "Error in fuzzing with seed", seed
- hexprint(self.fuzzy_cache.f)
- raise
-
def testSerialization(self):
self.cache.store(n1, n2, None, "data for n1")
self.cache.store(n3, n3, n4, "non-current data for n3")
@@ -226,9 +149,9 @@
# Copy data from self.cache into path, reaching into the cache
# guts to make the copy.
dst = open(path, "wb+")
- src = self.cache.fc.f
+ src = self.cache.f
src.seek(0)
- dst.write(src.read(self.cache.fc.maxsize))
+ dst.write(src.read(self.cache.maxsize))
dst.close()
copy = ZEO.cache.ClientCache(path)
copy.open()
@@ -238,8 +161,10 @@
eq = self.assertEqual
eq(copy.getLastTid(), self.cache.getLastTid())
eq(len(copy), len(self.cache))
- eq(copy.current, self.cache.current)
- eq(copy.noncurrent, self.cache.noncurrent)
+ eq(dict(copy.current), dict(self.cache.current))
+ eq(dict([(k, dict(v)) for (k, v) in copy.noncurrent.items()]),
+ dict([(k, dict(v)) for (k, v) in self.cache.noncurrent.items()]),
+ )
def testCurrentObjectLargerThanCache(self):
if self.cache.path:
@@ -260,20 +185,98 @@
def testOldObjectLargerThanCache(self):
if self.cache.path:
os.remove(self.cache.path)
- self.cache = ZEO.cache.ClientCache(size=50)
- self.cache.open()
+ cache = ZEO.cache.ClientCache(size=50)
+ cache.open()
# We store an object that is a bit larger than the cache can handle.
- self.cache.store(n1, n2, n3, "x"*64)
+ cache.store(n1, n2, n3, "x"*64)
# We can see that it was not stored.
- self.assertEquals(None, self.cache.load(n1))
+ self.assertEquals(None, cache.load(n1))
# If an object cannot be stored in the cache, it must not be
# recorded as non-current.
- self.assert_((n2, n3) not in self.cache.noncurrent[n1])
+ self.assert_(1 not in cache.noncurrent)
+__test__ = dict(
+ kill_does_not_cause_cache_corruption =
+ r"""
+ If we kill a process while a cache is being written to, the cache
+ isn't corrupted. To see this, we'll write a little script that
+ writes records to a cache file repeatedly.
+
+ >>> import os, random, sys, time
+ >>> open('t', 'w').write('''
+ ... import os, random, sys, thread, time
+ ... sys.path = %r
+ ...
+ ... def suicide():
+ ... time.sleep(random.random()/10)
+ ... os._exit(0)
+ ...
+ ... import ZEO.cache
+ ... from ZODB.utils import p64
+ ... cache = ZEO.cache.ClientCache('cache')
+ ... oid = 0
+ ... t = 0
+ ... thread.start_new_thread(suicide, ())
+ ... while 1:
+ ... oid += 1
+ ... t += 1
+ ... data = 'X' * random.randint(5000,25000)
+ ... cache.store(p64(oid), p64(t), None, data)
+ ...
+ ... ''' % sys.path)
+
+ >>> for i in range(10):
+ ... _ = os.spawnl(os.P_WAIT, sys.executable, sys.executable, 't')
+ ... if os.path.exists('cache'):
+ ... cache = ZEO.cache.ClientCache('cache')
+ ... cache.open()
+ ... cache.close()
+ ... os.remove('cache')
+ ... os.remove('cache.lock')
+
+
+ """,
+
+ full_cache_is_valid =
+ r"""
+
+ If we fill up the cache without any free space, the cache can
+ still be used.
+
+ >>> import ZEO.cache
+ >>> cache = ZEO.cache.ClientCache('cache', 1000)
+ >>> data = 'X' * (1000 - ZEO.cache.ZEC_HEADER_SIZE - 41)
+ >>> cache.store(p64(1), p64(1), None, data)
+ >>> cache.close()
+ >>> cache = ZEO.cache.ClientCache('cache', 1000)
+ >>> cache.open()
+ >>> cache.store(p64(2), p64(2), None, 'XXX')
+
+ >>> cache.close()
+ """,
+
+ cannot_open_same_cache_file_twice =
+ r"""
+ >>> import ZEO.cache
+ >>> cache = ZEO.cache.ClientCache('cache', 1000)
+ >>> cache2 = ZEO.cache.ClientCache('cache', 1000)
+ Traceback (most recent call last):
+ ...
+ LockError: Couldn't lock 'cache.lock'
+
+ >>> cache.close()
+ """,
+ )
+
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(CacheTests))
- suite.addTest(doctest.DocFileSuite('filecache.txt'))
+ suite.addTest(
+ doctest.DocTestSuite(
+ setUp=zope.testing.setupstack.setUpDirectory,
+ tearDown=zope.testing.setupstack.tearDown,
+ )
+ )
return suite
More information about the Zodb-checkins
mailing list