[Zodb-checkins] SVN: ZODB/trunk/src/ Cache sizes can now be changed. (Previously, you couldn't change the
Jim Fulton
jim at zope.com
Thu Nov 13 14:37:46 EST 2008
Log message for revision 92915:
Cache sizes can now be changed. (Previously, you couldn't change the
size of an existing cache file.)
Changed:
U ZODB/trunk/src/CHANGES.txt
U ZODB/trunk/src/ZEO/cache.py
U ZODB/trunk/src/ZEO/tests/test_cache.py
-=-
Modified: ZODB/trunk/src/CHANGES.txt
===================================================================
--- ZODB/trunk/src/CHANGES.txt 2008-11-13 18:53:08 UTC (rev 92914)
+++ ZODB/trunk/src/CHANGES.txt 2008-11-13 19:37:46 UTC (rev 92915)
@@ -39,7 +39,8 @@
- The previous (ZODB 3.8) ZEO client-cache format is supported.
The newer cache format introduced in ZODB 3.9.0a1 is no-longer
- supported. Cache files can still be larger than 4G.
+ supported. Cache files can still be larger than 4G. Cache file
+ sizes can now be changed.
3.9.0a4 (2008-11-06)
====================
Modified: ZODB/trunk/src/ZEO/cache.py
===================================================================
--- ZODB/trunk/src/ZEO/cache.py 2008-11-13 18:53:08 UTC (rev 92914)
+++ ZODB/trunk/src/ZEO/cache.py 2008-11-13 19:37:46 UTC (rev 92915)
@@ -88,6 +88,7 @@
# while opening.
max_block_size = (1<<31) - 1
+
# After the header, the file contains a contiguous sequence of blocks. All
# blocks begin with a one-byte status indicator:
#
@@ -116,6 +117,8 @@
# 2 byte version length must be 0
# 4 byte data size
# data
+# 8 byte redundant oid for error detection.
+allocated_record_overhead = 43
# The cache's currentofs goes around the file, circularly, forever.
# It's always the starting offset of some block.
@@ -187,35 +190,25 @@
# here -- the scan() method must be called then to open the file
# (and it sets self.f).
+ fsize = ZEC_HEADER_SIZE
if path:
self._lock_file = zc.lockfile.LockFile(path + '.lock')
-
- if path and os.path.exists(path):
- # Reuse an existing file. scan() will open & read it.
- self.f = None
- logger.info("reusing persistent cache file %r", path)
- else:
- if path:
+ if not os.path.exists(path):
+ # Create a small empty file. We'll make it bigger in _initfile.
self.f = open(path, 'wb+')
+ self.f.write(magic+z64)
logger.info("created persistent cache file %r", path)
else:
- self.f = tempfile.TemporaryFile()
- logger.info("created temporary cache file %r", self.f.name)
- # Make sure the OS really saves enough bytes for the file.
- self.f.seek(self.maxsize - 1)
- self.f.write('x')
- self.f.truncate()
- # Start with one magic header block
- self.f.seek(0)
- self.f.write(magic)
- self.f.write(z64)
- # add as many free blocks as are needed to fill the space
- nfree = self.maxsize - ZEC_HEADER_SIZE
- for i in range(0, nfree, max_block_size):
- block_size = min(max_block_size, nfree-i)
- self.f.write('f' + pack(">I", block_size))
- self.f.seek(block_size-5, 1)
- sync(self.f)
+ fsize = os.path.getsize(self.path)
+ self.f = open(path, 'rb+')
+ logger.info("reusing persistent cache file %r", path)
+ else:
+ # Create a small empty file. We'll make it bigger in _initfile.
+ self.f = tempfile.TemporaryFile()
+ self.f.write(magic+z64)
+ logger.info("created temporary cache file %r", self.f.name)
+
+ self._initfile(self.f, fsize)
# Statistics: _n_adds, _n_added_bytes,
# _n_evicts, _n_evicted_bytes,
@@ -224,8 +217,6 @@
self._setup_trace(path)
- self.open()
-
self._lock = threading.RLock()
# Backward compatibility. Client code used to have to use the fc
@@ -238,20 +229,13 @@
# Scan the current contents of the cache file, calling `install`
# for each object found in the cache. This method should only
# be called once to initialize the cache from disk.
- def open(self):
- if self.f is not None: # we're not (re)using a pre-existing file
- return
- fsize = os.path.getsize(self.path)
- if fsize != self.maxsize:
- logger.warning("existing cache file %r has size %d; "
- "requested size %d ignored", self.path,
- fsize, self.maxsize)
- self.maxsize = fsize
- self.f = open(self.path, 'rb+')
- read = self.f.read
- seek = self.f.seek
- _magic = read(4)
- if _magic != magic:
+ def _initfile(self, f, fsize):
+ maxsize = self.maxsize
+ read = f.read
+ seek = f.seek
+ write = f.write
+ seek(0)
+ if read(4) != magic:
raise ValueError("unexpected magic number: %r" % _magic)
self.tid = read(8)
if len(self.tid) != 8:
@@ -264,8 +248,9 @@
self.current = ZODB.fsIndex.fsIndex()
self.noncurrent = BTrees.LOBTree.LOBTree()
- max_free_size = l = 0
- ofs = max_free_offset = ZEC_HEADER_SIZE
+ l = 0
+ ofs = ZEC_HEADER_SIZE
+ first_free_offset = 0
current = self.current
while ofs < fsize:
seek(ofs)
@@ -273,35 +258,77 @@
if status == 'a':
size, oid, start_tid, end_tid, lver = unpack(
">I8s8s8sH", read(30))
- if end_tid == z64:
- assert oid not in current, (ofs, self.f.tell())
- current[oid] = ofs
+ if ofs+size <= maxsize:
+ if end_tid == z64:
+ assert oid not in current, (ofs, f.tell())
+ current[oid] = ofs
+ else:
+ assert start_tid < end_tid, (ofs, f.tell())
+ self._set_noncurrent(oid, start_tid, ofs)
+ assert lver == 0, "Versions aren't supported"
+ l += 1
+ else:
+ # free block
+ if first_free_offset == 0:
+ first_free_offset = ofs
+ if status == 'f':
+ size, = unpack(">I", read(4))
+ if size > max_block_size:
+ # Oops, we either have an old cache, or a we
+ # crashed while storing. Split this block into two.
+ assert size <= max_block_size*2
+ seek(ofs+max_block_size)
+ write('f'+pack(">I", size-max_block_size))
+ seek(ofs)
+ write('f'+pack(">I", max_block_size))
+ sync(f)
+ elif status in '1234':
+ size = int(status)
else:
- assert start_tid < end_tid, (ofs, self.f.tell())
- self._set_noncurrent(oid, start_tid, ofs)
- assert lver == 0, "Versions aren't supported"
- l += 1
- elif status == 'f':
- size, = unpack(">I", read(4))
- if size > max_block_size:
- # Oops, we either have an old cache, or a we
- # crashed while storing. Split this block into two.
- assert size <= max_block_size*2
- seek(ofs+max_block_size)
- self.f.write('f'+pack(">I", size-max_block_size))
+ raise ValueError("unknown status byte value %s in client "
+ "cache file" % 0, hex(ord(status)))
+
+ if ofs + size >= maxsize:
+ # Oops, the file was bigger before.
+ if ofs+size > maxsize:
+ # The last record is too big. Replace it with a smaller
+ # free record
+ size = maxsize-ofs
seek(ofs)
- self.f.write('f'+pack(">I", max_block_size))
- elif status in '1234':
- size = int(status)
- else:
- raise ValueError("unknown status byte value %s in client "
- "cache file" % 0, hex(ord(status)))
+ if size > 4:
+ write('f'+pack(">I", size))
+ else:
+ write("012345"[size])
+ sync(f)
+ ofs += size
+ break
+
ofs += size
- if ofs != fsize:
- raise ValueError("final offset %s != file size %s in client "
- "cache file" % (ofs, fsize))
- self.currentofs = max_free_offset
+ if fsize < maxsize:
+ assert ofs==fsize
+ # Make sure the OS really saves enough bytes for the file.
+ seek(self.maxsize - 1)
+ write('x')
+
+ # add as many free blocks as are needed to fill the space
+ seek(ofs)
+ nfree = maxsize - ZEC_HEADER_SIZE
+ for i in range(0, nfree, max_block_size):
+ block_size = min(max_block_size, nfree-i)
+ write('f' + pack(">I", block_size))
+ seek(block_size-5, 1)
+ sync(self.f)
+ first_free_offset = ofs
+ else:
+ assert ofs==maxsize
+ if maxsize < fsize:
+ seek(maxsize)
+ f.truncate()
+
+ # We use the first_free_offset because it is most likelyt the
+ # place where we last wrote.
+ self.currentofs = first_free_offset or ZEC_HEADER_SIZE
self._len = l
def _set_noncurrent(self, oid, tid, ofs):
@@ -518,7 +545,7 @@
if noncurrent_for_oid and (u64(start_tid) in noncurrent_for_oid):
return
- size = 43 + len(data)
+ size = allocated_record_overhead + len(data)
# A number of cache simulation experiments all concluded that the
# 2nd-level ZEO cache got a much higher hit rate if "very large"
Modified: ZODB/trunk/src/ZEO/tests/test_cache.py
===================================================================
--- ZODB/trunk/src/ZEO/tests/test_cache.py 2008-11-13 18:53:08 UTC (rev 92914)
+++ ZODB/trunk/src/ZEO/tests/test_cache.py 2008-11-13 19:37:46 UTC (rev 92915)
@@ -134,7 +134,7 @@
n = p64(i)
cache.store(n, n, None, data[i])
self.assertEquals(len(cache), i + 1)
- # The cache now uses 3287 bytes. The next insert
+ # The cache is now almost full. The next insert
# should delete some objects.
n = p64(50)
cache.store(n, n, None, data[51])
@@ -197,10 +197,10 @@
self.assert_(1 not in cache.noncurrent)
def testVeryLargeCaches(self):
- cache = ZEO.cache.ClientCache('cache', size=(1<<33))
+ cache = ZEO.cache.ClientCache('cache', size=(1<<32)+(1<<20))
cache.store(n1, n2, None, "x")
cache.close()
- cache = ZEO.cache.ClientCache('cache', size=(1<<33))
+ cache = ZEO.cache.ClientCache('cache', size=(1<<33)+(1<<20))
self.assertEquals(cache.load(n1), ('x', n2))
cache.close()
@@ -224,7 +224,78 @@
ZEO.cache.max_block_size)
f.close()
+ def testChangingCacheSize(self):
+ # start with a small cache
+ data = 'x'
+ recsize = ZEO.cache.allocated_record_overhead+len(data)
+ for extra in (0, 2, recsize-2):
+
+ cache = ZEO.cache.ClientCache(
+ 'cache', size=ZEO.cache.ZEC_HEADER_SIZE+100*recsize+extra)
+ for i in range(100):
+ cache.store(p64(i), n1, None, data)
+ self.assertEquals(len(cache), 100)
+ self.assertEquals(os.path.getsize(
+ 'cache'), ZEO.cache.ZEC_HEADER_SIZE+100*recsize+extra)
+
+ # Now make it smaller
+ cache.close()
+ small = 50
+ cache = ZEO.cache.ClientCache(
+ 'cache', size=ZEO.cache.ZEC_HEADER_SIZE+small*recsize+extra)
+ self.assertEquals(len(cache), small)
+ self.assertEquals(os.path.getsize(
+ 'cache'), ZEO.cache.ZEC_HEADER_SIZE+small*recsize+extra)
+ self.assertEquals(set(u64(oid) for (oid, tid) in cache.contents()),
+ set(range(small)))
+ for i in range(100, 110):
+ cache.store(p64(i), n1, None, data)
+ self.assertEquals(len(cache), small)
+ expected_oids = set(range(10, 50)+range(100, 110))
+ self.assertEquals(
+ set(u64(oid) for (oid, tid) in cache.contents()),
+ expected_oids)
+
+ # Make sure we can reopen with same size
+ cache.close()
+ cache = ZEO.cache.ClientCache(
+ 'cache', size=ZEO.cache.ZEC_HEADER_SIZE+small*recsize+extra)
+ self.assertEquals(len(cache), small)
+ self.assertEquals(set(u64(oid) for (oid, tid) in cache.contents()),
+ expected_oids)
+
+ # Now make it bigger
+ cache.close()
+ large = 150
+ cache = ZEO.cache.ClientCache(
+ 'cache', size=ZEO.cache.ZEC_HEADER_SIZE+large*recsize+extra)
+ self.assertEquals(len(cache), small)
+ self.assertEquals(os.path.getsize(
+ 'cache'), ZEO.cache.ZEC_HEADER_SIZE+large*recsize+extra)
+ self.assertEquals(set(u64(oid) for (oid, tid) in cache.contents()),
+ expected_oids)
+
+ for i in range(200, 305):
+ cache.store(p64(i), n1, None, data)
+ self.assertEquals(len(cache), large)
+ expected_oids = set(range(10, 50)+range(105, 110)+range(200, 305))
+ self.assertEquals(set(u64(oid) for (oid, tid) in cache.contents()),
+ expected_oids)
+
+ # Make sure we can reopen with same size
+ cache.close()
+ cache = ZEO.cache.ClientCache(
+ 'cache', size=ZEO.cache.ZEC_HEADER_SIZE+large*recsize+extra)
+ self.assertEquals(len(cache), large)
+ self.assertEquals(set(u64(oid) for (oid, tid) in cache.contents()),
+ expected_oids)
+
+ # Cleanup
+ cache.close()
+ os.remove('cache')
+
+
__test__ = dict(
kill_does_not_cause_cache_corruption =
r"""
More information about the Zodb-checkins
mailing list