[Zodb-checkins] SVN: ZODB/branches/3.3/ Port from ZODB 3.2.
Tim Peters
tim.one at comcast.net
Fri Feb 25 16:51:33 EST 2005
Log message for revision 29302:
Port from ZODB 3.2.
Stop believing the maximum oid cached in a FileStorage's .index file.
This is a critical bugfix, although the problems it addresses are
(a) rare; and, (b) not entirely fixed yet (more checkins to come).
The true max oid is found efficiently now by exploiting the recently-added
fsIndex.maxKey() method (which was, of course, added for this purpose).
Also fix that the .index file could get updated on disk when the
FileStorage was opened in read-only mode. The code was trying to prevent
this, but missed the most obvious rewrite path.
Incidentally improved many obsolete and/or incorrect comments.
Changed:
U ZODB/branches/3.3/NEWS.txt
U ZODB/branches/3.3/src/ZODB/FileStorage/FileStorage.py
U ZODB/branches/3.3/src/ZODB/tests/testFileStorage.py
-=-
Modified: ZODB/branches/3.3/NEWS.txt
===================================================================
--- ZODB/branches/3.3/NEWS.txt 2005-02-25 20:46:19 UTC (rev 29301)
+++ ZODB/branches/3.3/NEWS.txt 2005-02-25 21:51:33 UTC (rev 29302)
@@ -28,6 +28,34 @@
depending on platform, and should suffer far fewer (if any) intermittent
"timed out waiting for storage to connect" failures.
+
+FileStorage
+-----------
+
+- A FileStorage's index file tried to maintain the index's largest oid as a
+ separate piece of data, incrementally updated over the storage's lifetime.
+ This scheme was more complicated than necessary, so was also more brittle
+ and slower than necessary. It indirectly participated in a rare but
+ critical bug: when a FileStorage was created via
+ ``copyTransactionsFrom()``, the "maximum oid" saved in the index file was
+ always 0. Use that FileStorage, and it could then create "new" oids
+ starting over at 0 again, despite that those oids were already in use by
+ old objects in the database. Packing a FileStorage has no reason to
+ try to update the maximum oid in the index file either, so this kind of
+ damage could (and did) persist even across packing.
+
+ The index file's maximum-oid data is ignored now, but is still written
+ out so that ``.index`` files can be read by older versions of ZODB.
+ Finding the true maximum oid is done now by exploiting that the main
+ index is really a kind of BTree (long ago, this wasn't true), and finding
+ the largest key in a BTree is inexpensive.
+
+- A FileStorage's index file could be updated on disk even if the storage
+ was opened in read-only mode. That bug has been repaired.
+
+- An efficient ``maxKey()`` implementation was added to class ``fsIndex``.
+
+
Pickle (in-memory Connection) Cache
-----------------------------------
Modified: ZODB/branches/3.3/src/ZODB/FileStorage/FileStorage.py
===================================================================
--- ZODB/branches/3.3/src/ZODB/FileStorage/FileStorage.py 2005-02-25 20:46:19 UTC (rev 29301)
+++ ZODB/branches/3.3/src/ZODB/FileStorage/FileStorage.py 2005-02-25 21:51:33 UTC (rev 29302)
@@ -40,13 +40,8 @@
import FileStorageFormatter, DataHeader, TxnHeader, DATA_HDR, \
DATA_HDR_LEN, TRANS_HDR, TRANS_HDR_LEN, CorruptedDataError
from ZODB.loglevels import BLATHER
+from ZODB.fsIndex import fsIndex
-try:
- from ZODB.fsIndex import fsIndex
-except ImportError:
- def fsIndex():
- return {}
-
t32 = 1L << 32
packed_version = "FS21"
@@ -161,14 +156,13 @@
r = self._restore_index()
if r is not None:
self._used_index = 1 # Marker for testing
- index, vindex, start, maxoid, ltid = r
+ index, vindex, start, ltid = r
self._initIndex(index, vindex, tindex, tvindex,
oid2tid, toid2tid, toid2tid_delete)
self._pos, self._oid, tid = read_index(
self._file, file_name, index, vindex, tindex, stop,
- ltid=ltid, start=start, maxoid=maxoid,
- read_only=read_only,
+ ltid=ltid, start=start, read_only=read_only,
)
else:
self._used_index = 0 # Marker for testing
@@ -221,9 +215,8 @@
# stores 4000 objects, and each random seek + read takes 7ms
# (that was approximately true on Linux and Windows tests in
# mid-2003), that's 28 seconds just to find the old tids.
- # XXX Probably better to junk this and redefine _index as mapping
- # XXX oid to (offset, tid) pair, via a new memory-efficient
- # XXX BTree type.
+ # TODO: Probably better to junk this and redefine _index as mapping
+ # oid to (offset, tid) pair, via a new memory-efficient BTree type.
self._oid2tid = oid2tid
# oid->tid map to transactionally add to _oid2tid.
self._toid2tid = toid2tid
@@ -243,12 +236,18 @@
def _save_index(self):
"""Write the database index to a file to support quick startup."""
+ if self._is_read_only:
+ return
+
index_name = self.__name__ + '.index'
tmp_name = index_name + '.index_tmp'
f=open(tmp_name,'wb')
p=Pickler(f,1)
+ # Note: starting with ZODB 3.2.6, the 'oid' value stored is ignored
+ # by the code that reads the index. We still write it, so that
+ # .index files can still be read by older ZODBs.
info={'index': self._index, 'pos': self._pos,
'oid': self._oid, 'vindex': self._vindex}
@@ -340,11 +339,22 @@
def _restore_index(self):
"""Load database index to support quick startup."""
+ # Returns (index, vindex, pos, tid), or None in case of
+ # error.
+ # Starting with ZODB 3.2.6, the 'oid' value stored in the index
+ # is ignored.
+ # The index returned is always an instance of fsIndex. If the
+ # index cached in the file is a Python dict, it's converted to
+ # fsIndex here, and, if we're not in read-only mode, the .index
+ # file is rewritten with the converted fsIndex so we don't need to
+ # convert it again the next time.
file_name=self.__name__
index_name=file_name+'.index'
- try: f=open(index_name,'rb')
- except: return None
+ try:
+ f = open(index_name, 'rb')
+ except:
+ return None
p=Unpickler(f)
@@ -356,34 +366,31 @@
return None
index = info.get('index')
pos = info.get('pos')
- oid = info.get('oid')
vindex = info.get('vindex')
- if index is None or pos is None or oid is None or vindex is None:
+ if index is None or pos is None or vindex is None:
return None
pos = long(pos)
- if isinstance(index, DictType) and not self._is_read_only:
- # Convert to fsIndex
+ if isinstance(index, DictType):
+ # Convert to fsIndex.
newindex = fsIndex()
- if type(newindex) is not type(index):
- # And we have fsIndex
- newindex.update(index)
-
- # Now save the index
+ newindex.update(index)
+ index = newindex
+ if not self._is_read_only:
+ # Save the converted index.
f = open(index_name, 'wb')
p = Pickler(f, 1)
- info['index'] = newindex
+ info['index'] = index
p.dump(info)
f.close()
-
- # Now call this method again to get the new data
+ # Now call this method again to get the new data.
return self._restore_index()
tid = self._sane(index, pos)
if not tid:
return None
- return index, vindex, pos, oid, tid
+ return index, vindex, pos, tid
def close(self):
self._file.close()
@@ -1548,7 +1555,7 @@
def read_index(file, name, index, vindex, tindex, stop='\377'*8,
ltid=z64, start=4L, maxoid=z64, recover=0, read_only=0):
- """Scan the entire file storage and recreate the index.
+ """Scan the file storage and update the index.
Returns file position, max oid, and last transaction id. It also
stores index information in the three dictionary arguments.
@@ -1556,18 +1563,28 @@
Arguments:
file -- a file object (the Data.fs)
name -- the name of the file (presumably file.name)
- index -- dictionary, oid -> data record
- vindex -- dictionary, oid -> data record for version data
- tindex -- dictionary, oid -> data record
- XXX tindex is cleared before return, so it will be empty
+ index -- fsIndex, oid -> data record file offset
+ vindex -- dictionary, oid -> data record offset for version data
+ tindex -- dictionary, oid -> data record offset
+ tindex is cleared before return
There are several default arguments that affect the scan or the
- return values. XXX should document them.
+ return values. TODO: document them.
+ start -- the file position at which to start scanning for oids added
+ beyond the ones the passed-in indices know about. The .index
+ file caches the highest ._pos FileStorage knew about when the
+ the .index file was last saved, and that's the intended value
+ to pass in for start; accept the default (and pass empty
+ indices) to recreate the index from scratch
+ maxoid -- ignored (it meant something prior to ZODB 3.2.6; the argument
+ still exists just so the signature of read_index() stayed the
+ same)
+
The file position returned is the position just after the last
valid transaction record. The oid returned is the maximum object
- id in the data. The transaction id is the tid of the last
- transaction.
+ id in `index`, or z64 if the index is empty. The transaction id is the
+ tid of the last transaction, or ltid if the index is empty.
"""
read = file.read
@@ -1577,14 +1594,15 @@
fmt = TempFormatter(file)
if file_size:
- if file_size < start: raise FileStorageFormatError, file.name
+ if file_size < start:
+ raise FileStorageFormatError, file.name
seek(0)
if read(4) != packed_version:
raise FileStorageFormatError, name
else:
if not read_only:
file.write(packed_version)
- return 4L, maxoid, ltid
+ return 4L, z64, ltid
index_get=index.get
@@ -1651,18 +1669,18 @@
if tid >= stop:
break
- tpos=pos
- tend=tpos+tl
+ tpos = pos
+ tend = tpos + tl
if status=='u':
# Undone transaction, skip it
seek(tend)
- h=u64(read(8))
+ h = u64(read(8))
if h != tl:
if recover: return tpos, None, None
panic('%s has inconsistent transaction length at %s',
name, pos)
- pos=tend+8
+ pos = tend + 8
continue
pos = tpos+ TRANS_HDR_LEN + ul + dl + el
@@ -1690,27 +1708,34 @@
logger.warning("%s incorrect previous pointer at %s",
name, pos)
- pos=pos+dlen
+ pos += dlen
if pos != tend:
- if recover: return tpos, None, None
+ if recover:
+ return tpos, None, None
panic("%s data records don't add up at %s",name,tpos)
# Read the (intentionally redundant) transaction length
seek(pos)
h = u64(read(8))
if h != tl:
- if recover: return tpos, None, None
+ if recover:
+ return tpos, None, None
panic("%s redundant transaction length check failed at %s",
name, pos)
- pos=pos+8
+ pos += 8
- if tindex: # avoid the pathological empty transaction case
- _maxoid = max(tindex.keys()) # in 2.2, just max(tindex)
- maxoid = max(_maxoid, maxoid)
- index.update(tindex)
- tindex.clear()
+ index.update(tindex)
+ tindex.clear()
+ # Caution: fsIndex doesn't have an efficient __nonzero__ or __len__.
+ # That's why we do try/except instead. fsIndex.maxKey() is fast.
+ try:
+ maxoid = index.maxKey()
+ except ValueError:
+ # The index is empty.
+ maxoid == z64
+
return pos, maxoid, ltid
Modified: ZODB/branches/3.3/src/ZODB/tests/testFileStorage.py
===================================================================
--- ZODB/branches/3.3/src/ZODB/tests/testFileStorage.py 2005-02-25 20:46:19 UTC (rev 29301)
+++ ZODB/branches/3.3/src/ZODB/tests/testFileStorage.py 2005-02-25 21:51:33 UTC (rev 29302)
@@ -77,67 +77,66 @@
self.assertEqual(self._storage._index.__class__, fsIndex)
- # XXX We could really use some tests for sanity checking
+ # A helper for checking that when an .index contains a dict for the
+ # index, it's converted to an fsIndex when the file is opened.
+ def convert_index_to_dict(self):
+ # Convert the index in the current .index file to a Python dict.
+ # Return the index originally found.
+ import cPickle as pickle
- def check_conversion_to_fsIndex_not_if_readonly(self):
+ f = open('FileStorageTests.fs.index', 'r+b')
+ p = pickle.Unpickler(f)
+ data = p.load()
+ index = data['index']
- self.tearDown()
+ newindex = dict(index)
+ data['index'] = newindex
- class OldFileStorage(ZODB.FileStorage.FileStorage):
- def _newIndexes(self):
- return {}, {}, {}, {}, {}, {}, {}
+ f.seek(0)
+ f.truncate()
+ p = pickle.Pickler(f, 1)
+ p.dump(data)
+ f.close()
+ return index
-
+ def check_conversion_to_fsIndex(self, read_only=False):
from ZODB.fsIndex import fsIndex
- # Hack FileStorage to create dictionary indexes
- self._storage = OldFileStorage('FileStorageTests.fs')
-
- self.assertEqual(type(self._storage._index), type({}))
+ # Create some data, and remember the index.
for i in range(10):
self._dostore()
+ oldindex_as_dict = dict(self._storage._index)
- # Should save the index
+ # Save the index.
self._storage.close()
- self._storage = ZODB.FileStorage.FileStorage(
- 'FileStorageTests.fs', read_only=1)
- self.assertEqual(type(self._storage._index), type({}))
+ # Convert it to a dict.
+ old_index = self.convert_index_to_dict()
+ self.assert_(isinstance(old_index, fsIndex))
+ new_index = self.convert_index_to_dict()
+ self.assert_(isinstance(new_index, dict))
- def check_conversion_to_fsIndex(self):
+ # Verify it's converted to fsIndex in memory upon open.
+ self.open(read_only=read_only)
+ self.assert_(isinstance(self._storage._index, fsIndex))
- self.tearDown()
+ # Verify it has the right content.
+ newindex_as_dict = dict(self._storage._index)
+ self.assertEqual(oldindex_as_dict, newindex_as_dict)
- class OldFileStorage(ZODB.FileStorage.FileStorage):
- def _newIndexes(self):
- return {}, {}, {}, {}, {}, {}, {}
-
-
- from ZODB.fsIndex import fsIndex
-
- # Hack FileStorage to create dictionary indexes
- self._storage = OldFileStorage('FileStorageTests.fs')
-
- self.assertEqual(type(self._storage._index), type({}))
- for i in range(10):
- self._dostore()
-
- oldindex = self._storage._index.copy()
-
- # Should save the index
+ # Check that the type on disk has changed iff read_only is False.
self._storage.close()
+ current_index = self.convert_index_to_dict()
+ if read_only:
+ self.assert_(isinstance(current_index, dict))
+ else:
+ self.assert_(isinstance(current_index, fsIndex))
- self._storage = ZODB.FileStorage.FileStorage('FileStorageTests.fs')
- self.assertEqual(self._storage._index.__class__, fsIndex)
- self.failUnless(self._storage._used_index)
+ def check_conversion_to_fsIndex_readonly(self):
+ # Same thing, but the disk .index should continue to hold a
+ # Python dict.
+ self.check_conversion_to_fsIndex(read_only=True)
- index = {}
- for k, v in self._storage._index.items():
- index[k] = v
-
- self.assertEqual(index, oldindex)
-
-
def check_save_after_load_with_no_index(self):
for i in range(10):
self._dostore()
@@ -146,7 +145,46 @@
self.open()
self.assertEqual(self._storage._saved, 1)
+ def check_index_oid_ignored(self):
+ # Prior to ZODB 3.2.6, the 'oid' value stored in the .index file
+ # was believed. But there were cases where adding larger oids
+ # didn't update the FileStorage ._oid attribute -- the restore()
+ # method in particular didn't update it, and that's about the only
+ # method copyTransactionsFrom() uses. A database copy created that
+ # way then stored an 'oid' of z64 in the .index file. This created
+ # torturous problems, as when that file was opened, "new" oids got
+ # generated starting over from 0 again.
+ # Now the cached 'oid' value is ignored: verify that this is so.
+ import cPickle as pickle
+ from ZODB.utils import z64
+ from ZODB.DB import DB
+ # Create some data.
+ db = DB(self._storage)
+ conn = db.open()
+ conn.root()['xyz'] = 1
+ get_transaction().commit()
+ true_max_oid = self._storage._oid
+
+ # Save away the index, and poke in a bad 'oid' value by hand.
+ db.close()
+ f = open('FileStorageTests.fs.index', 'r+b')
+ p = pickle.Unpickler(f)
+ data = p.load()
+ saved_oid = data['oid']
+ self.assertEqual(true_max_oid, saved_oid)
+ data['oid'] = z64
+ f.seek(0)
+ f.truncate()
+ p = pickle.Pickler(f, 1)
+ p.dump(data)
+ f.close()
+
+ # Verify that we get the correct oid again when we reopen, despite
+ # that we stored nonsense in the .index file's 'oid'.
+ self.open()
+ self.assertEqual(self._storage._oid, true_max_oid)
+
# This would make the unit tests too slow
# check_save_after_load_that_worked_hard(self)
More information about the Zodb-checkins
mailing list