[Zope-Checkins] CVS: Packages/ZODB - FileStorage.py:1.135.6.7
Tim Peters
tim.one at comcast.net
Fri Feb 25 15:31:34 EST 2005
Update of /cvs-repository/Packages/ZODB
In directory cvs.zope.org:/tmp/cvs-serv24962/ZODB
Modified Files:
Tag: Zope-2_7-branch
FileStorage.py
Log Message:
Stop believing the maximum oid cached in a FileStorage's .index file.
This is a critical bugfix, although the problems it addresses are
(a) rare; and, (b) not entirely fixed yet (more checkins to come).
The true max oid is found efficiently now by exploiting the recently-added
fsIndex.maxKey() method (which was, of course, added for this purpose).
Also fix that the .index file could get updated on disk when the
FileStorage was opened in read-only mode. The code was trying to prevent
this, but missed the most obvious rewrite path.
Incidentally improved many obsolete and/or incorrect comments.
=== Packages/ZODB/FileStorage.py 1.135.6.6 => 1.135.6.7 ===
--- Packages/ZODB/FileStorage.py:1.135.6.6 Mon Jun 21 22:10:44 2004
+++ Packages/ZODB/FileStorage.py Fri Feb 25 15:31:04 2005
@@ -136,12 +136,7 @@
from ZODB.lock_file import LockFile
from ZODB.utils import p64, u64, cp, z64
from ZODB.fspack import FileStoragePacker
-
-try:
- from ZODB.fsIndex import fsIndex
-except ImportError:
- def fsIndex():
- return {}
+from ZODB.fsIndex import fsIndex
from zLOG import LOG, BLATHER, WARNING, ERROR, PANIC
@@ -273,14 +268,13 @@
r = self._restore_index()
if r is not None:
self._used_index = 1 # Marker for testing
- index, vindex, start, maxoid, ltid = r
+ index, vindex, start, ltid = r
self._initIndex(index, vindex, tindex, tvindex,
oid2serial, toid2serial, toid2serial_delete)
self._pos, self._oid, tid = read_index(
self._file, file_name, index, vindex, tindex, stop,
- ltid=ltid, start=start, maxoid=maxoid,
- read_only=read_only,
+ ltid=ltid, start=start, read_only=read_only,
)
else:
self._used_index = 0 # Marker for testing
@@ -333,9 +327,9 @@
# random seek + read takes 7ms (that was approximately true on
# Linux and Windows tests in mid-2003), that's 28 seconds just to
# find the old serials.
- # XXX Probably better to junk this and redefine _index as mapping
- # XXX oid to (offset, serialno) pair, via a new memory-efficient
- # XXX BTree type.
+ # TODO: Probably better to junk this and redefine _index as mapping
+ # oid to (offset, serialno) pair, via a new memory-efficient
+ # BTree type.
self._oid2serial = oid2serial
# oid->serialno map to transactionally add to _oid2serial.
self._toid2serial = toid2serial
@@ -355,12 +349,18 @@
def _save_index(self):
"""Write the database index to a file to support quick startup."""
+ if self._is_read_only:
+ return
+
index_name = self.__name__ + '.index'
tmp_name = index_name + '.index_tmp'
f=open(tmp_name,'wb')
p=Pickler(f,1)
+ # Note: starting with ZODB 3.2.6, the 'oid' value stored is ignored
+ # by the code that reads the index. We still write it, so that
+ # .index files can still be read by older ZODBs.
info={'index': self._index, 'pos': self._pos,
'oid': self._oid, 'vindex': self._vindex}
@@ -459,11 +459,22 @@
def _restore_index(self):
"""Load database index to support quick startup."""
+ # Returns (index, vindex, pos, tid), or None in case of
+ # error.
+ # Starting with ZODB 3.2.6, the 'oid' value stored in the index
+ # is ignored.
+ # The index returned is always an instance of fsIndex. If the
+ # index cached in the file is a Python dict, it's converted to
+ # fsIndex here, and, if we're not in read-only mode, the .index
+ # file is rewritten with the converted fsIndex so we don't need to
+ # convert it again the next time.
file_name=self.__name__
index_name=file_name+'.index'
- try: f=open(index_name,'rb')
- except: return None
+ try:
+ f = open(index_name, 'rb')
+ except:
+ return None
p=Unpickler(f)
@@ -476,34 +487,31 @@
return None
index = info.get('index')
pos = info.get('pos')
- oid = info.get('oid')
vindex = info.get('vindex')
- if index is None or pos is None or oid is None or vindex is None:
+ if index is None or pos is None or vindex is None:
return None
pos = long(pos)
- if isinstance(index, DictType) and not self._is_read_only:
- # Convert to fsIndex
+ if isinstance(index, DictType):
+ # Convert to fsIndex.
newindex = fsIndex()
- if type(newindex) is not type(index):
- # And we have fsIndex
- newindex.update(index)
-
- # Now save the index
+ newindex.update(index)
+ index = newindex
+ if not self._is_read_only:
+ # Save the converted index.
f = open(index_name, 'wb')
p = Pickler(f, 1)
- info['index'] = newindex
+ info['index'] = index
p.dump(info)
f.close()
-
- # Now call this method again to get the new data
+ # Now call this method again to get the new data.
return self._restore_index()
tid = self._sane(index, pos)
if not tid:
return None
- return index, vindex, pos, oid, tid
+ return index, vindex, pos, tid
def close(self):
self._file.close()
@@ -1784,7 +1792,7 @@
def read_index(file, name, index, vindex, tindex, stop='\377'*8,
ltid=z64, start=4L, maxoid=z64, recover=0, read_only=0):
- """Scan the entire file storage and recreate the index.
+ """Scan the file storage and update the index.
Returns file position, max oid, and last transaction id. It also
stores index information in the three dictionary arguments.
@@ -1792,18 +1800,28 @@
Arguments:
file -- a file object (the Data.fs)
name -- the name of the file (presumably file.name)
- index -- dictionary, oid -> data record
- vindex -- dictionary, oid -> data record for version data
- tindex -- dictionary, oid -> data record
- XXX tindex is cleared before return, so it will be empty
+ index -- fsIndex, oid -> data record file offset
+ vindex -- dictionary, oid -> data record offset for version data
+ tindex -- dictionary, oid -> data record offset
+ tindex is cleared before return
There are several default arguments that affect the scan or the
- return values. XXX should document them.
+ return values. TODO: document them.
+
+ start -- the file position at which to start scanning for oids added
+ beyond the ones the passed-in indices know about. The .index
+ file caches the highest ._pos FileStorage knew about when the
+ the .index file was last saved, and that's the intended value
+ to pass in for start; accept the default (and pass empty
+ indices) to recreate the index from scratch
+ maxoid -- ignored (it meant something prior to ZODB 3.2.6; the argument
+ still exists just so the signature of read_index() stayed the
+ same)
The file position returned is the position just after the last
valid transaction record. The oid returned is the maximum object
- id in the data. The transaction id is the tid of the last
- transaction.
+ id in `index`, or z64 if the index is empty. The transaction id is the
+ tid of the last transaction, or ltid if the index is empty.
"""
read = file.read
@@ -1812,12 +1830,15 @@
file_size=file.tell()
if file_size:
- if file_size < start: raise FileStorageFormatError, file.name
+ if file_size < start:
+ raise FileStorageFormatError, file.name
seek(0)
- if read(4) != packed_version: raise FileStorageFormatError, name
+ if read(4) != packed_version:
+ raise FileStorageFormatError, name
else:
- if not read_only: file.write(packed_version)
- return 4L, maxoid, ltid
+ if not read_only:
+ file.write(packed_version)
+ return 4L, z64, ltid
index_get=index.get
@@ -1949,11 +1970,16 @@
name, pos)
pos=pos+8
- if tindex: # avoid the pathological empty transaction case
- _maxoid = max(tindex.keys()) # in 2.2, just max(tindex)
- maxoid = max(_maxoid, maxoid)
- index.update(tindex)
- tindex.clear()
+ index.update(tindex)
+ tindex.clear()
+
+ # Caution: fsIndex doesn't have an efficient __nonzero__ or __len__.
+ # That's why we do try/except instead. fsIndex.maxKey() is fast.
+ try:
+ maxoid = index.maxKey()
+ except ValueError:
+ # The index is empty.
+ maxoid == z64
return pos, maxoid, ltid
More information about the Zope-Checkins
mailing list