[Zodb-checkins] CVS: Packages/ZODB - FileStorage.py:1.135.6.7

Fri Feb 25 15:31:34 EST 2005

Update of /cvs-repository/Packages/ZODB
In directory cvs.zope.org:/tmp/cvs-serv24962/ZODB

Modified Files:
      Tag: Zope-2_7-branch
	FileStorage.py 
Log Message:
Stop believing the maximum oid cached in a FileStorage's .index file.

This is a critical bugfix, although the problems it addresses are
(a) rare; and, (b) not entirely fixed yet (more checkins to come).

The true max oid is found efficiently now by exploiting the recently-added
fsIndex.maxKey() method (which was, of course, added for this purpose).

Also fix that the .index file could get updated on disk when the
FileStorage was opened in read-only mode.  The code was trying to prevent
this, but missed the most obvious rewrite path.

Incidentally improved many obsolete and/or incorrect comments.


=== Packages/ZODB/FileStorage.py 1.135.6.6 => 1.135.6.7 ===

--- Packages/ZODB/FileStorage.py:1.135.6.6	Mon Jun 21 22:10:44 2004
+++ Packages/ZODB/FileStorage.py	Fri Feb 25 15:31:04 2005
@@ -136,12 +136,7 @@
 from ZODB.lock_file import LockFile
 from ZODB.utils import p64, u64, cp, z64
 from ZODB.fspack import FileStoragePacker
-
-try:
-    from ZODB.fsIndex import fsIndex
-except ImportError:
-    def fsIndex():
-        return {}
+from ZODB.fsIndex import fsIndex
 
 from zLOG import LOG, BLATHER, WARNING, ERROR, PANIC
 
@@ -273,14 +268,13 @@
         r = self._restore_index()
         if r is not None:
             self._used_index = 1 # Marker for testing
-            index, vindex, start, maxoid, ltid = r
+            index, vindex, start, ltid = r
 
             self._initIndex(index, vindex, tindex, tvindex,
                             oid2serial, toid2serial, toid2serial_delete)
             self._pos, self._oid, tid = read_index(
                 self._file, file_name, index, vindex, tindex, stop,
-                ltid=ltid, start=start, maxoid=maxoid,
-                read_only=read_only,
+                ltid=ltid, start=start, read_only=read_only,
                 )
         else:
             self._used_index = 0 # Marker for testing
@@ -333,9 +327,9 @@
         # random seek + read takes 7ms (that was approximately true on
         # Linux and Windows tests in mid-2003), that's 28 seconds just to
         # find the old serials.
-        # XXX Probably better to junk this and redefine _index as mapping
-        # XXX oid to (offset, serialno) pair, via a new memory-efficient
-        # XXX BTree type.
+        # TODO:  Probably better to junk this and redefine _index as mapping
+        # oid to (offset, serialno) pair, via a new memory-efficient
+        # BTree type.
         self._oid2serial = oid2serial
         # oid->serialno map to transactionally add to _oid2serial.
         self._toid2serial = toid2serial
@@ -355,12 +349,18 @@
     def _save_index(self):
         """Write the database index to a file to support quick startup."""
 
+        if self._is_read_only:
+            return
+
         index_name = self.__name__ + '.index'
         tmp_name = index_name + '.index_tmp'
 
         f=open(tmp_name,'wb')
         p=Pickler(f,1)
 
+        # Note:  starting with ZODB 3.2.6, the 'oid' value stored is ignored
+        # by the code that reads the index.  We still write it, so that
+        # .index files can still be read by older ZODBs.
         info={'index': self._index, 'pos': self._pos,
               'oid': self._oid, 'vindex': self._vindex}
 
@@ -459,11 +459,22 @@
 
     def _restore_index(self):
         """Load database index to support quick startup."""
+        # Returns (index, vindex, pos, tid), or None in case of
+        # error.
+        # Starting with ZODB 3.2.6, the 'oid' value stored in the index
+        # is ignored.
+        # The index returned is always an instance of fsIndex.  If the
+        # index cached in the file is a Python dict, it's converted to
+        # fsIndex here, and, if we're not in read-only mode, the .index
+        # file is rewritten with the converted fsIndex so we don't need to
+        # convert it again the next time.
         file_name=self.__name__
         index_name=file_name+'.index'
 
-        try: f=open(index_name,'rb')
-        except: return None
+        try:
+            f = open(index_name, 'rb')
+        except:
+            return None
 
         p=Unpickler(f)
 
@@ -476,34 +487,31 @@
             return None
         index = info.get('index')
         pos = info.get('pos')
-        oid = info.get('oid')
         vindex = info.get('vindex')
-        if index is None or pos is None or oid is None or vindex is None:
+        if index is None or pos is None or vindex is None:
             return None
         pos = long(pos)
 
-        if isinstance(index, DictType) and not self._is_read_only:
-            # Convert to fsIndex
+        if isinstance(index, DictType):
+            # Convert to fsIndex.
             newindex = fsIndex()
-            if type(newindex) is not type(index):
-                # And we have fsIndex
-                newindex.update(index)
-
-                # Now save the index
+            newindex.update(index)
+            index = newindex
+            if not self._is_read_only:
+                # Save the converted index.
                 f = open(index_name, 'wb')
                 p = Pickler(f, 1)
-                info['index'] = newindex
+                info['index'] = index
                 p.dump(info)
                 f.close()
-
-                # Now call this method again to get the new data
+                # Now call this method again to get the new data.
                 return self._restore_index()
 
         tid = self._sane(index, pos)
         if not tid:
             return None
 
-        return index, vindex, pos, oid, tid
+        return index, vindex, pos, tid
 
     def close(self):
         self._file.close()
@@ -1784,7 +1792,7 @@
 
 def read_index(file, name, index, vindex, tindex, stop='\377'*8,
                ltid=z64, start=4L, maxoid=z64, recover=0, read_only=0):
-    """Scan the entire file storage and recreate the index.
+    """Scan the file storage and update the index.
 
     Returns file position, max oid, and last transaction id.  It also
     stores index information in the three dictionary arguments.
@@ -1792,18 +1800,28 @@
     Arguments:
     file -- a file object (the Data.fs)
     name -- the name of the file (presumably file.name)
-    index -- dictionary, oid -> data record
-    vindex -- dictionary, oid -> data record for version data
-    tindex -- dictionary, oid -> data record
-       XXX tindex is cleared before return, so it will be empty
+    index -- fsIndex, oid -> data record file offset
+    vindex -- dictionary, oid -> data record offset for version data
+    tindex -- dictionary, oid -> data record offset
+              tindex is cleared before return
 
     There are several default arguments that affect the scan or the
-    return values.  XXX should document them.
+    return values.  TODO:  document them.
+
+    start -- the file position at which to start scanning for oids added
+             beyond the ones the passed-in indices know about.  The .index
+             file caches the highest ._pos FileStorage knew about when the
+             the .index file was last saved, and that's the intended value
+             to pass in for start; accept the default (and pass empty
+             indices) to recreate the index from scratch
+    maxoid -- ignored (it meant something prior to ZODB 3.2.6; the argument
+              still exists just so the signature of read_index() stayed the
+              same)
 
     The file position returned is the position just after the last
     valid transaction record.  The oid returned is the maximum object
-    id in the data.  The transaction id is the tid of the last
-    transaction.
+    id in `index`, or z64 if the index is empty.  The transaction id is the
+    tid of the last transaction, or ltid if the index is empty.
     """
 
     read = file.read
@@ -1812,12 +1830,15 @@
     file_size=file.tell()
 
     if file_size:
-        if file_size < start: raise FileStorageFormatError, file.name
+        if file_size < start:
+            raise FileStorageFormatError, file.name
         seek(0)
-        if read(4) != packed_version: raise FileStorageFormatError, name
+        if read(4) != packed_version:
+            raise FileStorageFormatError, name
     else:
-        if not read_only: file.write(packed_version)
-        return 4L, maxoid, ltid
+        if not read_only:
+            file.write(packed_version)
+        return 4L, z64, ltid
 
     index_get=index.get
 
@@ -1949,11 +1970,16 @@
                   name, pos)
         pos=pos+8
 
-        if tindex: # avoid the pathological empty transaction case
-            _maxoid = max(tindex.keys()) # in 2.2, just max(tindex)
-            maxoid = max(_maxoid, maxoid)
-            index.update(tindex)
-            tindex.clear()
+        index.update(tindex)
+        tindex.clear()
+
+    # Caution:  fsIndex doesn't have an efficient __nonzero__ or __len__.
+    # That's why we do try/except instead.  fsIndex.maxKey() is fast.
+    try:
+        maxoid = index.maxKey()
+    except ValueError:
+        # The index is empty.
+        maxoid == z64
 
     return pos, maxoid, ltid