[Zope-Checkins] CVS: ZODB3/ZODB - FileStorage.py:1.105.2.10.2.1
Jim Fulton
jim@zope.com
Tue, 17 Dec 2002 18:09:07 -0500
Update of /cvs-repository/ZODB3/ZODB
In directory cvs.zope.org:/tmp/cvs-serv2822
Modified Files:
Tag: ZODB3-fast-restart-branch
FileStorage.py
Log Message:
Barry and Jim
Several startup performance improvements:
- When sanity checking saved indexes, only check a few objects
in the last transaction, rather than chacking every object.
Otherwise, really large transactions could cause the sanity check
to take a long time (28 seconds for 320000 objects on my machine.)
- Changed to use fsIndex (BTree-based) indexes. This not only saves
memory, but it also speeds index loading by a factor of 4.
o Included code to automatically convert old dictionary indexes to
use fsIndex.
- Save indexes on startup without an index.
- Periodically save indexes on commit when the number of records
(including transaction records) written since the last save exceeds
the number of objects in the database (as of the previous save).
This is somewhat conservative, since it seems to take about 10 times
longer to write and read an object in the index as it does to read a
record.
=== ZODB3/ZODB/FileStorage.py 1.105.2.10 => 1.105.2.10.2.1 ===
--- ZODB3/ZODB/FileStorage.py:1.105.2.10 Mon Dec 9 18:47:04 2002
+++ ZODB3/ZODB/FileStorage.py Tue Dec 17 18:09:06 2002
@@ -202,6 +202,8 @@
# default pack time is 0
_packt = z64
+ _records_before_save = 10000
+
def __init__(self, file_name, create=0, read_only=0, stop=None,
quota=None):
@@ -269,7 +271,11 @@
r = self._restore_index()
if r is not None:
+ self._used_index = 1 # Marker for testing
+
+
index, vindex, start, maxoid, ltid = r
+
self._initIndex(index, vindex, tindex, tvindex)
self._pos, self._oid, tid = read_index(
self._file, file_name, index, vindex, tindex, stop,
@@ -277,10 +283,18 @@
read_only=read_only,
)
else:
+ self._used_index = 0 # Marker for testing
self._pos, self._oid, tid = read_index(
self._file, file_name, index, vindex, tindex, stop,
read_only=read_only,
)
+ self._save_index()
+
+
+ self._records_before_save = max(self._records_before_save,
+ len(self._index))
+
+
self._ltid = tid
self._ts = tid = TimeStamp(tid)
@@ -307,8 +321,9 @@
def _newIndexes(self):
# hook to use something other than builtin dict
- return {}, {}, {}, {}
+ return fsIndex(), {}, {}, {}
+ _saved = 0
def _save_index(self):
"""Write the database index to a file to support quick startup
"""
@@ -325,6 +340,7 @@
p.dump(info)
f.flush()
f.close()
+
try:
try:
os.remove(index_name)
@@ -333,6 +349,8 @@
os.rename(tmp_name, index_name)
except: pass
+ self._saved += 1
+
def _clear_index(self):
index_name=self.__name__+'.index'
if os.path.exists(index_name):
@@ -350,48 +368,65 @@
object positions cause zero to be returned.
"""
- if pos < 100: return 0
- file=self._file
- seek=file.seek
- read=file.read
+ if pos < 100:
+ return 0 # insane
+ file = self._file
+ seek = file.seek
+ read = file.read
seek(0,2)
- if file.tell() < pos: return 0
- ltid=None
+ if file.tell() < pos:
+ return 0 # insane
+ ltid = None
+
+ max_checked = 5
+ checked = 0
- while 1:
+ while checked < max_checked:
seek(pos-8)
- rstl=read(8)
- tl=U64(rstl)
- pos=pos-tl-8
- if pos < 4: return 0
+ rstl = read(8)
+ tl = U64(rstl)
+ pos = pos-tl-8
+ if pos < 4:
+ return 0 # insane
seek(pos)
s = read(TRANS_HDR_LEN)
tid, stl, status, ul, dl, el = unpack(TRANS_HDR, s)
- if not ltid: ltid=tid
- if stl != rstl: return 0 # inconsistent lengths
- if status == 'u': continue # undone trans, search back
- if status not in ' p': return 0
- if tl < (TRANS_HDR_LEN + ul + dl + el): return 0
- tend=pos+tl
- opos=pos+(TRANS_HDR_LEN + ul + dl + el)
- if opos==tend: continue # empty trans
+ if not ltid:
+ ltid = tid
+ if stl != rstl:
+ return 0 # inconsistent lengths
+ if status == 'u':
+ continue # undone trans, search back
+ if status not in ' p':
+ return 0 # insane
+ if tl < (TRANS_HDR_LEN + ul + dl + el):
+ return 0 # insane
+ tend = pos+tl
+ opos = pos+(TRANS_HDR_LEN + ul + dl + el)
+ if opos == tend:
+ continue # empty trans
- while opos < tend:
+ while opos < tend and checked < max_checked:
# Read the data records for this transaction
seek(opos)
- h=read(DATA_HDR_LEN)
- oid,serial,sprev,stloc,vlen,splen = unpack(DATA_HDR, h)
- tloc=U64(stloc)
- plen=U64(splen)
+ h = read(DATA_HDR_LEN)
+ oid, serial, sprev, stloc, vlen, splen = unpack(DATA_HDR, h)
+ tloc = U64(stloc)
+ plen = U64(splen)
+
+ dlen = DATA_HDR_LEN+(plen or 8)
+ if vlen:
+ dlen = dlen+(16+vlen)
- dlen=DATA_HDR_LEN+(plen or 8)
- if vlen: dlen=dlen+(16+vlen)
+ if opos+dlen > tend or tloc != pos:
+ return 0 # insane
- if opos+dlen > tend or tloc != pos: return 0
+ if index.get(oid, 0) != opos:
+ return 0 # insane
- if index.get(oid, 0) != opos: return 0
+ checked += 1
- opos=opos+dlen
+ opos = opos+dlen
return ltid
@@ -421,6 +456,23 @@
return None
pos = long(pos)
+ if type(index) is type({}) and (not self._is_read_only):
+ # Convert to fsIndex
+ newindex = fsIndex()
+ if type(newindex) is not type(index):
+ # And we have fsIndex
+ newindex.update(index)
+
+ # Now save the index
+ f=open(index_name,'wb')
+ p=Pickler(f,1)
+ info['index'] = newindex
+ p.dump(info)
+ f.close()
+
+ # Now call this method again to get the new data
+ return self._restore_index()
+
tid=self._sane(index, pos)
if not tid: return None
@@ -946,6 +998,9 @@
finally:
self._lock_release()
+ # Keep track of the number of records that we've written
+ _records_written = 0
+
def _finish(self, tid, u, d, e):
nextpos=self._nextpos
if nextpos:
@@ -962,6 +1017,17 @@
self._index.update(self._tindex)
self._vindex.update(self._tvindex)
+
+
+ # Update the number of records that we've written
+ # +1 for the transaction record
+ self._records_written += len(self._tindex) + 1
+ if self._records_written >= self._records_before_save:
+ self._save_index()
+ self._records_written = 0
+ self._records_before_save = max(self._records_before_save,
+ len(self._index))
+
self._ltid = tid
def _abort(self):