[Zodb-checkins] CVS: Zope3/src/zodb/storage - file.py:1.23.2.1
Jeremy Hylton
jeremy@zope.com
Thu, 10 Apr 2003 18:41:13 -0400
Update of /cvs-repository/Zope3/src/zodb/storage
In directory cvs.zope.org:/tmp/cvs-serv6526
Modified Files:
Tag: jeremy-new-pack-branch
file.py
Log Message:
Incomplete progress on the new pack for FileStorage.
=== Zope3/src/zodb/storage/file.py 1.23 => 1.23.2.1 ===
--- Zope3/src/zodb/storage/file.py:1.23 Wed Apr 9 14:00:43 2003
+++ Zope3/src/zodb/storage/file.py Thu Apr 10 18:41:12 2003
@@ -132,6 +132,7 @@
from __future__ import generators
import os
+import stat
import sys
import time
import errno
@@ -452,8 +453,18 @@
s = struct.pack(">QQ", pnv, vprev)
file.write(s + version)
- def _read_txn_header(self, pos):
- pass
+ def _read_txn_header(self, pos, tid=None):
+ self._file.seek(pos)
+ s = self._file.read(TRANS_HDR_LEN)
+ if len(s) != TRANS_HDR_LEN:
+ raise CorruptedDataError(tid, s, pos)
+ h = TxnHeader.fromString(s)
+ if tid is not None and tid != h.tid:
+ raise CorruptedDataError(tid, s, pos)
+ h.user = self._file.read(h.ulen)
+ h.descr = self._file.read(h.dlen)
+ h.ext = self._file.read(h.elen)
+ return h
def _loadBack_impl(self, oid, back):
# shared implementation used by various _loadBack methods
@@ -889,11 +900,8 @@
# Return backpointer to oid in data record for in transaction at tpos.
# It should contain a pickle identical to data. Returns 0 on failure.
# Must call with lock held.
- self._file.seek(tpos)
- h = self._file.read(TRANS_HDR_LEN)
- tid, tl, status, ul, dl, el = struct.unpack(TRANS_HDR, h)
- self._file.read(ul + dl + el)
- tend = tpos + tl
+ h = self._read_txn_header(tpos)
+ tend = tpos + h.tlen
pos = self._file.tell()
while pos < tend:
h = self._read_data_header(pos)
@@ -1327,14 +1335,18 @@
otloc = self._pos
here = self._pos + self._tfile.tell() + self._thl
# Let's move the file pointer back to the start of the txn record.
- self._file.seek(tpos)
- h = self._file.read(TRANS_HDR_LEN)
- if h[16] != ' ':
+ th = self._read_txn_header(tpos)
+## self._file.seek(tpos)
+## h = self._file.read(TRANS_HDR_LEN)
+## if h[16] != ' ':
+## raise UndoError(None, "non-undoable transaction")
+## tl = u64(h[8:16])
+## ul, dl, el = struct.unpack(">HHH", h[17:TRANS_HDR_LEN])
+ if th.status != " ":
raise UndoError(None, "non-undoable transaction")
- tl = u64(h[8:16])
- ul, dl, el = struct.unpack(">HHH", h[17:TRANS_HDR_LEN])
- tend = tpos + tl
- pos = tpos + (TRANS_HDR_LEN + ul + dl + el)
+
+ tend = tpos + th.tlen
+ pos = tpos + th.headerlen()
tindex = {}
# keep track of failures, cause we may succeed later
@@ -1523,6 +1535,21 @@
self._commit_lock_acquire = cla
self._commit_lock_release = clr
+ # The packer will use several indexes.
+ # index: oid -> pos
+ # vindex: version -> pos of XXX
+ # tindex: oid -> pos, for current txn
+ # tvindex: version -> pos of XXX, for current txn
+
+ self.index = fsIndex()
+ self.vindex = {}
+ self.tindex = {}
+ self.tvindex = {}
+
+ # Index for non-version data. This is a temporary structure
+ # to reduce I/O during packing
+ self.nvindex = fsIndex()
+
def _loada(self, oid, _index):
"""Read any version and return the version"""
try:
@@ -1546,55 +1573,166 @@
return self._file.read(1) != ' ' # XXX or == "p"?
def _pack_index(self, index):
- # Return an index for everything reachable at the pack time.
+ """Return packpos and an index of objects reachable at the pack time.
+
+ If the storage is empty or it has been packed to a later time,
+ return None.
+ """
+ # index is a complete index as of the pack time.
+ index = fsIndex()
+ vindex = {}
+ tindex = {}
+ packpos, maxoid, ltid = self._read_index(index, vindex, tindex,
+ self._stop, read_only=1)
+ if packpos == self._metadata_size:
+ return None, None
+ if self._redundant_pack(packpos):
+ return None, None
+ del vindex, tindex
+
+ # Now traverse all objects starting from the root and add
+ # them to pindex. Any object in index but not in pindex
+ # is unreachable and should not be copied.
rootl = [ZERO]
pindex = fsIndex()
while rootl:
oid = rootl.pop()
if oid in pindex:
continue
- # XXX We might need to catch exceptions raised by _loada().
p, refs, v = self._loada(oid, index)
rootl.extend(refs)
pindex[oid] = index[oid]
- return pindex
+ return packpos, pindex
def pack(self):
+ # Pack copies all data reachable at the pack time or later.
+ #
+ # Copying occurs in two phases. In the first phase, txns
+ # before the pack time are copied if the contain any reachable
+ # data. In the second phase, all txns after the pack time
+ # are copied.
+ #
+ # Txn and data records contain pointers to previous records.
+ # Because these pointers are stored as file offsets, they
+ # must be updated when we copy data.
+
packing = True
+ file_end = os.stat(self._file.name)[stat.ST_SIZE]
+ packpos, pindex = self._pack_index()
+ assert packpos <= file_end
- ##################################################################
- # Step 1, get index as of pack time that has only referenced objects.
- index = fsIndex()
- vindex = {}
- # tindex is the index for the current transaction
- tindex = {}
- tvindex = {}
- packpos, maxoid, ltid = self._read_index(index, vindex, tindex,
- self._stop, read_only=1)
- if packpos == self._metadata_size:
- return
- if self._redundant_pack(packpos):
- return
- pindex = self._pack_index(index)
+ # Setup the destination file and copy the metadata.
+ self.ofile = open(self._name + ".pack", "w+b")
+ self._file.seek(0)
+ self.ofile.write(self._file.read(self._metadata_size))
- ##################################################################
- # Step 2, copy data and compute new index based on new positions.
- index = fsIndex()
- vindex = {}
- tindex = {}
- tvindex = {}
+ self.copyToPacktime(packpos)
+ self.copyRest()
- # Index for non-version data. This is a temporary structure
- # to reduce I/O during packing
- nvindex = fsIndex()
+ # OK, we've copied everything. Now we need to wrap things up.
+ self.ofile.flush()
+ self.ofile.close()
+ self._file.close()
+
+ # XXX probably don't need to return indexes, since caller
+ # has object
+ return opos, self.index, self.vindex, self.tindex, self.tvindex
+
+ def isCurNonversion(self, h, nvpos, curpos):
+ """Return True if h is current non-version data,
+
+ where the current data is at curpos. Always restores the file
+ to the position it was at before the call.
+ """
+ pos = self._file.tell()
+ try:
+ # XXX probably don't want to read this data everytime.
+ # instead, keep a cache of whether curpos is in a version.
+ # note that entries can be thrown out after passing curpos.
+ cur = self._read_data_header(curpos)
+ assert cur.oid == h.oid
+ if not cur.version:
+ return False
+ # If cur has a pnv, we need to chase backpointers until
+ # we get to the actual data.
+ self._file.read(cur.nrefs * 8)
+ pnv = self._loadBackPOS(cur.oid, cur.pnv)
+ return pnv == nvpos
+ finally:
+ self._file.seek(pos)
- ofile = open(self._name + '.pack', 'w+b')
- pv = ZERO
+ def copyToPacktime(self, packpos):
offset = 0L # the amount of space freed by packing
- pos = opos = self._metadata_size
- # Copy the metadata from the old file to the new one.
- self._file.seek(0)
- ofile.write(self._file.read(self._metadata_size))
+ pos = self._metadata_size
+ new_pos = pos
+
+ while pos <= packpos:
+ th = self._read_txn_header(pos)
+ copy = False
+ tend = pos + th.tlen
+ pos += th.headerlen()
+ while pos < tend:
+ h = self._read_data_header(pos)
+ cur_nonversion = False
+
+ # If this data record isn't current, don't copy it.
+ # If the current record is in a version, this may be
+ # the current non-version data.
+ curpos = pindex.get(h.oid, 0)
+ if curpos != pos:
+ if self.isCurNonversion(h, curpos):
+ cur_nonversion = True
+ else:
+ pos += h.recordlen()
+ continue
+
+ # If we are going to copy any data, we need to copy
+ # the transaction header. Note that we will need to
+ # patch up the transaction length when we are done.
+ if not copy:
+ th.status = "p"
+ s = th.asString()
+ self.ofile.write(s)
+ new_tpos = new_pos
+ new_pos += len(s)
+ copy = True
+
+ if cur_nonversion:
+ self.nvindex[h.oid] = new_pos
+
+ if h.plen:
+ refs = self._file.read(8 * h.nrefs)
+ data = self._file.read(h.plen)
+ else:
+ # If a current record has a backpointer, fetch
+ # refs and data from the backpointer. We need
+ # to write the data in the new record.
+ data, refs, serial, tid = self._loadBackTxn(h.oid, h.back)
+
+ self.writeDataRecord(h, data, refs, new_pos)
+ new_pos = self.ofile.tell()
+
+ if copy:
+ # now fix up the transaction header
+ pass
+
+
+ def writeDataRecord(self, h, data, refs, new_pos):
+ # Update the header to reflect current information, then write
+ # it to the output file.
+ h.prev = 0
+ h.back = 0
+ h.plen = len(data)
+ h.nrefs = len(refs) / 8
+ h.tloc = new_pos
+ if h.version:
+ h.pnv = self.nvindex[h.oid]
+ h.vprev = 0
+ self.ofile.write(h.asString())
+ self.ofile.write(refs)
+ self.ofile.write(data)
+
+ def XXX(self):
# Copy the data in two stages. In the packing stage, we skip
# records that are non-current or that are for unreferenced
@@ -1624,28 +1762,22 @@
self.locked = True
# Read the transaction record
- self._file.seek(pos)
- h = self._file.read(TRANS_HDR_LEN)
- if len(h) < TRANS_HDR_LEN:
+ if pos == file_end:
break
- tid, tl, status, ul, dl, el = unpack(TRANS_HDR, h)
- if status == 'c':
+ th = self._read_txn_header(pos)
+ if th.status == 'c':
# Oops. we found a checkpoint flag.
break
tpos = pos
- tend = tpos + tl
+ tend = tpos + th.tlen
otpos = opos # start pos of output trans
# write out the transaction record
status = packing and 'p' or ' '
- ofile.write(h[:16] + status + h[17:])
- thl = ul + dl + el
- h = self._file.read(thl)
- if len(h) != thl:
- raise PackError(opos)
- ofile.write(h)
- thl = TRANS_HDR_LEN + thl
+ th.status = status
+ ofile.write(th.asString())
+ thl = th.headerlen()
pos = tpos + thl
opos = otpos + thl
@@ -1780,7 +1912,7 @@
# Now, maybe we need to hack or delete the transaction
otl = opos - otpos
- if otl != tl:
+ if otl != th.tlen:
# Oops, what came out is not what came in!
# Check for empty:
@@ -2139,6 +2271,15 @@
fromString = classmethod(fromString)
+ def asString(self):
+ s = struct.pack(DATA_HDR, self.oid, self.serial, self.tloc,
+ self.vlen, self.plen, self.nrefs, self.back)
+ if self.version:
+ v = struct.pack(">QQ", self.pnv, self.vprev)
+ return s + v + self.version
+ else:
+ return s
+
def parseVersion(self, buf):
self.pnv, self.vprev = struct.unpack(">QQ", buf[:16])
self.version = buf[16:]
@@ -2148,6 +2289,33 @@
if self.version:
rlen += 16 + self.vlen
return rlen
+
+class TxnHeader:
+ """Header for a transaction record."""
+
+ __slots__ = ("tid", "tlen", "status", "user", "descr", "ext",
+ "ulen", "dlen", "elen")
+
+ def __init__(self, tid, tlen, status, ulen, dlen, elen):
+ self.tid = tid
+ self.tlen = tlen
+ self.status = status
+ self.ulen = ulen
+ self.dlen = dlen
+ self.elen = elen
+
+ def fromString(cls, s):
+ return cls(*struct.unpack(TRANS_HDR, s))
+
+ fromString = classmethod(fromString)
+
+ def asString(self):
+ s = struct.pack(TRANS_HDR, self.tid, self.tlen, self.status,
+ self.ulen, self.dlen, self.elen)
+ return "".join([s, self.user, self.descr, self.ext])
+
+ def headerlen(self):
+ return TRANS_HDR_LEN + self.ulen + self.dlen + self.elen
def cleanup(filename):