[Zope3-checkins] CVS: Zope3/src/zodb/storage - memory.py:1.2 interfaces.py:1.17 fsdump.py:1.6 bdbminimal.py:1.18 bdbfull.py:1.24 base.py:1.27 fsindex.py:NONE file.py:NONE
Jeremy Hylton
jeremy@zope.com
Tue, 22 Apr 2003 11:23:14 -0400
Update of /cvs-repository/Zope3/src/zodb/storage
In directory cvs.zope.org:/tmp/cvs-serv13741
Modified Files:
memory.py interfaces.py fsdump.py bdbminimal.py bdbfull.py
base.py
Removed Files:
fsindex.py file.py
Log Message:
Merge the jeremy-new-pack-branch to the trunk.
The primary change is a completely new implementation of file storage pack.
=== Zope3/src/zodb/storage/memory.py 1.1 => 1.2 ===
--- Zope3/src/zodb/storage/memory.py:1.1 Thu Mar 20 17:58:16 2003
+++ Zope3/src/zodb/storage/memory.py Tue Apr 22 11:23:13 2003
@@ -224,8 +224,8 @@
raise db.DBNotFoundError
while self._keys[i] == key and self._vals[i] <> val:
i += 1
- if i > len(self):
- raise db.DBNotFoundError
+ if i >= len(self):
+ raise db.DBNotFoundError
return self._getrec(i)
def set(self, key):
@@ -293,3 +293,6 @@
def append(self, val, txn=None):
super(FakeQueue, self).append(val)
+
+ def values(self):
+ return self
=== Zope3/src/zodb/storage/interfaces.py 1.16 => 1.17 ===
--- Zope3/src/zodb/storage/interfaces.py:1.16 Tue Apr 8 10:45:55 2003
+++ Zope3/src/zodb/storage/interfaces.py Tue Apr 22 11:23:13 2003
@@ -186,8 +186,8 @@
incremental pack, only old object revisions are removed. In a full gc
pack, cyclic garbage detection and removal is also performed.
- t is the pack time. All non-current object revisions older than t
- will be removed in an incremental pack.
+ t is the pack time. All non-current object revisions older than
+ or the same age as t will be removed in an incremental pack.
pack() always performs an incremental pack. If the gc flag is True,
then pack() will also perform a garbage collection. Some storages
=== Zope3/src/zodb/storage/fsdump.py 1.5 => 1.6 ===
--- Zope3/src/zodb/storage/fsdump.py:1.5 Mon Mar 17 15:18:27 2003
+++ Zope3/src/zodb/storage/fsdump.py Tue Apr 22 11:23:13 2003
@@ -11,98 +11,4 @@
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
-"""A low-level utility to dump the internal FileStorage representation."""
-
-import struct
-from zodb.storage.file \
- import TRANS_HDR, TRANS_HDR_LEN, DATA_HDR, DATA_HDR_LEN
-from zodb.utils import u64
-from zodb.storage.base import splitrefs
-from zodb.storage.tests.base import zodb_unpickle
-
-def fmt(p64):
- # Return a nicely formatted string for a packaged 64-bit value
- return "%016x" % u64(p64)
-
-def dump(path, dest=None):
- Dumper(path, dest).dump()
-
-class Dumper:
- """A very verbose dumper for debugging FileStorage problems."""
-
- def __init__(self, path, dest=None):
- self.file = open(path, "rb")
- self.dest = dest
-
- def dump(self):
- fid = self.file.read(1024)
- print >> self.dest, "*" * 60
- print >> self.dest, "file identifier: %r" % fid[:4]
- print >> self.dest, "database version: %r" % fid[4:8]
- # XXX perhaps verify that the rest of the metadata is nulls?
- while self.dump_txn():
- pass
-
- def dump_txn(self):
- pos = self.file.tell()
- h = self.file.read(TRANS_HDR_LEN)
- if not h:
- return False
- tid, tlen, status, ul, dl, el = struct.unpack(TRANS_HDR, h)
- end = pos + tlen
- print >> self.dest, "=" * 60
- print >> self.dest, "offset: %d" % pos
- print >> self.dest, "end pos: %d" % end
- print >> self.dest, "transaction id: %s" % fmt(tid)
- print >> self.dest, "trec len: %d" % tlen
- print >> self.dest, "status: %r" % status
- user = descr = extra = ""
- if ul:
- user = self.file.read(ul)
- if dl:
- descr = self.file.read(dl)
- if el:
- extra = self.file.read(el)
- print >> self.dest, "user: %r" % user
- print >> self.dest, "description: %r" % descr
- print >> self.dest, "len(extra): %d" % el
- while self.file.tell() < end:
- self.dump_data(pos)
- tlen2 = u64(self.file.read(8))
- print >> self.dest, "redundant trec len: %d" % tlen2
- return True
-
- def dump_data(self, tloc):
- pos = self.file.tell()
- h = self.file.read(DATA_HDR_LEN)
- assert len(h) == DATA_HDR_LEN
- oid, revid, prev, tloc, vlen, nrefs, dlen = struct.unpack(DATA_HDR, h)
- print >> self.dest, "-" * 60
- print >> self.dest, "offset: %d" % pos
- print >> self.dest, "oid: %s" % fmt(oid)
- print >> self.dest, "revid: %s" % fmt(revid)
- print >> self.dest, "previous record offset: %d" % prev
- print >> self.dest, "transaction offset: %d" % tloc
- if vlen:
- pnv = self.file.read(8)
- sprevdata = self.file.read(8)
- version = self.file.read(vlen)
- print >> self.dest, "version: %r" % version
- print >> self.dest, "non-version data offset: %d" % u64(pnv)
- print >> self.dest, \
- "previous version data offset: %d" % u64(sprevdata)
- print >> self.dest, 'numrefs:', nrefs
- for ref in splitrefs(self.file.read(nrefs * 8)):
- print >> self.dest, '\t%s' % fmt(ref)
- print >> self.dest, "len(data): %d" % dlen
- data = self.file.read(dlen)
- # A debugging feature for use with the test suite.
- if data.startswith("(czodb.storage.tests.minpo\nMinPO\n"):
- print >> self.dest, "value: %r" % zodb_unpickle(data).value
- if not dlen:
- sbp = self.file.read(8)
- print >> self.dest, "backpointer: %d" % u64(sbp)
-
-if __name__ == "__main__":
- import sys
- Dumper(sys.argv[1]).dump()
+from zodb.storage.file.dump import *
=== Zope3/src/zodb/storage/bdbminimal.py 1.17 => 1.18 ===
--- Zope3/src/zodb/storage/bdbminimal.py:1.17 Wed Apr 9 13:58:20 2003
+++ Zope3/src/zodb/storage/bdbminimal.py Tue Apr 22 11:23:13 2003
@@ -23,6 +23,8 @@
from zodb.conflict import ResolvedSerial
from zodb.storage.base import db, BerkeleyBase, PackStop, _WorkThread
from zodb.storage.base import splitrefs
+# For debugging
+from zodb.interfaces import _fmt_oid as fo
ABORT = 'A'
COMMIT = 'C'
@@ -101,7 +103,7 @@
# It is also used during pack to list objects for which no more
# references exist, such that the objects can be completely packed
# away.
- pass
+ self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 8)
def _version_check(self, txn):
version = self._info.get('version')
@@ -472,6 +474,7 @@
finally:
c.close()
# We're done with the mark table
+ self._packmark.truncate(txn=txn)
def _collect_objs(self, txn):
orec = self._oidqueue.consume(txn)
=== Zope3/src/zodb/storage/bdbfull.py 1.23 => 1.24 ===
--- Zope3/src/zodb/storage/bdbfull.py:1.23 Thu Apr 10 15:06:53 2003
+++ Zope3/src/zodb/storage/bdbfull.py Tue Apr 22 11:23:13 2003
@@ -31,6 +31,8 @@
from zodb.storage.base import db, BerkeleyBase, PackStop, _WorkThread, \
splitrefs
from zodb.storage._helper import incr
+# For debugging
+from zodb.interfaces import _fmt_oid as fo
ABORT = 'A'
COMMIT = 'C'
@@ -120,7 +122,7 @@
# pending table is empty, the oids, pvids, and prevrevids tables
# must also be empty.
#
- # packmark -- [oid]
+ # packmark -- oid -> [tid]
# Every object reachable from the root during a classic pack
# operation will have its oid present in this table.
#
@@ -232,6 +234,7 @@
# Tables to support packing.
self._objrevs = self._setupDB('objrevs', db.DB_DUP)
self._delqueue = self._setupDB('delqueue', 0, db.DB_QUEUE, 8)
+ self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 16)
def _version_check(self, txn):
version = self._info.get('version')
@@ -452,7 +455,7 @@
# created in the interrim.
if self._packing:
for oid in self._oids.keys():
- self._packmark.put(oid, PRESENT, txn=txn)
+ self._packmark.put(oid, tid, txn=txn)
self._oids.truncate(txn)
def _dobegin(self, txn, tid):
@@ -1422,8 +1425,6 @@
if self._metadata.has_key(orevid):
metadata = self._metadata[orevid]
self._metadata.delete(orevid, txn=txn)
- if self._references.has_key(orevid):
- self._references.delete(orevid, txn=txn)
# Decref the pickle
self._decrefPickle(oid, metadata[16:24], txn)
try:
@@ -1452,7 +1453,7 @@
refcount = u64(self._pickleRefcounts.get(revid, ZERO)) - 1
assert refcount >= 0
if refcount == 0:
- # We can collect this pickle
+ # We can collect this pickle and the references
self._pickleRefcounts.delete(revid, txn=txn)
self._pickles.delete(revid, txn=txn)
# And decref all objects pointed to by this pickle
@@ -1461,6 +1462,7 @@
deltas = {}
self._update(deltas, references, -1)
self._decref(deltas, txn)
+ self._references.delete(revid, txn=txn)
else:
self._pickleRefcounts.put(revid, p64(refcount), txn=txn)
@@ -1550,7 +1552,7 @@
# BAW: Maybe this could probably be more efficient by not doing so
# much searching, but it would also be more complicated, so the
# tradeoff should be measured.
- serial = None
+ serial, tid = self._getSerialAndTid(oid)
c = self._metadata.cursor(txn=txn)
try:
rec = c.set_range(oid)
@@ -1568,9 +1570,60 @@
c.close()
return serial
+ def _rootset(self, packtid, txn):
+ c = self._txnoids.cursor(txn)
+ try:
+ rec = c.first()
+ while rec:
+ tid, oid = rec
+ rec = c.next()
+ finally:
+ c.close()
+ # Find the root set for reachability purposes. A root set is a tuple
+ # of oid and tid. First, the current root object as of the pack time
+ # is always in the root set. Second, any object revision after the
+ # pack time that has a back pointer (lrevid) to before the pack time
+ # serves as another root because some future undo could then revive
+ # any referenced objects.
+ try:
+ zerorev = self._findrev(ZERO, packtid, txn)
+ except KeyError:
+ # There's no root object
+ return
+ self._oidqueue.append(ZERO+zerorev, txn)
+ c = self._txnoids.cursor(txn)
+ try:
+ try:
+ rec = c.set_range(packtid)
+ except db.DBNotFoundError:
+ rec = None
+ while rec:
+ tid, oid = rec
+ revid = oid + tid
+ rec = c.next()
+ lrevid = self._metadata[revid][16:24]
+ if lrevid < packtid:
+ self._oidqueue.append(revid, txn)
+ finally:
+ c.close()
+
+ # tid is None if all we care about is that any object revision is present.
+ def _packmark_has(self, oid, tid, txn):
+ if tid is None:
+ return self._packmark.has_key(oid)
+ c = self._packmark.cursor(txn)
+ try:
+ try:
+ c.set_both(oid, tid)
+ return True
+ except db.DBNotFoundError:
+ return False
+ finally:
+ c.close()
+
def _mark(self, txn, packtid):
# Find the oids for all the objects reachable from the root, as of the
- # pack time. To reduce the amount of in-core memory we need do do a
+ # pack time. To reduce the amount of in-core memory we need to do a
# pack operation, we'll save the mark data in the packmark table. The
# oidqueue is a BerkeleyDB Queue that holds the list of object ids to
# look at next, and by using this we don't need to keep an in-memory
@@ -1579,20 +1632,23 @@
# Quick exit for empty storages
if not self._serials:
return
- # The oid of the object we're looking at, starting at the root
- oid = ZERO
- # Start at the root, find all the objects the current revision of the
- # root references, and then for each of those, find all the objects it
- # references, and so on until we've traversed the entire object graph.
- while oid:
+ self._rootset(packtid, txn)
+ rec = self._oidqueue.consume(txn)
+ while rec:
if self._stop:
raise PackStop, 'stopped in _mark()'
- if not self._packmark.has_key(oid):
- # We haven't seen this object yet
- self._packmark.put(oid, PRESENT, txn=txn)
- # Get the list of references for the most current revision of
- # this object as of the pack time.
- tid = self._findrev(oid, packtid, txn)
+ revid = rec[1]
+ oid = revid[:8]
+ tid = revid[8:]
+ # See if this revision is already in the packmark
+ if not self._packmark_has(oid, tid, txn):
+ # BAW: We are more conservative than FileStorage here, since
+ # any reference to an object keeps all the object references
+ # alive. FileStorage will collect individual object
+ # revisions. I think our way is fine since we'll eventually
+ # collect everything incrementally anyway, and for Berkeley,
+ # all object revisions add to the refcount total.
+ self._packmark.put(oid, tid, txn=txn)
# Say there's no root object (as is the case in some of the
# unit tests), and we're looking up oid ZERO. Then serial
# will be None.
@@ -1602,11 +1658,13 @@
# object revision
references = self._references.get(oid+lrevid)
if references:
- for oid in splitrefs(references):
- self._oidqueue.append(oid, txn)
+ for roid in splitrefs(references):
+ # Find the most recent object revision as of the
+ # timestamp of the under-focus revision.
+ rtid = self._findrev(roid, tid, txn)
+ self._oidqueue.append(roid+rtid, txn)
# Pop the next oid off the queue and do it all again
rec = self._oidqueue.consume(txn)
- oid = rec and rec[1]
assert len(self._oidqueue) == 0
def _sweep(self, txn, packtid):
@@ -1627,7 +1685,7 @@
# Otherwise, if packmark (which knows about all the root
# reachable objects) doesn't have a record for this guy, then
# we can zap it. Do so by appending to oidqueue.
- if not self._packmark.has_key(oid):
+ if not self._packmark_has(oid, None, txn):
self._delqueue.append(oid, txn)
finally:
c.close()
=== Zope3/src/zodb/storage/base.py 1.26 => 1.27 ===
--- Zope3/src/zodb/storage/base.py:1.26 Wed Apr 9 13:54:51 2003
+++ Zope3/src/zodb/storage/base.py Tue Apr 22 11:23:13 2003
@@ -80,6 +80,7 @@
_vote()
_abort()
_finish()
+ _clear_temp()
If the subclass wants to implement IUndoStorage, it must implement
all the methods in that interface.
@@ -535,8 +536,7 @@
self._references = self._setupDB('references')
self._oids = self._setupDB('oids')
self._pending = self._setupDB('pending')
- self._packmark = self._setupDB('packmark')
- self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 8)
+ self._packmark = self._setupDB('packmark', db.DB_DUP)
# Do storage specific initialization
self._init()
self._withtxn(self._version_check)
=== Removed File Zope3/src/zodb/storage/fsindex.py ===
=== Removed File Zope3/src/zodb/storage/file.py ===