[Checkins] SVN: zc.zodbdgc/branches/dev/src/zc/zodbdgc/ Fixed several bugs:
Jim Fulton
jim at zope.com
Thu May 14 13:06:06 EDT 2009
Log message for revision 99956:
Fixed several bugs:
- The days option default wasn't set up properly. The script failed
if no -d option was used.
- oid paths weren't computed properly due to the flavor of base74
encoding used.
- Deleted records weren't handled properly when doing multiple GCs
between packs.
Changed:
U zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test
U zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py
-=-
Modified: zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test
===================================================================
--- zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test 2009-05-14 16:39:09 UTC (rev 99955)
+++ zc.zodbdgc/branches/dev/src/zc/zodbdgc/README.test 2009-05-14 17:06:06 UTC (rev 99956)
@@ -128,6 +128,7 @@
3 4
>>> del conn2.root.a
>>> del conn2.root.b
+ >>> transaction.commit()
More time passes.
@@ -266,9 +267,17 @@
Removed 2 objects from db3
Removed 1 objects from db2
- >>> sorted(bad2.iterator()) == sorted(bad2.iterator())
+ >>> sorted(bad2.iterator()) == sorted(bad.iterator())
True
+We can gc again, even with deleted records:
+
+ >>> sorted(zc.zodbdgc.gc_command(['-d2', 'config']).iterator())
+ Removed 0 objects from db1
+ Removed 0 objects from db3
+ Removed 0 objects from db2
+ []
+
>>> db = ZODB.config.databaseFromFile(open('config'))
>>> conn1 = db.open()
>>> conn2 = conn1.get_connection('db2')
@@ -291,6 +300,31 @@
>>> zc.zodbdgc.check_command(['config'])
+Make sure we can not specify days on the command line:
+
+ >>> for n in range(1, 4):
+ ... shutil.copyfile('%s.fs' % n, '%s.fs-2' %n)
+
+ >>> sorted(zc.zodbdgc.gc_command(['config', 'config2']).iterator())
+ Using secondary configuration, config2, for analysis
+ Ignoring index for 1.fs-2
+ Ignoring index for 2.fs-2
+ Ignoring index for 3.fs-2
+ Removed 0 objects from db1
+ Removed 0 objects from db3
+ Removed 0 objects from db2
+ []
+
+ >>> now += 90000
+
+ >>> sorted((name, u64(oid)) for (name, oid) in
+ ... zc.zodbdgc.gc_command(['config', 'config2']).iterator())
+ Using secondary configuration, config2, for analysis
+ Removed 0 objects from db1
+ Removed 0 objects from db3
+ Removed 2 objects from db2
+ [('db2', 3L), ('db2', 4L)]
+
.. cleanup
>>> logging.getLogger().setLevel(old_level)
Modified: zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py 2009-05-14 16:39:09 UTC (rev 99955)
+++ zc.zodbdgc/branches/dev/src/zc/zodbdgc/__init__.py 2009-05-14 17:06:06 UTC (rev 99956)
@@ -32,13 +32,13 @@
logger = logging.getLogger(__name__)
def gc(conf, days=1, conf2=None):
- db = ZODB.config.databaseFromFile(open(conf))
+ db1 = ZODB.config.databaseFromFile(open(conf))
if conf2 is None:
- db2 = db
+ db2 = db1
else:
logger.info("Using secondary configuration, %s, for analysis", conf2)
db2 = ZODB.config.databaseFromFile(open(conf2))
- if set(db.databases) != set(db2.databases):
+ if set(db1.databases) != set(db2.databases):
raise ValueError("primary and secondary databases don't match.")
databases = db2.databases
@@ -51,6 +51,8 @@
# Pre-populate good with roots and recently-written objects
good = oidset(databases)
bad = oidset(databases)
+ both = good, bad
+ deleted = oidset(databases)
baddir = tempfile.mkdtemp()
for name in storages:
os.mkdir(os.path.join(baddir, name))
@@ -60,41 +62,70 @@
_ = storage.load(z64, '')
good.insert(name, z64)
- # All new records are good
+ # All non-deleted new records are good
for trans in storage.iterator(ptid):
for record in trans:
- good.insert(name, record.oid)
- # and anything they reference
- for ref in getrefs(record.data, name):
- good.insert(*ref)
+ oid = record.oid
+ data = record.data
+ if data:
+ if deleted.has(name, oid):
+ raise AssertionError(
+ "Non-deleted record after deleted")
+ good.insert(name, oid)
+ # and anything they reference
+ for ref in getrefs(data, name):
+ if not deleted.has(*ref):
+ good.insert(*ref)
+ else:
+ # deleted record
+ deleted.insert(name, oid)
+ if good.has(name, oid):
+ good.remove(name, oid)
+
# Now iterate over older records
for trans in storage.iterator(None, ptid):
for record in trans:
oid = record.oid
data = record.data
- if good.has(name, oid):
- if not data:
+ if data:
+ if deleted.has(name, oid):
continue
- for ref in getrefs(data, name):
- if good.insert(*ref) and bad.has(*ref):
- bad_to_good(baddir, bad, good, *ref)
+ if good.has(name, oid):
+ for ref in getrefs(data, name):
+ if deleted.has(*ref):
+ continue
+ if good.insert(*ref) and bad.has(*ref):
+ bad_to_good(baddir, bad, good, *ref)
+ else:
+ bad.insert(name, oid)
+ refs = tuple(ref for ref in getrefs(data, name)
+ if not (good.has(*ref) or
+ deleted.has(*ref)
+ )
+ )
+ if not refs:
+ continue # leaves are common
+ f = open(bad_path(baddir, name, oid), 'ab')
+ marshal.dump(refs, f)
+ f.close()
else:
- bad.insert(name, oid)
- if not data:
- continue
- refs = tuple(ref for ref in getrefs(data, name)
- if not good.has(*ref))
- if not refs:
- continue # leaves are common
- f = open(os.path.join(baddir, name,
- base64.urlsafe_b64encode(oid)),
- 'ab')
- marshal.dump(refs, f)
- f.close()
+ # deleted record
+ if good.has(name, oid):
+ good.remove(name, oid)
+ elif bad.has(name, oid):
+ bad.remove(name, oid)
+ path = bad_path(baddir, name, oid)
+ if os.path.exists(path):
+ os.remove(path)
+ deleted.insert(name, oid)
+ if conf2 is not None:
+ for db in db2.databases.itervalues():
+ db.close()
+
# Now, we have the garbage in bad. Remove it.
- for name, db in db.databases.iteritems():
+ for name, db in db1.databases.iteritems():
storage = db.storage
t = transaction.begin()
storage.tpc_begin(t)
@@ -107,20 +138,23 @@
if nd:
storage.tpc_vote(t)
storage.tpc_finish(t)
- transaction.commit()
+ t.commit()
else:
storage.tpc_abort(t)
- transaction.abort()
+ t.abort()
db.close()
shutil.rmtree(baddir)
return bad
+def bad_path(baddir, name, oid):
+ return os.path.join(baddir, name, base64.urlsafe_b64encode(oid))
+
def bad_to_good(baddir, bad, good, name, oid):
bad.remove(name, oid)
- path = os.path.join(baddir, name, base64.urlsafe_b64encode(oid))
+ path = bad_path(baddir, name, oid)
if not os.path.exists(path):
return
@@ -201,7 +235,7 @@
args = sys.argv[1:]
parser = optparse.OptionParser("usage: %prog [options] config1 [config2]")
parser.add_option(
- '-d', '--days', dest='days', type='int',
+ '-d', '--days', dest='days', type='int', default=1,
help='Number of trailing days to treat as non-garbage')
options, args = parser.parse_args(args)
More information about the Checkins
mailing list