[Checkins] SVN: zc.zodbdgc/trunk/ - Added an option to ignore references to some databases.
Jim Fulton
jim at zope.com
Mon Jun 15 14:15:51 EDT 2009
Log message for revision 101023:
- Added an option to ignore references to some databases.
Changed:
U zc.zodbdgc/trunk/README.txt
U zc.zodbdgc/trunk/src/zc/zodbdgc/README.test
U zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt
U zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py
-=-
Modified: zc.zodbdgc/trunk/README.txt
===================================================================
--- zc.zodbdgc/trunk/README.txt 2009-06-15 18:10:52 UTC (rev 101022)
+++ zc.zodbdgc/trunk/README.txt 2009-06-15 18:15:51 UTC (rev 101023)
@@ -1,11 +1 @@
-***********************
-Title Here
-***********************
-
-Changes
-*******
-
-0.1 (yyyy-mm-dd)
-================
-
-Initial release
+See src/zc/zodbdgc/README.txt
Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/README.test
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/README.test 2009-06-15 18:10:52 UTC (rev 101022)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/README.test 2009-06-15 18:15:51 UTC (rev 101023)
@@ -291,6 +291,8 @@
-h, --help show this help message and exit
-d DAYS, --days=DAYS Number of trailing days (defaults to 1) to treat as
non-garbage
+ -i IGNORE, --ignore-database=IGNORE
+ Ignore references to the given database name.
-l LEVEL, --log-level=LEVEL
The logging level. The default is WARNING.
@@ -511,6 +513,70 @@
!!! db2 2 ?
POSKeyError: 0x02
+
+Ignoring databases
+------------------
+
+Sometimes, when doing garbage collection, you want to ignore some
+databases.
+
+ >>> db = ZODB.config.databaseFromString("""
+ ... <zodb db1>
+ ... <filestorage>
+ ... path one.fs
+ ... pack-gc false
+ ... </filestorage>
+ ... </zodb>
+ ... <zodb db2>
+ ... <filestorage>
+ ... path two.fs
+ ... pack-gc false
+ ... </filestorage>
+ ... </zodb>
+ ... """)
+
+ >>> conn = db.open()
+ >>> conn.get_connection('db2').root.x = C()
+ >>> transaction.commit()
+ >>> conn.root.x = C()
+ >>> conn.root.x.x = conn.get_connection('db2').root.x
+ >>> transaction.commit()
+ >>> conn.root.a = C()
+ >>> transaction.commit()
+ >>> conn.root.b = C()
+ >>> conn.root.a.b = conn.root.b
+ >>> conn.root.b.a = conn.root.a
+ >>> transaction.commit()
+ >>> del conn.root.a
+ >>> del conn.root.b
+ >>> transaction.commit()
+
+ >>> now += 2*86400
+
+ >>> db.pack()
+
+ >>> _ = [db.close() for db in db.databases.itervalues()]
+
+ >>> open('config', 'w').write("""
+ ... <zodb db1>
+ ... <filestorage>
+ ... path one.fs
+ ... </filestorage>
+ ... </zodb>
+ ... """)
+
+ >>> sorted(zc.zodbdgc.gc_command(['config']).iterator())
+ Traceback (most recent call last):
+ ...
+ KeyError: 'db2'
+
+ >>> sorted(zc.zodbdgc.gc_command(['-idb2', 'config']).iterator())
+ ... # doctest: +NORMALIZE_WHITESPACE
+ Removed 2 objects from db1
+ [('db1', '\x00\x00\x00\x00\x00\x00\x00\x02'),
+ ('db1', '\x00\x00\x00\x00\x00\x00\x00\x03')]
+
+
.. cleanup
>>> logging.getLogger().setLevel(old_level)
Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt 2009-06-15 18:10:52 UTC (rev 101022)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/README.txt 2009-06-15 18:15:51 UTC (rev 101023)
@@ -61,3 +61,18 @@
You can run the script with the ``--help`` option to get usage
information.
+
+Change History
+==============
+
+0.2.0 2009-06-15
+----------------
+
+- Added an option to ignore references to some databases.
+
+- Fixed a bug in handling of the logging level option.
+
+0.1.0 2009-06-11
+----------------
+
+Initial release
Modified: zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py
===================================================================
--- zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py 2009-06-15 18:10:52 UTC (rev 101022)
+++ zc.zodbdgc/trunk/src/zc/zodbdgc/__init__.py 2009-06-15 18:15:51 UTC (rev 101023)
@@ -44,13 +44,24 @@
logger = logging.getLogger(__name__)
-def gc(conf, days=1, conf2=None, batch_size=10000):
+def gc(conf, days=1, ignore=(), conf2=None, batch_size=10000):
+ close = []
+ try:
+ return gc_(close, conf, days, ignore, conf2, batch_size)
+ finally:
+ for db in close:
+ for db in db.databases.itervalues():
+ db.close()
+
+def gc_(close, conf, days, ignore, conf2, batch_size):
db1 = ZODB.config.databaseFromFile(open(conf))
+ close.append(db1)
if conf2 is None:
db2 = db1
else:
logger.info("Using secondary configuration, %s, for analysis", conf2)
db2 = ZODB.config.databaseFromFile(open(conf2))
+ close.append(db1)
if set(db1.databases) != set(db2.databases):
raise ValueError("primary and secondary databases don't match.")
@@ -71,7 +82,7 @@
# Make sure we can get the roots
data, s = storage.load(z64, '')
good.insert(name, z64)
- for ref in getrefs(data, name):
+ for ref in getrefs(data, name, ignore):
good.insert(*ref)
if days:
@@ -87,7 +98,7 @@
good.insert(name, oid)
# and anything they reference
- for ref in getrefs(data, name):
+ for ref in getrefs(data, name, ignore):
if not deleted.has(*ref):
good.insert(*ref)
else:
@@ -105,11 +116,11 @@
if deleted.has(name, oid):
continue
if good.has(name, oid):
- for ref in getrefs(data, name):
+ for ref in getrefs(data, name, ignore):
if deleted.has(*ref):
continue
if good.insert(*ref) and bad.has(*ref):
- bad_to_good(storages, bad, good, *ref)
+ bad_to_good(storages, ignore, bad, good, *ref)
else:
bad.insert(name, oid)
else:
@@ -123,6 +134,7 @@
if conf2 is not None:
for db in db2.databases.itervalues():
db.close()
+ close.pop()
# Now, we have the garbage in bad. Remove it.
for name, db in db1.databases.iteritems():
@@ -149,14 +161,13 @@
else:
storage.tpc_abort(t)
t.abort()
- db.close()
return bad
def bad_path(baddir, name, oid):
return os.path.join(baddir, name, base64.urlsafe_b64encode(oid))
-def bad_to_good(storages, bad, good, name, oid):
+def bad_to_good(storages, ignore, bad, good, name, oid):
to_do = [(name, oid)]
while to_do:
@@ -166,25 +177,26 @@
for h in storage.history(oid, size=1<<99):
data = storage.loadSerial(oid, h['tid'])
- for ref in getrefs(data, name):
+ for ref in getrefs(data, name, ignore):
if good.insert(*ref) and bad.has(*ref):
to_do.append(ref)
-def getrefs(p, rname):
+def getrefs(p, rname, ignore):
refs = []
u = cPickle.Unpickler(cStringIO.StringIO(p))
u.persistent_load = refs
u.noload()
u.noload()
for ref in refs:
- name = rname
if isinstance(ref, tuple):
yield rname, ref[0]
elif isinstance(ref, str):
yield rname, ref
else:
assert isinstance(ref, list)
- yield ref[1][:2]
+ ref = ref[1]
+ if ref[0] not in ignore:
+ yield ref[:2]
class oidset(dict):
@@ -258,6 +270,9 @@
'-d', '--days', dest='days', type='int', default=1,
help='Number of trailing days (defaults to 1) to treat as non-garbage')
parser.add_option(
+ '-i', '--ignore-database', dest='ignore', action='append',
+ help='Ignore references to the given database name.')
+ parser.add_option(
'-l', '--log-level', dest='level',
help='The logging level. The default is WARNING.')
@@ -276,7 +291,7 @@
level = getattr(logging, level)
logging.basicConfig(level=level)
- return gc(args[0], options.days, *args[1:])
+ return gc(args[0], options.days, options.ignore or (), *args[1:])
@@ -377,7 +392,7 @@
print "%s: %s" % (t.__name__, v)
continue
- for ref in getrefs(p, name):
+ for ref in getrefs(p, name, ()):
if (ref[0] != name) and not databases[name].xrefs:
print 'bad xref', ref[0], u64(ref[1]), name, u64(oid)
More information about the Checkins
mailing list