[Checkins] SVN: relstorage/trunk/ Added logging and periodic
commits to the pack code. This should help diagnose packing
speed issues.
Shane Hathaway
shane at hathawaymix.org
Sat Mar 29 18:13:49 EDT 2008
Log message for revision 85014:
Added logging and periodic commits to the pack code. This should help diagnose packing speed issues.
Changed:
U relstorage/trunk/CHANGELOG.txt
U relstorage/trunk/relstorage/adapters/common.py
U relstorage/trunk/relstorage/adapters/mysql.py
-=-
Modified: relstorage/trunk/CHANGELOG.txt
===================================================================
--- relstorage/trunk/CHANGELOG.txt 2008-03-29 18:50:37 UTC (rev 85013)
+++ relstorage/trunk/CHANGELOG.txt 2008-03-29 22:13:49 UTC (rev 85014)
@@ -6,7 +6,12 @@
an issue with converting databases. Thanks to Kevin Smith for
discovering this.
+- Added logging to the pack code to help diagnose performance issues.
+- Additions to the object_ref table are now periodically committed
+ during pre_pack so that the work is not lost if pre_pack fails.
+
+
RelStorage 1.0.1
- The speedtest script failed if run on a test database that has no tables.
Modified: relstorage/trunk/relstorage/adapters/common.py
===================================================================
--- relstorage/trunk/relstorage/adapters/common.py 2008-03-29 18:50:37 UTC (rev 85013)
+++ relstorage/trunk/relstorage/adapters/common.py 2008-03-29 22:13:49 UTC (rev 85014)
@@ -362,11 +362,18 @@
self.close(conn, cursor)
+ def open_for_pre_pack(self):
+ """Open a connection to be used for the pre-pack phase.
+ Returns (conn, cursor).
+
+ Subclasses may override this.
+ """
+ return self.open()
+
+
def pre_pack(self, pack_tid, get_references, gc):
"""Decide what to pack.
- Subclasses may override this.
-
tid specifies the most recent transaction to pack.
get_references is a function that accepts a pickled state and
@@ -377,23 +384,29 @@
even if nothing refers to it. Packing with gc disabled can be
much faster.
"""
- conn, cursor = self.open()
+ conn, cursor = self.open_for_pre_pack()
try:
try:
if gc:
- self._pre_pack_with_gc(cursor, pack_tid, get_references)
+ log.info("pre_pack: start with gc enabled")
+ self._pre_pack_with_gc(
+ conn, cursor, pack_tid, get_references)
else:
- self._pre_pack_without_gc(cursor, pack_tid)
+ log.info("pre_pack: start without gc")
+ self._pre_pack_without_gc(
+ conn, cursor, pack_tid)
except:
+ log.exception("pre_pack: failed")
conn.rollback()
raise
else:
+ log.info("pre_pack: finished successfully")
conn.commit()
finally:
self.close(conn, cursor)
- def _pre_pack_without_gc(self, cursor, pack_tid):
+ def _pre_pack_without_gc(self, conn, cursor, pack_tid):
"""Determine what to pack, without garbage collection.
With garbage collection disabled, there is no need to follow
@@ -401,6 +414,8 @@
"""
# Fill the pack_object table with OIDs, but configure them
# all to be kept by setting keep and keep_tid.
+ log.debug("pre_pack: populating pack_object")
+ subselect = self._scripts['select_keep_tid']
stmt = """
DELETE FROM pack_object;
@@ -409,20 +424,20 @@
FROM object_state
WHERE tid <= %(pack_tid)s;
- UPDATE pack_object SET keep_tid = (@select_keep_tid@)
+ UPDATE pack_object SET keep_tid = (""" + subselect + """)
"""
- stmt = stmt.replace(
- '@select_keep_tid@', self._scripts['select_keep_tid'])
self._run_script(cursor, stmt, {'pack_tid': pack_tid})
- def _pre_pack_with_gc(self, cursor, pack_tid, get_references):
+ def _pre_pack_with_gc(self, conn, cursor, pack_tid, get_references):
"""Determine what to pack, with garbage collection.
"""
+ log.info("pre_pack: following references after the pack point")
# Fill object_ref with references from object states
# in transactions that will not be packed.
- self._fill_nonpacked_refs(cursor, pack_tid, get_references)
+ self._fill_nonpacked_refs(conn, cursor, pack_tid, get_references)
+ log.debug("pre_pack: populating pack_object")
# Fill the pack_object table with OIDs that either will be
# removed (if nothing references the OID) or whose history will
# be cut.
@@ -468,13 +483,17 @@
# include objects currently set to be removed, keep
# those objects as well. Do this
# repeatedly until all references have been satisfied.
+ pass_num = 1
while True:
+ log.info("pre_pack: following references before the pack point, "
+ "pass %d", pass_num)
# Make a list of all parent objects that still need
# to be visited. Then set keep_tid for all pack_object
# rows with keep = true.
# keep_tid must be set before _fill_pack_object_refs examines
# references.
+ subselect = self._scripts['select_keep_tid']
stmt = """
DELETE FROM temp_pack_visit;
@@ -484,14 +503,17 @@
WHERE keep = %(TRUE)s
AND keep_tid IS NULL;
- UPDATE pack_object SET keep_tid = (@select_keep_tid@)
- WHERE keep = %(TRUE)s AND keep_tid IS NULL
+ UPDATE pack_object SET keep_tid = (""" + subselect + """)
+ WHERE keep = %(TRUE)s AND keep_tid IS NULL;
+
+ SELECT COUNT(1) FROM temp_pack_visit
"""
- stmt = stmt.replace(
- '@select_keep_tid@', self._scripts['select_keep_tid'])
self._run_script(cursor, stmt, {'pack_tid': pack_tid})
+ visit_count = cursor.fetchone()[0]
+ log.debug("pre_pack: checking references from %d object(s)",
+ visit_count)
- self._fill_pack_object_refs(cursor, get_references)
+ self._fill_pack_object_refs(conn, cursor, get_references)
# Visit the children of all parent objects that were
# just visited.
@@ -505,12 +527,18 @@
)
"""
self._run_script_stmt(cursor, stmt)
- if not cursor.rowcount:
+ found_count = cursor.rowcount
+
+ log.info("pre_pack: found %d more referenced object(s) in "
+ "pass %d", found_count, pass_num)
+ if not found_count:
# No new references detected.
break
+ else:
+ pass_num += 1
- def _fill_nonpacked_refs(self, cursor, pack_tid, get_references):
+ def _fill_nonpacked_refs(self, conn, cursor, pack_tid, get_references):
"""Fill object_ref for all transactions that will not be packed."""
stmt = """
SELECT DISTINCT tid
@@ -523,11 +551,11 @@
)
"""
self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
- for (tid,) in cursor.fetchall():
- self._add_refs_for_tid(cursor, tid, get_references)
+ tids = [tid for (tid,) in cursor]
+ self._add_refs_for_tids(conn, cursor, tids, get_references)
- def _fill_pack_object_refs(self, cursor, get_references):
+ def _fill_pack_object_refs(self, conn, cursor, get_references):
"""Fill object_ref for all pack_object rows that have keep_tid."""
stmt = """
SELECT DISTINCT keep_tid
@@ -540,8 +568,8 @@
)
"""
cursor.execute(stmt)
- for (tid,) in cursor.fetchall():
- self._add_refs_for_tid(cursor, tid, get_references)
+ tids = [tid for (tid,) in cursor]
+ self._add_refs_for_tids(conn, cursor, tids, get_references)
def _add_object_ref_rows(self, cursor, add_rows):
@@ -560,7 +588,12 @@
def _add_refs_for_tid(self, cursor, tid, get_references):
"""Fill object_refs with all states for a transaction.
+
+ Returns the number of references added.
"""
+ log.debug("pre_pack: transaction %d: computing references ", tid)
+ from_count = 0
+
stmt = """
SELECT zoid, state
FROM object_state
@@ -574,6 +607,7 @@
# Oracle
state = state.read()
if state:
+ from_count += 1
to_oids = get_references(str(state))
for to_oid in to_oids:
add_rows.append((from_oid, tid, to_oid))
@@ -588,7 +622,26 @@
"""
self._run_script_stmt(cursor, stmt, {'tid': tid})
+ to_count = len(add_rows)
+ log.debug("pre_pack: transaction %d: has %d reference(s) "
+ "from %d object(s)", tid, to_count, from_count)
+ return to_count
+
+ def _add_refs_for_tids(self, conn, cursor, tids, get_references):
+ """Fill object_refs with all states for multiple transactions."""
+ if tids:
+ added = 0
+ log.info("pre_pack: examining all references from objects in %d "
+ "transaction(s)" % len(tids))
+ for tid in tids:
+ added += self._add_refs_for_tid(cursor, tid, get_references)
+ if added >= 1000:
+ # save the work done so far
+ conn.commit()
+ added = 0
+
+
def _hold_commit_lock(self, cursor):
"""Hold the commit lock for packing"""
cursor.execute("LOCK TABLE commit_lock IN EXCLUSIVE MODE")
@@ -601,26 +654,61 @@
conn, cursor = self.open()
try:
try:
+ log.info("pack: start, pack_tid = %d", pack_tid)
+
+ stmt = """
+ select COUNT(1)
+ FROM transaction
+ WHERE tid > 0
+ AND tid <= %(pack_tid)s
+ """
+ self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
+ transaction_count = cursor.fetchone()[0]
+
+ stmt = """
+ SELECT COUNT(1)
+ FROM pack_object
+ WHERE keep = %(FALSE)s
+ """
+ self._run_script_stmt(cursor, stmt)
+ delete_count = cursor.fetchone()[0]
+
+ stmt = """
+ SELECT COUNT(1)
+ FROM pack_object
+ WHERE keep = %(TRUE)s
+ """
+ self._run_script_stmt(cursor, stmt)
+ trim_count = cursor.fetchone()[0]
+
+ log.info(
+ "pack: will pack %d transaction(s), delete %s object(s),"
+ " and trim %s old object(s)",
+ transaction_count, delete_count, trim_count)
+
# hold the commit lock for a moment to prevent deadlocks.
self._hold_commit_lock(cursor)
for table in ('object_ref', 'current_object', 'object_state'):
- # Remove objects that are in pack_object and have keep
- # set to false.
- stmt = """
- DELETE FROM %s
- WHERE zoid IN (
- SELECT zoid
- FROM pack_object
- WHERE keep = %%(FALSE)s
- )
- """ % table
- self._run_script_stmt(cursor, stmt)
+ if delete_count > 0:
+ # Remove objects that are in pack_object and have keep
+ # set to false.
+ log.debug("pack: deleting objects from %s", table)
+ stmt = """
+ DELETE FROM %s
+ WHERE zoid IN (
+ SELECT zoid
+ FROM pack_object
+ WHERE keep = %%(FALSE)s
+ )
+ """ % table
+ self._run_script_stmt(cursor, stmt)
- if table != 'current_object':
+ if trim_count > 0 and table != 'current_object':
# Cut the history of objects in pack_object that
# have keep set to true.
+ log.debug("pack: trimming objects in %s", table)
stmt = """
DELETE FROM %s
WHERE zoid IN (
@@ -636,14 +724,18 @@
""" % (table, table)
self._run_script_stmt(cursor, stmt)
+ log.debug("pack: terminating prev_tid chains")
stmt = """
- -- Terminate prev_tid chains
UPDATE object_state SET prev_tid = 0
WHERE tid <= %(pack_tid)s
- AND prev_tid != 0;
+ AND prev_tid != 0
+ """
+ self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
- -- For each tid to be removed, delete the corresponding row in
- -- object_refs_added.
+ # For each tid to be removed, delete the corresponding row in
+ # object_refs_added.
+ log.debug("pack: deleting from object_refs_added")
+ stmt = """
DELETE FROM object_refs_added
WHERE tid > 0
AND tid <= %(pack_tid)s
@@ -651,9 +743,12 @@
SELECT 1
FROM object_state
WHERE tid = object_refs_added.tid
- );
+ )
+ """
+ self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
- -- Delete transactions no longer used.
+ log.debug("pack: deleting transactions")
+ stmt = """
DELETE FROM transaction
WHERE tid > 0
AND tid <= %(pack_tid)s
@@ -661,24 +756,29 @@
SELECT 1
FROM object_state
WHERE tid = transaction.tid
- );
+ )
+ """
+ self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
- -- Mark the remaining packable transactions as packed
+ log.debug("pack: marking transactions as packed")
+ stmt = """
UPDATE transaction SET packed = %(TRUE)s
WHERE tid > 0
AND tid <= %(pack_tid)s
- AND packed = %(FALSE)s;
-
- -- Clean up.
- DELETE FROM pack_object;
+ AND packed = %(FALSE)s
"""
- self._run_script(cursor, stmt, {'pack_tid': pack_tid})
+ self._run_script_stmt(cursor, stmt, {'pack_tid': pack_tid})
+ log.debug("pack: clearing pack_object")
+ cursor.execute("DELETE FROM pack_object")
+
except:
+ log.exception("pack: failed")
conn.rollback()
raise
else:
+ log.info("pack: finished successfully")
conn.commit()
finally:
Modified: relstorage/trunk/relstorage/adapters/mysql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/mysql.py 2008-03-29 18:50:37 UTC (rev 85013)
+++ relstorage/trunk/relstorage/adapters/mysql.py 2008-03-29 22:13:49 UTC (rev 85014)
@@ -543,8 +543,9 @@
cursor.execute(stmt)
- def pre_pack(self, pack_tid, get_references, gc):
- """Decide what to pack.
+ def open_for_pre_pack(self):
+ """Open a connection to be used for the pre-pack phase.
+ Returns (conn, cursor).
This overrides the method by the same name in common.Adapter.
"""
@@ -553,12 +554,10 @@
# This phase of packing works best with transactions
# disabled. It changes no user-facing data.
conn.autocommit(True)
- if gc:
- self._pre_pack_with_gc(cursor, pack_tid, get_references)
- else:
- self._pre_pack_without_gc(cursor, pack_tid)
- finally:
+ return conn, cursor
+ except:
self.close(conn, cursor)
+ raise
def _hold_commit_lock(self, cursor):
More information about the Checkins
mailing list