[Checkins] SVN: relstorage/trunk/ more pack optimizations, primarily for MySQL
Shane Hathaway
shane at hathawaymix.org
Mon Dec 22 16:16:54 EST 2008
Log message for revision 94251:
more pack optimizations, primarily for MySQL
Changed:
U relstorage/trunk/CHANGES.txt
U relstorage/trunk/relstorage/adapters/common.py
U relstorage/trunk/relstorage/adapters/mysql.py
U relstorage/trunk/relstorage/adapters/oracle.py
U relstorage/trunk/relstorage/relstorage.py
-=-
Modified: relstorage/trunk/CHANGES.txt
===================================================================
--- relstorage/trunk/CHANGES.txt 2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/CHANGES.txt 2008-12-22 21:16:54 UTC (rev 94251)
@@ -1,4 +1,4 @@
-Version 1.1.1
+Version 1.1.1 (unreleased)
- Worked around MySQL performance bugs in packing. Used temporary
tables and another column in the pack_object table. The
@@ -8,7 +8,7 @@
bringing write speed back up to where it was in version 1.0.
-Version 1.1
+Version 1.1 (2008-12-19)
- Normalized poll-invalidation patches as Solaris' patch command would not
accept the current format. The patches now apply with:
Modified: relstorage/trunk/relstorage/adapters/common.py
===================================================================
--- relstorage/trunk/relstorage/adapters/common.py 2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/relstorage/adapters/common.py 2008-12-22 21:16:54 UTC (rev 94251)
@@ -98,9 +98,43 @@
JOIN temp_pack_visit USING (zoid)
)
""",
+
+ 'pack_current_object': """
+ DELETE FROM current_object
+ WHERE tid = %(tid)s
+ AND zoid in (
+ SELECT pack_state.zoid
+ FROM pack_state
+ WHERE pack_state.tid = %(tid)s
+ )
+ """,
+
+ 'pack_object_state': """
+ DELETE FROM object_state
+ WHERE tid = %(tid)s
+ AND zoid in (
+ SELECT pack_state.zoid
+ FROM pack_state
+ WHERE pack_state.tid = %(tid)s
+ )
+ """,
+
+ 'pack_object_ref': """
+ DELETE FROM object_refs_added
+ WHERE tid IN (
+ SELECT tid
+ FROM transaction
+ WHERE empty = %(TRUE)s
+ );
+ DELETE FROM object_ref
+ WHERE tid IN (
+ SELECT tid
+ FROM transaction
+ WHERE empty = %(TRUE)s
+ )
+ """,
}
-
def _run_script_stmt(self, cursor, generic_stmt, generic_params=()):
"""Execute a statement from a script with the given parameters.
@@ -185,7 +219,7 @@
"""Iterate over the transactions in the given range, oldest first.
Includes packed transactions.
- Yields (tid, packed, username, description, extension)
+ Yields (tid, username, description, extension, packed)
for each transaction.
"""
stmt = """
@@ -740,6 +774,10 @@
log.info("pack: will pack %d transaction(s)", len(tid_rows))
+ stmt = self._scripts['create_temp_pack_visit']
+ if stmt:
+ self._run_script(cursor, stmt)
+
# Hold the commit lock while packing to prevent deadlocks.
# Pack in small batches of transactions in order to minimize
# the interruption of concurrent write operations.
@@ -785,22 +823,18 @@
has_removable):
"""Pack one transaction. Requires populated pack tables."""
log.debug("pack: transaction %d: packing", tid)
- counters = {}
+ removed_objects = 0
+ removed_states = 0
if has_removable:
- for _table in ('current_object', 'object_state'):
- stmt = """
- DELETE FROM _table
- WHERE tid = %(tid)s
- AND zoid IN (
- SELECT pack_state.zoid
- FROM pack_state
- WHERE pack_state.tid = %(tid)s
- )
- """.replace('_table', _table)
- self._run_script_stmt(cursor, stmt, {'tid': tid})
- counters[_table] = cursor.rowcount
+ stmt = self._scripts['pack_current_object']
+ self._run_script_stmt(cursor, stmt, {'tid': tid})
+ removed_objects = cursor.rowcount
+ stmt = self._scripts['pack_object_state']
+ self._run_script_stmt(cursor, stmt, {'tid': tid})
+ removed_states = cursor.rowcount
+
# Terminate prev_tid chains
stmt = """
UPDATE object_state SET prev_tid = 0
@@ -828,9 +862,7 @@
log.debug(
"pack: transaction %d (%s): removed %d object(s) and %d state(s)",
- tid, state,
- counters.get('current_object', 0),
- counters.get('object_state', 0))
+ tid, state, removed_objects, removed_states)
def _pack_cleanup(self, conn, cursor):
@@ -839,7 +871,13 @@
conn.commit()
self._release_commit_lock(cursor)
self._hold_commit_lock(cursor)
- log.info("pack: removing empty packed transactions")
+ log.info("pack: cleaning up")
+
+ log.debug("pack: removing unused object references")
+ stmt = self._scripts['pack_object_ref']
+ self._run_script(cursor, stmt)
+
+ log.debug("pack: removing empty packed transactions")
stmt = """
DELETE FROM transaction
WHERE packed = %(TRUE)s
@@ -856,28 +894,7 @@
stmt = '%(TRUNCATE)s ' + _table
self._run_script_stmt(cursor, stmt)
- log.debug("pack: removing unused object references")
- stmt = """
- DELETE FROM object_ref
- WHERE tid IN (
- SELECT tid
- FROM transaction
- WHERE empty = %(TRUE)s
- )
- """
- self._run_script_stmt(cursor, stmt)
- stmt = """
- DELETE FROM object_refs_added
- WHERE tid IN (
- SELECT tid
- FROM transaction
- WHERE empty = %(TRUE)s
- )
- """
- self._run_script_stmt(cursor, stmt)
-
-
def poll_invalidations(self, conn, cursor, prev_polled_tid, ignore_tid):
"""Polls for new transactions.
@@ -902,7 +919,8 @@
return (), new_polled_tid
stmt = "SELECT 1 FROM transaction WHERE tid = %(tid)s"
- cursor.execute(stmt % self._script_vars, {'tid': prev_polled_tid})
+ cursor.execute(intern(stmt % self._script_vars),
+ {'tid': prev_polled_tid})
rows = cursor.fetchall()
if not rows:
# Transaction not found; perhaps it has been packed.
@@ -916,10 +934,11 @@
WHERE tid > %(tid)s
"""
if ignore_tid is None:
- cursor.execute(stmt % self._script_vars, {'tid': prev_polled_tid})
+ cursor.execute(intern(stmt % self._script_vars),
+ {'tid': prev_polled_tid})
else:
stmt += " AND tid != %(self_tid)s"
- cursor.execute(stmt % self._script_vars,
+ cursor.execute(intern(stmt % self._script_vars),
{'tid': prev_polled_tid, 'self_tid': ignore_tid})
oids = [oid for (oid,) in cursor]
Modified: relstorage/trunk/relstorage/adapters/mysql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/mysql.py 2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/relstorage/adapters/mysql.py 2008-12-22 21:16:54 UTC (rev 94251)
@@ -67,33 +67,62 @@
# Work around a MySQL performance bug by avoiding an expensive subquery.
# See: http://mail.zope.org/pipermail/zodb-dev/2008-May/011880.html
# http://bugs.mysql.com/bug.php?id=28257
- _scripts['create_temp_pack_visit'] = """
- CREATE TEMPORARY TABLE temp_pack_visit (
- zoid BIGINT NOT NULL
- );
- CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid);
- CREATE TEMPORARY TABLE temp_pack_child (
- zoid BIGINT NOT NULL
- );
- CREATE UNIQUE INDEX temp_pack_child_zoid ON temp_pack_child (zoid);
- """
+ _scripts.update({
+ 'create_temp_pack_visit': """
+ CREATE TEMPORARY TABLE temp_pack_visit (
+ zoid BIGINT NOT NULL
+ );
+ CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid);
+ CREATE TEMPORARY TABLE temp_pack_child (
+ zoid BIGINT NOT NULL
+ );
+ CREATE UNIQUE INDEX temp_pack_child_zoid ON temp_pack_child (zoid);
+ """,
- # Note: UPDATE must be the last statement in the script
- # because it returns a value.
- _scripts['prepack_follow_child_refs'] = """
- %(TRUNCATE)s temp_pack_child;
+ # Note: UPDATE must be the last statement in the script
+ # because it returns a value.
+ 'prepack_follow_child_refs': """
+ %(TRUNCATE)s temp_pack_child;
- INSERT INTO temp_pack_child
- SELECT DISTINCT to_zoid
- FROM object_ref
- JOIN temp_pack_visit USING (zoid);
+ INSERT INTO temp_pack_child
+ SELECT DISTINCT to_zoid
+ FROM object_ref
+ JOIN temp_pack_visit USING (zoid);
- -- MySQL-specific syntax for table join in update
- UPDATE pack_object, temp_pack_child SET keep = %(TRUE)s
- WHERE keep = %(FALSE)s
- AND pack_object.zoid = temp_pack_child.zoid;
- """
+ -- MySQL-specific syntax for table join in update
+ UPDATE pack_object, temp_pack_child SET keep = %(TRUE)s
+ WHERE keep = %(FALSE)s
+ AND pack_object.zoid = temp_pack_child.zoid;
+ """,
+ # MySQL optimizes deletion far better when using a join syntax.
+ 'pack_current_object': """
+ DELETE FROM current_object
+ USING current_object
+ JOIN pack_state USING (zoid, tid)
+ WHERE current_object.tid = %(tid)s
+ """,
+
+ 'pack_object_state': """
+ DELETE FROM object_state
+ USING object_state
+ JOIN pack_state USING (zoid, tid)
+ WHERE object_state.tid = %(tid)s
+ """,
+
+ 'pack_object_ref': """
+ DELETE FROM object_refs_added
+ USING object_refs_added
+ JOIN transaction USING (tid)
+ WHERE transaction.empty = true;
+
+ DELETE FROM object_ref
+ USING object_ref
+ JOIN transaction USING (tid)
+ WHERE transaction.empty = true
+ """,
+ })
+
def __init__(self, **params):
self._params = params.copy()
@@ -142,6 +171,7 @@
tid BIGINT NOT NULL,
FOREIGN KEY (zoid, tid) REFERENCES object_state (zoid, tid)
) ENGINE = InnoDB;
+ CREATE INDEX current_object_tid ON current_object (tid);
-- A list of referenced OIDs from each object_state.
-- This table is populated as needed during packing.
Modified: relstorage/trunk/relstorage/adapters/oracle.py
===================================================================
--- relstorage/trunk/relstorage/adapters/oracle.py 2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/relstorage/adapters/oracle.py 2008-12-22 21:16:54 UTC (rev 94251)
@@ -61,7 +61,8 @@
'max_tid': ':max_tid',
}
- _scripts = {
+ _scripts = Adapter._scripts.copy()
+ _scripts.update({
'choose_pack_transaction': """
SELECT MAX(tid)
FROM transaction
@@ -79,11 +80,8 @@
FROM object_state
WHERE tid = %(tid)s
""",
+ })
- 'prepack_follow_child_refs':
- Adapter._scripts['prepack_follow_child_refs'],
- }
-
def __init__(self, user, password, dsn, twophase=False, arraysize=64,
use_inline_lobs=None):
"""Create an Oracle adapter.
Modified: relstorage/trunk/relstorage/relstorage.py
===================================================================
--- relstorage/trunk/relstorage/relstorage.py 2008-12-22 17:59:40 UTC (rev 94250)
+++ relstorage/trunk/relstorage/relstorage.py 2008-12-22 21:16:54 UTC (rev 94251)
@@ -834,6 +834,9 @@
# Nothing needs to be packed.
return
+ s = time.ctime(TimeStamp(p64(tid_int)).timeTime())
+ log.info("packing transactions committed %s or before", s)
+
# In pre_pack, the adapter fills tables with
# information about what to pack. The adapter
# should not actually pack anything yet.
More information about the Checkins
mailing list