[Checkins] SVN: relstorage/trunk/ 1. Valery Suhomlinov discovered a
problem with non-ASCII data in transaction
Shane Hathaway
shane at hathawaymix.org
Fri Feb 22 02:35:46 EST 2008
Log message for revision 84129:
1. Valery Suhomlinov discovered a problem with non-ASCII data in transaction
metadata. The problem has been fixed for all supported databases.
2. Made it possible to disable garbage collection during packing.
Exposed the option in zope.conf.
Changed:
U relstorage/trunk/CHANGELOG.txt
U relstorage/trunk/notes/oracle_notes.txt
U relstorage/trunk/relstorage/adapters/common.py
U relstorage/trunk/relstorage/adapters/mysql.py
U relstorage/trunk/relstorage/adapters/oracle.py
U relstorage/trunk/relstorage/adapters/postgresql.py
U relstorage/trunk/relstorage/component.xml
U relstorage/trunk/relstorage/config.py
U relstorage/trunk/relstorage/relstorage.py
U relstorage/trunk/relstorage/tests/reltestbase.py
-=-
Modified: relstorage/trunk/CHANGELOG.txt
===================================================================
--- relstorage/trunk/CHANGELOG.txt 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/CHANGELOG.txt 2008-02-22 07:35:44 UTC (rev 84129)
@@ -1,5 +1,5 @@
-relstorage 0.9
+relstorage 1.0 beta
- Renamed to reflect expanding database support.
@@ -41,10 +41,18 @@
- Implemented the storage iterator protocol, making it possible to copy
transactions to and from FileStorage and other RelStorage instances.
-- Fixed a bug that caused OIDs to be reused after importing objects.
+- Fixed a bug that caused OIDs to be reused after importing transactions.
Added a corresponding test.
+- Made it possible to disable garbage collection during packing.
+ Exposed the option in zope.conf.
+- Valery Suhomlinov discovered a problem with non-ASCII data in transaction
+ metadata. The problem has been fixed for all supported databases.
+
+
+
+
PGStorage 0.4
- Began using the PostgreSQL LISTEN and NOTIFY statements as a shortcut
Modified: relstorage/trunk/notes/oracle_notes.txt
===================================================================
--- relstorage/trunk/notes/oracle_notes.txt 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/notes/oracle_notes.txt 2008-02-22 07:35:44 UTC (rev 84129)
@@ -1,4 +1,8 @@
+If you are using Oracle 10g XE, use the "universal" version,
+since the smaller Western European version has minimal support
+for Unicode and will not pass all of the RelStorage tests.
+
Docs:
http://www.oracle.com/pls/db102/homepage
@@ -6,11 +10,18 @@
http://www.davidpashley.com/articles/oracle-install.html
Work around session limit (fixes ORA-12520):
- ALTER SYSTEM SET PROCESSES=150 SCOPE=SPFILE
- ALTER SYSTEM SET SESSIONS=150 SCOPE=SPFILE
+ ALTER SYSTEM SET PROCESSES=150 SCOPE=SPFILE;
+ ALTER SYSTEM SET SESSIONS=150 SCOPE=SPFILE;
(then restart Oracle)
Manually rollback an in-dispute transaction:
select local_tran_id, state from DBA_2PC_PENDING;
rollback force '$local_tran_id';
+It might be necessary to add the following lines to adapters/oracle.py,
+before all imports, to solve Oracle encoding issues. (Let me know
+if you have to do this!)
+
+ import os
+ os.environ["NLS_LANG"] = ".AL32UTF8"
+
Modified: relstorage/trunk/relstorage/adapters/common.py
===================================================================
--- relstorage/trunk/relstorage/adapters/common.py 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/adapters/common.py 2008-02-22 07:35:44 UTC (rev 84129)
@@ -20,6 +20,14 @@
log = logging.getLogger("relstorage.adapters.common")
+# Notes about adapters:
+#
+# An adapter must not hold a connection, cursor, or database state, because
+# RelStorage opens multiple concurrent connections using a single adapter
+# instance.
+# Within the context of an adapter, all OID and TID values are integers,
+# not binary strings, except as noted.
+
class Adapter(object):
"""Common code for a database adapter.
@@ -122,6 +130,18 @@
self._run_script_stmt(cursor, stmt, params)
+ def _transaction_iterator(self, cursor):
+ """Iterate over a list of transactions returned from the database.
+
+ Each row begins with (tid, username, description, extension)
+ and may have other columns.
+
+ The default implementation returns the cursor unmodified.
+ Subclasses can override this.
+ """
+ return cursor
+
+
def iter_transactions(self, cursor):
"""Iterate over the transaction log, newest first.
@@ -136,7 +156,7 @@
ORDER BY tid DESC
"""
self._run_script_stmt(cursor, stmt)
- return iter(cursor)
+ return self._transaction_iterator(cursor)
def iter_transactions_range(self, cursor, start=None, stop=None):
@@ -144,12 +164,11 @@
Includes packed transactions.
Yields (tid, packed, username, description, extension)
- for each transaction.
+ for each transaction.
"""
stmt = """
- SELECT tid,
- CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END,
- username, description, extension
+ SELECT tid, username, description, extension,
+ CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END
FROM transaction
WHERE tid >= 0
"""
@@ -160,7 +179,7 @@
stmt += " ORDER BY tid"
self._run_script_stmt(cursor, stmt,
{'min_tid': start, 'max_tid': stop})
- return iter(cursor)
+ return self._transaction_iterator(cursor)
def iter_object_history(self, cursor, oid):
@@ -186,7 +205,7 @@
ORDER BY tid DESC
"""
self._run_script_stmt(cursor, stmt, {'oid': oid})
- return iter(cursor)
+ return self._transaction_iterator(cursor)
def iter_objects(self, cursor, tid):
@@ -332,7 +351,7 @@
self.close(conn, cursor)
- def pre_pack(self, pack_tid, get_references, gc=True):
+ def pre_pack(self, pack_tid, get_references, gc):
"""Decide what to pack.
Subclasses may override this.
Modified: relstorage/trunk/relstorage/adapters/mysql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/mysql.py 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/adapters/mysql.py 2008-02-22 07:35:44 UTC (rev 84129)
@@ -64,7 +64,9 @@
"""MySQL adapter for RelStorage."""
def __init__(self, **params):
- self._params = params
+ self._params = params.copy()
+ self._params['use_unicode'] = True
+ self._params['charset'] = 'utf8'
def create_schema(self, cursor):
"""Create the database tables."""
@@ -76,7 +78,7 @@
username VARCHAR(255) NOT NULL,
description TEXT NOT NULL,
extension BLOB
- ) ENGINE = InnoDB;
+ ) ENGINE = InnoDB CHARACTER SET utf8;
-- Create a special transaction to represent object creation. This
-- row is often referenced by object_state.prev_tid, but never by
@@ -97,7 +99,7 @@
tid BIGINT NOT NULL REFERENCES transaction,
PRIMARY KEY (zoid, tid),
prev_tid BIGINT NOT NULL REFERENCES transaction,
- md5 CHAR(32),
+ md5 CHAR(32) CHARACTER SET ascii,
state LONGBLOB,
CHECK (tid > 0)
) ENGINE = InnoDB;
@@ -537,7 +539,7 @@
cursor.execute(stmt)
- def pre_pack(self, pack_tid, get_references, gc=True):
+ def pre_pack(self, pack_tid, get_references, gc):
"""Decide what to pack.
This overrides the method by the same name in common.Adapter.
Modified: relstorage/trunk/relstorage/adapters/oracle.py
===================================================================
--- relstorage/trunk/relstorage/adapters/oracle.py 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/adapters/oracle.py 2008-02-22 07:35:44 UTC (rev 84129)
@@ -105,8 +105,8 @@
CREATE TABLE transaction (
tid NUMBER(20) NOT NULL PRIMARY KEY,
packed CHAR DEFAULT 'N' CHECK (packed IN ('N', 'Y')),
- username VARCHAR2(255),
- description VARCHAR2(4000),
+ username NVARCHAR2(255),
+ description NVARCHAR2(2000),
extension RAW(2000)
);
@@ -512,9 +512,10 @@
(tid, packed, username, description, extension)
VALUES (:1, :2, :3, :4, :5)
"""
+ encoding = cursor.connection.encoding
cursor.execute(stmt, (
- tid, packed and 'Y' or 'N', username, description,
- cx_Oracle.Binary(extension)))
+ tid, packed and 'Y' or 'N', username.encode(encoding),
+ description.encode(encoding), cx_Oracle.Binary(extension)))
def detect_conflict(self, cursor):
"""Find one conflict in the data about to be committed.
@@ -618,6 +619,7 @@
"""Abort the commit. If txn is not None, phase 1 is also aborted."""
cursor.connection.rollback()
+
def new_oid(self, cursor):
"""Return a new, unused OID."""
stmt = "SELECT zoid_seq.nextval FROM DUAL"
@@ -625,6 +627,24 @@
return cursor.fetchone()[0]
+ def _transaction_iterator(self, cursor):
+ """Iterate over a list of transactions returned from the database.
+
+ Each row begins with (tid, username, description, extension)
+ and may have other columns.
+
+ This overrides the default implementation.
+ """
+ encoding = cursor.connection.encoding
+ for row in cursor:
+ tid, username, description = row[:3]
+ if username is not None:
+ username = username.decode(encoding)
+ if description is not None:
+ description = description.decode(encoding)
+ yield (tid, username, description) + tuple(row[3:])
+
+
def hold_pack_lock(self, cursor):
"""Try to acquire the pack lock.
@@ -720,4 +740,3 @@
def __getitem__(self, key):
self.used.add(key)
return self.source[key]
-
Modified: relstorage/trunk/relstorage/adapters/postgresql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/postgresql.py 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/adapters/postgresql.py 2008-02-22 07:35:44 UTC (rev 84129)
@@ -22,15 +22,9 @@
log = logging.getLogger("relstorage.adapters.postgresql")
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
-# Notes about adapters:
-#
-# An adapter must not hold a connection, cursor, or database state, because
-# RelStorage opens multiple concurrent connections using a single adapter
-# instance.
-# All OID and TID values are integers, not binary strings, except as noted.
-
class PostgreSQLAdapter(Adapter):
"""PostgreSQL adapter for RelStorage."""
@@ -179,6 +173,7 @@
"""Open a database connection and return (conn, cursor)."""
try:
conn = psycopg2.connect(self._dsn)
+ conn.set_client_encoding('UNICODE')
conn.set_isolation_level(isolation)
cursor = conn.cursor()
cursor.arraysize = 64
Modified: relstorage/trunk/relstorage/component.xml
===================================================================
--- relstorage/trunk/relstorage/component.xml 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/component.xml 2008-02-22 07:35:44 UTC (rev 84129)
@@ -40,6 +40,21 @@
the performance of servers with high write volume.
</description>
</key>
+ <key name="pack-gc" datatype="boolean" default="true">
+ <description>
+ If pack-gc is false, pack operations do not perform
+ garbage collection. Garbage collection is enabled by default.
+
+ If garbage collection is disabled, pack operations keep at least one
+ revision of every object. With garbage collection disabled, the
+ pack code does not need to follow object references, making
+ packing conceivably much faster. However, some of that benefit
+ may be lost due to an ever increasing number of unused objects.
+
+ Disabling garbage collection is also a hack that ensures
+ inter-database references never break.
+ </description>
+ </key>
</sectiontype>
<sectiontype name="postgresql" implements="relstorage.adapter"
Modified: relstorage/trunk/relstorage/config.py
===================================================================
--- relstorage/trunk/relstorage/config.py 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/config.py 2008-02-22 07:35:44 UTC (rev 84129)
@@ -24,7 +24,8 @@
config = self.config
adapter = config.adapter.open()
return RelStorage(adapter, name=config.name, create=config.create,
- read_only=config.read_only, poll_interval=config.poll_interval)
+ read_only=config.read_only, poll_interval=config.poll_interval,
+ pack_gc=config.pack_gc)
class PostgreSQLAdapterFactory(BaseConfig):
Modified: relstorage/trunk/relstorage/relstorage.py
===================================================================
--- relstorage/trunk/relstorage/relstorage.py 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/relstorage.py 2008-02-22 07:35:44 UTC (rev 84129)
@@ -43,7 +43,7 @@
"""Storage to a relational database, based on invalidation polling"""
def __init__(self, adapter, name=None, create=True,
- read_only=False, poll_interval=0):
+ read_only=False, poll_interval=0, pack_gc=True):
if name is None:
name = 'RelStorage on %s' % adapter.__class__.__name__
@@ -51,6 +51,7 @@
self._name = name
self._is_read_only = read_only
self._poll_interval = poll_interval
+ self._pack_gc = pack_gc
if create:
self._adapter.prepare_schema()
@@ -667,6 +668,22 @@
self._lock_release()
+ def set_pack_gc(self, pack_gc):
+ """Configures whether garbage collection during packing is enabled.
+
+ Garbage collection is enabled by default. If GC is disabled,
+ packing keeps at least one revision of every object.
+ With GC disabled, the pack code does not need to follow object
+ references, making packing conceivably much faster.
+ However, some of that benefit may be lost due to an ever
+ increasing number of unused objects.
+
+ Disabling garbage collection is also a hack that ensures
+ inter-database references never break.
+ """
+ self._pack_gc = pack_gc
+
+
def pack(self, t, referencesf):
if self._is_read_only:
raise POSException.ReadOnlyError()
@@ -700,7 +717,7 @@
# In pre_pack, the adapter fills tables with
# information about what to pack. The adapter
# should not actually pack anything yet.
- adapter.pre_pack(tid_int, get_references)
+ adapter.pre_pack(tid_int, get_references, self._pack_gc)
# Now pack.
adapter.pack(tid_int)
@@ -734,7 +751,7 @@
# self._zodb_conn = zodb_conn
RelStorage.__init__(self, adapter=parent._adapter, name=parent._name,
create=False, read_only=parent._is_read_only,
- poll_interval=parent._poll_interval)
+ poll_interval=parent._poll_interval, pack_gc=parent._pack_gc)
# _prev_polled_tid contains the tid at the previous poll
self._prev_polled_tid = None
self._showed_disconnect = False
@@ -841,7 +858,7 @@
else:
stop_int = None
- # _transactions: [(tid, packed, username, description, extension)]
+ # _transactions: [(tid, username, description, extension, packed)]
self._transactions = list(adapter.iter_transactions_range(
self._cursor, start_int, stop_int))
self._index = 0
@@ -871,7 +888,7 @@
class RecordIterator(object):
"""Iterate over the objects in a transaction."""
- def __init__(self, trans_iter, tid_int, packed, user, desc, ext):
+ def __init__(self, trans_iter, tid_int, user, desc, ext, packed):
self.tid = p64(tid_int)
self.status = packed and 'p' or ' '
self.user = user or ''
Modified: relstorage/trunk/relstorage/tests/reltestbase.py
===================================================================
--- relstorage/trunk/relstorage/tests/reltestbase.py 2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/tests/reltestbase.py 2008-02-22 07:35:44 UTC (rev 84129)
@@ -13,6 +13,7 @@
##############################################################################
"""A foundation for relstorage adapter tests"""
+import time
import unittest
from relstorage.relstorage import RelStorage
@@ -30,6 +31,7 @@
from ZODB.tests.MinPO import MinPO
from ZODB.tests.StorageTestBase import zodb_unpickle, zodb_pickle
+from ZODB.serialize import referencesf
class BaseRelStorageTests(StorageTestBase.StorageTestBase):
@@ -371,7 +373,63 @@
self.assertRaises(IndexError, iter.__getitem__, offset)
+ def checkNonASCIITransactionMetadata(self):
+ # Verify the database stores and retrieves non-ASCII text
+ # in transaction metadata.
+ db = DB(self._storage)
+ try:
+ c1 = db.open()
+ r1 = c1.root()
+ r1['alpha'] = 1
+ user = u"\u65e5\u672c\u8e86"
+ transaction.get().setUser(user)
+ transaction.commit()
+ r1['alpha'] = 2
+ desc = u"Japanese: \u65e5\u672c\u8e86"
+ transaction.get().note(desc)
+ transaction.commit()
+ info = self._storage.undoInfo()
+ self.assertEqual(info[0]['description'], desc)
+ self.assertEqual(info[1]['user_name'], '/ ' + user)
+ finally:
+ db.close()
+
+ def checkPackGC(self, gc_enabled=True):
+ db = DB(self._storage)
+ try:
+ c1 = db.open()
+ r1 = c1.root()
+ r1['alpha'] = PersistentMapping()
+ transaction.commit()
+
+ oid = r1['alpha']._p_oid
+ r1['alpha'] = None
+ transaction.commit()
+
+ # The object should still exist
+ self._storage.load(oid, '')
+
+ # Pack
+ now = packtime = time.time()
+ while packtime <= now:
+ packtime = time.time()
+ self._storage.pack(packtime, referencesf)
+
+ if gc_enabled:
+ # The object should now be gone
+ self.assertRaises(KeyError, self._storage.load, oid, '')
+ else:
+ # The object should still exist
+ self._storage.load(oid, '')
+ finally:
+ db.close()
+
+ def checkPackGCDisabled(self):
+ self._storage.set_pack_gc(False)
+ self.checkPackGC(gc_enabled=False)
+
+
class IteratorDeepCompareUnordered:
# Like IteratorDeepCompare, but compensates for OID order
# differences in transactions.
More information about the Checkins
mailing list