[Zodb-checkins] SVN: ZODB/trunk/src/ZEO/ Add sanity check for closed but still locked connections.
Jim Fulton
jim at zope.com
Tue May 4 14:13:56 EDT 2010
Log message for revision 111936:
Add sanity check for closed but still locked connections.
Changed:
U ZODB/trunk/src/ZEO/StorageServer.py
U ZODB/trunk/src/ZEO/tests/testZEO2.py
-=-
Modified: ZODB/trunk/src/ZEO/StorageServer.py
===================================================================
--- ZODB/trunk/src/ZEO/StorageServer.py 2010-05-04 16:50:15 UTC (rev 111935)
+++ ZODB/trunk/src/ZEO/StorageServer.py 2010-05-04 18:13:55 UTC (rev 111936)
@@ -120,8 +120,6 @@
self.log_label = _addr_label(conn.addr)
def notifyDisconnected(self):
- self.connection = None
-
# When this storage closes, we must ensure that it aborts
# any pending transaction.
if self.transaction is not None:
@@ -131,6 +129,8 @@
else:
self.log("disconnected")
+ self.connection = None
+
def __repr__(self):
tid = self.transaction and repr(self.transaction.id)
if self.storage:
@@ -1176,9 +1176,23 @@
if storage_id in self._commit_locks:
# The lock is held by another zeostore
- assert self._commit_locks[storage_id] is not zeostore, (
- storage_id, delay)
+ locked = self._commit_locks[storage_id]
+ assert locked is not zeostore, (storage_id, delay)
+
+ if locked.connection is None:
+ locked.log("Still locked after disconnected. Unlocking.",
+ logging.CRITICAL)
+ if locked.transaction:
+ locked.storage.tpc_abort(locked.transaction)
+ del self._commit_locks[storage_id]
+ # yuck: have to manipulate lock to appease with :(
+ self._lock.release()
+ try:
+ return self.lock_storage(zeostore, delay)
+ finally:
+ self._lock.acquire()
+
if delay is None:
# New request, queue it
assert not [i for i in waiting if i[0] is zeostore
Modified: ZODB/trunk/src/ZEO/tests/testZEO2.py
===================================================================
--- ZODB/trunk/src/ZEO/tests/testZEO2.py 2010-05-04 16:50:15 UTC (rev 111935)
+++ ZODB/trunk/src/ZEO/tests/testZEO2.py 2010-05-04 18:13:55 UTC (rev 111936)
@@ -395,7 +395,67 @@
>>> logging.getLogger('ZEO').removeHandler(handler)
"""
+def lock_sanity_check():
+ r"""
+On one occasion with 3.10.0a1 in production, we had a case where a
+transaction lock wasn't released properly. One possibility, fron
+scant log information, is that the server and ZEOStorage had different
+ideas about whether the ZEOStorage was locked. The timeout thread
+properly closed the ZEOStorage's connection, but the ZEOStorage didn't
+release it's lock, presumably because it thought it wasn't locked. I'm
+not sure why this happened. I've refactored the logic quite a bit to
+try to deal with this, but the consequences of this failure are so
+severe, I'm adding some sanity checking when queueing lock requests.
+Helper to manage transactions:
+
+ >>> itid = 0
+ >>> def start_trans(zs):
+ ... global itid
+ ... itid += 1
+ ... tid = str(itid)
+ ... zs.tpc_begin(tid, '', '', {})
+ ... zs.storea(ZODB.utils.p64(99), ZODB.utils.z64, 'x', tid)
+ ... return tid
+
+Set up server and logging:
+
+ >>> server = ZEO.tests.servertesting.StorageServer()
+
+ >>> handler = logging.StreamHandler(sys.stdout)
+ >>> handler.setFormatter(logging.Formatter(
+ ... '%(name)s %(levelname)s\n%(message)s'))
+ >>> logging.getLogger('ZEO').addHandler(handler)
+ >>> logging.getLogger('ZEO').setLevel(logging.DEBUG)
+
+Now, we'll start a transaction, get the lock and then mark the
+ZEOStorage as closed and see if trying to get a lock cleans it up:
+
+ >>> zs1 = ZEO.tests.servertesting.client(server, '1')
+ >>> tid1 = start_trans(zs1)
+ >>> zs1.vote(tid1) # doctest: +ELLIPSIS
+ ZEO.StorageServer DEBUG
+ (test-addr-1) ('1') lock: transactions waiting: 0
+ ZEO.StorageServer BLATHER
+ (test-addr-1) Preparing to commit transaction: 1 objects, 36 bytes
+ 1 callAsync serialnos ...
+
+ >>> zs1.connection = None
+
+ >>> zs2 = ZEO.tests.servertesting.client(server, '2')
+ >>> tid2 = start_trans(zs2)
+ >>> zs2.vote(tid2) # doctest: +ELLIPSIS
+ ZEO.StorageServer CRITICAL
+ (test-addr-1) Still locked after disconnected. Unlocking.
+ ZEO.StorageServer DEBUG
+ (test-addr-2) ('1') lock: transactions waiting: 0
+ ZEO.StorageServer BLATHER
+ (test-addr-2) Preparing to commit transaction: 1 objects, 36 bytes
+ 2 callAsync serialnos ...
+
+ """
+
+
def test_suite():
return unittest.TestSuite((
doctest.DocTestSuite(
More information about the Zodb-checkins
mailing list