[Zope-Checkins] CVS: ZODB3/ZEO - StorageServer.py:1.77

Jeremy Hylton jeremy@zope.com
Wed, 30 Oct 2002 16:20:01 -0500


Update of /cvs-repository/ZODB3/ZEO
In directory cvs.zope.org:/tmp/cvs-serv31302/ZEO

Modified Files:
	StorageServer.py 
Log Message:
New timeout feature.

If a transaction takes a long time to complete after it has the
storage lock, close the connection and abort the transaction.  The
abort may cause the client to be hosed.

XXX There should be a configuration option to enable / disable this
feature.  That should be done before too long.

When a transaction takes a long time to complete, it is often a sign
of deadlock involving multiple storages.  Until the possibility of
deadlock is eliminated, this feature will prevent an entire ZEO
cluster from deadlocking (on writes).



=== ZODB3/ZEO/StorageServer.py 1.76 => 1.77 ===
--- ZODB3/ZEO/StorageServer.py:1.76	Fri Oct  4 20:35:25 2002
+++ ZODB3/ZEO/StorageServer.py	Wed Oct 30 16:20:00 2002
@@ -25,6 +25,7 @@
 import os
 import sys
 import threading
+import time
 
 from ZEO import ClientStub
 from ZEO.CommitLog import CommitLog
@@ -210,9 +211,12 @@
         self.storage_id = "uninitialized"
         self.transaction = None
         self.read_only = read_only
+        self.timeout = TimeoutThread()
+        self.timeout.start()
 
     def notifyConnected(self, conn):
         self.client = self.ClientStorageStubClass(conn)
+        self.timeout.notifyConnected(conn)
 
     def notifyDisconnected(self):
         # When this storage closes, we must ensure that it aborts
@@ -222,6 +226,7 @@
             self.abort()
         else:
             self.log("disconnected")
+        self.timeout.notifyDisconnected()
 
     def __repr__(self):
         tid = self.transaction and repr(self.transaction.id)
@@ -393,6 +398,7 @@
         if self.storage._transaction is None:
             self.strategy = self.ImmediateCommitStrategyClass(self.storage,
                                                               self.client)
+            self.timeout.begin()
         else:
             self.strategy = self.DelayedCommitStrategyClass(self.storage,
                                                             self.wait)
@@ -409,6 +415,7 @@
     def tpc_finish(self, id):
         if not self.check_tid(id):
             return
+        self.timeout.end()
         invalidated = self.strategy.tpc_finish()
         if invalidated:
             self.server.invalidate(self, self.storage_id,
@@ -420,6 +427,7 @@
     def tpc_abort(self, id):
         if not self.check_tid(id):
             return
+        self.timeout.end()
         strategy = self.strategy
         strategy.tpc_abort()
         self.transaction = None
@@ -440,7 +448,9 @@
 
     def vote(self, id):
         self.check_tid(id, exc=StorageTransactionError)
-        return self.strategy.tpc_vote()
+        r = self.strategy.tpc_vote()
+        self.timeout.begin()
+        return r
 
     def abortVersion(self, src, id):
         self.check_tid(id, exc=StorageTransactionError)
@@ -736,6 +746,79 @@
             self.delay.error(sys.exc_info())
         else:
             self.delay.reply(result)
+
+class TimeoutThread(threading.Thread):
+    # A TimeoutThread is associated with a ZEOStorage.  It trackes
+    # how long transactions take to commit.  If a transaction takes
+    # too long, it will close the connection.
+
+    TIMEOUT = 30
+
+    def __init__(self):
+        threading.Thread.__init__(self)
+        self._lock = threading.Lock()
+        self._timestamp = None
+        self._conn = None
+
+    def begin(self):
+        self._lock.acquire()
+        try:
+            self._timestamp = time.time()
+        finally:
+            self._lock.release()
+
+    def end(self):
+        self._lock.acquire()
+        try:
+            self._timestamp = None
+        finally:
+            self._lock.release()
+
+    # There's a race here, but I hope it is harmless.
+
+    def notifyConnected(self, conn):
+        self._conn = conn
+
+    def notifyDisconnected(self):
+        self._conn = None
+
+    def run(self):
+        timeout = self.TIMEOUT
+        while self._conn is not None:
+            time.sleep(timeout)
+            
+            self._lock.acquire()
+            try:
+                if self._timestamp is not None:
+                    deadline = self._timestamp + self.TIMEOUT
+                else:
+                    log("TimeoutThread no current transaction",
+                        zLOG.BLATHER)
+                    timeout = self.TIMEOUT
+                    continue
+            finally:
+                self._lock.release()
+                
+            timeout = deadline - time.time()
+            if deadline < time.time():
+                self._abort()
+                break
+            else:
+                elapsed = self.TIMEOUT - timeout
+                log("TimeoutThread transaction has %0.2f sec to complete"
+                    " (%.2f elapsed)" % (timeout, elapsed), zLOG.BLATHER)
+        log("TimeoutThread exiting.  Connection closed.", zLOG.BLATHER)
+
+    def _abort(self):
+        # It's possible for notifyDisconnected to remove the connection
+        # just before we use it.  I think that's harmless, since it means
+        # the connection was closed.
+        log("TimeoutThread aborting transaction", zLOG.WARNING)
+        try:
+            self._conn.close()
+        except AttributeError, msg:
+            log(msg)
+
 
 # Patch up class references
 StorageServer.ZEOStorageClass = ZEOStorage