[Zope-Checkins] CVS: Zope/lib/python/nt_svcutils - service.py:1.1.2.2

Sidnei da Silva sidnei at awkly.org
Tue Apr 12 23:42:04 EDT 2005


Update of /cvs-repository/Zope/lib/python/nt_svcutils
In directory cvs.zope.org:/tmp/cvs-serv27740/lib/python/nt_svcutils

Modified Files:
      Tag: Zope-2_7-branch
	service.py 
Log Message:

Major service enhancements.  Service cleanly shuts down child, and if child
fails the tail of the process output (which generally contains a traceback)
is
written to the event log.

Minor tweaks to the Windows build 'clean' process and documentation tweaks.


=== Zope/lib/python/nt_svcutils/service.py 1.1.2.1 => 1.1.2.2 ===
--- Zope/lib/python/nt_svcutils/service.py:1.1.2.1	Wed Aug 20 10:45:07 2003
+++ Zope/lib/python/nt_svcutils/service.py	Tue Apr 12 23:41:34 2005
@@ -12,16 +12,12 @@
 #
 ##############################################################################
 
-""" Windows NT/2K service installer/controller for Zope/ZEO/ZRS instance
-homes """
+"""Windows Services installer/controller for Zope/ZEO/ZRS instance homes"""
 
-import win32serviceutil
-import win32service
-import win32event
-import win32process
-import pywintypes
-import time
-import os
+import sys, os, time, threading, signal
+import win32api, win32event, win32file, win32pipe, win32process, win32security
+import win32service, win32serviceutil, servicemanager
+import pywintypes, winerror, win32con
 
 # the max seconds we're allowed to spend backing off
 BACKOFF_MAX = 300
@@ -32,52 +28,98 @@
 # a dead process)
 BACKOFF_INITIAL_INTERVAL = 5
 
+# We execute a new thread that captures the tail of the output from our child
+# process. If the child fails, it is written to the event log.
+# This process is unconditional, and the output is never written to disk
+# (except obviously via the event log entry)
+# Size of the blocks we read from the child process's output.
+CHILDCAPTURE_BLOCK_SIZE = 80
+# The number of BLOCKSIZE blocks we keep as process output.
+CHILDCAPTURE_MAX_BLOCKS = 200
+
 class Service(win32serviceutil.ServiceFramework):
-    """ A class representing a Windows NT service that can manage an
-    instance-home-based Zope/ZEO/ZRS processes """
+    """Base class for a Windows Server to manage an external process.
+
+    Subclasses can be used to managed an instance home-based Zope or
+    ZEO process.  The win32 Python service module registers a specific
+    file and class for a service.  To manage an instance, a subclass
+    should be created in the instance home.
+    """
 
     # The PythonService model requires that an actual on-disk class declaration
-    # represent a single service.  Thus, the below definition of start_cmd,
+    # represent a single service.  Thus, the definitions below for the instance
     # must be overridden in a subclass in a file within the instance home for
-    # each instance.  The below-defined start_cmd (and _svc_display_name_
-    # and _svc_name_) are just examples.
-
+    # each instance.
+    # The values below are just examples.
     _svc_name_ = r'Zope-Instance'
     _svc_display_name_ = r'Zope instance at C:\Zope-Instance'
 
-    start_cmd = (
-        r'"C:\Program Files\Zope-2.7.0-a1\bin\python.exe" '
-        r'"C:\Program Files\Zope-2.7.0-a1\lib\python\Zope\Startup\run.py" '
-        r'-C "C:\Zope-Instance\etc\zope.conf"'
-        )
+    process_runner = r'C:\Program Files\Zope-2.7.0-a1\bin\python.exe'
+    process_args = r'{path_to}\run.py -C {path_to}\zope.conf'
+    evtlog_name = 'Zope'
 
     def __init__(self, args):
         win32serviceutil.ServiceFramework.__init__(self, args)
+        # Just say "Zope", instead of "Zope_-xxxxx"
+        try:
+            servicemanager.SetEventSourceName(self.evtlog_name)
+        except AttributeError:
+            # old pywin32 - that's ok.
+            pass
         # Create an event which we will use to wait on.
         # The "service stop" request will set this event.
-        self.hWaitStop = win32event.CreateEvent(None, 0, 0, None)
+        # We create it inheritable so we can pass it to the child process, so
+        # it too can act on the stop event.
+        sa = win32security.SECURITY_ATTRIBUTES()
+        sa.bInheritHandle = True
+
+        self.hWaitStop = win32event.CreateEvent(sa, 0, 0, None)
+        self.redirect_thread = None
 
     def SvcStop(self):
         # Before we do anything, tell the SCM we are starting the stop process.
         self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
-        # stop the process if necessary
-        try:
-            win32process.TerminateProcess(self.hZope, 0)
-        except pywintypes.error:
-            # the process may already have been terminated
-            pass
-        # And set my event.
+        self.onStop()
+        # Set the stop event - the main loop takes care of termination.
         win32event.SetEvent(self.hWaitStop)
 
+    def onStop(self):
+        # A hook for subclasses to override
+        pass
+
     def createProcess(self, cmd):
-        return win32process.CreateProcess(
-            None, cmd, None, None, 0, 0, None, None,
-            win32process.STARTUPINFO())
+        self.start_time = time.time()
+        return self.createProcessCaptureIO(cmd)
+
+    def logmsg(self, event):
+        # log a service event using servicemanager.LogMsg
+        from servicemanager import LogMsg, EVENTLOG_INFORMATION_TYPE
+        LogMsg(EVENTLOG_INFORMATION_TYPE, event,
+               (self._svc_name_, " (%s)" % self._svc_display_name_))
+
+    def info(self, s):
+        from servicemanager import LogInfoMsg
+        LogInfoMsg("%s (%s): %s" %
+                   (self._svc_name_, self._svc_display_name_, s))
+
+    def warning(self, s):
+        from servicemanager import LogWarningMsg
+        LogWarningMsg("%s (%s): %s" %
+                      (self._svc_name_, self._svc_display_name_, s))
+
+    def error(self, s):
+        from servicemanager import LogErrorMsg
+        LogErrorMsg("%s (%s): %s" %
+                    (self._svc_name_, self._svc_display_name_, s))
 
     def SvcDoRun(self):
         # indicate to Zope that the process is daemon managed (restartable)
         os.environ['ZMANAGED'] = '1'
 
+        # XXX the restart behavior is different here than it is for
+        # zdaemon.zdrun.  we should probably do the same thing in both
+        # places.
+
         # daemon behavior:  we want to to restart the process if it
         # dies, but if it dies too many times, we need to give up.
 
@@ -91,89 +133,218 @@
         # BACKOFF_CLEAR_TIME seconds, the backoff stats are reset.
 
         # the initial number of seconds between process start attempts
-        backoff_interval = BACKOFF_INITIAL_INTERVAL
+        self.backoff_interval = BACKOFF_INITIAL_INTERVAL
         # the cumulative backoff seconds counter
-        backoff_cumulative = 0
+        self.backoff_cumulative = 0
+
+        self.logmsg(servicemanager.PYS_SERVICE_STARTED)
 
-        import servicemanager
-        
-        # log a service started message
-        servicemanager.LogMsg(
-            servicemanager.EVENTLOG_INFORMATION_TYPE,
-            servicemanager.PYS_SERVICE_STARTED,
-            (self._svc_name_, ' (%s)' % self._svc_display_name_))
-        
         while 1:
-            start_time = time.time()
-            info = self.createProcess(self.start_cmd)
-            self.hZope = info[0] # the pid
-            if backoff_interval > BACKOFF_INITIAL_INTERVAL:
-                # if we're in a backoff state, log a message about
-                # starting a new process
-                servicemanager.LogInfoMsg(
-                    '%s (%s): recovering from died process, new process '
-                    'started' % (self._svc_name_, self._svc_display_name_)
-                    )
-            rc = win32event.WaitForMultipleObjects(
-                (self.hWaitStop, self.hZope), 0, win32event.INFINITE)
+            # We pass *this* file and the handle as the first 2 params, then
+            # the 'normal' startup args.
+            # See the bottom of this script for how that is handled.
+            cmd = '"%s" %s' % (self.process_runner, self.process_args)
+            info = self.createProcess(cmd)
+            # info is (hProcess, hThread, pid, tid)
+            self.hZope = info[0] # process handle
+            # XXX why the test before the log message?
+            if self.backoff_interval > BACKOFF_INITIAL_INTERVAL:
+                self.info("created process")
+            if not (self.run() and self.checkRestart()):            
+                break
+
+        self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
+        # Stop the child process by opening the special named event.
+        # We give it 90 seconds to shutdown normally.  If that doesn't
+        # stop things, we give it 30 seconds to do a "fast" shutdown.
+        # After that, we just knock it on the head.
+        winver = sys.getwindowsversion()
+        for sig, timeout in ((signal.SIGINT, 30), (signal.SIGTERM, 10)):
+            event_name = "Zope-%d-%d" % (info[2], sig)
+            # sys.getwindowsversion() -> major, minor, build, platform_id, ver_string
+            # for platform_id, 2==VER_PLATFORM_WIN32_NT
+            if winver[0] >= 5 and winver[3] == 2:
+                event_name = "Global\\" + event_name
+            try:
+                he = win32event.OpenEvent(win32event.EVENT_MODIFY_STATE, 0,
+                                          event_name)
+            except win32event.error, details:
+                if details[0] == winerror.ERROR_FILE_NOT_FOUND:
+                    # process already dead!
+                    break
+                # no other expected error - report it.
+                self.warning("Failed to open child shutdown event %s"
+                             % (event_name,))
+                continue
+
+            win32event.SetEvent(he)
+            # It should be shutting down now - wait for termination, reporting
+            # progress as we go.
+            for i in range(timeout):
+                self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
+                rc = win32event.WaitForSingleObject(self.hZope, 3000)
+                if rc == win32event.WAIT_OBJECT_0:
+                    break
+            # Process terminated - no need to try harder.
             if rc == win32event.WAIT_OBJECT_0:
-                # user sent a stop service request
-                self.SvcStop()
                 break
-            else:
-                # user did not send a service stop request, but
-                # the process died; this may be an error condition
-                status = win32process.GetExitCodeProcess(self.hZope)
-                if status == 0:
-                    # the user shut the process down from the web
-                    # interface (or it otherwise exited cleanly)
+
+        self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
+        # If necessary, kill it
+        if win32process.GetExitCodeProcess(self.hZope)==win32con.STILL_ACTIVE:
+            win32api.TerminateProcess(self.hZope, 3)
+        self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
+
+        # Wait for the redirect thread - it should have died as the remote 
+        # process terminated.
+        # As we are shutting down, we do the join with a little more care,
+        # reporting progress as we wait (even though we never will <wink>)
+        if self.redirect_thread is not None:
+            for i in range(5):
+                self.redirect_thread.join(1)
+                self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
+                if not self.redirect_thread.isAlive():
                     break
-                else:
-                    # this was an abormal shutdown.  if we can, we want to
-                    # restart the process but if it seems hopeless,
-                    # don't restart an infinite number of times.
-                    if backoff_cumulative > BACKOFF_MAX:
-                        # it's hopeless
-                        servicemanager.LogErrorMsg(
-                          '%s (%s): process could not be restarted due to max '
-                          'restart attempts exceeded' % (
-                            self._svc_display_name_, self._svc_name_
-                          ))
-                        self.SvcStop()
-                        break
-                    servicemanager.LogWarningMsg(
-                       '%s (%s): process died unexpectedly.  Will attempt '
-                       'restart after %s seconds.' % (
-                            self._svc_name_, self._svc_display_name_,
-                            backoff_interval
-                            )
-                       )
-                    # if BACKOFF_CLEAR_TIME seconds have elapsed since we last
-                    # started the process, reset the backoff interval
-                    # and the cumulative backoff time to their original
-                    # states
-                    if time.time() - start_time > BACKOFF_CLEAR_TIME:
-                        backoff_interval = BACKOFF_INITIAL_INTERVAL
-                        backoff_cumulative = 0
-                    # we sleep for the backoff interval.  since this is async
-                    # code, it would be better done by sending and
-                    # catching a timed event (a service
-                    # stop request will need to wait for us to stop sleeping),
-                    # but this works well enough for me.
-                    time.sleep(backoff_interval)
-                    # update backoff_cumulative with the time we spent
-                    # backing off.
-                    backoff_cumulative = backoff_cumulative + backoff_interval
-                    # bump the backoff interval up by 2* the last interval
-                    backoff_interval = backoff_interval * 2
-
-                    # loop and try to restart the process
-
-        # log a service stopped message
-        servicemanager.LogMsg(
-            servicemanager.EVENTLOG_INFORMATION_TYPE, 
-            servicemanager.PYS_SERVICE_STOPPED,
-            (self._svc_name_, ' (%s) ' % self._svc_display_name_))
+            else:
+                self.warning("Redirect thread did not stop!")
+        self.logmsg(servicemanager.PYS_SERVICE_STOPPED)
+
+    def run(self):
+        """Monitor the daemon process.
 
-if __name__=='__main__':
-    win32serviceutil.HandleCommandLine(Service)
+        Returns True if the service should continue running and
+        False if the service process should exit.  On True return,
+        the process exited unexpectedly and the caller should restart
+        it.
+        """
+        keep_running = True
+        rc = win32event.WaitForMultipleObjects([self.hWaitStop, self.hZope],
+                                               0, # bWaitAll
+                                               win32event.INFINITE)
+        if rc == win32event.WAIT_OBJECT_0:
+            # user sent a stop service request
+            self.SvcStop()
+            keep_running = False
+        elif rc == win32event.WAIT_OBJECT_0 + 1:
+            # user did not send a service stop request, but
+            # the process died; this may be an error condition
+            status = win32process.GetExitCodeProcess(self.hZope)
+            # exit status 0 means the user caused a clean shutdown,
+            # presumably via the web interface.  Any other status
+            # is an error that gets written to the event log.
+            if status != 0:
+                # This should never block - the child process terminating
+                # has closed the redirection pipe, so our thread dies.
+                self.redirect_thread.join(5)
+                if self.redirect_thread.isAlive():
+                    self.warning("Redirect thread did not stop!")
+                self.warning("process terminated with exit code %d.\n%s" \
+                             % (status, "".join(self.captured_blocks)))
+            keep_running = status != 0
+        else:
+            # No other valid return codes.
+            assert 0, rc
+        return keep_running
+
+    def checkRestart(self):
+        # this was an abormal shutdown.
+        if self.backoff_cumulative > BACKOFF_MAX:
+            self.error("restarting too frequently; quit")
+            self.SvcStop()
+            return False
+        self.warning("sleep %s to avoid rapid restarts"
+                     % self.backoff_interval)
+        if time.time() - self.start_time > BACKOFF_CLEAR_TIME:
+            self.backoff_interval = BACKOFF_INITIAL_INTERVAL
+            self.backoff_cumulative = 0
+        # sleep for our backoff, but still respond to stop requests.
+        if win32event.WAIT_OBJECT_0 == \
+           win32event.WaitForSingleObject(self.hWaitStop,
+                                          self.backoff_interval * 1000):
+            return False
+        self.backoff_cumulative += self.backoff_interval
+        self.backoff_interval *= 2
+        return True
+        
+    def createProcessCaptureIO(self, cmd):
+        hInputRead, hInputWriteTemp = self.newPipe()
+        hOutReadTemp, hOutWrite = self.newPipe()
+        pid = win32api.GetCurrentProcess()
+        # This one is duplicated as inheritable.
+        hErrWrite = win32api.DuplicateHandle(pid, hOutWrite, pid, 0, 1,
+                                       win32con.DUPLICATE_SAME_ACCESS)
+
+        # These are non-inheritable duplicates.
+        hOutRead = self.dup(hOutReadTemp)
+        hInputWrite = self.dup(hInputWriteTemp)
+        # dup() closed hOutReadTemp, hInputWriteTemp
+
+        si = win32process.STARTUPINFO()
+        si.hStdInput = hInputRead
+        si.hStdOutput = hOutWrite
+        si.hStdError = hErrWrite
+        si.dwFlags = win32process.STARTF_USESTDHANDLES | \
+                     win32process.STARTF_USESHOWWINDOW
+        si.wShowWindow = win32con.SW_HIDE
+
+        # pass True to allow handles to be inherited.  Inheritance is
+        # problematic in general, but should work in the controlled
+        # circumstances of a service process.
+        create_flags = win32process.CREATE_NEW_CONSOLE
+        info = win32process.CreateProcess(None, cmd, None, None, True, 
+                                          create_flags, None, None, si)
+        # (NOTE: these really aren't necessary for Python - they are closed
+        # as soon as they are collected)
+        hOutWrite.Close()
+        hErrWrite.Close()
+        hInputRead.Close()
+        # We don't use stdin
+        hInputWrite.Close()
+
+        # start a thread collecting output
+        t = threading.Thread(target=self.redirectCaptureThread,
+                             args = (hOutRead,))
+        t.start()
+        self.redirect_thread = t
+        return info
+
+    def redirectCaptureThread(self, handle):
+        # Only one of these running at a time, and handling both stdout and
+        # stderr on a single handle.  The read data is never referenced until
+        # the thread dies - so no need for locks around self.captured_blocks.
+        self.captured_blocks = []
+        #self.info("Redirect thread starting")
+        while 1:
+            try:
+                ec, data = win32file.ReadFile(handle, CHILDCAPTURE_BLOCK_SIZE)
+            except pywintypes.error, err:
+                # ERROR_BROKEN_PIPE means the child process closed the
+                # handle - ie, it terminated.
+                if err[0] != winerror.ERROR_BROKEN_PIPE:
+                    self.warning("Error reading output from process: %s" % err)
+                break
+            self.captured_blocks.append(data)
+            del self.captured_blocks[CHILDCAPTURE_MAX_BLOCKS:]
+        handle.Close()
+        #self.info("Redirect capture thread terminating")
+
+    def newPipe(self):
+        sa = win32security.SECURITY_ATTRIBUTES()
+        sa.bInheritHandle = True
+        return win32pipe.CreatePipe(sa, 0)
+
+    def dup(self, pipe):
+        # create a duplicate handle that is not inherited, so that
+        # it can be closed in the parent.  close the original pipe in
+        # the process.
+        pid = win32api.GetCurrentProcess()
+        dup = win32api.DuplicateHandle(pid, pipe, pid, 0, 0,
+                                       win32con.DUPLICATE_SAME_ACCESS)
+        pipe.Close()
+        return dup
+
+# Real __main__ bootstrap code is in the instance's service module.
+if __name__ == '__main__':
+    print "This is a framework module - you don't run it directly."
+    print "See your $SOFTWARE_HOME\bin directory for the service script."
+    sys.exit(1)



More information about the Zope-Checkins mailing list