Author: tmckay Date: 2013-01-18 18:02:31 +0000 (Fri, 18 Jan 2013) New Revision: 5658
Modified: trunk/cumin/bin/cumin Log: Re-insert SIGKILL for children after timeout. Include child termination status in log messages. Wait additional time after SIGKILL to report child termination before exiting. BZ800065
Modified: trunk/cumin/bin/cumin =================================================================== --- trunk/cumin/bin/cumin 2013-01-18 15:32:12 UTC (rev 5657) +++ trunk/cumin/bin/cumin 2013-01-18 18:02:31 UTC (rev 5658) @@ -320,17 +320,18 @@ % app[PROCESS].pid)
# Wait for children to exit so we can record the result and so that - # the initd script can track the pid correctly. Exit regardless after + # the initd script can track the pid correctly. Issue SIGKILL after # timeout+0.5 seconds, even if the children are still running. Each child # will have a hard exit after timeout seconds from its own main thread, # even if its shutdown activities have not completed, so in most # cases the children *should* exit within timeout+0.5 seconds then = datetime.now() + sent_kill = False while complete != 0: for app in apps: poll = app[PROCESS] and app[PROCESS].poll() if poll is not None: - log.info("Subprocess (%s) exited", app[PROCESS].pid) + log.info("Subprocess (%s) exited (%s)" % (app[PROCESS].pid, poll)) app[PROCESS] = None complete -= 1
@@ -340,12 +341,24 @@
sleep(0.25) if datetime.now() - then > timedelta(seconds=options.timeout+0.5): - log.warn("Timed out waiting for children, exiting") - for app in apps: # just to be paranoid - if app[PROCESS]: - log.warn("Subprocess (%s) failed to stop"\ - % app[PROCESS].pid) - break + if sent_kill: + log.warn("Timed out waiting for children, exiting") + for app in apps: + if app[PROCESS]: + log.warn("Subprocess (%s) failed to stop"\ + % app[PROCESS].pid) + break + else: + log.warn("Timed out waiting for children, sending SIGKILL") + for app in apps: # just to be paranoid + if app[PROCESS]: + os.kill(app[PROCESS].pid, signal.SIGKILL) + log.warn("Subprocess (%s) failed to stop, "\ + "sending SIGKILL" % (app[PROCESS].pid)) + + # Okay, let's loop one more time and report + sent_kill = True + then = datetime.now()
write_status_path(config.get_init_status_path(), "exit\n", append=True) return return_code
cumin-developers@lists.fedorahosted.org