From 815e668209042825731ebbde92342b943f3f8b3b Mon Sep 17 00:00:00 2001 From: Julien Gilli <julien.gilli@joyent.com> Date: Tue, 31 Jan 2017 09:34:30 -0800 Subject: [PATCH] test: add --abort-on-timeout option to test.py Currently, when a process times out, it is terminated by sending it the SIGTERM signal. Sending SIGBART instead allows the operating system to generate a core file that can be investigated later using post-mortem debuggers such as llnode or mdb_v8. This can be very useful when investigating flaky tests that time out, since in that case the failure is difficult to reproduce, and being able to look at a core file makes a big difference. With these changes, passing the --abort-on-timeout command line option to tools/test.py now sends SIGABRT to processes timing out on all platforms but Windows. PR-URL: https://github.com/nodejs/node/pull/11086 Ref: https://github.com/nodejs/node/issues/11026 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Gibson Fahnestock <gibfahn@gmail.com> Reviewed-By: Sakthipriyan Vairamani <thechargingvolcano@gmail.com> Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> --- tools/test.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tools/test.py b/tools/test.py index b6db2cd68e3f7e..c900b1a83a88e0 100755 --- a/tools/test.py +++ b/tools/test.py @@ -565,11 +565,11 @@ def HasFailed(self): return execution_failed -def KillProcessWithID(pid): +def KillProcessWithID(pid, signal_to_send=signal.SIGTERM): if utils.IsWindows(): os.popen('taskkill /T /F /PID %d' % pid) else: - os.kill(pid, signal.SIGTERM) + os.kill(pid, signal_to_send) MAX_SLEEP_TIME = 0.1 @@ -588,6 +588,17 @@ def Win32SetErrorMode(mode): pass return prev_error_mode + +def KillTimedOutProcess(context, pid): + signal_to_send = signal.SIGTERM + if context.abort_on_timeout: + # Using SIGABRT here allows the OS to generate a core dump that can be + # looked at post-mortem, which helps for investigating failures that are + # difficult to reproduce. + signal_to_send = signal.SIGABRT + KillProcessWithID(pid, signal_to_send) + + def RunProcess(context, timeout, args, **rest): if context.verbose: print "#", " ".join(args) popen_args = args @@ -627,7 +638,7 @@ def RunProcess(context, timeout, args, **rest): while True: if time.time() >= end_time: # Kill the process and wait for it to exit. - KillProcessWithID(process.pid) + KillTimedOutProcess(context, process.pid) exit_code = process.wait() timed_out = True break @@ -648,7 +659,7 @@ def RunProcess(context, timeout, args, **rest): while exit_code is None: if (not end_time is None) and (time.time() >= end_time): # Kill the process and wait for it to exit. - KillProcessWithID(process.pid) + KillTimedOutProcess(context, process.pid) exit_code = process.wait() timed_out = True else: @@ -851,7 +862,7 @@ class Context(object): def __init__(self, workspace, buildspace, verbose, vm, args, expect_fail, timeout, processor, suppress_dialogs, - store_unexpected_output, repeat): + store_unexpected_output, repeat, abort_on_timeout): self.workspace = workspace self.buildspace = buildspace self.verbose = verbose @@ -863,6 +874,7 @@ def __init__(self, workspace, buildspace, verbose, vm, args, expect_fail, self.suppress_dialogs = suppress_dialogs self.store_unexpected_output = store_unexpected_output self.repeat = repeat + self.abort_on_timeout = abort_on_timeout def GetVm(self, arch, mode): if arch == 'none': @@ -1385,6 +1397,9 @@ def BuildOptions(): result.add_option('--repeat', help='Number of times to repeat given tests', default=1, type="int") + result.add_option('--abort-on-timeout', + help='Send SIGABRT instead of SIGTERM to kill processes that time out', + default=False, dest="abort_on_timeout") return result @@ -1566,7 +1581,8 @@ def Main(): processor, options.suppress_dialogs, options.store_unexpected_output, - options.repeat) + options.repeat, + options.abort_on_timeout) # Get status for tests sections = [ ]