diff --git a/tools/roslaunch/resources/timeouts.launch b/tools/roslaunch/resources/timeouts.launch new file mode 100644 index 0000000000..66c9859513 --- /dev/null +++ b/tools/roslaunch/resources/timeouts.launch @@ -0,0 +1,5 @@ + + + + + diff --git a/tools/roslaunch/scripts/roscore b/tools/roslaunch/scripts/roscore index e19205217c..79534752fc 100755 --- a/tools/roslaunch/scripts/roscore +++ b/tools/roslaunch/scripts/roscore @@ -34,6 +34,7 @@ import sys from optparse import OptionParser from rosmaster.master_api import NUM_WORKERS +from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM NAME = 'roscore' @@ -59,6 +60,16 @@ def _get_optparse(): parser.add_option("--master-logger-level", dest="master_logger_level", default=False, type=str, help="set rosmaster.master logger level ('debug', 'info', 'warn', 'error', 'fatal')") + parser.add_option("--sigint-timeout", + dest="sigint_timeout", + default=DEFAULT_TIMEOUT_SIGINT, type=float, + help="the SIGINT timeout used when killing the core (in seconds).", + metavar="SIGINT_TIMEOUT") + parser.add_option("--sigterm-timeout", + dest="sigterm_timeout", + default=DEFAULT_TIMEOUT_SIGTERM, type=float, + help="the SIGTERM timeout used when killing core if SIGINT does not stop the core (in seconds).", + metavar="SIGTERM_TIMEOUT") return parser diff --git a/tools/roslaunch/src/roslaunch/__init__.py b/tools/roslaunch/src/roslaunch/__init__.py index 14f52ba4c0..eec89659b5 100644 --- a/tools/roslaunch/src/roslaunch/__init__.py +++ b/tools/roslaunch/src/roslaunch/__init__.py @@ -68,6 +68,7 @@ DEFAULT_MASTER_PORT = 11311 from rosmaster.master_api import NUM_WORKERS +from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM NAME = 'roslaunch' @@ -193,6 +194,16 @@ def _get_optparse(): parser.add_option("--master-logger-level", dest="master_logger_level", default=False, type=str, help="set rosmaster.master logger level ('debug', 'info', 'warn', 'error', 'fatal')") + parser.add_option("--sigint-timeout", + dest="sigint_timeout", + default=DEFAULT_TIMEOUT_SIGINT, type=float, + help="the SIGINT timeout used when killing nodes (in seconds).", + metavar="SIGINT_TIMEOUT") + parser.add_option("--sigterm-timeout", + dest="sigterm_timeout", + default=DEFAULT_TIMEOUT_SIGTERM, type=float, + help="the SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds).", + metavar="SIGTERM_TIMEOUT") return parser @@ -296,7 +307,9 @@ def main(argv=sys.argv): # client spins up an XML-RPC server that waits for # commands and configuration from the server. from . import child as roslaunch_child - c = roslaunch_child.ROSLaunchChild(uuid, options.child_name, options.server_uri) + c = roslaunch_child.ROSLaunchChild(uuid, options.child_name, options.server_uri, + sigint_timeout=options.sigint_timeout, + sigterm_timeout=options.sigterm_timeout) c.run() else: logger.info('starting in server mode') @@ -326,7 +339,9 @@ def main(argv=sys.argv): num_workers=options.num_workers, timeout=options.timeout, master_logger_level=options.master_logger_level, show_summary=not options.no_summary, - force_required=options.force_required) + force_required=options.force_required, + sigint_timeout=options.sigint_timeout, + sigterm_timeout=options.sigterm_timeout) p.start() p.spin() diff --git a/tools/roslaunch/src/roslaunch/child.py b/tools/roslaunch/src/roslaunch/child.py index 8358225384..aa84e67bf2 100644 --- a/tools/roslaunch/src/roslaunch/child.py +++ b/tools/roslaunch/src/roslaunch/child.py @@ -49,6 +49,7 @@ import roslaunch.core import roslaunch.pmon import roslaunch.server +from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM class ROSLaunchChild(object): """ @@ -57,7 +58,7 @@ class ROSLaunchChild(object): This must be called from the Python Main thread due to signal registration. """ - def __init__(self, run_id, name, server_uri): + def __init__(self, run_id, name, server_uri, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ Startup roslaunch remote client XML-RPC services. Blocks until shutdown @param run_id: UUID of roslaunch session @@ -66,6 +67,11 @@ def __init__(self, run_id, name, server_uri): @type name: str @param server_uri: XML-RPC URI of roslaunch server @type server_uri: str + @param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + @type sigint_timeout: float + @param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node ( + in seconds). + @type sigterm_timeout: float @return: XML-RPC URI @rtype: str """ @@ -77,6 +83,8 @@ def __init__(self, run_id, name, server_uri): self.server_uri = server_uri self.child_server = None self.pm = None + self.sigint_timeout = sigint_timeout + self.sigterm_timeout = sigterm_timeout roslaunch.pmon._init_signal_handlers() @@ -102,7 +110,9 @@ def run(self): try: self.logger.info("starting roslaunch child process [%s], server URI is [%s]", self.name, self.server_uri) self._start_pm() - self.child_server = roslaunch.server.ROSLaunchChildNode(self.run_id, self.name, self.server_uri, self.pm) + self.child_server = roslaunch.server.ROSLaunchChildNode(self.run_id, self.name, self.server_uri, + self.pm, sigint_timeout=self.sigint_timeout, + sigterm_timeout=self.sigterm_timeout) self.logger.info("... creating XMLRPC server for child") self.child_server.start() self.logger.info("... started XMLRPC server for child") diff --git a/tools/roslaunch/src/roslaunch/launch.py b/tools/roslaunch/src/roslaunch/launch.py index efa3f03724..93860df55a 100644 --- a/tools/roslaunch/src/roslaunch/launch.py +++ b/tools/roslaunch/src/roslaunch/launch.py @@ -52,7 +52,7 @@ from roslaunch.core import * #from roslaunch.core import setup_env -from roslaunch.nodeprocess import create_master_process, create_node_process +from roslaunch.nodeprocess import create_master_process, create_node_process, DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM from roslaunch.pmon import start_process_monitor, ProcessListener from roslaunch.rlutil import update_terminal_name @@ -235,7 +235,8 @@ class ROSLaunchRunner(object): monitored. """ - def __init__(self, run_id, config, server_uri=None, pmon=None, is_core=False, remote_runner=None, is_child=False, is_rostest=False, num_workers=NUM_WORKERS, timeout=None, master_logger_level=False): + def __init__(self, run_id, config, server_uri=None, pmon=None, is_core=False, remote_runner=None, is_child=False, is_rostest=False, num_workers=NUM_WORKERS, timeout=None, + master_logger_level=False, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ @param run_id: /run_id for this launch. If the core is not running, this value will be used to initialize /run_id. If @@ -266,9 +267,19 @@ def __init__(self, run_id, config, server_uri=None, pmon=None, is_core=False, re @type timeout: Float or None @param master_logger_level: Specify roscore's rosmaster.master logger level, use default if it is False. @type master_logger_level: str or False + @param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + @type sigint_timeout: float + @param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds). + @type sigterm_timeout: float + @raise RLException: If sigint_timeout or sigterm_timeout are nonpositive. """ if run_id is None: raise RLException("run_id is None") + if sigint_timeout <= 0: + raise RLException("sigint_timeout must be a positive number, received %f" % sigint_timeout) + if sigterm_timeout <= 0: + raise RLException("sigterm_timeout must be a positive number, received %f" % sigterm_timeout) + self.run_id = run_id # In the future we should can separate the notion of a core @@ -286,6 +297,8 @@ def __init__(self, run_id, config, server_uri=None, pmon=None, is_core=False, re self.master_logger_level = master_logger_level self.logger = logging.getLogger('roslaunch') self.pm = pmon or start_process_monitor() + self.sigint_timeout = sigint_timeout + self.sigterm_timeout = sigterm_timeout # wire in ProcessMonitor events to our listeners # aggregator. We similarly wire in the remote events when we @@ -402,7 +415,8 @@ def _launch_master(self): printlog("auto-starting new master") p = create_master_process( self.run_id, m.type, get_ros_root(), m.get_port(), self.num_workers, - self.timeout, master_logger_level=self.master_logger_level) + self.timeout, master_logger_level=self.master_logger_level, + sigint_timeout=self.sigint_timeout, sigterm_timeout=self.sigterm_timeout) self.pm.register_core_proc(p) success = p.start() if not success: @@ -541,7 +555,7 @@ def launch_node(self, node, core=False): master = self.config.master import roslaunch.node_args try: - process = create_node_process(self.run_id, node, master.uri) + process = create_node_process(self.run_id, node, master.uri, sigint_timeout=self.sigint_timeout, sigterm_timeout=self.sigterm_timeout) except roslaunch.node_args.NodeParamsException as e: self.logger.error(e) printerrlog("ERROR: cannot launch node of type [%s/%s]: %s"%(node.package, node.type, str(e))) diff --git a/tools/roslaunch/src/roslaunch/nodeprocess.py b/tools/roslaunch/src/roslaunch/nodeprocess.py index 1a87b73f34..8daf7a1b6b 100644 --- a/tools/roslaunch/src/roslaunch/nodeprocess.py +++ b/tools/roslaunch/src/roslaunch/nodeprocess.py @@ -55,8 +55,8 @@ import logging _logger = logging.getLogger("roslaunch") -_TIMEOUT_SIGINT = 15.0 #seconds -_TIMEOUT_SIGTERM = 2.0 #seconds +DEFAULT_TIMEOUT_SIGINT = 15.0 #seconds +DEFAULT_TIMEOUT_SIGTERM = 2.0 #seconds _counter = 0 def _next_counter(): @@ -64,7 +64,7 @@ def _next_counter(): _counter += 1 return _counter -def create_master_process(run_id, type_, ros_root, port, num_workers=NUM_WORKERS, timeout=None, master_logger_level=False): +def create_master_process(run_id, type_, ros_root, port, num_workers=NUM_WORKERS, timeout=None, master_logger_level=False, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ Launch a master @param type_: name of master executable (currently just Master.ZENMASTER) @@ -77,10 +77,14 @@ def create_master_process(run_id, type_, ros_root, port, num_workers=NUM_WORKERS @type num_workers: int @param timeout: socket timeout for connections. @type timeout: float - @raise RLException: if type_ or port is invalid @param master_logger_level: rosmaster.master logger debug level @type master_logger_level=: str or False - """ + @param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + @type sigint_timeout: float + @param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds). + @type sigterm_timeout: float + @raise RLException: if type_ or port is invalid or sigint_timeout or sigterm_timeout are nonpositive. + """ if port < 1 or port > 65535: raise RLException("invalid port assignment: %s"%port) @@ -100,9 +104,10 @@ def create_master_process(run_id, type_, ros_root, port, num_workers=NUM_WORKERS _logger.info("process[master]: launching with args [%s]"%args) log_output = False - return LocalProcess(run_id, package, 'master', args, os.environ, log_output, None, required=True) + return LocalProcess(run_id, package, 'master', args, os.environ, log_output, None, required=True, + sigint_timeout=sigint_timeout, sigterm_timeout=sigterm_timeout) -def create_node_process(run_id, node, master_uri): +def create_node_process(run_id, node, master_uri, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ Factory for generating processes for launching local ROS nodes. Also registers the process with the L{ProcessMonitor} so that @@ -114,9 +119,14 @@ def create_node_process(run_id, node, master_uri): @type node: L{Node} @param master_uri: API URI for master node @type master_uri: str + @param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + @type sigint_timeout: float + @param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds). + @type sigterm_timeout: float @return: local process instance @rtype: L{LocalProcess} @raise NodeParamsException: If the node's parameters are improperly specific + @raise RLException: If sigint_timeout or sigterm_timeout are nonpositive. """ _logger.info("create_node_process: package[%s] type[%s] machine[%s] master_uri[%s]", node.package, node.type, node.machine, master_uri) # check input args @@ -150,7 +160,8 @@ def create_node_process(run_id, node, master_uri): _logger.debug('process[%s]: returning LocalProcess wrapper') return LocalProcess(run_id, node.package, name, args, env, log_output, \ respawn=node.respawn, respawn_delay=node.respawn_delay, \ - required=node.required, cwd=node.cwd) + required=node.required, cwd=node.cwd, \ + sigint_timeout=sigint_timeout, sigterm_timeout=sigterm_timeout) class LocalProcess(Process): @@ -160,7 +171,7 @@ class LocalProcess(Process): def __init__(self, run_id, package, name, args, env, log_output, respawn=False, respawn_delay=0.0, required=False, cwd=None, - is_node=True): + is_node=True, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ @param run_id: unique run ID for this roslaunch. Used to generate log directory location. run_id may be None if this @@ -184,9 +195,20 @@ def __init__(self, run_id, package, name, args, env, log_output, @type cwd: str @param is_node: (optional) if True, process is ROS node and accepts ROS node command-line arguments. Default: True @type is_node: False + @param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + @type sigint_timeout: float + @param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds). + @type sigterm_timeout: float + @raise RLException: If sigint_timeout or sigterm_timeout are nonpositive. """ super(LocalProcess, self).__init__(package, name, args, env, respawn, respawn_delay, required) + + if sigint_timeout <= 0: + raise RLException("sigint_timeout must be a positive number, received %f" % sigint_timeout) + if sigterm_timeout <= 0: + raise RLException("sigterm_timeout must be a positive number, received %f" % sigterm_timeout) + self.run_id = run_id self.popen = None self.log_output = log_output @@ -196,6 +218,8 @@ def __init__(self, run_id, package, name, args, env, log_output, self.log_dir = None self.pid = -1 self.is_node = is_node + self.sigint_timeout = sigint_timeout + self.sigterm_timeout = sigterm_timeout # NOTE: in the future, info() is going to have to be sufficient for relaunching a process def get_info(self): @@ -406,7 +430,7 @@ def _stop_unix(self, errors): _logger.info("[%s] sending SIGINT to pgid [%s]", self.name, pgid) os.killpg(pgid, signal.SIGINT) _logger.info("[%s] sent SIGINT to pgid [%s]", self.name, pgid) - timeout_t = time.time() + _TIMEOUT_SIGINT + timeout_t = time.time() + self.sigint_timeout retcode = self.popen.poll() while time.time() < timeout_t and retcode is None: time.sleep(0.1) @@ -414,7 +438,7 @@ def _stop_unix(self, errors): # Escalate non-responsive process if retcode is None: printerrlog("[%s] escalating to SIGTERM"%self.name) - timeout_t = time.time() + _TIMEOUT_SIGTERM + timeout_t = time.time() + self.sigterm_timeout os.killpg(pgid, signal.SIGTERM) _logger.info("[%s] sent SIGTERM to pgid [%s]"%(self.name, pgid)) retcode = self.popen.poll() @@ -474,7 +498,7 @@ def _stop_win32(self, errors): _logger.info("[%s] running taskkill pid tree [%s]", self.name, pid) subprocess.call(['taskkill', '/F', '/T', '/PID', str(pid)]) _logger.info("[%s] run taskkill pid tree [%s]", self.name, pid) - timeout_t = time.time() + _TIMEOUT_SIGINT + timeout_t = time.time() + self.sigint_timeout retcode = self.popen.poll() while time.time() < timeout_t and retcode is None: time.sleep(0.1) diff --git a/tools/roslaunch/src/roslaunch/parent.py b/tools/roslaunch/src/roslaunch/parent.py index 39ec7b2b46..58e9294900 100644 --- a/tools/roslaunch/src/roslaunch/parent.py +++ b/tools/roslaunch/src/roslaunch/parent.py @@ -55,6 +55,7 @@ import roslaunch.xmlloader from rosmaster.master_api import NUM_WORKERS +from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM #TODO: probably move process listener infrastructure into here @@ -73,7 +74,8 @@ class ROSLaunchParent(object): """ def __init__(self, run_id, roslaunch_files, is_core=False, port=None, local_only=False, process_listeners=None, - verbose=False, force_screen=False, force_log=False, is_rostest=False, roslaunch_strs=None, num_workers=NUM_WORKERS, timeout=None, master_logger_level=False, show_summary=True, force_required=False): + verbose=False, force_screen=False, force_log=False, is_rostest=False, roslaunch_strs=None, num_workers=NUM_WORKERS, timeout=None, master_logger_level=False, show_summary=True, force_required=False, + sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ @param run_id: UUID of roslaunch session @type run_id: str @@ -109,7 +111,16 @@ def __init__(self, run_id, roslaunch_files, is_core=False, port=None, local_only @type master_logger_level: str or False @param force_required: (optional) whether to make all nodes required @type force_required: boolean + @param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + @type sigint_timeout: float + @param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds). + @type sigterm_timeout: float + @raise RLException: If sigint_timeout or sigterm_timeout are nonpositive. """ + if sigint_timeout <= 0: + raise RLException("sigint_timeout must be a positive number, received %f" % sigint_timeout) + if sigterm_timeout <= 0: + raise RLException("sigterm_timeout must be a positive number, received %f" % sigterm_timeout) self.logger = logging.getLogger('roslaunch.parent') self.run_id = run_id @@ -126,6 +137,8 @@ def __init__(self, run_id, roslaunch_files, is_core=False, port=None, local_only self.num_workers = num_workers self.timeout = timeout self.master_logger_level = master_logger_level + self.sigint_timeout = sigint_timeout + self.sigterm_timeout = sigterm_timeout # I don't think we should have to pass in so many options from # the outside into the roslaunch parent. One possibility is to @@ -173,7 +186,8 @@ def _init_runner(self): raise RLException("pm is not initialized") if self.server is None: raise RLException("server is not initialized") - self.runner = roslaunch.launch.ROSLaunchRunner(self.run_id, self.config, server_uri=self.server.uri, pmon=self.pm, is_core=self.is_core, remote_runner=self.remote_runner, is_rostest=self.is_rostest, num_workers=self.num_workers, timeout=self.timeout, master_logger_level=self.master_logger_level) + self.runner = roslaunch.launch.ROSLaunchRunner(self.run_id, self.config, server_uri=self.server.uri, pmon=self.pm, is_core=self.is_core, remote_runner=self.remote_runner, is_rostest=self.is_rostest, num_workers=self.num_workers, timeout=self.timeout, master_logger_level=self.master_logger_level, + sigint_timeout=self.sigint_timeout, sigterm_timeout=self.sigterm_timeout) # print runner info to user, put errors last to make the more visible if self.is_core: @@ -215,7 +229,9 @@ def _init_remote(self): if not self.local_only and self.config.has_remote_nodes(): # keep the remote package lazy-imported import roslaunch.remote - self.remote_runner = roslaunch.remote.ROSRemoteRunner(self.run_id, self.config, self.pm, self.server) + self.remote_runner = roslaunch.remote.ROSRemoteRunner(self.run_id, self.config, self.pm, self.server, + sigint_timeout=self.sigint_timeout, + sigterm_timeout=self.sigterm_timeout) elif self.local_only: printlog_bold("LOCAL\nlocal only launch specified, will not launch remote nodes\nLOCAL\n") diff --git a/tools/roslaunch/src/roslaunch/remote.py b/tools/roslaunch/src/roslaunch/remote.py index 4d3a075da9..8e0266940a 100644 --- a/tools/roslaunch/src/roslaunch/remote.py +++ b/tools/roslaunch/src/roslaunch/remote.py @@ -48,6 +48,7 @@ import roslaunch.launch import roslaunch.server #ROSLaunchParentNode hidden dep from roslaunch.core import RLException, is_machine_local, printerrlog, printlog +from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM _CHILD_REGISTER_TIMEOUT = 10.0 #seconds @@ -56,17 +57,23 @@ class ROSRemoteRunner(roslaunch.launch.ROSRemoteRunnerIF): Manages the running of remote roslaunch children """ - def __init__(self, run_id, rosconfig, pm, server): + def __init__(self, run_id, rosconfig, pm, server, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ :param run_id: roslaunch run_id of this runner, ``str`` :param config: launch configuration, ``ROSConfig`` :param pm process monitor, ``ProcessMonitor`` :param server: roslaunch parent server, ``ROSLaunchParentNode`` + :param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + :type sigint_timeout: float + :param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds). + :type sigterm_timeout: float """ self.run_id = run_id self.rosconfig = rosconfig self.server = server self.pm = pm + self.sigint_timeout = sigint_timeout + self.sigterm_timeout = sigterm_timeout self.logger = logging.getLogger('roslaunch.remote') self.listeners = [] @@ -90,7 +97,8 @@ def _start_child(self, server_node_uri, machine, counter): self.logger.info("remote[%s] starting roslaunch", name) printlog("remote[%s] starting roslaunch"%name) - p = SSHChildROSLaunchProcess(self.run_id, name, server_node_uri, machine, self.rosconfig.master.uri) + p = SSHChildROSLaunchProcess(self.run_id, name, server_node_uri, machine, self.rosconfig.master.uri, + sigint_timeout=self.sigint_timeout, sigterm_timeout=self.sigterm_timeout) success = p.start() self.pm.register(p) if not success: #treat as fatal diff --git a/tools/roslaunch/src/roslaunch/remoteprocess.py b/tools/roslaunch/src/roslaunch/remoteprocess.py index 1f2aa6da91..6f44eff25f 100644 --- a/tools/roslaunch/src/roslaunch/remoteprocess.py +++ b/tools/roslaunch/src/roslaunch/remoteprocess.py @@ -48,6 +48,7 @@ from roslaunch.core import printlog, printerrlog import roslaunch.pmon import roslaunch.server +from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM import logging _logger = logging.getLogger("roslaunch.remoteprocess") @@ -126,18 +127,25 @@ class SSHChildROSLaunchProcess(roslaunch.server.ChildROSLaunchProcess): """ Process wrapper for launching and monitoring a child roslaunch process over SSH """ - def __init__(self, run_id, name, server_uri, machine, master_uri=None): + def __init__(self, run_id, name, server_uri, machine, master_uri=None, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ :param machine: Machine instance. Must be fully configured. machine.env_loader is required to be set. + :param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + :type sigint_timeout: float + :param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds). + :type sigterm_timeout: float """ if not machine.env_loader: raise ValueError("machine.env_loader must have been assigned before creating ssh child instance") - args = [machine.env_loader, 'roslaunch', '-c', name, '-u', server_uri, '--run_id', run_id] + args = [machine.env_loader, 'roslaunch', '-c', name, '-u', server_uri, '--run_id', run_id, + '--sigint-timeout', str(sigint_timeout), '--sigterm-timeout', str(sigterm_timeout)] # env is always empty dict because we only use env_loader super(SSHChildROSLaunchProcess, self).__init__(name, args, {}) self.machine = machine self.master_uri = master_uri + self.sigint_timeout = sigint_timeout + self.sigterm_timeout = sigterm_timeout self.ssh = self.sshin = self.sshout = self.ssherr = None self.started = False self.uri = None diff --git a/tools/roslaunch/src/roslaunch/server.py b/tools/roslaunch/src/roslaunch/server.py index c51dbaca25..0d7799dd7f 100644 --- a/tools/roslaunch/src/roslaunch/server.py +++ b/tools/roslaunch/src/roslaunch/server.py @@ -70,6 +70,7 @@ import roslaunch.config from roslaunch.pmon import ProcessListener, Process import roslaunch.xmlloader +from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM from roslaunch.launch import ROSLaunchRunner from roslaunch.core import RLException, \ @@ -243,12 +244,17 @@ class ROSLaunchChildHandler(ROSLaunchBaseHandler): it can track processes across requests """ - def __init__(self, run_id, name, server_uri, pm): + def __init__(self, run_id, name, server_uri, pm, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ @param server_uri: XML-RPC URI of server @type server_uri: str @param pm: process monitor to use @type pm: L{ProcessMonitor} + @param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + @type sigint_timeout: float + @param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node ( + in seconds). + @type sigterm_timeout: float @raise RLException: If parameters are invalid """ super(ROSLaunchChildHandler, self).__init__(pm) @@ -264,6 +270,8 @@ def __init__(self, run_id, name, server_uri, pm): self.name = name self.pm = pm self.server_uri = server_uri + self.sigint_timeout = sigint_timeout + self.sigterm_timeout = sigterm_timeout self.server = ServerProxy(server_uri) def _shutdown(self, reason): @@ -328,7 +336,8 @@ def launch(self, launch_xml): # mainly the responsibility of the roslaunch server to not give us any XML that might # cause conflict (e.g. master tags, param tags, etc...). self._log(Log.INFO, "launching nodes...") - runner = ROSLaunchRunner(self.run_id, rosconfig, server_uri=self.server_uri, pmon=self.pm) + runner = ROSLaunchRunner(self.run_id, rosconfig, server_uri=self.server_uri, pmon=self.pm, + sigint_timeout=self.sigint_timeout, sigterm_timeout=self.sigterm_timeout) succeeded, failed = runner.launch() self._log(Log.INFO, "... done launching nodes") # enable the process monitor to exit of all processes die @@ -475,13 +484,18 @@ class ROSLaunchChildNode(ROSLaunchNode): XML-RPC server for roslaunch child processes """ - def __init__(self, run_id, name, server_uri, pm): + def __init__(self, run_id, name, server_uri, pm, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM): """ ## Startup roslaunch remote client XML-RPC services. Blocks until shutdown ## @param name: name of remote client ## @type name: str ## @param server_uri: XML-RPC URI of roslaunch server ## @type server_uri: str + ## @param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds). + ## @type sigint_timeout: float + ## @param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node ( + ## in seconds). + ## @type sigterm_timeout: float ## @return: XML-RPC URI ## @rtype: str """ @@ -493,7 +507,8 @@ def __init__(self, run_id, name, server_uri, pm): if self.pm is None: raise RLException("cannot create child node: pm is not initialized") - handler = ROSLaunchChildHandler(self.run_id, self.name, self.server_uri, self.pm) + handler = ROSLaunchChildHandler(self.run_id, self.name, self.server_uri, self.pm, + sigint_timeout=sigint_timeout, sigterm_timeout=sigterm_timeout) super(ROSLaunchChildNode, self).__init__(handler) def _register_with_server(self): diff --git a/tools/roslaunch/test/manual-test-remote-timeouts.sh b/tools/roslaunch/test/manual-test-remote-timeouts.sh new file mode 100755 index 0000000000..20fbe34809 --- /dev/null +++ b/tools/roslaunch/test/manual-test-remote-timeouts.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" + +if [ $# -lt 5 ] || [ "$1" == "--help" ] || [ "$1" == "-h" ]; then + echo "Usage: manual-test-remote-timeouts.sh sigint_timeout sigterm_timeout address env_loader user [roslaunch_timeout]" + echo "Run this script set up to connect to a remote machine (or your own one, if you have self-ssh enabled), and after a while, break the program with Ctrl-C." + echo "Observe, if SIGINT and SIGTERM are issued approximately after the time you have given in sigint_timeout and sigterm_timeout" + echo "Make sure the remote machine also has the same version of ros_comm as this one!" + exit 1 +fi + +sigint=$1 +sigterm=$2 +address=$3 +env_loader=$4 +user=$5 +if [ $# -gt 6 ]; then + timeout=$6 +else + timeout=10.0 +fi + +bold="\033[1m" +normal="\033[0m" +echo -e "${bold}A while after you see '... done launching nodes', break the program with Ctrl-C." +echo -e "Observe, if SIGINT and SIGTERM are issued approximately after ${sigint} and ${sigterm} seconds" +echo -e "Make sure the remote machine also has the same version of ros_comm as this one!${normal}" + +sleep 5 + +"${THIS_DIR}/../scripts/roslaunch" --sigint-timeout ${sigint} --sigterm-timeout ${sigterm} \ + "${THIS_DIR}/xml/manual-test-remote-timeouts.launch" \ + address:=${address} env_loader:=${env_loader} user:=${user} timeout:=${timeout} \ No newline at end of file diff --git a/tools/roslaunch/test/signal_logger.py b/tools/roslaunch/test/signal_logger.py new file mode 100755 index 0000000000..b9e81eace2 --- /dev/null +++ b/tools/roslaunch/test/signal_logger.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 + +import os +import signal +import tempfile +import time + +if __name__ == '__main__': + LOG_FILE = os.path.join(tempfile.gettempdir(), "signal.log") + log_stream = open(LOG_FILE, 'w') + + + def handler(signum, _): + log_stream.write("%i %s\n" % (signum, str(time.time()))) + log_stream.flush() + + if signum == signal.SIGTERM: + log_stream.close() + + + signal.signal(signal.SIGINT, handler) + signal.signal(signal.SIGTERM, handler) + + while True: + time.sleep(10) diff --git a/tools/roslaunch/test/unit/test_nodeprocess.py b/tools/roslaunch/test/unit/test_nodeprocess.py index 1a8c0c58a6..945759e915 100644 --- a/tools/roslaunch/test/unit/test_nodeprocess.py +++ b/tools/roslaunch/test/unit/test_nodeprocess.py @@ -78,6 +78,14 @@ def test_create_master_process(self): self.assertEquals(p.package, 'rosmaster') p = create_master_process(run_id, type, ros_root, port) + self.assertEquals(create_master_process(run_id, type, ros_root, port, sigint_timeout=3).sigint_timeout, 3) + self.assertEquals(create_master_process(run_id, type, ros_root, port, sigint_timeout=1).sigint_timeout, 1) + self.assertRaises(RLException, create_master_process, run_id, type, ros_root, port, sigint_timeout=0) + + self.assertEquals(create_master_process(run_id, type, ros_root, port, sigterm_timeout=3).sigterm_timeout, 3) + self.assertEquals(create_master_process(run_id, type, ros_root, port, sigterm_timeout=1).sigterm_timeout, 1) + self.assertRaises(RLException, create_master_process, run_id, type, ros_root, port, sigterm_timeout=0) + # TODO: have to think more as to the correct environment for the master process @@ -164,6 +172,16 @@ def test_create_node_process(self): n.cwd = 'node' self.assertEquals(create_node_process(run_id, n, master_uri).cwd, 'node') + # sigint timeout + self.assertEquals(create_node_process(run_id, n, master_uri).sigint_timeout, 15) + self.assertEquals(create_node_process(run_id, n, master_uri, sigint_timeout=1).sigint_timeout, 1) + self.assertRaises(RLException, create_node_process, run_id, n, master_uri, sigint_timeout=0) + + # sigterm timeout + self.assertEquals(create_node_process(run_id, n, master_uri).sigterm_timeout, 2) + self.assertEquals(create_node_process(run_id, n, master_uri, sigterm_timeout=1).sigterm_timeout, 1) + self.assertRaises(RLException, create_node_process, run_id, n, master_uri, sigterm_timeout=0) + # test args # - simplest test (no args) @@ -201,7 +219,69 @@ def test_create_node_process(self): self.failIf('SUB_TEST' in p.args) self.assert_('foo' in p.args) self.assert_('subtest' in p.args) - self.assert_('subtest2' in p.args) + self.assert_('subtest2' in p.args) + + def test_local_process_stop_timeouts(self): + from roslaunch.core import Node, Machine + + # have to use real ROS configuration for these tests + ros_root = os.environ['ROS_ROOT'] + rpp = os.environ.get('ROS_PACKAGE_PATH', None) + master_uri = 'http://masteruri:1234' + m = Machine('name1', ros_root, rpp, '1.2.3.4') + + run_id = 'id' + + n = Node('roslaunch', 'signal_logger.py') + n.name = 'logger' + n.machine = m + self.check_stop_timeouts(master_uri, n, run_id, 1.0, 1.0) + self.check_stop_timeouts(master_uri, n, run_id, 0.00001, 1.0) + # shorter sigterm times are risky in the test - the signal file might not get written; but in the wild, it's ok + self.check_stop_timeouts(master_uri, n, run_id, 1.0, 0.001) + self.check_stop_timeouts(master_uri, n, run_id, 2.0, 3.0) + + def check_stop_timeouts(self, master_uri, n, run_id, sigint_timeout, sigterm_timeout): + from roslaunch.nodeprocess import create_node_process, LocalProcess + + import time + import tempfile + import signal + + signal_log_file = os.path.join(tempfile.gettempdir(), "signal.log") + + try: + os.remove(signal_log_file) + except OSError: + pass + + p = create_node_process(run_id, n, master_uri, sigint_timeout=sigint_timeout, sigterm_timeout=sigterm_timeout) + self.assert_(isinstance(p, LocalProcess)) + + p.start() + time.sleep(3) # give it time to start + + before_stop_call_time = time.time() + p.stop() + after_stop_call_time = time.time() + + signals = dict() + + try: + with open(signal_log_file, 'r') as f: + lines = f.readlines() + for line in lines: + sig, timestamp = line.split(" ") + sig = int(sig) + timestamp = float(timestamp) + signals[sig] = timestamp + except IOError: + self.fail("Could not open %s" % signal_log_file) + + self.assertSetEqual({signal.SIGINT, signal.SIGTERM}, set(signals.keys())) + self.assertAlmostEqual(before_stop_call_time, signals[signal.SIGINT], delta=1) + self.assertAlmostEqual(before_stop_call_time, signals[signal.SIGTERM] - sigint_timeout, delta=1) + self.assertAlmostEqual(before_stop_call_time, after_stop_call_time - sigint_timeout - sigterm_timeout, delta=1) def test__cleanup_args(self): # #1595 diff --git a/tools/roslaunch/test/unit/test_roslaunch_parent.py b/tools/roslaunch/test/unit/test_roslaunch_parent.py index fae7a2225d..567663fecb 100644 --- a/tools/roslaunch/test/unit/test_roslaunch_parent.py +++ b/tools/roslaunch/test/unit/test_roslaunch_parent.py @@ -236,6 +236,89 @@ def ftrue(): p.shutdown() self.assert_(pmon.is_shutdown) + +## Test sigint_timeout and sigterm_timeout +# We need real ProcessMonitor here, and we need to spawn real threads +class TestRoslaunchTimeouts(unittest.TestCase): + def setUp(self): + from roslaunch.pmon import ProcessMonitor + self.pmon = ProcessMonitor() + + def test_roslaunchTimeouts(self): + try: + self._subtestTimeouts() + finally: + self.pmon.shutdown() + + def _subtestTimeouts(self): + from roslaunch.parent import ROSLaunchParent + from roslaunch.server import ROSLaunchParentNode + import signal + import tempfile + from threading import Thread + + pmon = self.pmon + pmon.start() + try: + # if there is a core up, we have to use its run id + run_id = get_param('/run_id') + except: + run_id = 'test-rl-parent-timeout-%s' % time.time() + + rl_dir = rospkg.RosPack().get_path('roslaunch') + rl_file = os.path.join(rl_dir, 'resources', 'timeouts.launch') + + sigint_timeout = 2 + sigterm_timeout = 3 + + p = ROSLaunchParent(run_id, [rl_file], is_core=False, port=11312, local_only=True, + sigint_timeout=sigint_timeout, sigterm_timeout=sigterm_timeout) + p._load_config() + p.pm = pmon + p.server = ROSLaunchParentNode(p.config, pmon) + + signal_log_file = os.path.join(tempfile.gettempdir(), "signal.log") + try: + os.remove(signal_log_file) + except OSError: + pass + + def kill_launch(times): + time.sleep(3) # give it time to start + + times.append(time.time()) + p.shutdown() + times.append(time.time()) + + p.start() + + times = [] + t = Thread(target=kill_launch, args=(times,)) + t.start() + + p.spin() + t.join() + + before_stop_call_time, after_stop_call_time = times + + signals = dict() + try: + with open(signal_log_file, 'r') as f: + lines = f.readlines() + for line in lines: + sig, timestamp = line.split(" ") + sig = int(sig) + timestamp = float(timestamp) + signals[sig] = timestamp + except IOError: + self.fail("Could not open %s" % signal_log_file) + + self.assertSetEqual({signal.SIGINT, signal.SIGTERM}, set(signals.keys())) + self.assertAlmostEqual(before_stop_call_time, signals[signal.SIGINT], delta=1.0) + self.assertAlmostEqual(before_stop_call_time, signals[signal.SIGTERM] - sigint_timeout, delta=1) + self.assertAlmostEqual(before_stop_call_time, after_stop_call_time - sigint_timeout - sigterm_timeout, delta=1) + + def kill_parent(p, delay=1.0): # delay execution so that whatever pmon method we're calling has time to enter time.sleep(delay) diff --git a/tools/roslaunch/test/xml/manual-test-remote-timeouts.launch b/tools/roslaunch/test/xml/manual-test-remote-timeouts.launch new file mode 100644 index 0000000000..58514aeda5 --- /dev/null +++ b/tools/roslaunch/test/xml/manual-test-remote-timeouts.launch @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +