diff --git a/rqd/deploy/rqd3_init.d b/rqd/deploy/rqd3_init.d index babc1e1f1..b146780f3 100644 --- a/rqd/deploy/rqd3_init.d +++ b/rqd/deploy/rqd3_init.d @@ -3,7 +3,7 @@ # RQD3: Start/stop rqd3 services # # chkconfig: 345 98 02 -# description: RQD for opencue +# description: Opencue RQD agent # # Source function library. @@ -18,7 +18,7 @@ RQD=${RQD_PATH}rqd.py start() { [ -f /usr/local/etc/sweatbox.csh ] && echo "Refusing to start RQD3 on a sweatbox" && exit 0 - echo -n $"Starting rqd3 services:" + echo -n $"Starting openrqd services:" cd ${RQD_PATH} daemon "${RQD}" -d echo "" @@ -26,20 +26,18 @@ start() idle_restart() { - echo -n "Requesting idle restart of rqd3 services:" + echo -n "Requesting idle restart of openrqd services:" cd ${RQD_PATH} - daemon "./cuerqd.py" -restart + daemon "rqd/cuerqd.py --restart &>/dev/null || :" echo "" } stop() { - echo -n "Stopping rqd3 services:" + echo -n "Stopping openrqd services:" cd ${RQD_PATH} - daemon "./cuerqd.py" -exit_now - sleep 2 - killproc ${RQD} >/dev/null 2>&1 || : - echo "" + daemon "rqd/cuerqd.py" --exit_now + echo "Stop Request completed" } case "$1" in diff --git a/rqd/rqd/cuerqd.py b/rqd/rqd/cuerqd.py index c80098579..f7aee88ef 100755 --- a/rqd/rqd/cuerqd.py +++ b/rqd/rqd/cuerqd.py @@ -100,7 +100,13 @@ def shutdownRqdIdle(self): def shutdownRqdNow(self): """Shuts down the host now.""" print(self.rqdHost, "Sending shutdownRqdNow command") - self.stub.ShutdownRqdNow(rqd.compiled_proto.rqd_pb2.RqdStaticShutdownNowRequest()) + try: + self.stub.ShutdownRqdNow(rqd.compiled_proto.rqd_pb2.RqdStaticShutdownNowRequest()) + # pylint: disable=broad-except + except Exception: + # Shutting down the service from inside means this request will receive + # a connection error response + pass def restartRqdIdle(self): """Restarts RQD on the host when idle.""" diff --git a/rqd/rqd/rqconstants.py b/rqd/rqd/rqconstants.py index b52d20afe..0d6968d27 100644 --- a/rqd/rqd/rqconstants.py +++ b/rqd/rqd/rqconstants.py @@ -123,7 +123,7 @@ OVERRIDE_PROCS = None # number of physical cpus. ex: None or 2 OVERRIDE_MEMORY = None # in Kb OVERRIDE_NIMBY = None # True to turn on, False to turn off -USE_NIMBY_PYNPUT = platform.system() == 'Windows' +USE_NIMBY_PYNPUT = True # True pynput, False select OVERRIDE_HOSTNAME = None # Force to use this hostname ALLOW_GPU = False LOAD_MODIFIER = 0 # amount to add/subtract from load diff --git a/rqd/rqd/rqcore.py b/rqd/rqd/rqcore.py index 06c26e80f..48ef7ccc9 100644 --- a/rqd/rqd/rqcore.py +++ b/rqd/rqd/rqcore.py @@ -817,15 +817,18 @@ def shutdown(self): log.warning("Rebooting machine by request") self.machine.reboot() else: - log.warning("Shutting down RQD by request") + log.warning("Shutting down RQD by request. pid(%s)", os.getpid()) + self.network.stopGrpc() + # Using sys.exit would raise SystemExit, giving exception handlers a chance + # to block this + # pylint: disable=protected-access + os._exit(0) def handleExit(self, signalnum, flag): """Shutdown threads and exit RQD.""" del signalnum del flag self.shutdown() - self.network.stopGrpc() - sys.exit() def launchFrame(self, runFrame): """This will setup for the launch the frame specified in the arguments. @@ -925,8 +928,12 @@ def reportStatus(self): def shutdownRqdNow(self): """Kill all running frames and shutdown RQD""" self.machine.state = rqd.compiled_proto.host_pb2.DOWN - self.lockAll() - self.killAllFrame("shutdownRqdNow Command") + try: + self.lockAll() + self.killAllFrame("shutdownRqdNow Command") + # pylint: disable=broad-except + except Exception: + log.exception("Failed to kill frames, stopping service anyways") if not self.__cache: self.shutdown() @@ -980,14 +987,12 @@ def rebootIdle(self): def nimbyOn(self): """Activates nimby, does not kill any running frames until next nimby event. Also does not unlock until sufficient idle time is reached.""" - if platform.system() != "Windows" and os.getuid() != 0: - log.warning("Not starting nimby, not running as root") - return - if not self.nimby.active: + if self.nimby and not self.nimby.active: try: self.nimby.run() - log.info("Nimby has been activated") - except: + log.warning("Nimby has been activated") + # pylint: disable=broad-except + except Exception: self.nimby.locked = False err = "Nimby is in the process of shutting down" log.exception(err) @@ -1007,7 +1012,7 @@ def onNimbyLock(self): self.sendStatusReport() def onNimbyUnlock(self, asOf=None): - """This is called by nimby when it unlocks the machine due to sufficent + """This is called by nimby when it unlocks the machine due to sufficient idle. A new report is sent to the cuebot. @param asOf: Time when idle state began, if known.""" del asOf diff --git a/rqd/rqd/rqmachine.py b/rqd/rqd/rqmachine.py index c712e5c41..bed78fdef 100644 --- a/rqd/rqd/rqmachine.py +++ b/rqd/rqd/rqmachine.py @@ -207,14 +207,15 @@ def __updateGpuAndLlu(self, frame): stat = os.stat(frame.runFrame.log_dir_file).st_mtime frame.lluTime = int(stat) - def _getFields(self, filePath): + def _getFields(self, pidFilePath): fields = [] try: - with open(filePath, "r") as statFile: + with open(pidFilePath, "r") as statFile: fields = statFile.read().split() - except rqd.rqexceptions.RqdException as e: - log.warning("Failed to read file: %s", e) + # pylint: disable=broad-except + except Exception: + log.warning("Not able to read pidFilePath: %s", pidFilePath) return fields diff --git a/rqd/rqd/rqnetwork.py b/rqd/rqd/rqnetwork.py index 39a65c750..be0208319 100644 --- a/rqd/rqd/rqnetwork.py +++ b/rqd/rqd/rqnetwork.py @@ -165,6 +165,7 @@ def kill(self, message=""): except OSError as e: log.warning( "kill() tried to kill a non-existant pid for: %s Error: %s", self.frameId, e) + # pylint: disable=broad-except except Exception as e: log.warning("kill() encountered an unknown error: %s", e) else: @@ -226,8 +227,8 @@ def serveForever(self): def shutdown(self): """Stops the gRPC server.""" - log.info('Stopping grpc server.') - self.server.stop(0) + log.warning('Stopping grpc server.') + self.server.stop(10) def stayAlive(self): """Runs forever until killed.""" @@ -255,6 +256,7 @@ def stopGrpc(self): """Stops the gRPC server.""" self.grpcServer.shutdown() del self.grpcServer + log.warning("Stopped grpc server") def closeChannel(self): """Closes the gRPC channel.""" diff --git a/rqd/rqd/rqnimby.py b/rqd/rqd/rqnimby.py index e57e39f23..e2cdbf214 100644 --- a/rqd/rqd/rqnimby.py +++ b/rqd/rqd/rqnimby.py @@ -20,43 +20,50 @@ from __future__ import print_function from __future__ import division -from abc import ABCMeta, abstractmethod +from abc import abstractmethod +import abc import os import select -import time import signal import threading +import time import logging -import platform import rqd.rqconstants import rqd.rqutil -log = logging.getLogger(__name__) - -if platform.system() == 'Windows': - pynputIsAvailable = False - try: - import pynput - pynputIsAvailable = True - except ImportError as e: - log.error(e) +log = logging.getLogger(__name__) # compatible with Python 2 and 3: -ABC = ABCMeta('ABC', (object,), {'__slots__': ()}) +ABC = abc.ABCMeta('ABC', (object,), {'__slots__': ()}) + class NimbyFactory(object): """ Factory to handle Linux/Windows platforms """ @staticmethod def getNimby(rqCore): """ assign platform dependent Nimby instance """ - nimbyInstance = None - if rqd.rqconstants.USE_NIMBY_PYNPUT and pynputIsAvailable: - nimbyInstance = NimbyPynput(rqCore) - else: - nimbyInstance = NimbySelect(rqCore) - return nimbyInstance + if rqd.rqconstants.USE_NIMBY_PYNPUT: + try: + # DISPLAY is required to import pynput internals + # and it's not automatically set depending on the + # environment rqd is running in + if "DISPLAY" not in os.environ: + os.environ['DISPLAY'] = ":0" + # pylint: disable=unused-import, import-error, unused-variable, import-outside-toplevel + import pynput + # pylint: disable=broad-except + except Exception: + # Ideally ImportError could be used here, but pynput + # can throw other kinds of exception while trying to + # access runpy components + log.exception("Failed to import pynput, falling back to Select module") + # Still enabling the application start as hosts can be manually locked + # using the API/GUI + return NimbyNop(rqCore) + return NimbyPynput(rqCore) + return NimbySelect(rqCore) class Nimby(threading.Thread, ABC): @@ -75,8 +82,6 @@ def __init__(self, rqCore): self.rqCore = rqCore self.locked = False self.active = False - log.warning("Locked state :%s", self.locked) - log.warning("Active state :%s", self.active) self.fileObjList = [] self.results = [[]] @@ -97,7 +102,7 @@ def lockNimby(self): """Activates the nimby lock, calls lockNimby() in rqcore""" if self.active and not self.locked: self.locked = True - log.info("Locked nimby") + log.warning("Locked nimby") self.rqCore.onNimbyLock() def unlockNimby(self, asOf=None): @@ -105,29 +110,15 @@ def unlockNimby(self, asOf=None): @param asOf: Time when idle state began, if known.""" if self.locked: self.locked = False - log.info("Unlocked nimby") + log.warning("Unlocked nimby") self.rqCore.onNimbyUnlock(asOf=asOf) def run(self): """Starts the Nimby thread""" - log.warning("Nimby Run") self.active = True - self.locked = True self.startListener() self.unlockedIdle() - rqd.rqutil.permissionsHigh() - try: - for device in os.listdir("/dev/input/"): - if device.startswith("event") or device.startswith("mice"): - try: - self.fileObjList.append(open("/dev/input/%s" % device, "rb")) - except IOError as e: - # Bad device found - log.warning("IOError: Failed to open %s, %s", "/dev/input/%s" % device, e) - finally: - rqd.rqutil.permissionsLow() - def stop(self): """Stops the Nimby thread""" log.warning("Stop Nimby") @@ -173,7 +164,6 @@ def startListener(self): """ start listening """ def stopListener(self): - """ stop listening """ self.closeEvents() def lockedInUse(self): @@ -206,7 +196,7 @@ def unlockedIdle(self): self.results = select.select(self.fileObjList, [], [], 5) # pylint: disable=broad-except except Exception: - pass + log.exception("failed to execute nimby check event") if not self.rqCore.machine.isNimbySafeToRunJobs(): log.warning("memory threshold has been exceeded, locking nimby") self.active = True @@ -244,7 +234,6 @@ def lockedIdle(self): def openEvents(self): """Opens the /dev/input/event* files so nimby can monitor them""" - log.warning("openEvents") self.closeEvents() rqd.rqutil.permissionsHigh() @@ -253,17 +242,15 @@ def openEvents(self): if device.startswith("event") or device.startswith("mice"): try: self.fileObjList.append(open("/dev/input/%s" % device, "rb")) - except IOError as e: + except IOError: # Bad device found - msg = ('IOError: Failed to open %s, %s' - % ("/dev/input/%s" % device, e)) - log.warning(msg) + log.exception("IOError: Failed to open /dev/input/%s", device) finally: rqd.rqutil.permissionsLow() def closeEvents(self): """Closes the /dev/input/event* files""" - log.warning("closeEvents") + log.info("closeEvents") if self.fileObjList: for fileObj in self.fileObjList: try: @@ -279,11 +266,14 @@ def isNimbyActive(self): """ return self.active and self.results[0] == [] + class NimbyPynput(Nimby): - """ Nimby Windows """ + """ Nimby using pynput """ def __init__(self, rqCore): Nimby.__init__(self, rqCore) + # pylint: disable=unused-import, import-error, import-outside-toplevel + import pynput self.mouse_listener = pynput.mouse.Listener( on_move=self.on_interaction, on_click=self.on_interaction, @@ -337,21 +327,19 @@ def unlockedIdle(self): self.lockNimby() self.thread = threading.Timer(rqd.rqconstants.CHECK_INTERVAL_LOCKED, self.lockedInUse) - log.warning("starting Thread") self.thread.start() def lockedIdle(self): """Nimby State: Machine is idle, waiting for sufficient idle time to unlock""" - log.warning("lockedIdle") - waitStartTime = time.time() + wait_start_time = time.time() time.sleep(rqd.rqconstants.MINIMUM_IDLE) if self.active and not self.interaction_detected and \ self.rqCore.machine.isNimbySafeToUnlock(): - log.warning("Start wait time: %s", waitStartTime) - self.unlockNimby(asOf=waitStartTime) + log.warning("Start wait time: %s", wait_start_time) + self.unlockNimby(asOf=wait_start_time) self.unlockedIdle() elif self.active: @@ -364,3 +352,33 @@ def isNimbyActive(self): :return: boolean if events are logged and Nimby is active """ return not self.active and self.interaction_detected + + +class NimbyNop(Nimby): + """Nimby option for when no option is available""" + def __init__(self, rqCore): + Nimby.__init__(self, rqCore) + self.warning_msg() + + @staticmethod + def warning_msg(): + """Just a helper to avoid duplication""" + log.warning("Using Nimby nop! Something went wrong on nimby's initialization.") + + def startListener(self): + self.warning_msg() + + def stopListener(self): + self.warning_msg() + + def lockedInUse(self): + self.warning_msg() + + def unlockedIdle(self): + self.warning_msg() + + def lockedIdle(self): + self.warning_msg() + + def isNimbyActive(self): + return False