diff --git a/.gitignore b/.gitignore index 7f4857d09..370dbd6b1 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ htmlcov/ /.env .envrc .vscode -.venv/ \ No newline at end of file +.venv/ +.eggs/* diff --git a/VERSION.in b/VERSION.in index 7d385d419..992221508 100644 --- a/VERSION.in +++ b/VERSION.in @@ -1 +1 @@ -0.25 +0.26 diff --git a/cuebot/src/main/java/com/imageworks/spcue/Source.java b/cuebot/src/main/java/com/imageworks/spcue/Source.java index a54207c92..8f6d78ee3 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/Source.java +++ b/cuebot/src/main/java/com/imageworks/spcue/Source.java @@ -25,6 +25,10 @@ public class Source { public String source = "unknown"; + public String username = ""; + public String pid = ""; + public String host_kill = ""; + public String reason = ""; public Source() {} @@ -32,8 +36,24 @@ public Source(String source) { this.source = source; } + public Source(String source, String username, String pid, String host_kill, String reason) { + this.source = source; + this.username = username; + this.pid = pid; + this.host_kill = host_kill; + this.reason = reason; + } + + public String getReason() { + return this.reason; + } + public String toString() { - return this.source; + return "User: " + this.username + + ", Pid: " + this.pid + + ", Hostname: " + this.host_kill + + ", Reason: " + this.reason + + "\n" + this.source; } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageJob.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageJob.java index e3cfa5178..fc01365a6 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageJob.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageJob.java @@ -275,7 +275,9 @@ public void kill(JobKillRequest request, StreamObserver respons try { setupJobData(request.getJob()); manageQueue.execute(new DispatchJobComplete(job, - new Source(request.toString()), true, jobManagerSupport)); + new Source(request.toString(), request.getUsername(), request.getPid(), + request.getHostKill(), request.getReason()), + true, jobManagerSupport)); responseObserver.onNext(JobKillResponse.newBuilder().build()); responseObserver.onCompleted(); } @@ -486,7 +488,8 @@ public void killFrames(JobKillFramesRequest request, StreamObserver responseObserver) { updateLayer(request.getLayer()); - if (attemptChange(env, property, jobManager, layer, responseObserver)) { - manageQueue.execute(new DispatchKillFrames(frameSearch, - new Source(request.toString()), jobManagerSupport)); - responseObserver.onNext(LayerKillFramesResponse.newBuilder().build()); - responseObserver.onCompleted(); - } + manageQueue.execute(new DispatchKillFrames(frameSearch, + new Source(request.toString(), request.getUsername(), request.getPid(), + request.getHostKill(), request.getReason()), jobManagerSupport)); + responseObserver.onNext(LayerKillFramesResponse.newBuilder().build()); + responseObserver.onCompleted(); } @Override diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerSupport.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerSupport.java index b2db74d59..6f5789ee2 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerSupport.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerSupport.java @@ -75,76 +75,82 @@ public void queueShutdownJob(JobInterface job, Source source, boolean isManualKi public boolean shutdownJob(JobInterface job, Source source, boolean isManualKill) { - if (jobManager.shutdownJob(job)) { - - /* - * Satisfy any dependencies on just the - * job record, not layers or frames. - */ - satisfyWhatDependsOn(job); - - if (departmentManager.isManaged(job)) { - departmentManager.syncJobsWithTask(job); - } - - if (isManualKill) { + if (isManualKill && source.getReason().isEmpty()) { + logger.info(job.getName() + "/" + job.getId() + + " **Invalid Job Kill Request** for " + source.toString()); + } + else { + if (jobManager.shutdownJob(job)) { - logger.info(job.getName() + "/" + job.getId() + - " is being manually killed by " + source.toString()); + /* + * Satisfy any dependencies on just the + * job record, not layers or frames. + */ + satisfyWhatDependsOn(job); - /** - * Sleep a bit here in case any frames were - * dispatched during the job shutdown process. - */ - try { - Thread.sleep(3000); - } catch (InterruptedException e1) { - logger.info(job.getName() + "/" + job.getId() + - " shutdown thread was interrupted."); - Thread.currentThread().interrupt(); + if (departmentManager.isManaged(job)) { + departmentManager.syncJobsWithTask(job); } - FrameSearchInterface search = frameSearchFactory.create(job); - FrameSearchCriteria newCriteria = search.getCriteria(); - FrameStateSeq states = newCriteria.getStates().toBuilder() - .addFrameStates(FrameState.RUNNING) - .build(); - search.setCriteria(newCriteria.toBuilder().setStates(states).build()); + if (isManualKill) { - for (FrameInterface frame: jobManager.findFrames(search)) { + logger.info(job.getName() + "/" + job.getId() + + " is being manually killed by " + source.toString()); - VirtualProc proc = null; + /** + * Sleep a bit here in case any frames were + * dispatched during the job shutdown process. + */ try { - proc = hostManager.findVirtualProc(frame); - } - catch (DataAccessException e) { - logger.warn("Unable to find proc to kill frame " + frame + - " on job shutdown operation, " + e); + Thread.sleep(3000); + } catch (InterruptedException e1) { + logger.info(job.getName() + "/" + job.getId() + + " shutdown thread was interrupted."); + Thread.currentThread().interrupt(); } - if (manualStopFrame(frame, FrameState.WAITING)) { + FrameSearchInterface search = frameSearchFactory.create(job); + FrameSearchCriteria newCriteria = search.getCriteria(); + FrameStateSeq states = newCriteria.getStates().toBuilder() + .addFrameStates(FrameState.RUNNING) + .build(); + search.setCriteria(newCriteria.toBuilder().setStates(states).build()); + + for (FrameInterface frame: jobManager.findFrames(search)) { + + VirtualProc proc = null; try { - if (proc != null) { - kill(proc, source); - } - } catch (DataAccessException e) { - logger.warn("Failed to kill frame " + frame + + proc = hostManager.findVirtualProc(frame); + } + catch (DataAccessException e) { + logger.warn("Unable to find proc to kill frame " + frame + " on job shutdown operation, " + e); } - catch (Exception e) { - logger.warn("error killing frame: " + frame); + + if (manualStopFrame(frame, FrameState.WAITING)) { + try { + if (proc != null) { + kill(proc, source); + } + } catch (DataAccessException e) { + logger.warn("Failed to kill frame " + frame + + " on job shutdown operation, " + e); + } + catch (Exception e) { + logger.warn("error killing frame: " + frame); + } } } } - } - /* - * Send mail after all frames have been stopped or else the email - * will have inaccurate numbers. - */ - emailSupport.sendShutdownEmail(job); + /* + * Send mail after all frames have been stopped or else the email + * will have inaccurate numbers. + */ + emailSupport.sendShutdownEmail(job); - return true; + return true; + } } return false; diff --git a/cuegui/cuegui/MenuActions.py b/cuegui/cuegui/MenuActions.py index 6e1664671..7642eb383 100644 --- a/cuegui/cuegui/MenuActions.py +++ b/cuegui/cuegui/MenuActions.py @@ -24,6 +24,7 @@ from builtins import filter from builtins import str from builtins import object +import getpass import glob import subprocess import time @@ -65,7 +66,8 @@ TITLE = 0 TOOLTIP = 1 ICON = 2 - +DEFAULT_JOB_KILL_REASON = "Manual Job Kill Request in Cuegui by " + getpass.getuser() +DEFAULT_FRAME_KILL_REASON = "Manual Frame(s) Kill Request in Cuegui by " + getpass.getuser() # pylint: disable=missing-function-docstring,no-self-use,unused-argument @@ -368,7 +370,7 @@ def kill(self, rpcObjects=None): if cuegui.Utils.questionBoxYesNo(self._caller, "Kill jobs?", msg, [job.data.name for job in jobs]): for job in jobs: - job.kill() + job.kill(reason=DEFAULT_JOB_KILL_REASON) self.killDependents(jobs) self._update() @@ -384,7 +386,7 @@ def killDependents(self, jobs): sorted([dep.name() for dep in dependents])): for depJob in dependents: try: - depJob.kill() + depJob.kill(reason=DEFAULT_JOB_KILL_REASON) except opencue.exception.CueException as e: errMsg = "Failed to kill depending job: %s - %s" % (depJob.name(), e) logger.warning(errMsg) @@ -769,7 +771,7 @@ def kill(self, rpcObjects=None): "Kill ALL frames in selected layers?", [layer.data.name for layer in layers]): for layer in layers: - layer.kill() + layer.kill(reason=DEFAULT_FRAME_KILL_REASON) self._update() eat_info = ["&Eat", None, "eat"] @@ -1080,7 +1082,8 @@ def kill(self, rpcObjects=None): if cuegui.Utils.questionBoxYesNo(self._caller, "Confirm", "Kill selected frames?", names): - self._getSource().killFrames(name=names) + self._getSource().killFrames(reason=DEFAULT_FRAME_KILL_REASON, + name=names) self._update() markAsWaiting_info = ["Mark as &waiting", None, "configure"] diff --git a/cuegui/cuegui/plugins/StuckFramePlugin.py b/cuegui/cuegui/plugins/StuckFramePlugin.py index a44c90196..870dcaff4 100644 --- a/cuegui/cuegui/plugins/StuckFramePlugin.py +++ b/cuegui/cuegui/plugins/StuckFramePlugin.py @@ -23,12 +23,12 @@ from builtins import str from builtins import map import datetime -import re +import getpass import os -from datetime import datetime -import time -import socket +import re import signal +import socket +import time import yaml from qtpy import QtGui @@ -63,7 +63,7 @@ LLU_COLUMN = 3 RUNTIME_COLUMN = 4 LASTLINE_COLUMN = 7 - +DEFAULT_FRAME_KILL_REASON = "Manual Frame Kill Request in Cuegui by " + getpass.getuser() class StuckWidget(cuegui.AbstractDockWidget.AbstractDockWidget): """This builds what is displayed on the dock widget""" @@ -1345,7 +1345,7 @@ def logKill(self): if cuegui.Utils.questionBoxYesNo(self, "Confirm", "Kill selected frames?", names): self.log() for frame in self.selectedObjects(): - frame.kill() + frame.kill(reason=DEFAULT_FRAME_KILL_REASON) self.remove() def retryFrame(self): diff --git a/cuegui/tests/MenuActions_tests.py b/cuegui/tests/MenuActions_tests.py index f7a041e74..dea85612e 100644 --- a/cuegui/tests/MenuActions_tests.py +++ b/cuegui/tests/MenuActions_tests.py @@ -958,7 +958,9 @@ def test_kill(self, yesNoMock): self.frame_actions.kill(rpcObjects=[frame]) - self.job.killFrames.assert_called_with(name=[frame_name]) + self.job.killFrames.assert_called_with( + name=[frame_name], + reason="Manual Frame(s) Kill Request in Cuegui by root") @mock.patch('cuegui.Utils.questionBoxYesNo', return_value=True) def test_markAsWaiting(self, yesNoMock): diff --git a/proto/job.proto b/proto/job.proto index 79a0dc234..80b8cceec 100644 --- a/proto/job.proto +++ b/proto/job.proto @@ -885,6 +885,10 @@ message FrameGetWhatThisDependsOnResponse { // Kill message FrameKillRequest { Frame frame = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message FrameKillResponse {} // Empty @@ -1283,6 +1287,10 @@ message JobIsJobPendingResponse { // Kill message JobKillRequest { Job job = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message JobKillResponse {} // Empty @@ -1291,6 +1299,10 @@ message JobKillResponse {} // Empty message JobKillFramesRequest { Job job = 1; FrameSearchCriteria req = 2; + string username = 3; + string pid = 4; + string host_kill = 5; + string reason = 6; } message JobKillFramesResponse {} // Empty @@ -1613,6 +1625,10 @@ message LayerGetWhatThisDependsOnResponse { // KillFrames message LayerKillFramesRequest { Layer layer = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message LayerKillFramesResponse {} // Empty diff --git a/pycue/opencue/wrappers/frame.py b/pycue/opencue/wrappers/frame.py index 5aa528b5d..9c341d8a7 100644 --- a/pycue/opencue/wrappers/frame.py +++ b/pycue/opencue/wrappers/frame.py @@ -15,6 +15,7 @@ """Module for classes related to frames.""" import enum +import getpass import time import os @@ -59,10 +60,18 @@ def eat(self): if self.data.state != job_pb2.FrameState.Value('EATEN'): self.stub.Eat(job_pb2.FrameEatRequest(frame=self.data), timeout=Cuebot.Timeout) - def kill(self): + def kill(self, username=None, pid=None, host_kill=None, reason=None): """Kills the frame.""" + username = username if username else getpass.getuser() + pid = pid if pid else os.getpid() + host_kill = host_kill if host_kill else os.uname()[1] if self.data.state == job_pb2.FrameState.Value('RUNNING'): - self.stub.Kill(job_pb2.FrameKillRequest(frame=self.data), timeout=Cuebot.Timeout) + self.stub.Kill(job_pb2.FrameKillRequest(frame=self.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), + timeout=Cuebot.Timeout) def retry(self): """Retries the frame.""" diff --git a/pycue/opencue/wrappers/job.py b/pycue/opencue/wrappers/job.py index cbb1869d9..2a86714f9 100644 --- a/pycue/opencue/wrappers/job.py +++ b/pycue/opencue/wrappers/job.py @@ -15,6 +15,7 @@ """Module for classes related to jobs.""" import enum +import getpass import os import time @@ -44,9 +45,17 @@ def __init__(self, job=None): self.stub = Cuebot.getStub('job') self.__frameStateTotals = {} - def kill(self): + def kill(self, username=None, pid=None, host_kill=None, reason=None): """Kills the job.""" - self.stub.Kill(job_pb2.JobKillRequest(job=self.data), timeout=Cuebot.Timeout) + username = username if username else getpass.getuser() + pid = pid if pid else os.getpid() + host_kill = host_kill if host_kill else os.uname()[1] + self.stub.Kill(job_pb2.JobKillRequest(job=self.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), + timeout=Cuebot.Timeout) def pause(self): """Pauses the job.""" @@ -56,15 +65,23 @@ def resume(self): """Resumes the job.""" self.stub.Resume(job_pb2.JobResumeRequest(job=self.data), timeout=Cuebot.Timeout) - def killFrames(self, **request): + def killFrames(self, username=None, pid=None, host_kill=None, reason=None, **request): """Kills all frames that match the FrameSearch. :type request: Dict :param request: FrameSearch parameters """ + username = username if username else getpass.getuser() + pid = pid if pid else os.getpid() + host_kill = host_kill if host_kill else os.uname()[1] criteria = opencue.search.FrameSearch.criteriaFromOptions(**request) - self.stub.KillFrames(job_pb2.JobKillFramesRequest(job=self.data, req=criteria), - timeout=Cuebot.Timeout) + self.stub.KillFrames(job_pb2.JobKillFramesRequest(job=self.data, + req=criteria, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), + timeout=Cuebot.Timeout) def eatFrames(self, **request): """Eats all frames that match the FrameSearch. @@ -791,9 +808,9 @@ def children(self): """Returns all job children.""" return self.__children - def kill(self): + def kill(self, username=None, pid=None, host_kill=None, reason=None): """Kills the job.""" - self.asJob().kill() + self.asJob().kill(username, pid, host_kill, reason) def pause(self): """Pauses the job.""" @@ -803,13 +820,13 @@ def resume(self): """Resumes the job.""" self.asJob().resume() - def killFrames(self, **request): + def killFrames(self, username=None, pid=None, host_kill=None, reason=None, **request): """Kills all frames that match the FrameSearch. :type request: Dict :param request: FrameSearch parameters """ - self.asJob().killFrames(**request) + self.asJob().killFrames(username, pid, host_kill, reason, **request) def eatFrames(self, **request): """Eats all frames that match the FrameSearch. diff --git a/pycue/opencue/wrappers/layer.py b/pycue/opencue/wrappers/layer.py index 0e34985bf..7a9782e0e 100644 --- a/pycue/opencue/wrappers/layer.py +++ b/pycue/opencue/wrappers/layer.py @@ -15,6 +15,7 @@ """Module for classes related to job layers.""" import enum +import getpass import os import opencue.api @@ -46,9 +47,16 @@ def __init__(self, layer=None): self.data = layer self.stub = Cuebot.getStub('layer') - def kill(self): + def kill(self, username=None, pid=None, host_kill=None, reason=None): """Kills the entire layer.""" - return self.stub.KillFrames(job_pb2.LayerKillFramesRequest(layer=self.data), + username = username if username else getpass.getuser() + pid = pid if pid else os.getpid() + host_kill = host_kill if host_kill else os.uname()[1] + return self.stub.KillFrames(job_pb2.LayerKillFramesRequest(layer=self.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), timeout=Cuebot.Timeout) def eat(self): diff --git a/pycue/tests/wrappers/frame_test.py b/pycue/tests/wrappers/frame_test.py index d0e7c7ba6..82dcc11ae 100644 --- a/pycue/tests/wrappers/frame_test.py +++ b/pycue/tests/wrappers/frame_test.py @@ -19,6 +19,8 @@ from __future__ import print_function from __future__ import division from __future__ import absolute_import +import getpass +import os import time import unittest @@ -56,10 +58,18 @@ def testKill(self, getStubMock): frame = opencue.wrappers.frame.Frame( job_pb2.Frame(name=TEST_FRAME_NAME, state=job_pb2.RUNNING)) - frame.kill() + username = getpass.getuser() + pid = os.getpid() + host_kill = os.uname()[1] + reason = "Frames Kill Request" + frame.kill(username=username, pid=pid, host_kill=host_kill, reason=reason) stubMock.Kill.assert_called_with( - job_pb2.FrameKillRequest(frame=frame.data), timeout=mock.ANY) + job_pb2.FrameKillRequest(frame=frame.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), timeout=mock.ANY) def testRetry(self, getStubMock): stubMock = mock.Mock() diff --git a/pycue/tests/wrappers/job_test.py b/pycue/tests/wrappers/job_test.py index 0ad2504de..1b9d2d420 100644 --- a/pycue/tests/wrappers/job_test.py +++ b/pycue/tests/wrappers/job_test.py @@ -19,6 +19,7 @@ from __future__ import print_function from __future__ import division from __future__ import absolute_import +import getpass import os import unittest @@ -46,10 +47,18 @@ def testKill(self, getStubMock): job = opencue.wrappers.job.Job( job_pb2.Job(name=TEST_JOB_NAME)) - job.kill() + username = getpass.getuser() + pid = os.getpid() + host_kill = os.uname()[1] + reason = "Job Kill Request" + job.kill(username=username, pid=pid, host_kill=host_kill, reason=reason) stubMock.Kill.assert_called_with( - job_pb2.JobKillRequest(job=job.data), timeout=mock.ANY) + job_pb2.JobKillRequest(job=job.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), timeout=mock.ANY) def testPause(self, getStubMock): stubMock = mock.Mock() @@ -84,10 +93,23 @@ def testKillFrames(self, getStubMock): criteria = opencue.search.FrameSearch.criteriaFromOptions(range=frameRange) job = opencue.wrappers.job.Job( job_pb2.Job(name=TEST_JOB_NAME)) - job.killFrames(range=frameRange) + username = getpass.getuser() + pid = os.getpid() + host_kill = os.uname()[1] + reason = "Job Kill Request" + job.killFrames(range=frameRange, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason) stubMock.KillFrames.assert_called_with( - job_pb2.JobKillFramesRequest(job=job.data, req=criteria), timeout=mock.ANY) + job_pb2.JobKillFramesRequest(job=job.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason, + req=criteria), timeout=mock.ANY) def testEatFrames(self, getStubMock): stubMock = mock.Mock() diff --git a/pycue/tests/wrappers/layer_test.py b/pycue/tests/wrappers/layer_test.py index 4f5578681..8b71047a0 100644 --- a/pycue/tests/wrappers/layer_test.py +++ b/pycue/tests/wrappers/layer_test.py @@ -19,6 +19,8 @@ from __future__ import print_function from __future__ import division from __future__ import absolute_import +import getpass +import os import unittest import mock @@ -45,10 +47,18 @@ def testKill(self, getStubMock): layer = opencue.wrappers.layer.Layer( job_pb2.Layer(name=TEST_LAYER_NAME)) - layer.kill() + username = getpass.getuser() + pid = os.getpid() + host_kill = os.uname()[1] + reason = "Frames Kill Request" + layer.kill(username=username, pid=pid, host_kill=host_kill, reason=reason) stubMock.KillFrames.assert_called_with( - job_pb2.LayerKillFramesRequest(layer=layer.data), timeout=mock.ANY) + job_pb2.LayerKillFramesRequest(layer=layer.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), timeout=mock.ANY) def testEat(self, getStubMock): stubMock = mock.Mock()