From 7d4ec30f15af952cdcf03d3c28d9ce9608d861ab Mon Sep 17 00:00:00 2001 From: Daniel Neilson <53624638+ddneilson@users.noreply.github.com> Date: Thu, 20 Jun 2024 11:06:26 -0500 Subject: [PATCH] fix: error due to out-of-range process exit code (#339) Problem: On Win32 systems we may receive process exit codes that are outside of the range of a 32-bit signed integer (as expected by UpdateWorkerAgent API), but in range of a 32-bit unsigned integer. This causes a validation error to be raised which the agent treats as an unrecoverable error and exits. Solution: Convert all process exit codes to be in range of a 32-bit signed integer by casting the bits to the type. Signed-off-by: Daniel Neilson <53624638+ddneilson@users.noreply.github.com> --- .../scheduler/scheduler.py | 10 ++++++- test/unit/scheduler/test_scheduler.py | 29 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/deadline_worker_agent/scheduler/scheduler.py b/src/deadline_worker_agent/scheduler/scheduler.py index 3be2b3e9..6967ee02 100644 --- a/src/deadline_worker_agent/scheduler/scheduler.py +++ b/src/deadline_worker_agent/scheduler/scheduler.py @@ -549,6 +549,12 @@ def _updated_action_to_boto( ) -> UpdatedSessionActionInfo: updated_action = UpdatedSessionActionInfo() + def _exit_code_to_32bit_signed(exitcode: int) -> int: + # Workaround to ensure that the process exit code is returned in range of + # a 32-bit signed integer as expected by the UpdateWorkerSchedule API. + as_uint32_bytes = (exitcode & 0xFFFFFFFF).to_bytes(4, "big", signed=False) + return int.from_bytes(as_uint32_bytes, "big", signed=True) + # Optional fields if action_updated.start_time: updated_action["startedAt"] = action_updated.start_time @@ -558,7 +564,9 @@ def _updated_action_to_boto( updated_action["updatedAt"] = action_updated.update_time if action_updated.status: if action_updated.status.exit_code is not None: - updated_action["processExitCode"] = action_updated.status.exit_code + updated_action["processExitCode"] = _exit_code_to_32bit_signed( + action_updated.status.exit_code + ) if action_updated.completed_status: if action_updated.status.fail_message: updated_action["progressMessage"] = action_updated.status.fail_message diff --git a/test/unit/scheduler/test_scheduler.py b/test/unit/scheduler/test_scheduler.py index 30ff03e6..61e0e1f1 100644 --- a/test/unit/scheduler/test_scheduler.py +++ b/test/unit/scheduler/test_scheduler.py @@ -533,6 +533,35 @@ def test_return_sessionactions_from_stopped_session( assert action_status.end_time is not None assert action_status.start_time <= action_status.end_time + @pytest.mark.parametrize( + "exitcode, expected_result", + [ + pytest.param(None, None, id="None"), + pytest.param(0, 0, id="Zero"), + pytest.param(0x7FFFFFFF, 0x7FFFFFFF, id="maxint"), + pytest.param(-2147483648, -2147483648, id="minint_decimal"), + pytest.param(0x80000000, -2147483648, id="minint_hex"), + pytest.param(0xFFFD0000, -196608, id="out-of-range-32bit"), + pytest.param(0xFFFFFFFD0000, -196608, id="out-of-range-big"), + ], + ) + def test_updated_action_to_boto_exitcode( + self, scheduler: WorkerScheduler, exitcode: Optional[int], expected_result: Optional[int] + ) -> None: + # GIVEN + action_status = SessionActionStatus( + id="1234", status=ActionStatus(state=ActionState.FAILED, exit_code=exitcode) + ) + + # WHEN + status_as_boto = scheduler._updated_action_to_boto(action_status) + + # THEN + if expected_result is None: + assert status_as_boto.get("processExitCode", "ABSENT") == "ABSENT" + else: + assert status_as_boto.get("processExitCode", "FAIL") == expected_result + class TestCreateNewSessions: """Tests for WorkerScheduler._create_new_sessions"""