Skip to content

Commit

Permalink
fix: error due to out-of-range process exit code
Browse files Browse the repository at this point in the history
Problem:
On Win32 systems we may receive process exit codes that are outside of
the range of a 32-bit signed integer (as expected by UpdateWorkerAgent
API), but in range of a 32-bit unsigned integer. This causes a
validation error to be raised which the agent treats as an unrecoverable
error and exits.

Solution:
Convert all process exit codes to be in range of a 32-bit signed integer
by casting the bits to the type.

Signed-off-by: Daniel Neilson <53624638+ddneilson@users.noreply.github.com>
  • Loading branch information
ddneilson committed Jun 20, 2024
1 parent bcac41e commit 03dcde1
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
10 changes: 9 additions & 1 deletion src/deadline_worker_agent/scheduler/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,12 @@ def _updated_action_to_boto(
) -> UpdatedSessionActionInfo:
updated_action = UpdatedSessionActionInfo()

def _exit_code_to_32bit_signed(exitcode: int) -> int:
# Workaround to ensure that the process exit code is returned in range of
# a 32-bit signed integer as expected by the UpdateWorkerSchedule API.
as_uint32_bytes = (exitcode & 0xFFFFFFFF).to_bytes(4, "big", signed=False)
return int.from_bytes(as_uint32_bytes, "big", signed=True)

# Optional fields
if action_updated.start_time:
updated_action["startedAt"] = action_updated.start_time
Expand All @@ -558,7 +564,9 @@ def _updated_action_to_boto(
updated_action["updatedAt"] = action_updated.update_time
if action_updated.status:
if action_updated.status.exit_code is not None:
updated_action["processExitCode"] = action_updated.status.exit_code
updated_action["processExitCode"] = _exit_code_to_32bit_signed(
action_updated.status.exit_code
)
if action_updated.completed_status:
if action_updated.status.fail_message:
updated_action["progressMessage"] = action_updated.status.fail_message
Expand Down
29 changes: 29 additions & 0 deletions test/unit/scheduler/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,35 @@ def test_return_sessionactions_from_stopped_session(
assert action_status.end_time is not None
assert action_status.start_time <= action_status.end_time

@pytest.mark.parametrize(
"exitcode, expected_result",
[
pytest.param(None, None, id="None"),
pytest.param(0, 0, id="Zero"),
pytest.param(0x7FFFFFFF, 0x7FFFFFFF, id="maxint"),
pytest.param(-2147483648, -2147483648, id="minint_decimal"),
pytest.param(0x80000000, -2147483648, id="minint_hex"),
pytest.param(0xFFFD0000, -196608, id="out-of-range-32bit"),
pytest.param(0xFFFFFFFD0000, -196608, id="out-of-range-big"),
],
)
def test_updated_action_to_boto_exitcode(
self, scheduler: WorkerScheduler, exitcode: Optional[int], expected_result: Optional[int]
) -> None:
# GIVEN
action_status = SessionActionStatus(
id="1234", status=ActionStatus(state=ActionState.FAILED, exit_code=exitcode)
)

# WHEN
status_as_boto = scheduler._updated_action_to_boto(action_status)

# THEN
if expected_result is None:
assert status_as_boto.get("processExitCode", "ABSENT") == "ABSENT"
else:
assert status_as_boto.get("processExitCode", "FAIL") == expected_result


class TestCreateNewSessions:
"""Tests for WorkerScheduler._create_new_sessions"""
Expand Down

0 comments on commit 03dcde1

Please sign in to comment.