Skip to content

Commit

Permalink
fix: pending error not repackaged (#993)
Browse files Browse the repository at this point in the history
* fix: replace `max_attempts` in logs by correct value (`BUILDER_KANIKO_STARTUP_MAX_ATTEMPTS`)

Signed-off-by: Guilhem Barthés <guilhem.barthes@owkin.com>

* feat: tests

Signed-off-by: Guilhem Barthés <guilhem.barthes@owkin.com>

* chore: remove useless ,

Signed-off-by: Guilhem Barthés <guilhem.barthes@owkin.com>

---------

Signed-off-by: Guilhem Barthés <guilhem.barthes@owkin.com>
  • Loading branch information
guilhem-barthes authored Sep 17, 2024
1 parent 71dc1f2 commit 9e03a1c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 4 deletions.
6 changes: 2 additions & 4 deletions backend/builder/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ def watch_pod(k8s_client: kubernetes.client.CoreV1Api, name: str) -> None:
PodTimeoutError: this exception is raised if the pod does not reach the running state after some time
"""
attempt = 0
# with 60 attempts we wait max 2 min with a pending pod
max_attempts = 60

# This variable is used to track the current status through retries
previous_pod_status = None
Expand All @@ -104,7 +102,7 @@ def watch_pod(k8s_client: kubernetes.client.CoreV1Api, name: str) -> None:
reason=pod_state.reason,
message=pod_state.message,
attempt=attempt,
max_attempts=max_attempts,
max_attempts=BUILDER_KANIKO_STARTUP_MAX_ATTEMPTS,
)

if pod_state.status == ObjectState.COMPLETED:
Expand Down Expand Up @@ -132,7 +130,7 @@ def watch_pod(k8s_client: kubernetes.client.CoreV1Api, name: str) -> None:

time.sleep(0.2)

raise PodTimeoutError(f"Pod {name} didn't complete after {max_attempts} attempts")
raise PodTimeoutError(f"Pod {name} didn't complete after {BUILDER_KANIKO_STARTUP_MAX_ATTEMPTS} attempts")


def retrieve_pod_status(k8s_client: kubernetes.client.CoreV1Api, pod_name: str) -> kubernetes.client.V1PodStatus:
Expand Down
14 changes: 14 additions & 0 deletions backend/builder/tests/test_image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import orchestrator
from builder.exceptions import BuildError
from builder.exceptions import BuildRetryError
from builder.exceptions import PodTimeoutError
from builder.image_builder import image_builder
from substrapp.compute_tasks import utils

Expand Down Expand Up @@ -46,6 +48,18 @@ def test_build_image_if_missing_image_build_needed(mocker: MockerFixture, functi
assert m_build_function_image.call_args.args[1] == function


def test__build_container_image_podtimeouterror(mocker: MockerFixture) -> None:
mocker.patch("kubernetes.config.load_incluster_config")
mocker.patch("builder.image_builder.image_builder._assert_dockerfile_exist")
mocker.patch("builder.image_builder.image_builder.pod_exists", return_value=True)
watch_pod = mocker.patch("builder.image_builder.image_builder.watch_pod", side_effect=PodTimeoutError())

with pytest.raises(BuildRetryError):
image_builder._build_container_image("path", "name")

watch_pod.assert_called_once()


def test_get_entrypoint_from_dockerfile_valid_dockerfile(tmp_path: pathlib.Path):
dockerfile_path = tmp_path / "Dockerfile"
dockerfile_path.write_text(_VALID_DOCKERFILE)
Expand Down
19 changes: 19 additions & 0 deletions backend/builder/tests/test_kubernetes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
from unittest import mock

import kubernetes
import pytest
from pytest_mock import MockerFixture

from builder.exceptions import PodTimeoutError
from builder.kubernetes import ObjectState
from builder.kubernetes import PodState
from builder.kubernetes import get_pod_logs
from builder.kubernetes import watch_pod


def test_get_pod_logs(mocker):
Expand All @@ -26,3 +32,16 @@ def test_get_pod_logs_bad_request():
k8s_client = kubernetes.client.CoreV1Api()
logs = get_pod_logs(k8s_client, "pod_name", "container_name", ignore_pod_not_found=True)
assert "pod_name" in logs


def test_pod_pending_too_long(mocker: MockerFixture) -> None:
k8s_client = None
pod_name = "unused"
retrieve_pod_status = mocker.patch("builder.kubernetes.retrieve_pod_status")
mocker.patch("builder.kubernetes._get_pod_state", return_value=PodState(ObjectState.PENDING))
mocker.patch("builder.kubernetes.BUILDER_KANIKO_STARTUP_MAX_ATTEMPTS", new=1)

with pytest.raises(PodTimeoutError):
watch_pod(k8s_client, pod_name)

retrieve_pod_status.assert_called_once_with(k8s_client, pod_name)

0 comments on commit 9e03a1c

Please sign in to comment.