From f05096ac7d5c6e7a535772966ccbbb2e07a325ef Mon Sep 17 00:00:00 2001 From: Marco Donadoni Date: Fri, 23 Feb 2024 14:05:19 +0100 Subject: [PATCH] feat(manager): increase termination period of run-batch pods (#572) --- reana_workflow_controller/config.py | 8 ++++++++ reana_workflow_controller/workflow_run_manager.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/reana_workflow_controller/config.py b/reana_workflow_controller/config.py index e1905f91..122606fb 100644 --- a/reana_workflow_controller/config.py +++ b/reana_workflow_controller/config.py @@ -208,3 +208,11 @@ def _env_vars_dict_to_k8s_list(env_vars): RunStatus.pending, ] """Alive workflow statuses.""" + +REANA_RUNTIME_BATCH_TERMINATION_GRACE_PERIOD = int( + os.getenv("REANA_RUNTIME_BATCH_TERMINATION_GRACE_PERIOD", "120") +) +"""Grace period before terminating the job controller and workflow engine pod. + +The job controller needs to clean up all the running jobs before the end of the grace period. +""" diff --git a/reana_workflow_controller/workflow_run_manager.py b/reana_workflow_controller/workflow_run_manager.py index 7488d39c..4dd22751 100644 --- a/reana_workflow_controller/workflow_run_manager.py +++ b/reana_workflow_controller/workflow_run_manager.py @@ -63,6 +63,7 @@ JOB_CONTROLLER_CONTAINER_PORT, JOB_CONTROLLER_ENV_VARS, JOB_CONTROLLER_SHUTDOWN_ENDPOINT, + REANA_RUNTIME_BATCH_TERMINATION_GRACE_PERIOD, REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_LIMIT, REANA_KUBERNETES_JOBS_MEMORY_LIMIT, REANA_KUBERNETES_JOBS_TIMEOUT_LIMIT, @@ -662,6 +663,7 @@ def _create_job_spec( containers=containers, node_selector=REANA_RUNTIME_BATCH_KUBERNETES_NODE_LABEL, init_containers=[], + termination_grace_period_seconds=REANA_RUNTIME_BATCH_TERMINATION_GRACE_PERIOD, ) spec.template.spec.service_account_name = ( REANA_RUNTIME_KUBERNETES_SERVICEACCOUNT_NAME