From f4d3ea0b717837ae8aad2c6df8fe494a483e1887 Mon Sep 17 00:00:00 2001 From: Ang Gao Date: Fri, 28 Aug 2020 19:17:25 +0100 Subject: [PATCH 1/4] Replace default retry in executor with an increased value retryer --- util/retry/retry.go | 8 ++++++++ workflow/executor/executor.go | 13 +++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/util/retry/retry.go b/util/retry/retry.go index f46b1a97ef4c..6ec0a04aa354 100644 --- a/util/retry/retry.go +++ b/util/retry/retry.go @@ -20,6 +20,14 @@ var DefaultRetry = wait.Backoff{ Jitter: 0.1, } +// ExecutorRetry is a retry backoff settings for WorkflowExecutor +var ExecutorRetry = wait.Backoff{ + Steps: 8, + Duration: 1 * time.Second, + Factor: 1.0, + Jitter: 0.1, +} + // IsRetryableKubeAPIError returns if the error is a retryable kubernetes error func IsRetryableKubeAPIError(err error) bool { // get original error if it was wrapped diff --git a/workflow/executor/executor.go b/workflow/executor/executor.go index 259e7e9ba193..54bc68baacc1 100644 --- a/workflow/executor/executor.go +++ b/workflow/executor/executor.go @@ -40,13 +40,6 @@ import ( os_specific "github.com/argoproj/argo/workflow/executor/os-specific" ) -var MainContainerStartRetry = wait.Backoff{ - Steps: 8, - Duration: 1 * time.Second, - Factor: 1.0, - Jitter: 0.1, -} - const ( // This directory temporarily stores the tarballs of the artifacts before uploading tempOutArtDir = "/tmp/argo/outputs/artifacts" @@ -619,7 +612,7 @@ func (we *WorkflowExecutor) getPod() (*apiv1.Pod, error) { podsIf := we.ClientSet.CoreV1().Pods(we.Namespace) var pod *apiv1.Pod var err error - _ = wait.ExponentialBackoff(retry.DefaultRetry, func() (bool, error) { + _ = wait.ExponentialBackoff(retry.ExecutorRetry, func() (bool, error) { pod, err = podsIf.Get(we.PodName, metav1.GetOptions{}) if err != nil { log.Warnf("Failed to get pod '%s': %v", we.PodName, err) @@ -920,7 +913,7 @@ func (we *WorkflowExecutor) Wait() error { annotationUpdatesCh := we.monitorAnnotations(ctx) go we.monitorDeadline(ctx, annotationUpdatesCh) - _ = wait.ExponentialBackoff(retry.DefaultRetry, func() (bool, error) { + _ = wait.ExponentialBackoff(retry.ExecutorRetry, func() (bool, error) { err = we.RuntimeExecutor.Wait(mainContainerID) if err != nil { log.Warnf("Failed to wait for container id '%s': %v", mainContainerID, err) @@ -947,7 +940,7 @@ func (we *WorkflowExecutor) waitMainContainerStart() (string, error) { var err error var watchIf watch.Interface - err = wait.ExponentialBackoff(MainContainerStartRetry, func() (bool, error) { + err = wait.ExponentialBackoff(retry.ExecutorRetry, func() (bool, error) { watchIf, err = podsIf.Watch(opts) if err != nil { log.Debugf("Failed to establish watch, retrying: %v", err) From 533414b065bcc2b50d5b6eb72c8c87550218fc58 Mon Sep 17 00:00:00 2001 From: Ang Gao Date: Fri, 28 Aug 2020 19:41:15 +0100 Subject: [PATCH 2/4] fix per comments from Alex --- util/retry/retry.go | 8 -------- workflow/executor/executor.go | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/util/retry/retry.go b/util/retry/retry.go index 6ec0a04aa354..f46b1a97ef4c 100644 --- a/util/retry/retry.go +++ b/util/retry/retry.go @@ -20,14 +20,6 @@ var DefaultRetry = wait.Backoff{ Jitter: 0.1, } -// ExecutorRetry is a retry backoff settings for WorkflowExecutor -var ExecutorRetry = wait.Backoff{ - Steps: 8, - Duration: 1 * time.Second, - Factor: 1.0, - Jitter: 0.1, -} - // IsRetryableKubeAPIError returns if the error is a retryable kubernetes error func IsRetryableKubeAPIError(err error) bool { // get original error if it was wrapped diff --git a/workflow/executor/executor.go b/workflow/executor/executor.go index 54bc68baacc1..ce3d5b32abee 100644 --- a/workflow/executor/executor.go +++ b/workflow/executor/executor.go @@ -45,6 +45,14 @@ const ( tempOutArtDir = "/tmp/argo/outputs/artifacts" ) +// ExecutorRetry is a retry backoff settings for WorkflowExecutor +var ExecutorRetry = wait.Backoff{ + Steps: 8, + Duration: 1 * time.Second, + Factor: 1.0, + Jitter: 0.1, +} + // WorkflowExecutor is program which runs as the init/wait container type WorkflowExecutor struct { PodName string From 2b411b59a3eeb0e6f00067fa6c62583b5c929a82 Mon Sep 17 00:00:00 2001 From: Ang Gao Date: Fri, 28 Aug 2020 19:45:11 +0100 Subject: [PATCH 3/4] reoder codes for better diff --- workflow/executor/executor.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/workflow/executor/executor.go b/workflow/executor/executor.go index ce3d5b32abee..5d4d8ae9d228 100644 --- a/workflow/executor/executor.go +++ b/workflow/executor/executor.go @@ -40,11 +40,6 @@ import ( os_specific "github.com/argoproj/argo/workflow/executor/os-specific" ) -const ( - // This directory temporarily stores the tarballs of the artifacts before uploading - tempOutArtDir = "/tmp/argo/outputs/artifacts" -) - // ExecutorRetry is a retry backoff settings for WorkflowExecutor var ExecutorRetry = wait.Backoff{ Steps: 8, @@ -53,6 +48,11 @@ var ExecutorRetry = wait.Backoff{ Jitter: 0.1, } +const ( + // This directory temporarily stores the tarballs of the artifacts before uploading + tempOutArtDir = "/tmp/argo/outputs/artifacts" +) + // WorkflowExecutor is program which runs as the init/wait container type WorkflowExecutor struct { PodName string From 05e9f8dcb0fa69cbfa0cc56a5fd9c3cbd9344e0b Mon Sep 17 00:00:00 2001 From: Ang Gao Date: Fri, 28 Aug 2020 19:46:42 +0100 Subject: [PATCH 4/4] fix var reference --- workflow/executor/executor.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflow/executor/executor.go b/workflow/executor/executor.go index 5d4d8ae9d228..833b7aefd86a 100644 --- a/workflow/executor/executor.go +++ b/workflow/executor/executor.go @@ -620,7 +620,7 @@ func (we *WorkflowExecutor) getPod() (*apiv1.Pod, error) { podsIf := we.ClientSet.CoreV1().Pods(we.Namespace) var pod *apiv1.Pod var err error - _ = wait.ExponentialBackoff(retry.ExecutorRetry, func() (bool, error) { + _ = wait.ExponentialBackoff(ExecutorRetry, func() (bool, error) { pod, err = podsIf.Get(we.PodName, metav1.GetOptions{}) if err != nil { log.Warnf("Failed to get pod '%s': %v", we.PodName, err) @@ -921,7 +921,7 @@ func (we *WorkflowExecutor) Wait() error { annotationUpdatesCh := we.monitorAnnotations(ctx) go we.monitorDeadline(ctx, annotationUpdatesCh) - _ = wait.ExponentialBackoff(retry.ExecutorRetry, func() (bool, error) { + _ = wait.ExponentialBackoff(ExecutorRetry, func() (bool, error) { err = we.RuntimeExecutor.Wait(mainContainerID) if err != nil { log.Warnf("Failed to wait for container id '%s': %v", mainContainerID, err) @@ -948,7 +948,7 @@ func (we *WorkflowExecutor) waitMainContainerStart() (string, error) { var err error var watchIf watch.Interface - err = wait.ExponentialBackoff(retry.ExecutorRetry, func() (bool, error) { + err = wait.ExponentialBackoff(ExecutorRetry, func() (bool, error) { watchIf, err = podsIf.Watch(opts) if err != nil { log.Debugf("Failed to establish watch, retrying: %v", err)