From 43d49df4126d2348d08764c23add10cbd67f3c96 Mon Sep 17 00:00:00 2001 From: Dawid Rusnak Date: Thu, 5 Sep 2024 17:27:52 +0200 Subject: [PATCH] fix: retry cloning Git repository on failure (#5825) * fix: retry cloning Git repository on failure * fix: retry executing Test Workflows in execute --- .../testworkflow-toolkit/commands/execute.go | 44 +++++++++++++++---- cmd/testworkflow-toolkit/commands/clone.go | 16 ++++--- cmd/testworkflow-toolkit/commands/tarball.go | 2 +- cmd/testworkflow-toolkit/commands/utils.go | 17 +++++++ 4 files changed, 65 insertions(+), 14 deletions(-) diff --git a/cmd/tcl/testworkflow-toolkit/commands/execute.go b/cmd/tcl/testworkflow-toolkit/commands/execute.go index 4f63c0fe85f..43331fc87bc 100644 --- a/cmd/tcl/testworkflow-toolkit/commands/execute.go +++ b/cmd/tcl/testworkflow-toolkit/commands/execute.go @@ -35,6 +35,14 @@ import ( "github.com/kubeshop/testkube/pkg/utils" ) +const ( + CreateExecutionRetryOnFailureMaxAttempts = 5 + CreateExecutionRetryOnFailureBaseDelay = 100 * time.Millisecond + + GetExecutionRetryOnFailureMaxAttempts = 10 + GetExecutionRetryOnFailureDelay = 300 * time.Millisecond +) + type testExecutionDetails struct { Id string `json:"id"` Name string `json:"name"` @@ -157,17 +165,28 @@ func buildWorkflowExecution(workflow testworkflowsv1.StepExecuteWorkflow, async ui.Errf("failed to decode tags: %s: %s", workflow.Name, err.Error()) } - exec, err := c.ExecuteTestWorkflow(workflow.Name, testkube.TestWorkflowExecutionRequest{ - Name: workflow.ExecutionName, - Config: testworkflows.MapConfigValueKubeToAPI(workflow.Config), - DisableWebhooks: env.ExecutionDisableWebhooks(), - Tags: tags, - }) - execName := exec.Name + var exec testkube.TestWorkflowExecution + for i := 0; i < CreateExecutionRetryOnFailureMaxAttempts; i++ { + exec, err = c.ExecuteTestWorkflow(workflow.Name, testkube.TestWorkflowExecutionRequest{ + Name: workflow.ExecutionName, + Config: testworkflows.MapConfigValueKubeToAPI(workflow.Config), + DisableWebhooks: env.ExecutionDisableWebhooks(), + Tags: tags, + }) + if err == nil { + break + } + if i+1 < CreateExecutionRetryOnFailureMaxAttempts { + nextDelay := time.Duration(i+1) * CreateExecutionRetryOnFailureBaseDelay + ui.Errf("failed to execute test workflow: retrying in %s (attempt %d/%d): %s: %s", nextDelay.String(), i+2, CreateExecutionRetryOnFailureMaxAttempts, workflow.Name, err.Error()) + time.Sleep(nextDelay) + } + } if err != nil { ui.Errf("failed to execute test workflow: %s: %s", workflow.Name, err.Error()) return } + execName := exec.Name instructions.PrintOutput(env.Ref(), "testworkflow-start", &testWorkflowExecutionDetails{ Id: exec.Id, @@ -189,7 +208,16 @@ func buildWorkflowExecution(workflow testworkflowsv1.StepExecuteWorkflow, async loop: for { time.Sleep(100 * time.Millisecond) - exec, err = c.GetTestWorkflowExecution(exec.Id) + for i := 0; i < GetExecutionRetryOnFailureMaxAttempts; i++ { + exec, err = c.GetTestWorkflowExecution(exec.Id) + if err == nil { + break + } + if i+1 < GetExecutionRetryOnFailureMaxAttempts { + ui.Errf("error while getting execution result: retrying in %s (attempt %d/%d): %s: %s", GetExecutionRetryOnFailureDelay.String(), i+2, GetExecutionRetryOnFailureMaxAttempts, ui.LightCyan(execName), err.Error()) + time.Sleep(GetExecutionRetryOnFailureDelay) + } + } if err != nil { ui.Errf("error while getting execution result: %s: %s", ui.LightCyan(execName), err.Error()) return diff --git a/cmd/testworkflow-toolkit/commands/clone.go b/cmd/testworkflow-toolkit/commands/clone.go index 0ec74249ad7..a143372af66 100644 --- a/cmd/testworkflow-toolkit/commands/clone.go +++ b/cmd/testworkflow-toolkit/commands/clone.go @@ -7,6 +7,7 @@ import ( "path/filepath" "regexp" "strings" + "time" "github.com/kballard/go-shellquote" "github.com/otiai10/copy" @@ -16,6 +17,11 @@ import ( "github.com/kubeshop/testkube/pkg/ui" ) +const ( + CloneRetryOnFailureMaxAttempts = 5 + CloneRetryOnFailureBaseDelay = 100 * time.Millisecond +) + var ( protocolRe = regexp.MustCompile(`^[^:]+://`) ) @@ -97,19 +103,19 @@ func NewCloneCmd() *cobra.Command { if len(paths) == 0 { ui.Debug("full checkout") if revision == "" { - err = Run("git", "clone", configArgs, authArgs, "--depth", 1, "--verbose", uri.String(), outputPath) + err = RunWithRetry(CloneRetryOnFailureMaxAttempts, CloneRetryOnFailureBaseDelay, "git", "clone", configArgs, authArgs, "--depth", 1, "--verbose", uri.String(), outputPath) } else { - err = Run("git", "clone", configArgs, authArgs, "--depth", 1, "--branch", revision, "--verbose", uri.String(), outputPath) + err = RunWithRetry(CloneRetryOnFailureMaxAttempts, CloneRetryOnFailureBaseDelay, "git", "clone", configArgs, authArgs, "--depth", 1, "--branch", revision, "--verbose", uri.String(), outputPath) } ui.ExitOnError("cloning repository", err) } else { ui.Debug("sparse checkout") - err = Run("git", "clone", configArgs, authArgs, "--filter=blob:none", "--no-checkout", "--sparse", "--depth", 1, "--verbose", uri.String(), outputPath) + err = RunWithRetry(CloneRetryOnFailureMaxAttempts, CloneRetryOnFailureBaseDelay, "git", "clone", configArgs, authArgs, "--filter=blob:none", "--no-checkout", "--sparse", "--depth", 1, "--verbose", uri.String(), outputPath) ui.ExitOnError("cloning repository", err) - err = Run("git", "-C", outputPath, configArgs, "sparse-checkout", "set", "--no-cone", paths) + err = RunWithRetry(CloneRetryOnFailureMaxAttempts, CloneRetryOnFailureBaseDelay, "git", "-C", outputPath, configArgs, "sparse-checkout", "set", "--no-cone", paths) ui.ExitOnError("sparse checkout repository", err) if revision != "" { - err = Run("git", "-C", outputPath, configArgs, "fetch", authArgs, "--depth", 1, "origin", revision) + err = RunWithRetry(CloneRetryOnFailureMaxAttempts, CloneRetryOnFailureBaseDelay, "git", "-C", outputPath, configArgs, "fetch", authArgs, "--depth", 1, "origin", revision) ui.ExitOnError("fetching revision", err) err = Run("git", "-C", outputPath, configArgs, "checkout", "FETCH_HEAD") ui.ExitOnError("checking out head", err) diff --git a/cmd/testworkflow-toolkit/commands/tarball.go b/cmd/testworkflow-toolkit/commands/tarball.go index f7c1e8f1db4..39d5b41dcf6 100644 --- a/cmd/testworkflow-toolkit/commands/tarball.go +++ b/cmd/testworkflow-toolkit/commands/tarball.go @@ -57,7 +57,7 @@ func NewTarballCmd() *cobra.Command { os.Exit(1) } attempt++ - fmt.Printf("Retrying - attempt %d/%d.\n", attempt, TarballRetryMaxAttempts) + fmt.Printf("retrying - attempt %d/%d.\n", attempt, TarballRetryMaxAttempts) } } }, diff --git a/cmd/testworkflow-toolkit/commands/utils.go b/cmd/testworkflow-toolkit/commands/utils.go index 5a3bd990357..37a77247d69 100644 --- a/cmd/testworkflow-toolkit/commands/utils.go +++ b/cmd/testworkflow-toolkit/commands/utils.go @@ -1,9 +1,11 @@ package commands import ( + "fmt" "os" "os/exec" "strconv" + "time" ) func concat(args ...interface{}) []string { @@ -33,3 +35,18 @@ func Run(c string, args ...interface{}) error { sub.Stderr = os.Stderr return sub.Run() } + +func RunWithRetry(retries int, delay time.Duration, c string, args ...interface{}) (err error) { + for i := 0; i < retries; i++ { + err = Run(c, args...) + if err == nil { + return nil + } + if i+1 < retries { + nextDelay := time.Duration(i+1) * delay + fmt.Printf("error, trying again in %s (attempt %d/%d): %s\n", nextDelay.String(), i+2, retries, err.Error()) + time.Sleep(nextDelay) + } + } + return err +}