Skip to content

Commit

Permalink
Merge pull request #2736 from buildkite/etxtbsy-retry-tweak
Browse files Browse the repository at this point in the history
Tweak ETXTBSY retry, and be helpful for ENOENT
  • Loading branch information
DrJosh9000 authored Apr 22, 2024
2 parents 6ebf6cb + 6420d0b commit 0b9072f
Showing 1 changed file with 46 additions and 8 deletions.
54 changes: 46 additions & 8 deletions internal/job/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"slices"
"strconv"
"strings"
"syscall"
"time"

"github.com/buildkite/agent/v3/agent/plugin"
Expand Down Expand Up @@ -421,6 +422,31 @@ func logOpenedHookInfo(l shell.Logger, debug bool, hookName, path string) {
}
}

func logMissingHookInfo(l shell.Logger, wrapperPath string) {
// It's unlikely, but possible, that the script wrapper was spontaneously
// deleted or corrupted (it's usually in /tmp, which is fair game).
// A common setup error is to try to run a Bash hook in a container or other
// environment without Bash (or Bash is not in the expected location).
shebang, err := shellscript.ShebangLine(wrapperPath)
if err != nil {
// It's reasonable to assume the script wrapper was spontaneously
// deleted, or had something equally horrible happen to it.
l.Errorf("The %s hook failed to run - perhaps the wrapper script %q was spontaneously deleted", wrapperPath)
return
}
interpreter := strings.TrimPrefix(shebang, "#!")
if interpreter == "" {
// Either the script never had a shebang, or the script was
// spontaneously corrupted.
// If it didn't have a shebang line, we defaulted to using Bash, and if
// that's not present we already logged a warning.
// If it was spontaneously corrupted, we should expect a different error
// than ENOENT.
return
}
l.Errorf("The %s hook failed to run - perhaps the script interpreter %q is missing", interpreter)
}

func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName string, hookCfg HookConfig) error {
defer e.redactors.Flush()

Expand Down Expand Up @@ -449,16 +475,20 @@ func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName strin
e.shell.Promptf("%s", process.FormatCommand(cleanHookPath, []string{}))
}

const maxHookRetry = 3
const maxHookRetry = 30

// Run the wrapper script
if err := roko.NewRetrier(
roko.WithStrategy(roko.Constant(time.Second)),
roko.WithStrategy(roko.Constant(100*time.Millisecond)),
roko.WithMaxAttempts(maxHookRetry),
).DoWithContext(ctx, func(r *roko.Retrier) error {
// Run the script and only retry on fork/exec errors
// Run the script and only retry on ETXTBSY.
// This error occurs because of an unavoidable race between forking
// (which acquires open file descriptors of the parent process) and
// writing an executable (the script wrapper).
// See https://github.com/golang/go/issues/22315.
err := e.shell.RunScript(ctx, script.Path(), hookCfg.Env)
if perr := new(os.PathError); errors.As(err, &perr) && perr.Op == "fork/exec" {
if errors.Is(err, syscall.ETXTBSY) {
return err
}
r.Break()
Expand All @@ -476,10 +506,18 @@ func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName strin
}
}

// If the error is from fork/exec, then inspect the file to see why it failed to be executed,
// even after the retry
if perr := new(os.PathError); errors.As(err, &perr) && perr.Op == "fork/exec" {
logOpenedHookInfo(e.shell.Logger, e.Debug, hookName, perr.Path)
switch {
case errors.Is(err, syscall.ETXTBSY):
// If the underlying error is _still_ ETXTBSY, then inspect the file
// to see what process had it open for write, to log something helpful
logOpenedHookInfo(e.shell.Logger, e.Debug, hookName, script.Path())

case errors.Is(err, syscall.ENOENT):
// Unfortunately the wrapping os.PathError's Path is always the
// program we tried to exec, even if the missing file/directory was
// actually the interpreter specified on the shebang line.
// Try to figure out which part is missing from the wrapper.
logMissingHookInfo(e.shell.Logger, script.Path())
}

return err
Expand Down

0 comments on commit 0b9072f

Please sign in to comment.