Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry fork/exec errors when running hook #2325

Merged
merged 9 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions hook/scriptwrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@ func NewScriptWrapper(opts ...scriptWrapperOpt) (*ScriptWrapper, error) {
if err != nil {
return nil, err
}
wrap.beforeEnvFile.Close()
if err := wrap.beforeEnvFile.Close(); err != nil {
return nil, fmt.Errorf("failed to close before env file: %w", err)
}

// We'll then pump the ENV _after_ the hook into this temp file
wrap.afterEnvFile, err = shell.TempFileWithExtension(
Expand All @@ -192,7 +194,9 @@ func NewScriptWrapper(opts ...scriptWrapperOpt) (*ScriptWrapper, error) {
if err != nil {
return nil, err
}
wrap.afterEnvFile.Close()
if err := wrap.afterEnvFile.Close(); err != nil {
return nil, fmt.Errorf("failed to close after env file: %w", err)
}

absolutePathToHook, err := filepath.Abs(wrap.hookPath)
if err != nil {
Expand Down Expand Up @@ -229,9 +233,14 @@ func NewScriptWrapper(opts ...scriptWrapperOpt) (*ScriptWrapper, error) {
// Make script executable
err = utils.ChmodExecutable(wrap.scriptFile.Name())
if err != nil {
return wrap, err
return nil, err
}

// the defered close attempt will discard any errors,
// and attempting to execute a unclosed script will fail with ETXTBSY
if err := wrap.scriptFile.Close(); err != nil {
return nil, fmt.Errorf("failed to close script file: %w", err)
}
return wrap, nil
}

Expand Down
45 changes: 45 additions & 0 deletions internal/file/is_opened.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package file

import (
"fmt"
"os"
"strconv"

"github.com/buildkite/agent/v3/internal/job/shell"
)

// IsOpened returns true if the file at the given path is opened by the current process.
func IsOpened(l shell.Logger, debug bool, path string) (bool, error) {
fdEntries, err := os.ReadDir("/dev/fd")
if err != nil {
return false, fmt.Errorf("failed to read /dev/fd: %w", err)
}

for _, fdEntry := range fdEntries {
fd, err := strconv.ParseInt(fdEntry.Name(), 10, 64)
if err != nil {
if debug {
l.Warningf("Failed to parse fd %s: %s", fd, err)
}
continue
}

if fd <= stderrFd {
continue
}

fdPath, err := os.Readlink(fmt.Sprintf("/dev/fd/%d", fd))
if err != nil {
if debug {
l.Warningf("Failed to readlink /dev/fd/%d: %v", fd, err)
}
continue
}

if fdPath == path {
return true, nil
}
}

return false, nil
}
93 changes: 93 additions & 0 deletions internal/file/opened_by.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package file

import (
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
"strconv"

"github.com/buildkite/agent/v3/internal/job/shell"
)

const stderrFd = 2

var (
ErrFileNotOpen = errors.New("file not open, or the procces that opened it can't be found")
numeric = regexp.MustCompile("^[0-9]+$")
)

// OpenedBy attempts to find the executable that opened the given file.
func OpenedBy(l shell.Logger, debug bool, path string) (string, error) {
pidEntries, err := os.ReadDir("/proc")
if err != nil {
return "", fmt.Errorf("failed to read /proc: %w", err)
}

absPath, err := filepath.Abs(path)
if err != nil {
return "", fmt.Errorf("failed to get absolute path: %w", err)
}

for _, p := range pidEntries {
pid := p.Name()

if !numeric.MatchString(pid) || !openedByPid(l, debug, absPath, pid) {
continue
}

// /proc/<pid>/exe is a symlink to the executable
exe, err := os.Readlink(fmt.Sprintf("/proc/%s/exe", pid))
if err != nil {
if debug {
l.Warningf("Failed to read executable for pid %s: %v", pid, err)
}
continue
}

return exe, nil
}

return "", ErrFileNotOpen
}

func openedByPid(l shell.Logger, debug bool, absPath string, pid string) bool {
dirEntries, err := os.ReadDir(fmt.Sprintf("/proc/%s/fd", pid))
if err != nil {
if debug {
l.Warningf("Failed to read /proc/%s/fd: %v", pid, err)
}
// the process has gone away, or we don't have permission to read it, ignore and move on
return false
}

for _, dirEntry := range dirEntries {
fd, err := strconv.ParseInt(dirEntry.Name(), 10, 64)
if err != nil {
if debug {
l.Warningf("Failed to parse fd %s: %s", fd, err)
}
continue
}

// 0 = stdin, 1 = stdout, 2 = stderr
if fd <= stderrFd {
continue
}

fPath, err := os.Readlink(fmt.Sprintf("/proc/%s/fd/%s", pid, dirEntry.Name()))
if err != nil {
if debug {
l.Warningf("Failed to read link for fd", "error", err)
}
continue
}

if fPath == absPath {
return true
}
}

return false
}
54 changes: 51 additions & 3 deletions internal/job/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/buildkite/agent/v3/env"
"github.com/buildkite/agent/v3/hook"
"github.com/buildkite/agent/v3/internal/experiments"
"github.com/buildkite/agent/v3/internal/file"
"github.com/buildkite/agent/v3/internal/job/shell"
"github.com/buildkite/agent/v3/internal/redact"
"github.com/buildkite/agent/v3/internal/replacer"
Expand Down Expand Up @@ -363,6 +364,32 @@ func (e *Executor) runUnwrappedHook(ctx context.Context, hookName string, hookCf
return e.shell.RunWithEnv(ctx, environ, hookCfg.Path)
}

func logOpenedHookInfo(l shell.Logger, debug bool, hookName, path string) {
switch {
case runtime.GOOS == "linux":
procPath, err := file.OpenedBy(l, debug, path)
if err != nil {
l.Errorf("The %s hook failed to run because it was already open. We couldn't find out what process had the hook open", hookName)

} else {
l.Errorf("The %s hook failed to run the %s process has the hook file open", hookName, procPath)
}
case utils.FileExists("/dev/fd"):
isOpened, err := file.IsOpened(l, debug, path)
if err == nil {
if isOpened {
l.Errorf("The %s hook failed to run because it was opened by this buildkite-agent")
} else {
l.Errorf("The %s hook failed to run because it was opened by another process")
}
break
}
fallthrough
default:
l.Errorf("The %s hook failed to run because it was opened", hookName)
}
}

func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName string, hookCfg HookConfig) error {
redactors := e.setupRedactors()
defer redactors.Flush()
Expand All @@ -386,24 +413,45 @@ func (e *Executor) runWrappedShellScriptHook(ctx context.Context, hookName strin

// Show the hook runner in debug, but the thing being run otherwise 💅🏻
if e.Debug {
e.shell.Commentf("A hook runner was written to \"%s\" with the following:", script.Path())
e.shell.Commentf("A hook runner was written to %q with the following:", script.Path())
e.shell.Promptf("%s", process.FormatCommand(script.Path(), nil))
} else {
e.shell.Promptf("%s", process.FormatCommand(cleanHookPath, []string{}))
}

const maxHookRetry = 3

// Run the wrapper script
if err = e.shell.RunScript(ctx, script.Path(), hookCfg.Env); err != nil {
if err := roko.NewRetrier(
roko.WithStrategy(roko.Constant(time.Second)),
roko.WithMaxAttempts(maxHookRetry),
).DoWithContext(ctx, func(r *roko.Retrier) error {
// Run the script and only retry on fork/exec errors
err := e.shell.RunScript(ctx, script.Path(), hookCfg.Env)
if perr := new(os.PathError); errors.As(err, &perr) && perr.Op == "fork/exec" {
return err
}
r.Break()
return err
}); err != nil {
exitCode := shell.GetExitCode(err)
e.shell.Env.Set("BUILDKITE_LAST_HOOK_EXIT_STATUS", fmt.Sprintf("%d", exitCode))

// Give a simpler error if it's just a shell exit error
// If the hook exited with a non-zero exit code, then we should pass that back to the executor
// so it may inform the Buildkite API
if shell.IsExitError(err) {
return &shell.ExitError{
Code: exitCode,
Message: fmt.Sprintf("The %s hook exited with status %d", hookName, exitCode),
}
}

// If the error is from fork/exec, then inspect the file to see why it failed to be executed,
// even after the retry
if perr := new(os.PathError); errors.As(err, &perr) && perr.Op == "fork/exec" {
logOpenedHookInfo(e.shell.Logger, e.Debug, hookName, perr.Path)
}

return err
}

Expand Down
2 changes: 1 addition & 1 deletion internal/job/shell/shell.go
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ func (s *Shell) executeCommand(
s.cmdLock.Unlock()

if err := p.Run(ctx); err != nil {
return fmt.Errorf("Error running %q: %w", cmdStr, err)
return fmt.Errorf("error running %q: %w", cmdStr, err)
}

return p.WaitResult()
Expand Down