Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachtest: report timeout failures accordingly #96743

Merged
merged 1 commit into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 28 additions & 21 deletions pkg/cmd/roachtest/test_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,52 +288,47 @@ func collectErrors(args []interface{}) []error {
// ATTENTION: Since this calls panic(errTestFatal), it should only be called
// from a test's closure. The test runner itself should never call this.
func (t *testImpl) Fatal(args ...interface{}) {
t.addFailure("", args...)
t.addFailureAndCancel(1, "", args...)
panic(errTestFatal)
}

// Fatalf is like Fatal, but takes a format string.
func (t *testImpl) Fatalf(format string, args ...interface{}) {
t.addFailure(format, args...)
t.addFailureAndCancel(1, format, args...)
panic(errTestFatal)
}

// FailNow implements the TestingT interface.
func (t *testImpl) FailNow() {
t.addFailure("FailNow called")
t.addFailureAndCancel(1, "FailNow called")
panic(errTestFatal)
}

// Error implements the TestingT interface
func (t *testImpl) Error(args ...interface{}) {
t.addFailure("", args...)
t.addFailureAndCancel(1, "", args...)
}

// Errorf implements the TestingT interface.
func (t *testImpl) Errorf(format string, args ...interface{}) {
t.addFailure(format, args...)
t.addFailureAndCancel(1, format, args...)
}

// We take the first error from each failure which is the
// "squashed" error that contains all information of a failure
func formatFailure(b *strings.Builder, reportFailures ...failure) {
for i, failure := range reportFailures {
if i > 0 {
fmt.Fprintln(b)
}
file, line, fn, ok := errors.GetOneLineSource(failure.squashedErr)
if !ok {
file, line, fn = "<unknown>", 0, "unknown"
}
fmt.Fprintf(b, "(%s:%d).%s: %v", file, line, fn, failure.squashedErr)
func (t *testImpl) addFailureAndCancel(depth int, format string, args ...interface{}) {
t.addFailure(depth+1, format, args...)
if t.mu.cancel != nil {
t.mu.cancel()
}
}

func (t *testImpl) addFailure(format string, args ...interface{}) {
// addFailure depth indicates how many stack frames to skip when reporting the
// site of the failure in logs. `0` will report the caller of addFailure, `1` the
// caller of the caller of addFailure, etc.
func (t *testImpl) addFailure(depth int, format string, args ...interface{}) {
if format == "" {
format = strings.Repeat(" %v", len(args))[1:]
}
reportFailure := newFailure(errors.NewWithDepthf(2, format, args...), collectErrors(args))
reportFailure := newFailure(errors.NewWithDepthf(depth+1, format, args...), collectErrors(args))

t.mu.Lock()
defer t.mu.Unlock()
Expand Down Expand Up @@ -368,8 +363,20 @@ func (t *testImpl) addFailure(format string, args ...interface{}) {

t.mu.output = append(t.mu.output, msg...)
t.mu.output = append(t.mu.output, '\n')
if t.mu.cancel != nil {
t.mu.cancel()
}

// We take the first error from each failure which is the
// "squashed" error that contains all information of a failure
func formatFailure(b *strings.Builder, reportFailures ...failure) {
for i, failure := range reportFailures {
if i > 0 {
fmt.Fprintln(b)
}
file, line, fn, ok := errors.GetOneLineSource(failure.squashedErr)
if !ok {
file, line, fn = "<unknown>", 0, "unknown"
}
fmt.Fprintf(b, "(%s:%d).%s: %v", file, line, fn, failure.squashedErr)
}
}

Expand Down
16 changes: 8 additions & 8 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -984,10 +984,9 @@ func (r *testRunner) runTest(
}
t.L().Printf("tearing down after %s; see teardown.log", s)
case <-time.After(timeout):
// NB: we're intentionally not failing the test if it hasn't
// already. This will be done at the very end of this method,
// after we've collected artifacts.
t.L().Printf("test timed out after %s; check __stacks.log and CRDB logs for goroutine dumps", timeout)
// NB: We're adding the timeout failure intentionally without cancelling the context
// to capture as much state as possible during artifact collection.
t.addFailure(0, "test timed out (%s)", timeout)
timedOut = true
}

Expand Down Expand Up @@ -1104,10 +1103,11 @@ func (r *testRunner) teardownTest(
// around so someone can poke at it.
_ = c.StopE(ctx, t.L(), option.DefaultStopOpts(), c.All())

// The hung test may, against all odds, still not have reported an error.
// We delayed it to improve artifacts collection, and now we ensure the test
// is marked as failing.
t.Errorf("test timed out (%s)", t.Spec().(*registry.TestSpec).Timeout)
// We previously added a timeout failure without cancellation, so we cancel here.
if t.mu.cancel != nil {
t.mu.cancel()
}
t.L().Printf("test timed out; check __stacks.log and CRDB logs for goroutine dumps")
}
return nil
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/testutils/lint/passes/fmtsafe/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ var requireConstFmt = map[string]bool{
"(*github.com/cockroachdb/cockroach/pkg/cmd/roachtest.testImpl).addFailure": true,
"(*main.testImpl).addFailure": true,

"(*github.com/cockroachdb/cockroach/pkg/cmd/roachtest.testImpl).addFailureAndCancel": true,
"(*main.testImpl).addFailureAndCancel": true,

"(*main.testImpl).Fatalf": true,
"(*github.com/cockroachdb/cockroach/pkg/cmd/roachtest.testImpl).Fatalf": true,

Expand Down