Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachtest: use structured errors #93328

1 change: 1 addition & 0 deletions pkg/cmd/roachtest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ go_library(
"//pkg/internal/team",
"//pkg/roachprod",
"//pkg/roachprod/config",
"//pkg/roachprod/errors",
"//pkg/roachprod/install",
"//pkg/roachprod/logger",
"//pkg/roachprod/prometheus",
Expand Down
58 changes: 47 additions & 11 deletions pkg/cmd/roachtest/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/internal/team"
rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/roachprod/vm"
)
Expand All @@ -32,6 +33,14 @@ type githubIssues struct {
teamLoader func() (team.Map, error)
}

type issueCategory int

const (
otherErr issueCategory = iota
clusterCreationErr
sshErr
)

func newGithubIssues(
disable bool, c *clusterImpl, vmCreateOpts *vm.CreateOpts, l *logger.Logger,
) *githubIssues {
Expand Down Expand Up @@ -59,10 +68,32 @@ func (g *githubIssues) shouldPost(t test.Test) bool {
t.Spec().(*registry.TestSpec).Cluster.NodeCount > 0
}

func (g *githubIssues) createPostRequest(t test.Test, message string) issues.PostRequest {
func (g *githubIssues) createPostRequest(
t test.Test, cat issueCategory, message string,
) issues.PostRequest {
var mention []string
var projColID int

issueOwner := t.Spec().(*registry.TestSpec).Owner
issueName := t.Name()

messagePrefix := ""
// Overrides to shield eng teams from potential flakes
if cat == clusterCreationErr {
issueOwner = registry.OwnerDevInf
issueName = "cluster_creation"
messagePrefix = fmt.Sprintf("test %s was skipped due to ", t.Name())
} else if cat == sshErr {
issueOwner = registry.OwnerTestEng
issueName = "ssh_problem"
messagePrefix = fmt.Sprintf("test %s failed due to ", t.Name())
}

teams, err := g.teamLoader()
if err != nil {
t.Fatalf("could not load teams: %v", err)
}

// Issues posted from roachtest are identifiable as such and
// they are also release blockers (this label may be removed
// by a human upon closer investigation).
Expand All @@ -72,12 +103,7 @@ func (g *githubIssues) createPostRequest(t test.Test, message string) issues.Pos
labels = append(labels, "release-blocker")
}

teams, err := g.teamLoader()
if err != nil {
t.Fatalf("could not load teams: %v", err)
}

if sl, ok := teams.GetAliasesForPurpose(ownerToAlias(t.Spec().(*registry.TestSpec).Owner), team.PurposeRoachtest); ok {
if sl, ok := teams.GetAliasesForPurpose(ownerToAlias(issueOwner), team.PurposeRoachtest); ok {
for _, alias := range sl {
mention = append(mention, "@"+string(alias))
if label := teams[alias].Label; label != "" {
Expand Down Expand Up @@ -115,8 +141,8 @@ func (g *githubIssues) createPostRequest(t test.Test, message string) issues.Pos
MentionOnCreate: mention,
ProjectColumnID: projColID,
PackageName: "roachtest",
TestName: t.Name(),
Message: message,
TestName: issueName,
Message: messagePrefix + message,
Artifacts: artifacts,
ExtraLabels: labels,
ExtraParams: clusterParams,
Expand All @@ -133,14 +159,24 @@ func (g *githubIssues) createPostRequest(t test.Test, message string) issues.Pos
}
}

func (g *githubIssues) MaybePost(t test.Test, message string) error {
func (g *githubIssues) MaybePost(t *testImpl, message string) error {
if !g.shouldPost(t) {
return nil
}

cat := otherErr

// Overrides to shield eng teams from potential flakes
firstFailure := t.firstFailure()
if failureContainsError(firstFailure, errClusterProvisioningFailed) {
cat = clusterCreationErr
} else if failureContainsError(firstFailure, rperrors.ErrSSH255) {
cat = sshErr
}

return g.issuePoster(
context.Background(),
issues.UnitTestFormatter,
g.createPostRequest(t, message),
g.createPostRequest(t, cat, message),
)
}
47 changes: 37 additions & 10 deletions pkg/cmd/roachtest/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ var (
aliases:
cockroachdb/rfc-prs: other
triage_column_id: 0
label: T-testeng`
cockroachdb/test-eng:
label: T-testeng
triage_column_id: 14041337
cockroachdb/dev-inf:
triage_column_id: 10210759`

validTeamsFn = func() (team.Map, error) { return loadYamlTeams(teamsYaml) }
invalidTeamsFn = func() (team.Map, error) { return loadYamlTeams("invalid yaml") }
Expand Down Expand Up @@ -58,7 +62,7 @@ func TestShouldPost(t *testing.T) {
envTcBuildBranch string
expected bool
}{
/* Cases 1 - 4 verify that issues are not posted if any of on the relevant criteria checks fail */
/* Cases 1 - 4 verify that issues are not posted if any of the relevant criteria checks fail */
// disable
{true, 1, "token", "master", false},
// nodeCount
Expand Down Expand Up @@ -103,10 +107,11 @@ func TestCreatePostRequest(t *testing.T) {
clusterCreationFailed bool
loadTeamsFailed bool
localSSD bool
category issueCategory
expectedPost bool
expectedParams map[string]string
}{
{true, false, false, false, true,
{true, false, false, false, otherErr, true,
prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
Expand All @@ -116,7 +121,7 @@ func TestCreatePostRequest(t *testing.T) {
"localSSD": "false",
}),
},
{true, false, false, true, true,
{true, false, false, true, clusterCreationErr, true,
prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
Expand All @@ -129,15 +134,15 @@ func TestCreatePostRequest(t *testing.T) {
// Assert that release-blocker label exists when !nonReleaseBlocker
// Also ensure that in the event of a failed cluster creation,
// nil `vmOptions` and `clusterImpl` are not dereferenced
{false, true, false, false, true,
{false, true, false, false, sshErr, true,
prefixAll(map[string]string{
"cloud": "gce",
"ssd": "0",
"cpu": "4",
}),
},
//Simulate failure loading TEAMS.yaml
{true, false, true, false, false, nil},
{true, false, true, false, otherErr, false, nil},
}

reg, _ := makeTestRegistry(spec.GCE, "", "", false)
Expand All @@ -146,7 +151,7 @@ func TestCreatePostRequest(t *testing.T) {
clusterSpec := reg.MakeClusterSpec(1)

testSpec := &registry.TestSpec{
Name: "githubPost",
Name: "github_test",
Owner: OwnerUnitTest,
Cluster: clusterSpec,
NonReleaseBlocker: c.nonReleaseBlocker,
Expand Down Expand Up @@ -184,9 +189,9 @@ func TestCreatePostRequest(t *testing.T) {

if c.loadTeamsFailed {
// Assert that if TEAMS.yaml cannot be loaded then function panics.
assert.Panics(t, func() { github.createPostRequest(ti, "message") })
assert.Panics(t, func() { github.createPostRequest(ti, c.category, "message") })
} else {
req := github.createPostRequest(ti, "message")
req := github.createPostRequest(ti, c.category, "message")

if c.expectedParams != nil {
require.Equal(t, c.expectedParams, req.ExtraParams)
Expand All @@ -197,7 +202,29 @@ func TestCreatePostRequest(t *testing.T) {
if !c.nonReleaseBlocker {
require.True(t, contains(req.ExtraLabels, nil, "release-blocker"))
}
require.Contains(t, req.ExtraLabels, "T-testeng")

expectedTeam := "@cockroachdb/unowned"
expectedName := "github_test"
expectedLabel := ""
expectedMessagePrefix := ""

if c.category == clusterCreationErr {
expectedTeam = "@cockroachdb/dev-inf"
expectedName = "cluster_creation"
expectedMessagePrefix = "test github_test was skipped due to "
} else if c.category == sshErr {
expectedTeam = "@cockroachdb/test-eng"
expectedName = "ssh_problem"
expectedLabel = "T-testeng"
expectedMessagePrefix = "test github_test failed due to "
}

require.Contains(t, req.MentionOnCreate, expectedTeam)
require.Equal(t, expectedName, req.TestName)
require.True(t, strings.HasPrefix(req.Message, expectedMessagePrefix), req.Message)
if expectedLabel != "" {
require.Contains(t, req.ExtraLabels, expectedLabel)
}
}
}
}
11 changes: 9 additions & 2 deletions pkg/cmd/roachtest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ runner itself.
if errors.Is(err, errTestsFailed) {
code = ExitCodeTestsFailed
}
if errors.Is(err, errClusterProvisioningFailed) {
if errors.Is(err, errSomeClusterProvisioningFailed) {
code = ExitCodeClusterProvisioningFailed
}
// Cobra has already printed the error message.
Expand Down Expand Up @@ -376,8 +376,15 @@ func runTests(register func(registry.Registry), cfg cliCfg) error {

filter := registry.NewTestFilter(cfg.args)
clusterType := roachprodCluster
bindTo := ""
if local {
clusterType = localCluster

// This will suppress the annoying "Allow incoming network connections" popup from
// OSX when running a roachtest
bindTo = "localhost"

fmt.Printf("--local specified. Binding http listener to localhost only")
if cfg.parallelism != 1 {
fmt.Printf("--local specified. Overriding --parallelism to 1.\n")
cfg.parallelism = 1
Expand All @@ -392,7 +399,7 @@ func runTests(register func(registry.Registry), cfg cliCfg) error {
keepClustersOnTestFailure: cfg.debugEnabled,
clusterID: cfg.clusterID,
}
if err := runner.runHTTPServer(cfg.httpPort, os.Stdout); err != nil {
if err := runner.runHTTPServer(cfg.httpPort, os.Stdout, bindTo); err != nil {
return err
}

Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/test/test_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ type Test interface {
VersionsBinaryOverride() map[string]string
Skip(args ...interface{})
Skipf(format string, args ...interface{})
Error(args ...interface{})
Errorf(string, ...interface{})
FailNow()
Fatal(args ...interface{})
Expand Down
Loading