Skip to content

Commit

Permalink
roachtest: add roachtest.Operation and run-operation command
Browse files Browse the repository at this point in the history
This change adds the ability to write roachtest Operations that,
unlike a full-on roachtest.Test, do not require an ephemeral cluster
to be spun up or down. These operations are expected to have as few
logical side effects as possible and can be run on a cluster with
running workloads. Running an Operation using `roachtest run-operation`
also guarantees that the Cockroach/Workload binaries on that node
will not be swapped with local ones, and that the cluster won't be
wiped unintentionally at the end or in case of error.

This change also adds add-index and add-column as two example operations
that operate in SQL land and demonstrate the purpose of an Operation.

Release note: None.

Epic: none
  • Loading branch information
itsbilal committed Feb 26, 2024
1 parent 903e2fc commit fd1f248
Show file tree
Hide file tree
Showing 24 changed files with 840 additions and 44 deletions.
2 changes: 2 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,8 @@ GO_TARGETS = [
"//pkg/cmd/roachtest/clusterstats:clusterstats",
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
"//pkg/cmd/roachtest/grafana:grafana",
"//pkg/cmd/roachtest/operation:operation",
"//pkg/cmd/roachtest/operations:operations",
"//pkg/cmd/roachtest/option:option",
"//pkg/cmd/roachtest/option:option_test",
"//pkg/cmd/roachtest/registry:registry",
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ go_library(
"//pkg/build",
"//pkg/cmd/internal/issues",
"//pkg/cmd/roachtest/cluster",
"//pkg/cmd/roachtest/operations",
"//pkg/cmd/roachtest/option",
"//pkg/cmd/roachtest/registry",
"//pkg/cmd/roachtest/roachtestflags",
Expand Down
38 changes: 25 additions & 13 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,11 +316,15 @@ func initBinariesAndLibraries() {
cockroachPath := roachtestflags.CockroachPath
cockroachEAPath := roachtestflags.CockroachEAPath
workloadPath := roachtestflags.WorkloadPath
cockroach[defaultArch], _ = resolveBinary("cockroach", cockroachPath, defaultArch, true, false)
workload[defaultArch], _ = resolveBinary("workload", workloadPath, defaultArch, true, false)
cockroachEA[defaultArch], err = resolveBinary("cockroach-ea", cockroachEAPath, defaultArch, false, true)
if err != nil {
fmt.Fprintf(os.Stderr, "WARN: unable to find %q for %q: %s\n", "cockroach-ea", defaultArch, err)
// If a remote cockroach binary has been specified, we don't need to resolve
// cockroach/workload binaries.
if roachtestflags.CockroachBinaryPath == "" {
cockroach[defaultArch], _ = resolveBinary("cockroach", cockroachPath, defaultArch, true, false)
workload[defaultArch], _ = resolveBinary("workload", workloadPath, defaultArch, true, false)
cockroachEA[defaultArch], err = resolveBinary("cockroach-ea", cockroachEAPath, defaultArch, false, true)
if err != nil {
fmt.Fprintf(os.Stderr, "WARN: unable to find %q for %q: %s\n", "cockroach-ea", defaultArch, err)
}
}

if roachtestflags.ARM64Probability > 0 && defaultArch != vm.ArchARM64 {
Expand Down Expand Up @@ -973,8 +977,9 @@ func (f *clusterFactory) newCluster(
type attachOpt struct {
skipValidation bool
// Implies skipWipe.
skipStop bool
skipWipe bool
skipStop bool
skipWipe bool
tolerateRegistrationErrors bool
}

// attachToExistingCluster creates a cluster object based on machines that have
Expand Down Expand Up @@ -1011,7 +1016,7 @@ func attachToExistingCluster(
}
}

if err := r.registerCluster(c); err != nil {
if err := r.registerCluster(c); err != nil && !opt.tolerateRegistrationErrors {
return nil, err
}

Expand Down Expand Up @@ -1066,7 +1071,7 @@ func (c *clusterImpl) StopCockroachGracefullyOnNode(

// Save marks the cluster as "saved" so that it doesn't get destroyed.
func (c *clusterImpl) Save(ctx context.Context, msg string, l *logger.Logger) {
l.PrintfCtx(ctx, "saving cluster %s for debugging (--debug specified)", c)
l.PrintfCtx(ctx, "saving cluster %s (--debug specified or running operation)", c)
c.r.markClusterAsSaved(c, msg)
c.destroyState.mu.Lock()
c.destroyState.mu.saved = true
Expand Down Expand Up @@ -1820,17 +1825,18 @@ func (c *clusterImpl) PutE(
// nodes in the cluster. By default, we randomly upload a binary with or without
// runtime assertions enabled. Note that we upload to all nodes even if they
// don't use the binary, so that the test runner can always fetch logs.
func (c *clusterImpl) PutCockroach(ctx context.Context, l *logger.Logger, t *testImpl) error {
switch t.spec.CockroachBinary {
func (c *clusterImpl) PutCockroach(ctx context.Context, l *logger.Logger, t test.Test) error {
binaryType := t.(*testImpl).spec.CockroachBinary
switch binaryType {
case registry.RandomizedCockroach:
if tests.UsingRuntimeAssertions(t) {
t.l.Printf("To reproduce the same set of metamorphic constants, run this test with %s=%d", test.EnvAssertionsEnabledSeed, c.cockroachRandomSeed())
t.L().Printf("To reproduce the same set of metamorphic constants, run this test with %s=%d", test.EnvAssertionsEnabledSeed, c.cockroachRandomSeed())
}
return c.PutE(ctx, l, t.Cockroach(), test.DefaultCockroachPath, c.All())
case registry.StandardCockroach:
return c.PutE(ctx, l, t.StandardCockroach(), test.DefaultCockroachPath, c.All())
case registry.RuntimeAssertionsCockroach:
t.l.Printf("To reproduce the same set of metamorphic constants, run this test with %s=%d", test.EnvAssertionsEnabledSeed, c.cockroachRandomSeed())
t.L().Printf("To reproduce the same set of metamorphic constants, run this test with %s=%d", test.EnvAssertionsEnabledSeed, c.cockroachRandomSeed())
return c.PutE(ctx, l, t.RuntimeAssertionsCockroach(), test.DefaultCockroachPath, c.All())
default:
return errors.Errorf("Specified cockroach binary does not exist.")
Expand Down Expand Up @@ -2861,6 +2867,12 @@ func (c *clusterImpl) MaybeExtendCluster(
ctx context.Context, l *logger.Logger, testSpec *registry.TestSpec,
) error {
timeout := testTimeout(testSpec)
return c.MaybeExtendClusterForTimeout(ctx, l, timeout)
}

func (c *clusterImpl) MaybeExtendClusterForTimeout(
ctx context.Context, l *logger.Logger, timeout time.Duration,
) error {
minExp := timeutil.Now().Add(timeout + time.Hour)
if c.expiration.Before(minExp) {
extend := minExp.Sub(c.expiration)
Expand Down
111 changes: 111 additions & 0 deletions pkg/cmd/roachtest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"strings"

"github.com/cockroachdb/cockroach/pkg/build"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operations"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestflags"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/tests"
Expand Down Expand Up @@ -179,9 +180,31 @@ the cluster nodes on start.
}
roachtestflags.AddRunFlags(benchCmd.Flags())

var runOperationCmd = &cobra.Command{
// Don't display usage when tests fail.
SilenceUsage: true,
Use: "run-operation [regex...]",
Short: "run operations on cockroach cluster",
Long: `Run automated operations on existing clusters.`,
RunE: func(cmd *cobra.Command, args []string) error {
if err := initRunFlagsBinariesAndLibraries(cmd); err != nil {
return err
}
filter, err := makeTestFilter(args)
if err != nil {
return err
}
fmt.Printf("\nRunning %s.\n\n", filter.String())
cmd.SilenceUsage = true
return runOperations(operations.RegisterOperations, filter)
},
}
roachtestflags.AddRunOpsFlags(runOperationCmd.Flags())

rootCmd.AddCommand(listCmd)
rootCmd.AddCommand(runCmd)
rootCmd.AddCommand(benchCmd)
rootCmd.AddCommand(runOperationCmd)

var err error
config.OSUser, err = user.Current()
Expand Down Expand Up @@ -263,6 +286,94 @@ func testsToRun(
return selectSpecs(notSkipped, selectProbability, true, print), nil
}

func opsToRun(
r testRegistryImpl,
filter *registry.TestFilter,
runSkipped bool,
selectProbability float64,
print bool,
) ([]registry.OperationSpec, error) {
specs := filter.FilterOps(r.AllOperations())
if len(specs) == 0 {
return nil, errors.New("no matching operations to run")
}

var notSkipped []registry.OperationSpec
for _, s := range specs {
if s.Skip == "" || runSkipped {
notSkipped = append(notSkipped, s)
} else {
if print && roachtestflags.TeamCity {
fmt.Fprintf(os.Stdout, "##teamcity[testIgnored name='%s' message='%s']\n",
s.Name, TeamCityEscape(s.Skip))
}
if print {
fmt.Fprintf(os.Stdout, "--- SKIP: %s (%s)\n\t%s\n", s.Name, "0.00s", s.Skip)
}
}
}

if print {
// We want to show information about all operations which match the
// pattern(s) but were excluded for other reasons.
relaxedFilter := registry.TestFilter{
Name: filter.Name,
}
for _, s := range relaxedFilter.FilterOps(r.AllOperations()) {
if matches, r := filter.MatchesOp(&s); !matches {
reason := filter.MatchFailReasonString(r)
// This test matches the "relaxed" filter but not the original filter.
if roachtestflags.TeamCity {
fmt.Fprintf(os.Stdout, "##teamcity[testIgnored name='%s' message='%s']\n", s.Name, reason)
}
fmt.Fprintf(os.Stdout, "--- SKIP: %s (%s)\n\t%s\n", s.Name, "0.00s", reason)
}
}
}

return selectOpSpecs(notSkipped, selectProbability, print), nil
}

func selectOpSpecs(
specs []registry.OperationSpec, samplePct float64, print bool,
) []registry.OperationSpec {
if samplePct == 1 || len(specs) == 0 {
return specs
}

var sampled []registry.OperationSpec
var selectedIdxs []int

// Selects one random spec from the range [start, end) and appends it to sampled.
for i, s := range specs {
if rand.Float64() < samplePct {
sampled = append(sampled, s)
selectedIdxs = append(selectedIdxs, i)
continue
}
}

p := 0
// This loop depends on an ordered list as we are essentially
// skipping all values in between the selected indexes.
for _, i := range selectedIdxs {
for j := p; j < i; j++ {
s := specs[j]
if print && roachtestflags.TeamCity {
fmt.Fprintf(os.Stdout, "##teamcity[testIgnored name='%s' message='excluded via sampling']\n",
s.Name)
}

if print {
fmt.Fprintf(os.Stdout, "--- SKIP: %s (%s)\n\texcluded via sampling\n", s.Name, "0.00s")
}
}
p = i + 1
}

return sampled
}

// selectSpecs returns a random sample of the given test specs.
// If atLeastOnePerPrefix is true, it guarantees that at least one test is
// selected for each prefix (e.g. kv0/, acceptance/).
Expand Down
9 changes: 9 additions & 0 deletions pkg/cmd/roachtest/operation/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "operation",
srcs = ["operation_interface.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation",
visibility = ["//visibility:public"],
deps = ["//pkg/cmd/roachtest/test"],
)
22 changes: 22 additions & 0 deletions pkg/cmd/roachtest/operation/operation_interface.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package operation

import "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"

type Operation interface {
// TODO(bilal): Instead of encapsulating test.Test, copy over the small
// set of relevant methods, ideally moving them to a shared interface.
test.Test

GetCleanupState(string) string
SetCleanupState(string, string)
}
22 changes: 22 additions & 0 deletions pkg/cmd/roachtest/operations/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "operations",
srcs = [
"add_column.go",
"add_index.go",
"register.go",
"utils.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operations",
visibility = ["//visibility:public"],
deps = [
"//pkg/cmd/roachtest/cluster",
"//pkg/cmd/roachtest/operation",
"//pkg/cmd/roachtest/option",
"//pkg/cmd/roachtest/registry",
"//pkg/cmd/roachtest/roachtestflags",
"//pkg/cmd/roachtest/test",
"//pkg/util/randutil",
],
)
74 changes: 74 additions & 0 deletions pkg/cmd/roachtest/operations/add_column.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package operations

import (
"context"
"fmt"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestflags"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

func runAddColumn(ctx context.Context, o operation.Operation, c cluster.Cluster) {
conn := c.Conn(ctx, o.L(), 1, option.TenantName(roachtestflags.VirtualCluster))
defer conn.Close()

rng, _ := randutil.NewPseudoRand()
dbName := pickRandomDB(ctx, o, conn)
tableName := pickRandomTable(ctx, o, conn, dbName)
o.SetCleanupState("db", dbName)
o.SetCleanupState("table", tableName)

colName := fmt.Sprintf("add_column_op_%d", rng.Uint32())
o.Status(fmt.Sprintf("adding column %s to table %s.%s", colName, dbName, tableName))
addColStmt := fmt.Sprintf("ALTER TABLE %s.%s ADD COLUMN %s VARCHAR DEFAULT 'default'", dbName, tableName, colName)
_, err := conn.ExecContext(ctx, addColStmt)
if err != nil {
o.Fatal(err)
}
o.SetCleanupState("column", colName)

o.Status(fmt.Sprintf("column %s created", colName))
}

func cleanupAddColumn(ctx context.Context, o operation.Operation, c cluster.Cluster) {
conn := c.Conn(ctx, o.L(), 1, option.TenantName(roachtestflags.VirtualCluster))
defer conn.Close()

dbName := o.GetCleanupState("db")
tableName := o.GetCleanupState("table")
columnName := o.GetCleanupState("column")

o.Status(fmt.Sprintf("dropping column %s", columnName))
_, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.%s DROP COLUMN %s CASCADE", dbName, tableName, columnName))
if err != nil {
o.Fatal(err)
}
}

func registerAddColumn(r registry.Registry) {
r.AddOperation(registry.OperationSpec{
Name: "add-column",
Owner: registry.OwnerSQLFoundations,
Timeout: 24 * time.Hour,
CompatibleClouds: registry.AllClouds,
Dependency: registry.OperationRequiresDatabaseSchema,
Run: runAddColumn,
CleanupWaitTime: 5 * time.Minute,
Cleanup: cleanupAddColumn,
})
}
Loading

0 comments on commit fd1f248

Please sign in to comment.