diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 4411811138c1..158d5f986116 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -1155,6 +1155,7 @@ GO_TARGETS = [ "//pkg/cmd/roachtest/clusterstats:clusterstats", "//pkg/cmd/roachtest/clusterstats:clusterstats_test", "//pkg/cmd/roachtest/grafana:grafana", + "//pkg/cmd/roachtest/operation:operation", "//pkg/cmd/roachtest/option:option", "//pkg/cmd/roachtest/option:option_test", "//pkg/cmd/roachtest/registry:registry", diff --git a/pkg/cmd/roachtest/operation/BUILD.bazel b/pkg/cmd/roachtest/operation/BUILD.bazel new file mode 100644 index 000000000000..6a709598b84f --- /dev/null +++ b/pkg/cmd/roachtest/operation/BUILD.bazel @@ -0,0 +1,9 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "operation", + srcs = ["operation_interface.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation", + visibility = ["//visibility:public"], + deps = ["//pkg/roachprod/logger"], +) diff --git a/pkg/cmd/roachtest/operation/operation_interface.go b/pkg/cmd/roachtest/operation/operation_interface.go new file mode 100644 index 000000000000..b10138330e17 --- /dev/null +++ b/pkg/cmd/roachtest/operation/operation_interface.go @@ -0,0 +1,29 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package operation + +import "github.com/cockroachdb/cockroach/pkg/roachprod/logger" + +type Operation interface { + // ClusterCockroach returns the path to the Cockroach binary on the target + // cluster. + ClusterCockroach() string + Name() string + Error(args ...interface{}) + Errorf(string, ...interface{}) + FailNow() + Fatal(args ...interface{}) + Fatalf(format string, args ...interface{}) + Failed() bool + + L() *logger.Logger + Status(args ...interface{}) +} diff --git a/pkg/cmd/roachtest/registry/BUILD.bazel b/pkg/cmd/roachtest/registry/BUILD.bazel index a627c5089017..73f948a2665e 100644 --- a/pkg/cmd/roachtest/registry/BUILD.bazel +++ b/pkg/cmd/roachtest/registry/BUILD.bazel @@ -6,6 +6,7 @@ go_library( "encryption.go", "errors.go", "filter.go", + "operation_spec.go", "owners.go", "registry_interface.go", "tag.go", @@ -15,6 +16,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/cmd/roachtest/cluster", + "//pkg/cmd/roachtest/operation", "//pkg/cmd/roachtest/spec", "//pkg/cmd/roachtest/test", "//pkg/internal/team", diff --git a/pkg/cmd/roachtest/registry/operation_spec.go b/pkg/cmd/roachtest/registry/operation_spec.go new file mode 100644 index 000000000000..8b02965a392a --- /dev/null +++ b/pkg/cmd/roachtest/registry/operation_spec.go @@ -0,0 +1,80 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package registry + +import ( + "context" + "time" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation" +) + +// OperationDependency specifies what an operation requires from a cluster to +// be able to run. The zero value is the simplest dependency (i.e. an existant +// cluster with nodes) and is always implied even if unspecified. All non-zero +// values could require additional pre-Run checks from any runner/scheduler +// to verify if this operation can be run. +type OperationDependency int + +const ( + OperationRequiresNodes OperationDependency = iota + OperationRequiresPopulatedDatabase + OperationRequiresZeroUnavailableRanges + OperationRequiresZeroLaggingRanges +) + +// OperationSpec is a spec for a roachtest operation. +type OperationSpec struct { + Skip string // if non-empty, operation will be skipped + + Name string + // Owner is the name of the team responsible for signing off on failures of + // this operation that happen in the release process. This must be one of a limited + // set of values (the keys in the roachtestTeams map). + Owner Owner + // The maximum duration the operation is allowed to run before it is considered + // failed. This timeout applies _individually_ to Run() and Cleanup() each, but + // does not include any scheduling waits for either method. + Timeout time.Duration + + // CompatibleClouds is the set of clouds this operation can run on (e.g. AllClouds, + // OnlyGCE, etc). Must be set. + CompatibleClouds CloudSet + + // Dependencies specify the types of resources required for this roachtest + // operation to work. This will be used in filtering eligible operations to + // run. Multiple dependencies could be specified, and any schedulers will take + // care of ensuring those dependencies are met before running Run(). + // + // TODO(bilal): Unused. + Dependencies []OperationDependency + + // CanRunConcurrently specifies whether this operation is safe to run + // concurrently with other operations that have CanRunConcurrently = true. For + // instance, a random-index addition is safe to run concurrently with most + // other operations like node kills, while a drop would need to run on its own + // and will have CanRunConcurrently = false. + // + // TODO(bilal): Unused. + CanRunConcurrently bool + + // Run is the operation function. + Run func(ctx context.Context, o operation.Operation, c cluster.Cluster) + + // CleanupWaitTime is the min time to wait before running the Cleanup method. + // Note that schedulers/runners are free to wait longer than this amount of + // time. Any intermediary wait of this sort does not count towards Timeout. + CleanupWaitTime time.Duration + + // Cleanup is the operation cleanup function, if defined. + Cleanup func(ctx context.Context, o operation.Operation, c cluster.Cluster) +} diff --git a/pkg/cmd/roachtest/registry/registry_interface.go b/pkg/cmd/roachtest/registry/registry_interface.go index 06c59c02d0cf..7b9d522f4408 100644 --- a/pkg/cmd/roachtest/registry/registry_interface.go +++ b/pkg/cmd/roachtest/registry/registry_interface.go @@ -20,5 +20,6 @@ import ( type Registry interface { MakeClusterSpec(nodeCount int, opts ...spec.Option) spec.ClusterSpec Add(TestSpec) + AddOperation(OperationSpec) PromFactory() promauto.Factory } diff --git a/pkg/cmd/roachtest/test_registry.go b/pkg/cmd/roachtest/test_registry.go index 9501dadeacdd..3ffece141a12 100644 --- a/pkg/cmd/roachtest/test_registry.go +++ b/pkg/cmd/roachtest/test_registry.go @@ -26,6 +26,7 @@ import ( type testRegistryImpl struct { m map[string]*registry.TestSpec + ops map[string]*registry.OperationSpec snapshotPrefixes map[string]struct{} promRegistry *prometheus.Registry @@ -37,6 +38,7 @@ var _ registry.Registry = (*testRegistryImpl)(nil) func makeTestRegistry() testRegistryImpl { return testRegistryImpl{ m: make(map[string]*registry.TestSpec), + ops: make(map[string]*registry.OperationSpec), snapshotPrefixes: make(map[string]struct{}), promRegistry: prometheus.NewRegistry(), } @@ -65,6 +67,19 @@ func (r *testRegistryImpl) Add(spec registry.TestSpec) { r.m[spec.Name] = &spec } +// AddOperation adds an operation to the registry. +func (r *testRegistryImpl) AddOperation(spec registry.OperationSpec) { + if _, ok := r.ops[spec.Name]; ok { + fmt.Fprintf(os.Stderr, "operation %s already registered\n", spec.Name) + os.Exit(1) + } + if err := r.prepareOpSpec(&spec); err != nil { + fmt.Fprintf(os.Stderr, "%+v\n", err) + os.Exit(1) + } + r.ops[spec.Name] = &spec +} + // MakeClusterSpec makes a cluster spec. It should be used over `spec.MakeClusterSpec` // because this method also adds options baked into the registry. func (r *testRegistryImpl) MakeClusterSpec(nodeCount int, opts ...spec.Option) spec.ClusterSpec { @@ -73,6 +88,35 @@ func (r *testRegistryImpl) MakeClusterSpec(nodeCount int, opts ...spec.Option) s const testNameRE = "^[a-zA-Z0-9-_=/,]+$" +// prepareOpSpec validates a spec and does minor massaging of its fields. +func (r *testRegistryImpl) prepareOpSpec(spec *registry.OperationSpec) error { + // Operations follow the same naming conventions as tests and have the same + // alphabet of allowable characters in names. + if matched, err := regexp.MatchString(testNameRE, spec.Name); err != nil || !matched { + return fmt.Errorf("%s: Name must match this regexp: %s", spec.Name, testNameRE) + } + + spec.CompatibleClouds.AssertInitialized() + + if spec.Run == nil { + return fmt.Errorf("%s: must specify Run", spec.Name) + } + if spec.Cleanup == nil && spec.CleanupWaitTime != 0 { + return fmt.Errorf("%s: must specify Cleanup if cleanup time is specified", spec.Name) + } + + // All operations must have an owner so the release team knows who signs off on + // failures and so the github issue poster knows who to assign it to. + if spec.Owner == `` { + return fmt.Errorf(`%s: unspecified owner`, spec.Name) + } + if !spec.Owner.IsValid() { + return fmt.Errorf(`%s: unknown owner %q`, spec.Name, spec.Owner) + } + + return nil +} + // prepareSpec validates a spec and does minor massaging of its fields. func (r *testRegistryImpl) prepareSpec(spec *registry.TestSpec) error { if matched, err := regexp.MatchString(testNameRE, spec.Name); err != nil || !matched { @@ -130,3 +174,15 @@ func (r testRegistryImpl) AllTests() []registry.TestSpec { }) return tests } + +// AllOperations returns all the operation specs, sorted by name. +func (r testRegistryImpl) AllOperations() []registry.OperationSpec { + var ops []registry.OperationSpec + for _, t := range r.ops { + ops = append(ops, *t) + } + sort.Slice(ops, func(i, j int) bool { + return ops[i].Name < ops[j].Name + }) + return ops +} diff --git a/pkg/cmd/roachtest/tests/restore_test.go b/pkg/cmd/roachtest/tests/restore_test.go index 94f539e28d44..7a9de275dad5 100644 --- a/pkg/cmd/roachtest/tests/restore_test.go +++ b/pkg/cmd/roachtest/tests/restore_test.go @@ -36,6 +36,10 @@ func (m *mockRegistry) Add(spec registry.TestSpec) { m.testNames = append(m.testNames, spec.Name) } +func (m *mockRegistry) AddOperation(spec registry.OperationSpec) { + // No-op. +} + func (m *mockRegistry) PromFactory() promauto.Factory { return promauto.With(nil) }