Skip to content

Commit

Permalink
cmd/roachtest: add registry.OperationSpec
Browse files Browse the repository at this point in the history
This change introduces a new type in the roachtest registry for
Operations; smaller subunits of tests that work strictly on existing
clusters and are meant to have zero logical side-effects to a running
cluster (eg. should not delete data in an irrecoverable way), but can
induce chaos events like crashing nodes. This new spec type,
OperationSpec, specifies one operation and is registered with
a registry.Registry.

Split from cockroachdb#118364.

Epic: none

Release note: None
  • Loading branch information
itsbilal committed Mar 11, 2024
1 parent 1741d97 commit a9f1d5b
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,7 @@ GO_TARGETS = [
"//pkg/cmd/roachtest/clusterstats:clusterstats",
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
"//pkg/cmd/roachtest/grafana:grafana",
"//pkg/cmd/roachtest/operation:operation",
"//pkg/cmd/roachtest/option:option",
"//pkg/cmd/roachtest/option:option_test",
"//pkg/cmd/roachtest/registry:registry",
Expand Down
9 changes: 9 additions & 0 deletions pkg/cmd/roachtest/operation/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "operation",
srcs = ["operation_interface.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation",
visibility = ["//visibility:public"],
deps = ["//pkg/roachprod/logger"],
)
29 changes: 29 additions & 0 deletions pkg/cmd/roachtest/operation/operation_interface.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package operation

import "github.com/cockroachdb/cockroach/pkg/roachprod/logger"

type Operation interface {
// ClusterCockroach returns the path to the Cockroach binary on the target
// cluster.
ClusterCockroach() string
Name() string
Error(args ...interface{})
Errorf(string, ...interface{})
FailNow()
Fatal(args ...interface{})
Fatalf(format string, args ...interface{})
Failed() bool

L() *logger.Logger
Status(args ...interface{})
}
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/registry/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
"encryption.go",
"errors.go",
"filter.go",
"operation_spec.go",
"owners.go",
"registry_interface.go",
"tag.go",
Expand All @@ -15,6 +16,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/cmd/roachtest/cluster",
"//pkg/cmd/roachtest/operation",
"//pkg/cmd/roachtest/spec",
"//pkg/cmd/roachtest/test",
"//pkg/internal/team",
Expand Down
80 changes: 80 additions & 0 deletions pkg/cmd/roachtest/registry/operation_spec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package registry

import (
"context"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation"
)

// OperationDependency specifies what an operation requires from a cluster to
// be able to run. The zero value is the simplest dependency (i.e. an existant
// cluster with nodes) and is always implied even if unspecified. All non-zero
// values could require additional pre-Run checks from any runner/scheduler
// to verify if this operation can be run.
type OperationDependency int

const (
OperationRequiresNodes OperationDependency = iota
OperationRequiresPopulatedDatabase
OperationRequiresZeroUnavailableRanges
OperationRequiresZeroLaggingRanges
)

// OperationSpec is a spec for a roachtest operation.
type OperationSpec struct {
Skip string // if non-empty, operation will be skipped

Name string
// Owner is the name of the team responsible for signing off on failures of
// this operation that happen in the release process. This must be one of a limited
// set of values (the keys in the roachtestTeams map).
Owner Owner
// The maximum duration the operation is allowed to run before it is considered
// failed. This timeout applies _individually_ to Run() and Cleanup() each, but
// does not include any scheduling waits for either method.
Timeout time.Duration

// CompatibleClouds is the set of clouds this operation can run on (e.g. AllClouds,
// OnlyGCE, etc). Must be set.
CompatibleClouds CloudSet

// Dependencies specify the types of resources required for this roachtest
// operation to work. This will be used in filtering eligible operations to
// run. Multiple dependencies could be specified, and any schedulers will take
// care of ensuring those dependencies are met before running Run().
//
// TODO(bilal): Unused.
Dependencies []OperationDependency

// CanRunConcurrently specifies whether this operation is safe to run
// concurrently with other operations that have CanRunConcurrently = true. For
// instance, a random-index addition is safe to run concurrently with most
// other operations like node kills, while a drop would need to run on its own
// and will have CanRunConcurrently = false.
//
// TODO(bilal): Unused.
CanRunConcurrently bool

// Run is the operation function.
Run func(ctx context.Context, o operation.Operation, c cluster.Cluster)

// CleanupWaitTime is the min time to wait before running the Cleanup method.
// Note that schedulers/runners are free to wait longer than this amount of
// time. Any intermediary wait of this sort does not count towards Timeout.
CleanupWaitTime time.Duration

// Cleanup is the operation cleanup function, if defined.
Cleanup func(ctx context.Context, o operation.Operation, c cluster.Cluster)
}
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/registry/registry_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ import (
type Registry interface {
MakeClusterSpec(nodeCount int, opts ...spec.Option) spec.ClusterSpec
Add(TestSpec)
AddOperation(OperationSpec)
PromFactory() promauto.Factory
}
56 changes: 56 additions & 0 deletions pkg/cmd/roachtest/test_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

type testRegistryImpl struct {
m map[string]*registry.TestSpec
ops map[string]*registry.OperationSpec
snapshotPrefixes map[string]struct{}

promRegistry *prometheus.Registry
Expand All @@ -37,6 +38,7 @@ var _ registry.Registry = (*testRegistryImpl)(nil)
func makeTestRegistry() testRegistryImpl {
return testRegistryImpl{
m: make(map[string]*registry.TestSpec),
ops: make(map[string]*registry.OperationSpec),
snapshotPrefixes: make(map[string]struct{}),
promRegistry: prometheus.NewRegistry(),
}
Expand Down Expand Up @@ -65,6 +67,19 @@ func (r *testRegistryImpl) Add(spec registry.TestSpec) {
r.m[spec.Name] = &spec
}

// AddOperation adds an operation to the registry.
func (r *testRegistryImpl) AddOperation(spec registry.OperationSpec) {
if _, ok := r.ops[spec.Name]; ok {
fmt.Fprintf(os.Stderr, "operation %s already registered\n", spec.Name)
os.Exit(1)
}
if err := r.prepareOpSpec(&spec); err != nil {
fmt.Fprintf(os.Stderr, "%+v\n", err)
os.Exit(1)
}
r.ops[spec.Name] = &spec
}

// MakeClusterSpec makes a cluster spec. It should be used over `spec.MakeClusterSpec`
// because this method also adds options baked into the registry.
func (r *testRegistryImpl) MakeClusterSpec(nodeCount int, opts ...spec.Option) spec.ClusterSpec {
Expand All @@ -73,6 +88,35 @@ func (r *testRegistryImpl) MakeClusterSpec(nodeCount int, opts ...spec.Option) s

const testNameRE = "^[a-zA-Z0-9-_=/,]+$"

// prepareOpSpec validates a spec and does minor massaging of its fields.
func (r *testRegistryImpl) prepareOpSpec(spec *registry.OperationSpec) error {
// Operations follow the same naming conventions as tests and have the same
// alphabet of allowable characters in names.
if matched, err := regexp.MatchString(testNameRE, spec.Name); err != nil || !matched {
return fmt.Errorf("%s: Name must match this regexp: %s", spec.Name, testNameRE)
}

spec.CompatibleClouds.AssertInitialized()

if spec.Run == nil {
return fmt.Errorf("%s: must specify Run", spec.Name)
}
if spec.Cleanup == nil && spec.CleanupWaitTime != 0 {
return fmt.Errorf("%s: must specify Cleanup if cleanup time is specified", spec.Name)
}

// All operations must have an owner so the release team knows who signs off on
// failures and so the github issue poster knows who to assign it to.
if spec.Owner == `` {
return fmt.Errorf(`%s: unspecified owner`, spec.Name)
}
if !spec.Owner.IsValid() {
return fmt.Errorf(`%s: unknown owner %q`, spec.Name, spec.Owner)
}

return nil
}

// prepareSpec validates a spec and does minor massaging of its fields.
func (r *testRegistryImpl) prepareSpec(spec *registry.TestSpec) error {
if matched, err := regexp.MatchString(testNameRE, spec.Name); err != nil || !matched {
Expand Down Expand Up @@ -130,3 +174,15 @@ func (r testRegistryImpl) AllTests() []registry.TestSpec {
})
return tests
}

// AllOperations returns all the operation specs, sorted by name.
func (r testRegistryImpl) AllOperations() []registry.OperationSpec {
var ops []registry.OperationSpec
for _, t := range r.ops {
ops = append(ops, *t)
}
sort.Slice(ops, func(i, j int) bool {
return ops[i].Name < ops[j].Name
})
return ops
}
4 changes: 4 additions & 0 deletions pkg/cmd/roachtest/tests/restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ func (m *mockRegistry) Add(spec registry.TestSpec) {
m.testNames = append(m.testNames, spec.Name)
}

func (m *mockRegistry) AddOperation(spec registry.OperationSpec) {
// No-op.
}

func (m *mockRegistry) PromFactory() promauto.Factory {
return promauto.With(nil)
}
Expand Down

0 comments on commit a9f1d5b

Please sign in to comment.