Skip to content

Commit

Permalink
Merge pull request #15044 from serathius/linearizability-watch
Browse files Browse the repository at this point in the history
Watch events during linearizability test and compare history
  • Loading branch information
serathius authored Jan 10, 2023
2 parents 2965817 + 8a9f848 commit ff89864
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 31 deletions.
90 changes: 59 additions & 31 deletions tests/linearizability/linearizability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package linearizability

import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
Expand All @@ -25,6 +24,8 @@ import (
"time"

"github.com/anishathalye/porcupine"
"github.com/google/go-cmp/cmp"
"golang.org/x/sync/errgroup"
"golang.org/x/time/rate"

"go.etcd.io/etcd/tests/v3/framework/e2e"
Expand Down Expand Up @@ -84,52 +85,62 @@ func TestLinearizability(t *testing.T) {
}
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
failpoint := FailpointConfig{
ctx := context.Background()
clus, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&tc.config))
if err != nil {
t.Fatal(err)
}
defer clus.Close()
operations, events := testLinearizability(ctx, t, clus, FailpointConfig{
failpoint: tc.failpoint,
count: 1,
retries: 3,
waitBetweenTriggers: waitBetweenFailpointTriggers,
}
traffic := trafficConfig{
}, trafficConfig{
minimalQPS: minimalQPS,
maximalQPS: maximalQPS,
clientCount: 8,
traffic: DefaultTraffic,
}
testLinearizability(context.Background(), t, tc.config, failpoint, traffic)
})
validateEventsMatch(t, events)
checkOperationsAndPersistResults(t, operations, clus)
})
}
}

func testLinearizability(ctx context.Context, t *testing.T, config e2e.EtcdProcessClusterConfig, failpoint FailpointConfig, traffic trafficConfig) {
clus, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&config))
if err != nil {
t.Fatal(err)
}
defer clus.Close()
ctx, cancel := context.WithCancel(ctx)
go func() {
defer cancel()
err := triggerFailpoints(ctx, t, clus, failpoint)
if err != nil {
t.Error(err)
}
}()
operations := simulateTraffic(ctx, t, clus, traffic)
err = clus.Stop()
if err != nil {
t.Error(err)
}
checkOperationsAndPersistResults(t, operations, clus)
func testLinearizability(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, failpoint FailpointConfig, traffic trafficConfig) (operations []porcupine.Operation, events [][]watchEvent) {
// Run multiple test components (traffic, failpoints, etc) in parallel and use canceling context to propagate stop signal.
g := errgroup.Group{}
trafficCtx, trafficCancel := context.WithCancel(ctx)
g.Go(func() error {
triggerFailpoints(ctx, t, clus, failpoint)
time.Sleep(time.Second)
trafficCancel()
return nil
})
watchCtx, watchCancel := context.WithCancel(ctx)
g.Go(func() error {
operations = simulateTraffic(trafficCtx, t, clus, traffic)
time.Sleep(time.Second)
watchCancel()
return nil
})
g.Go(func() error {
events = collectClusterWatchEvents(watchCtx, t, clus)
return nil
})
g.Wait()
return operations, events
}

func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config FailpointConfig) error {
func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config FailpointConfig) {
var err error
successes := 0
failures := 0
for _, proc := range clus.Procs {
if !config.failpoint.Available(proc) {
return fmt.Errorf("failpoint %q not available on %s", config.failpoint.Name(), proc.Config().Name)
t.Errorf("Failpoint %q not available on %s", config.failpoint.Name(), proc.Config().Name)
return
}
}
for successes < config.count && failures < config.retries {
Expand All @@ -143,10 +154,8 @@ func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessC
successes++
}
if successes < config.count || failures >= config.retries {
return fmt.Errorf("failed to trigger failpoints enough times, err: %v", err)
t.Errorf("failed to trigger failpoints enough times, err: %v", err)
}
time.Sleep(config.waitBetweenTriggers)
return nil
}

type FailpointConfig struct {
Expand Down Expand Up @@ -203,6 +212,25 @@ type trafficConfig struct {
traffic Traffic
}

func validateEventsMatch(t *testing.T, ops [][]watchEvent) {
// Move longest history to ops[0]
maxLength := len(ops[0])
for i := 1; i < len(ops); i++ {
if len(ops[i]) > maxLength {
maxLength = len(ops[i])
ops[0], ops[i] = ops[i], ops[0]
}
}

for i := 1; i < len(ops); i++ {
length := len(ops[i])
// We compare prefix of watch events, as we are not guaranteed to collect all events from each node.
if diff := cmp.Diff(ops[0][:length], ops[i][:length]); diff != "" {
t.Errorf("Events in watches do not match, %s", diff)
}
}
}

func checkOperationsAndPersistResults(t *testing.T, operations []porcupine.Operation, clus *e2e.EtcdProcessCluster) {
path, err := testResultsDirectory(t)
if err != nil {
Expand Down
97 changes: 97 additions & 0 deletions tests/linearizability/watch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright 2022 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package linearizability

import (
"context"
"sync"
"testing"
"time"

"go.uber.org/zap"

"go.etcd.io/etcd/api/v3/mvccpb"
clientv3 "go.etcd.io/etcd/client/v3"
"go.etcd.io/etcd/tests/v3/framework/e2e"
)

func collectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) [][]watchEvent {
mux := sync.Mutex{}
var wg sync.WaitGroup
memberEvents := make([][]watchEvent, len(clus.Procs))
for i, member := range clus.Procs {
c, err := clientv3.New(clientv3.Config{
Endpoints: member.EndpointsV3(),
Logger: zap.NewNop(),
DialKeepAliveTime: 1 * time.Millisecond,
DialKeepAliveTimeout: 5 * time.Millisecond,
})
if err != nil {
t.Fatal(err)
}

wg.Add(1)
go func(i int, c *clientv3.Client) {
defer wg.Done()
defer c.Close()
events := collectMemberWatchEvents(ctx, t, c)
mux.Lock()
memberEvents[i] = events
mux.Unlock()
}(i, c)
}
wg.Wait()
return memberEvents
}

func collectMemberWatchEvents(ctx context.Context, t *testing.T, c *clientv3.Client) []watchEvent {
events := []watchEvent{}
var lastRevision int64 = 1
for {
select {
case <-ctx.Done():
return events
default:
}
for resp := range c.Watch(ctx, "", clientv3.WithPrefix(), clientv3.WithRev(lastRevision)) {
lastRevision = resp.Header.Revision
for _, event := range resp.Events {
var op OperationType
switch event.Type {
case mvccpb.PUT:
op = Put
case mvccpb.DELETE:
op = Delete
}
events = append(events, watchEvent{
Op: op,
Key: string(event.Kv.Key),
Value: string(event.Kv.Value),
Revision: event.Kv.ModRevision,
})
}
if resp.Err() != nil {
t.Logf("Watch error: %v", resp.Err())
}
}
}
}

type watchEvent struct {
Op OperationType
Key string
Value string
Revision int64
}

0 comments on commit ff89864

Please sign in to comment.