Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reproduce #17529 in robustness tests #17680

Merged
merged 2 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ require (
github.com/spf13/pflag v1.0.5 // indirect
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
go.etcd.io/gofail v0.1.0 // indirect
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 // indirect
go.opentelemetry.io/otel v1.27.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.4.0-alpha.1 h1:3yrqQzbRRPFPdOMWS/QQIVxVnzSkAZQYeWlZFv1kbj4=
go.etcd.io/bbolt v1.4.0-alpha.1/go.mod h1:S/Z/Nm3iuOnyO1W4XuFfPci51Gj6F1Hv0z8hisyYYOw=
go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg=
go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA=
go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns=
go.etcd.io/raft/v3 v3.6.0-alpha.0/go.mod h1:QpxpKeYmocQQFHP75LxNrdJTukZmqQig9lotwYLsUJY=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck=
Expand Down
3 changes: 1 addition & 2 deletions tests/framework/e2e/etcd_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,7 @@ func (f *BinaryFailpoints) DeactivateHTTP(ctx context.Context, failpoint string)
return err
}
httpClient := http.Client{
// TODO: Decrease after deactivate is not blocked by sleep https://github.com/etcd-io/gofail/issues/64
Timeout: 2 * time.Second,
Timeout: time.Second,
}
if f.clientTimeout != 0 {
httpClient.Timeout = f.clientTimeout
Expand Down
2 changes: 1 addition & 1 deletion tests/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ require (
go.etcd.io/etcd/etcdutl/v3 v3.6.0-alpha.0
go.etcd.io/etcd/pkg/v3 v3.6.0-alpha.0
go.etcd.io/etcd/server/v3 v3.6.0-alpha.0
go.etcd.io/gofail v0.1.0
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390
go.etcd.io/raft/v3 v3.6.0-alpha.0
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0
go.opentelemetry.io/otel v1.27.0
Expand Down
4 changes: 2 additions & 2 deletions tests/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.4.0-alpha.1 h1:3yrqQzbRRPFPdOMWS/QQIVxVnzSkAZQYeWlZFv1kbj4=
go.etcd.io/bbolt v1.4.0-alpha.1/go.mod h1:S/Z/Nm3iuOnyO1W4XuFfPci51Gj6F1Hv0z8hisyYYOw=
go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg=
go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA=
go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns=
go.etcd.io/raft/v3 v3.6.0-alpha.0/go.mod h1:QpxpKeYmocQQFHP75LxNrdJTukZmqQig9lotwYLsUJY=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck=
Expand Down
1 change: 1 addition & 0 deletions tests/robustness/failpoint/failpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ var (
RaftBeforeSaveSleep,
RaftAfterSaveSleep,
ApplyBeforeOpenSnapshot,
SleepBeforeSendWatchResponse,
}
)

Expand Down
4 changes: 1 addition & 3 deletions tests/robustness/failpoint/gofail.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ var (
BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second}
RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second}
SleepBeforeSendWatchResponse Failpoint = gofailSleepAndDeactivate{"beforeSendWatchResponse", time.Second}
)

type goPanicFailpoint struct {
Expand Down Expand Up @@ -238,9 +239,6 @@ func (f gofailSleepAndDeactivate) Name() string {
}

func (f gofailSleepAndDeactivate) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool {
if config.ClusterSize == 1 {
return false
}
memberFailpoints := member.Failpoints()
if memberFailpoints == nil {
return false
Expand Down
20 changes: 20 additions & 0 deletions tests/robustness/makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ test-robustness-issue15271: /tmp/etcd-v3.5.7-failpoints/bin
GO_TEST_FLAGS='-v --run=TestRobustnessRegression/Issue15271 --count 100 --failfast --bin-dir=/tmp/etcd-v3.5.7-failpoints/bin' make test-robustness && \
echo "Failed to reproduce" || echo "Successful reproduction"

.PHONY: test-robustness-issue17529
test-robustness-issue17529: /tmp/etcd-v3.5.12-beforeSendWatchResponse/bin
GO_TEST_FLAGS='-v --run=TestRobustnessRegression/Issue17529 --count 100 --failfast --bin-dir=/tmp/etcd-v3.5.12-beforeSendWatchResponse/bin' make test-robustness && \
echo "Failed to reproduce" || echo "Successful reproduction"

# Failpoints

GOPATH = $(shell go env GOPATH)
Expand Down Expand Up @@ -98,6 +103,21 @@ $(GOPATH)/bin/gofail: tools/mod/go.mod tools/mod/go.sum
(cd etcdutl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
FAILPOINTS=true ./build;

/tmp/etcd-v3.5.12-beforeSendWatchResponse/bin: $(GOPATH)/bin/gofail
rm -rf /tmp/etcd-v3.5.12-beforeSendWatchResponse/
mkdir -p /tmp/etcd-v3.5.12-beforeSendWatchResponse/
git clone --depth 1 --branch v3.5.12 https://github.com/etcd-io/etcd.git /tmp/etcd-v3.5.12-beforeSendWatchResponse/
cp -r ./tests/robustness/patches/beforeSendWatchResponse /tmp/etcd-v3.5.12-beforeSendWatchResponse/
cd /tmp/etcd-v3.5.12-beforeSendWatchResponse/; \
patch -l server/etcdserver/api/v3rpc/watch.go ./beforeSendWatchResponse/watch.patch; \
patch -l build.sh ./beforeSendWatchResponse/build.patch; \
go get go.etcd.io/gofail@${GOFAIL_VERSION}; \
(cd server; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
(cd etcdctl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
(cd etcdutl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
(cd tools/mod; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
FAILPOINTS=true ./build;

/tmp/etcd-release-3.5-failpoints/bin: $(GOPATH)/bin/gofail
rm -rf /tmp/etcd-release-3.5-failpoints/
mkdir -p /tmp/etcd-release-3.5-failpoints/
Expand Down
9 changes: 9 additions & 0 deletions tests/robustness/patches/beforeSendWatchResponse/build.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@@ -25,7 +26,7 @@ GOFAIL_VERSION=$(cd tools/mod && go list -m -f {{.Version}} go.etcd.io/gofail)
toggle_failpoints() {
mode="$1"
if command -v gofail >/dev/null 2>&1; then
- run gofail "$mode" server/etcdserver/ server/mvcc/ server/wal/ server/mvcc/backend/
+ run gofail "$mode" server/etcdserver/ server/mvcc/ server/wal/ server/mvcc/backend/ server/etcdserver/api/v3rpc/
if [[ "$mode" == "enable" ]]; then
go get go.etcd.io/gofail@${GOFAIL_VERSION}
cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION}
12 changes: 12 additions & 0 deletions tests/robustness/patches/beforeSendWatchResponse/watch.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/server/etcdserver/api/v3rpc/watch.go b/server/etcdserver/api/v3rpc/watch.go
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this idea~

index cd834aa..e6aaf2b 100644
--- a/server/etcdserver/api/v3rpc/watch.go
+++ b/server/etcdserver/api/v3rpc/watch.go
@@ -460,6 +460,7 @@ func (sws *serverWatchStream) sendLoop() {
sws.mu.RUnlock()

var serr error
+ // gofail: var beforeSendWatchResponse struct{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the failpoint also be backported to 3.5 to ensure the issue is fixed for the latest 3.5 release?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, there is even a issue for that #17817

if !fragmented && !ok {
serr = sws.gRPCStream.Send(wr)
} else {
11 changes: 11 additions & 0 deletions tests/robustness/scenarios.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,17 @@ func regressionScenarios(t *testing.T) []testScenario {
e2e.WithClusterSize(1),
),
})
scenarios = append(scenarios, testScenario{
name: "Issue17529",
profile: traffic.HighTrafficProfile,
traffic: traffic.Kubernetes,
failpoint: failpoint.SleepBeforeSendWatchResponse,
cluster: *e2e.NewConfig(
e2e.WithClusterSize(1),
e2e.WithGoFailEnabled(true),
options.WithSnapshotCount(100),
),
})
if v.Compare(version.V3_5) >= 0 {
opts := []e2e.EPClusterOption{
e2e.WithSnapshotCount(100),
Expand Down
2 changes: 1 addition & 1 deletion tests/robustness/traffic/traffic.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
var (
DefaultLeaseTTL int64 = 7200
RequestTimeout = 200 * time.Millisecond
WatchTimeout = 400 * time.Millisecond
WatchTimeout = time.Second
MultiOpTxnOpCount = 4

LowTraffic = Profile{
Expand Down
2 changes: 1 addition & 1 deletion tools/mod/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/google/addlicense v1.1.1
github.com/google/yamlfmt v0.11.0
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0
go.etcd.io/gofail v0.1.0
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390
go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116
go.etcd.io/raft/v3 v3.6.0-alpha.0
gotest.tools/gotestsum v1.11.0
Expand Down
2 changes: 2 additions & 0 deletions tools/mod/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,8 @@ go-simpler.org/sloglint v0.7.0 h1:rMZRxD9MbaGoRFobIOicMxZzum7AXNFDlez6xxJs5V4=
go-simpler.org/sloglint v0.7.0/go.mod h1:g9SXiSWY0JJh4LS39/Q0GxzP/QX2cVcbTOYhDpXrJEs=
go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg=
go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA=
go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116 h1:QQiUXlqz+d96jyNG71NE+IGTgOK6Xlhdx+PzvfbLHlQ=
go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116/go.mod h1:F9kog+iVAuvPJucb1dkYcDcbV0g4uyGEHllTP5NrXiw=
go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns=
Expand Down
Loading