From 9a1b977366e55243fff875fc2d8fb36ef19a136f Mon Sep 17 00:00:00 2001 From: Bogdan Kanivets Date: Fri, 8 Jul 2022 02:08:10 -0700 Subject: [PATCH] clientv3/balance: fixed flaky balancer tests - added verification step to indirectly verify that all peers are in balancer subconn list Signed-off-by: Bogdan Kanivets --- clientv3/balancer/balancer_test.go | 96 +++++++++++++++++------------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/clientv3/balancer/balancer_test.go b/clientv3/balancer/balancer_test.go index c31ddbe45a11..9ca13e55ec5f 100644 --- a/clientv3/balancer/balancer_test.go +++ b/clientv3/balancer/balancer_test.go @@ -19,7 +19,6 @@ import ( "fmt" "strings" "testing" - "time" "go.etcd.io/etcd/clientv3/balancer/picker" "go.etcd.io/etcd/clientv3/balancer/resolver/endpoint" @@ -92,24 +91,32 @@ func TestRoundRobinBalancedResolvableNoFailover(t *testing.T) { return picked, err } - prev, switches := "", 0 + var picked string + available := make(map[string]struct{}) + // cycle through all peers to indirectly verify that balancer subconn list is fully loaded + // otherwise we can't reliably count switches in the next step + for len(available) < tc.serverCount { + picked, err = reqFunc(context.Background()) + if err != nil { + t.Fatalf("Unexpected failure %v", err) + } + available[picked] = struct{}{} + } + + // verify that we round robin + prev, switches := picked, 0 for i := 0; i < tc.reqN; i++ { - picked, err := reqFunc(context.Background()) + picked, err = reqFunc(context.Background()) if err != nil { t.Fatalf("#%d: unexpected failure %v", i, err) } - if prev == "" { - prev = picked - continue - } if prev != picked { switches++ } prev = picked } - if tc.serverCount > 1 && switches < tc.reqN-3 { // -3 for initial resolutions - // TODO: FIX ME - t.Skipf("expected balanced loads for %d requests, got switches %d", tc.reqN, switches) + if tc.serverCount > 1 && switches != tc.reqN { + t.Fatalf("expected balanced loads for %d requests, got switches %d", tc.reqN, switches) } }) } @@ -162,25 +169,25 @@ func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) { // stop first server, loads should be redistributed // stopped server should never be picked ms.StopAt(0) + var picked string available := make(map[string]struct{}) - for i := 1; i < serverCount; i++ { - available[eps[i]] = struct{}{} + // cycle through all peers (except 1) to indirectly verify that balancer subconn list is fully loaded + // otherwise we can't reliably count switches in the next step + for len(available) < serverCount - 1 { + picked, err = reqFunc(context.Background()) + if err != nil { + t.Fatalf("Unexpected failure %v", err) + } + available[picked] = struct{}{} } reqN := 10 - prev, switches := "", 0 + prev, switches := picked, 0 for i := 0; i < reqN; i++ { picked, err := reqFunc(context.Background()) if err != nil && strings.Contains(err.Error(), "transport is closing") { continue } - if prev == "" { // first failover - if eps[0] == picked { - t.Fatalf("expected failover from %q, picked %q", eps[0], picked) - } - prev = picked - continue - } if _, ok := available[picked]; !ok { t.Fatalf("picked unavailable address %q (available %v)", picked, available) } @@ -189,18 +196,24 @@ func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) { } prev = picked } - if switches < reqN-3 { // -3 for initial resolutions + failover - // TODO: FIX ME! - t.Skipf("expected balanced loads for %d requests, got switches %d", reqN, switches) + if switches != reqN { + t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches) } // now failed server comes back ms.StartAt(0) - // enough time for reconnecting to recovered server - time.Sleep(time.Second) + // cycle through all peers to indirectly verify that balancer subconn list is fully loaded + // otherwise we can't reliably count switches in the next step + for len(available) < serverCount { + picked, err = reqFunc(context.Background()) + if err != nil { + t.Fatalf("Unexpected failure %v", err) + } + available[picked] = struct{}{} + } - prev, switches = "", 0 + prev, switches = picked, 0 recoveredAddr, recovered := eps[0], 0 available[recoveredAddr] = struct{}{} @@ -209,10 +222,6 @@ func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) { if err != nil { t.Fatalf("#%d: unexpected failure %v", i, err) } - if prev == "" { - prev = picked - continue - } if _, ok := available[picked]; !ok { t.Fatalf("#%d: picked unavailable address %q (available %v)", i, picked, available) } @@ -224,10 +233,10 @@ func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) { } prev = picked } - if switches < reqN-3 { // -3 for initial resolutions + if switches != 2*reqN { t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches) } - if recovered < reqN/serverCount { + if recovered != 2*reqN/serverCount { t.Fatalf("recovered server %q got only %d requests", recoveredAddr, recovered) } } @@ -242,11 +251,10 @@ func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) { } defer ms.Stop() var eps []string - available := make(map[string]struct{}) for _, svr := range ms.Servers { eps = append(eps, svr.ResolverAddress().Addr) - available[svr.Address] = struct{}{} } + rsv, err := endpoint.NewResolverGroup("requestfail") if err != nil { t.Fatal(err) @@ -277,6 +285,18 @@ func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) { return picked, err } + var picked string + available := make(map[string]struct{}) + // cycle through all peers to indirectly verify that balancer subconn list is fully loaded + // otherwise we can't reliably count switches in the next step + for len(available) < serverCount { + picked, err = reqFunc(context.Background()) + if err != nil { + t.Fatalf("Unexpected failure %v", err) + } + available[picked] = struct{}{} + } + reqN := 20 prev, switches := "", 0 for i := 0; i < reqN; i++ { @@ -287,15 +307,11 @@ func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) { } picked, err := reqFunc(ctx) if i%2 == 0 { - if s, ok := status.FromError(err); ok && s.Code() != codes.Canceled || picked != "" { + if s, ok := status.FromError(err); ok && s.Code() != codes.Canceled { t.Fatalf("#%d: expected %v, got %v", i, context.Canceled, err) } continue } - if prev == "" && picked != "" { - prev = picked - continue - } if _, ok := available[picked]; !ok { t.Fatalf("#%d: picked unavailable address %q (available %v)", i, picked, available) } @@ -304,7 +320,7 @@ func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) { } prev = picked } - if switches < reqN/2-3 { // -3 for initial resolutions + failover + if switches != reqN/2 { t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches) } }