Skip to content

Commit

Permalink
clientv3/balance: fixed flaky balancer tests
Browse files Browse the repository at this point in the history
- added verification step to indirectly verify that all peers are in balancer subconn list

Signed-off-by: Bogdan Kanivets <bkanivets@apple.com>
  • Loading branch information
Bogdan Kanivets committed Jul 9, 2022
1 parent 852ac37 commit 78c72ec
Showing 1 changed file with 56 additions and 40 deletions.
96 changes: 56 additions & 40 deletions clientv3/balancer/balancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"fmt"
"strings"
"testing"
"time"

"go.etcd.io/etcd/clientv3/balancer/picker"
"go.etcd.io/etcd/clientv3/balancer/resolver/endpoint"
Expand Down Expand Up @@ -92,24 +91,32 @@ func TestRoundRobinBalancedResolvableNoFailover(t *testing.T) {
return picked, err
}

prev, switches := "", 0
var picked string
available := make(map[string]struct{})
// cycle through all peers to indirectly verify that balancer subconn list is fully loaded
// otherwise we can't reliably count switches in the next step
for len(available) < tc.serverCount {
picked, err = reqFunc(context.Background())
if err != nil {
t.Fatalf("Unexpected failure %v", err)
}
available[picked] = struct{}{}
}

// verify that we round robin
prev, switches := picked, 0
for i := 0; i < tc.reqN; i++ {
picked, err := reqFunc(context.Background())
picked, err = reqFunc(context.Background())
if err != nil {
t.Fatalf("#%d: unexpected failure %v", i, err)
}
if prev == "" {
prev = picked
continue
}
if prev != picked {
switches++
}
prev = picked
}
if tc.serverCount > 1 && switches < tc.reqN-3 { // -3 for initial resolutions
// TODO: FIX ME
t.Skipf("expected balanced loads for %d requests, got switches %d", tc.reqN, switches)
if tc.serverCount > 1 && switches != tc.reqN {
t.Fatalf("expected balanced loads for %d requests, got switches %d", tc.reqN, switches)
}
})
}
Expand Down Expand Up @@ -162,25 +169,25 @@ func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) {
// stop first server, loads should be redistributed
// stopped server should never be picked
ms.StopAt(0)
var picked string
available := make(map[string]struct{})
for i := 1; i < serverCount; i++ {
available[eps[i]] = struct{}{}
// cycle through all peers (except 1) to indirectly verify that balancer subconn list is fully loaded
// otherwise we can't reliably count switches in the next step
for len(available) < serverCount-1 {
picked, err = reqFunc(context.Background())
if err != nil {
t.Fatalf("Unexpected failure %v", err)
}
available[picked] = struct{}{}
}

reqN := 10
prev, switches := "", 0
prev, switches := picked, 0
for i := 0; i < reqN; i++ {
picked, err := reqFunc(context.Background())
if err != nil && strings.Contains(err.Error(), "transport is closing") {
continue
}
if prev == "" { // first failover
if eps[0] == picked {
t.Fatalf("expected failover from %q, picked %q", eps[0], picked)
}
prev = picked
continue
}
if _, ok := available[picked]; !ok {
t.Fatalf("picked unavailable address %q (available %v)", picked, available)
}
Expand All @@ -189,18 +196,24 @@ func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) {
}
prev = picked
}
if switches < reqN-3 { // -3 for initial resolutions + failover
// TODO: FIX ME!
t.Skipf("expected balanced loads for %d requests, got switches %d", reqN, switches)
if switches != reqN {
t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches)
}

// now failed server comes back
ms.StartAt(0)

// enough time for reconnecting to recovered server
time.Sleep(time.Second)
// cycle through all peers to indirectly verify that balancer subconn list is fully loaded
// otherwise we can't reliably count switches in the next step
for len(available) < serverCount {
picked, err = reqFunc(context.Background())
if err != nil {
t.Fatalf("Unexpected failure %v", err)
}
available[picked] = struct{}{}
}

prev, switches = "", 0
prev, switches = picked, 0
recoveredAddr, recovered := eps[0], 0
available[recoveredAddr] = struct{}{}

Expand All @@ -209,10 +222,6 @@ func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) {
if err != nil {
t.Fatalf("#%d: unexpected failure %v", i, err)
}
if prev == "" {
prev = picked
continue
}
if _, ok := available[picked]; !ok {
t.Fatalf("#%d: picked unavailable address %q (available %v)", i, picked, available)
}
Expand All @@ -224,10 +233,10 @@ func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) {
}
prev = picked
}
if switches < reqN-3 { // -3 for initial resolutions
if switches != 2*reqN {
t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches)
}
if recovered < reqN/serverCount {
if recovered != 2*reqN/serverCount {
t.Fatalf("recovered server %q got only %d requests", recoveredAddr, recovered)
}
}
Expand All @@ -242,11 +251,10 @@ func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) {
}
defer ms.Stop()
var eps []string
available := make(map[string]struct{})
for _, svr := range ms.Servers {
eps = append(eps, svr.ResolverAddress().Addr)
available[svr.Address] = struct{}{}
}

rsv, err := endpoint.NewResolverGroup("requestfail")
if err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -277,6 +285,18 @@ func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) {
return picked, err
}

var picked string
available := make(map[string]struct{})
// cycle through all peers to indirectly verify that balancer subconn list is fully loaded
// otherwise we can't reliably count switches in the next step
for len(available) < serverCount {
picked, err = reqFunc(context.Background())
if err != nil {
t.Fatalf("Unexpected failure %v", err)
}
available[picked] = struct{}{}
}

reqN := 20
prev, switches := "", 0
for i := 0; i < reqN; i++ {
Expand All @@ -287,15 +307,11 @@ func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) {
}
picked, err := reqFunc(ctx)
if i%2 == 0 {
if s, ok := status.FromError(err); ok && s.Code() != codes.Canceled || picked != "" {
if s, ok := status.FromError(err); ok && s.Code() != codes.Canceled {
t.Fatalf("#%d: expected %v, got %v", i, context.Canceled, err)
}
continue
}
if prev == "" && picked != "" {
prev = picked
continue
}
if _, ok := available[picked]; !ok {
t.Fatalf("#%d: picked unavailable address %q (available %v)", i, picked, available)
}
Expand All @@ -304,7 +320,7 @@ func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) {
}
prev = picked
}
if switches < reqN/2-3 { // -3 for initial resolutions + failover
if switches != reqN/2 {
t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches)
}
}

0 comments on commit 78c72ec

Please sign in to comment.