Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

operator: make an infinity retry for connecting to store (#52177) #52561

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions br/pkg/backup/prepare_snap/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,8 @@ type RetryAndSplitRequestEnv struct {
}

func (r RetryAndSplitRequestEnv) ConnectToStore(ctx context.Context, storeID uint64) (PrepareClient, error) {
// Retry for about 2 minutes.
rs := utils.InitialRetryState(12, 10*time.Second, 10*time.Second)
bo := utils.Backoffer(&rs)
rs := utils.ConstantBackoff(10 * time.Second)
bo := utils.Backoffer(rs)
if r.GetBackoffer != nil {
bo = r.GetBackoffer()
}
Expand Down
2 changes: 1 addition & 1 deletion br/pkg/utils/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ go_test(
],
embed = [":utils"],
flaky = True,
shard_count = 36,
shard_count = 33,
deps = [
"//br/pkg/errors",
"//br/pkg/metautil",
Expand Down
15 changes: 15 additions & 0 deletions br/pkg/utils/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"database/sql"
"io"
"math"
"strings"
"time"

Expand Down Expand Up @@ -62,6 +63,20 @@ func isGRPCCancel(err error) bool {
return false
}

// ConstantBackoff is a backoffer that retry forever until success.
type ConstantBackoff time.Duration

// NextBackoff returns a duration to wait before retrying again
func (c ConstantBackoff) NextBackoff(err error) time.Duration {
return time.Duration(c)
}

// Attempt returns the remain attempt times
func (c ConstantBackoff) Attempt() int {
// A large enough value. Also still safe for arithmetic operations (won't easily overflow).
return math.MaxInt16
}

// RetryState is the mutable state needed for retrying.
// It likes the `utils.Backoffer`, but more fundamental:
// this only control the backoff time and knows nothing about what error happens.
Expand Down
42 changes: 42 additions & 0 deletions br/pkg/utils/backoff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ package utils_test

import (
"context"
"fmt"
"io"
"math"
"testing"
"time"

Expand Down Expand Up @@ -197,3 +199,43 @@ func TestNewBackupSSTBackofferWithCancel(t *testing.T) {
context.Canceled,
}, multierr.Errors(err))
}

func TestConstantBackoff(t *testing.T) {
backedOff := func(t *testing.T) {
backoffer := utils.ConstantBackoff(10 * time.Millisecond)
ctx, cancel := context.WithCancel(context.Background())
i := 0
ch := make(chan error)

go func() {
_, err := utils.WithRetryV2(ctx, backoffer, func(ctx context.Context) (struct{}, error) {
i += 1
return struct{}{}, fmt.Errorf("%d times, no meaning", i)
})
ch <- err
}()
time.Sleep(100 * time.Millisecond)
cancel()
require.Error(t, <-ch)
// Make sure we have backed off.
require.Less(t, i, 20)
}

infRetry := func(t *testing.T) {
backoffer := utils.ConstantBackoff(0)
ctx := context.Background()
i := math.MaxInt16

_, err := utils.WithRetryV2(ctx, backoffer, func(ctx context.Context) (struct{}, error) {
i -= 1
if i == 0 {
return struct{}{}, nil
}
return struct{}{}, fmt.Errorf("try %d more times", i)
})
require.NoError(t, err)
}

t.Run("backedOff", backedOff)
t.Run("infRetry", infRetry)
}
1 change: 1 addition & 0 deletions br/pkg/utils/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ func WithRetryV2[T any](
allErrors = multierr.Append(allErrors, err)
select {
case <-ctx.Done():
// allErrors must not be `nil` here, so ignore the context error.
return *new(T), allErrors
case <-time.After(backoffer.NextBackoff(err)):
}
Expand Down
Loading