Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

client_batch: add test for auto re-connect idle connection when wait connection ready and fix ci #835

Merged
merged 8 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions internal/client/client_batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,10 +541,13 @@ func (c *batchCommandsClient) failPendingRequests(err error) {
}

func (c *batchCommandsClient) waitConnReady() (err error) {
if c.conn.GetState() == connectivity.Ready {
state := c.conn.GetState()
if state == connectivity.Ready {
return
}
if c.conn.GetState() == connectivity.Idle {
// Trigger idle connection to reconnection
// Put it outside loop to avoid unnecessary reconnecting.
if state == connectivity.Idle {
c.conn.Connect()
}
start := time.Now()
Expand Down
83 changes: 83 additions & 0 deletions internal/client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ package client
import (
"context"
"fmt"
"math/rand"
"runtime"
"strconv"
"strings"
"sync"
Expand All @@ -53,7 +55,9 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/tikv/client-go/v2/config"
"github.com/tikv/client-go/v2/internal/logutil"
"github.com/tikv/client-go/v2/tikvrpc"
"go.uber.org/zap"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/metadata"
)
Expand Down Expand Up @@ -639,3 +643,82 @@ func TestTraceExecDetails(t *testing.T) {
})
}
}

func TestBatchClientRecoverAfterServerRestart(t *testing.T) {
config.UpdateGlobal(func(conf *config.Config) {
conf.TiKVClient.MaxBatchSize = 128
})()

server, port := startMockTikvService()
require.True(t, port > 0)
require.True(t, server.IsRunning())
addr := server.addr
client := NewRPCClient()
defer func() {
err := client.Close()
require.NoError(t, err)
server.Stop()
}()

req := &tikvpb.BatchCommandsRequest_Request{Cmd: &tikvpb.BatchCommandsRequest_Request_Coprocessor{Coprocessor: &coprocessor.Request{}}}
conn, err := client.getConnArray(addr, true)
assert.Nil(t, err)
// send some request, it should be success.
for i := 0; i < 100; i++ {
_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Second*20)
require.NoError(t, err)
}

logutil.BgLogger().Info("stop mock tikv server")
server.Stop()
require.False(t, server.IsRunning())

// send some request, it should be failed since server is down.
for i := 0; i < 200; i++ {
_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Second*20)
require.Error(t, err)
time.Sleep(time.Millisecond * time.Duration(rand.Intn(300)))
grpcConn := conn.Get()
require.NotNil(t, grpcConn)
logutil.BgLogger().Info("conn state",
zap.String("state", grpcConn.GetState().String()),
zap.Int("idx", i),
zap.Int("goroutine-count", runtime.NumGoroutine()))
}

logutil.BgLogger().Info("restart mock tikv server")
server.Start(addr)
require.True(t, server.IsRunning())
require.Equal(t, addr, server.addr)

// Wait batch client to auto reconnect.
start := time.Now()
for {
grpcConn := conn.Get()
require.NotNil(t, grpcConn)
var cli *batchCommandsClient
for i := range conn.batchConn.batchCommandsClients {
if conn.batchConn.batchCommandsClients[i].tryLockForSend() {
cli = conn.batchConn.batchCommandsClients[i]
break
}
}
// Wait for the connection to be ready,
if cli != nil {
cli.unlockForSend()
break
}
if time.Since(start) > time.Second*5 {
// It shouldn't take too long for batch_client to reconnect.
require.Fail(t, "wait batch client reconnect timeout")
}
logutil.BgLogger().Info("goroutine count", zap.Int("count", runtime.NumGoroutine()))
time.Sleep(time.Millisecond * 50)
}

// send some request, it should be success again.
for i := 0; i < 100; i++ {
_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Second*20)
require.NoError(t, err)
}
}
6 changes: 5 additions & 1 deletion internal/client/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,9 @@ import (

func TestMain(m *testing.M) {
util.EnableFailpoints()
goleak.VerifyTestMain(m)
opts := []goleak.Option{
goleak.IgnoreTopFunction("google.golang.org/grpc.(*ClientConn).WaitForStateChange"),
goleak.IgnoreTopFunction("github.com/tikv/client-go/v2/internal/retry.newBackoffFn.func1"),
}
goleak.VerifyTestMain(m, opts...)
}
37 changes: 28 additions & 9 deletions internal/client/mock_tikv_service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"fmt"
"net"
"sync"
"sync/atomic"
"time"

"github.com/pingcap/kvproto/pkg/coprocessor"
Expand All @@ -38,6 +39,8 @@ import (
type server struct {
tikvpb.TikvServer
grpcServer *grpc.Server
addr string
running int64 // 0: not running, 1: running
// metaChecker check the metadata of each request. Now only requests
// which need redirection set it.
metaChecker struct {
Expand Down Expand Up @@ -106,32 +109,48 @@ func (s *server) checkMetadata(ctx context.Context) error {
return nil
}

func (s *server) IsRunning() bool {
return atomic.LoadInt64(&s.running) == 1
}

func (s *server) Stop() {
s.grpcServer.Stop()
atomic.StoreInt64(&s.running, 0)
}

// Try to start a gRPC server and retrun the server instance and binded port.
func startMockTikvService() (*server, int) {
func (s *server) Start(addr string) int {
if addr == "" {
addr = fmt.Sprintf("%s:%d", "127.0.0.1", 0)
}
port := -1
lis, err := net.Listen("tcp", fmt.Sprintf("%s:%d", "127.0.0.1", 0))
lis, err := net.Listen("tcp", addr)
if err != nil {
logutil.BgLogger().Error("can't listen", zap.Error(err))
logutil.BgLogger().Error("can't start mock tikv service because no available ports")
return nil, port
return port
}
port = lis.Addr().(*net.TCPAddr).Port

server := &server{}
s := grpc.NewServer(grpc.ConnectionTimeout(time.Minute))
tikvpb.RegisterTikvServer(s, server)
server.grpcServer = s
grpcServer := grpc.NewServer(grpc.ConnectionTimeout(time.Minute))
tikvpb.RegisterTikvServer(grpcServer, s)
s.grpcServer = grpcServer
go func() {
if err = s.Serve(lis); err != nil {
if err = grpcServer.Serve(lis); err != nil {
logutil.BgLogger().Error(
"can't serve gRPC requests",
zap.Error(err),
)
}
}()
atomic.StoreInt64(&s.running, 1)
s.addr = fmt.Sprintf("%s:%d", "127.0.0.1", port)
logutil.BgLogger().Info("mock server started", zap.String("addr", s.addr))
return port
}

// Try to start a gRPC server and retrun the server instance and binded port.
func startMockTikvService() (*server, int) {
server := &server{}
port := server.Start("")
return server, port
}