diff --git a/internal/locate/region_request.go b/internal/locate/region_request.go index f10580bd1..af0832de9 100644 --- a/internal/locate/region_request.go +++ b/internal/locate/region_request.go @@ -1194,6 +1194,12 @@ func (s *RegionRequestSender) SendReqCtx( } } + if retryTimes > 0 && s.replicaSelector != nil && s.replicaSelector.regionStore != nil && + s.replicaSelector.targetIdx == s.replicaSelector.regionStore.workTiKVIdx { + // retry on the leader should not use stale read to avoid possible DataIsNotReady error as it always can serve any read + req.StaleRead = false + } + var retry bool resp, retry, err = s.sendReqToRegion(bo, rpcCtx, req, timeout) if err != nil { diff --git a/internal/locate/region_request3_test.go b/internal/locate/region_request3_test.go index 3a3287e05..31974985d 100644 --- a/internal/locate/region_request3_test.go +++ b/internal/locate/region_request3_test.go @@ -909,8 +909,8 @@ func (s *testRegionRequestToThreeStoresSuite) TestSendReqWithReplicaSelector() { req = tikvrpc.NewRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{Key: []byte("key")}) req.ReadReplicaScope = oracle.GlobalTxnScope req.TxnScope = oracle.GlobalTxnScope - req.EnableStaleRead() for i := 0; i < 5; i++ { + req.EnableStaleRead() // The request may be sent to the leader directly. We have to distinguish it. failureOnFollower := false s.regionRequestSender.client = &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { @@ -929,6 +929,13 @@ func (s *testRegionRequestToThreeStoresSuite) TestSendReqWithReplicaSelector() { totalAttempts += replica.attempts if idx == int(state.leaderIdx) { s.Equal(1, replica.attempts) + if failureOnFollower { + // retry always goes to the leader as an ordinary read,not a stale one + s.True(!req.StaleRead) + } else { + // if the first request goes directly to the leader then it keeps stale read flag + s.True(req.StaleRead) + } } else { s.True(replica.attempts <= 1) }