From 91efbdadd9c136d3a28d847454df44297dcde155 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 25 May 2023 01:25:38 +0800 Subject: [PATCH] server: fix the leader cannot election after pd leader lost while etcd leader intact (#6447) (#6460) close tikv/pd#6403, ref tikv/pd#6447 server: fix the leader cannot election after pd leader lost while etcd leader intact Signed-off-by: ti-chi-bot Signed-off-by: nolouch Co-authored-by: ShuNing Co-authored-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- server/member/member.go | 13 ++++++++++ server/server.go | 40 ++++++++++++++++++++++++++++++ tests/server/member/member_test.go | 24 ++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/server/member/member.go b/server/member/member.go index 470ee6330b1..1297fb641ee 100644 --- a/server/member/member.go +++ b/server/member/member.go @@ -59,6 +59,8 @@ type Member struct { // etcd leader key when the PD node is successfully elected as the PD leader // of the cluster. Every write will use it to check PD leadership. memberValue string + // lastLeaderUpdatedTime is the last time when the leader is updated. + lastLeaderUpdatedTime atomic.Value } // NewMember create a new Member. @@ -121,11 +123,13 @@ func (m *Member) GetLeader() *pdpb.Member { // setLeader sets the member's PD leader. func (m *Member) setLeader(member *pdpb.Member) { m.leader.Store(member) + m.lastLeaderUpdatedTime.Store(time.Now()) } // unsetLeader unsets the member's PD leader. func (m *Member) unsetLeader() { m.leader.Store(&pdpb.Member{}) + m.lastLeaderUpdatedTime.Store(time.Now()) } // EnableLeader sets the member itself to a PD leader. @@ -143,6 +147,15 @@ func (m *Member) GetLeadership() *election.Leadership { return m.leadership } +// GetLastLeaderUpdatedTime returns the last time when the leader is updated. +func (m *Member) GetLastLeaderUpdatedTime() time.Time { + lastLeaderUpdatedTime := m.lastLeaderUpdatedTime.Load() + if lastLeaderUpdatedTime == nil { + return time.Time{} + } + return lastLeaderUpdatedTime.(time.Time) +} + // CampaignLeader is used to campaign a PD member's leadership // and make it become a PD leader. func (m *Member) CampaignLeader(leaseTimeout int64) error { diff --git a/server/server.go b/server/server.go index e0bea9ffa79..8836410f2ea 100644 --- a/server/server.go +++ b/server/server.go @@ -88,6 +88,9 @@ const ( idAllocLabel = "idalloc" recoveringMarkPath = "cluster/markers/snapshot-recovering" + + lostPDLeaderMaxTimeoutSecs = 10 + lostPDLeaderReElectionFactor = 10 ) // EtcdStartTimeout the timeout of the startup etcd. @@ -1388,6 +1391,14 @@ func (s *Server) leaderLoop() { } leader, rev, checkAgain := s.member.CheckLeader() + // add failpoint to test leader check go to stuck. + failpoint.Inject("leaderLoopCheckAgain", func(val failpoint.Value) { + memberString := val.(string) + memberID, _ := strconv.ParseUint(memberString, 10, 64) + if s.member.ID() == memberID { + checkAgain = true + } + }) if checkAgain { continue } @@ -1413,6 +1424,25 @@ func (s *Server) leaderLoop() { // To make sure the etcd leader and PD leader are on the same server. etcdLeader := s.member.GetEtcdLeader() if etcdLeader != s.member.ID() { + if s.member.GetLeader() == nil { + lastUpdated := s.member.GetLastLeaderUpdatedTime() + // use random timeout to avoid leader campaigning storm. + randomTimeout := time.Duration(rand.Intn(int(lostPDLeaderMaxTimeoutSecs)))*time.Second + lostPDLeaderMaxTimeoutSecs*time.Second + lostPDLeaderReElectionFactor*s.cfg.ElectionInterval.Duration + // add failpoint to test the campaign leader logic. + failpoint.Inject("timeoutWaitPDLeader", func() { + log.Info("timeoutWaitPDLeader is injected, skip wait other etcd leader be etcd leader") + randomTimeout = time.Duration(rand.Intn(10))*time.Millisecond + 100*time.Millisecond + }) + if lastUpdated.Add(randomTimeout).Before(time.Now()) && !lastUpdated.IsZero() && etcdLeader != 0 { + log.Info("the pd leader is lost for a long time, try to re-campaign a pd leader with resign etcd leader", + zap.Duration("timeout", randomTimeout), + zap.Time("last-updated", lastUpdated), + zap.String("current-leader-member-id", types.ID(etcdLeader).String()), + zap.String("transferee-member-id", types.ID(s.member.ID()).String()), + ) + s.member.MoveEtcdLeader(s.ctx, etcdLeader, s.member.ID()) + } + } log.Info("skip campaigning of pd leader and check later", zap.String("server-name", s.Name()), zap.Uint64("etcd-leader-id", etcdLeader), @@ -1521,6 +1551,16 @@ func (s *Server) campaignLeader() { log.Info("no longer a leader because lease has expired, pd leader will step down") return } + // add failpoint to test exit leader, failpoint judge the member is the give value, then break + failpoint.Inject("exitCampaignLeader", func(val failpoint.Value) { + memberString := val.(string) + memberID, _ := strconv.ParseUint(memberString, 10, 64) + if s.member.ID() == memberID { + log.Info("exit PD leader") + failpoint.Return() + } + }) + etcdLeader := s.member.GetEtcdLeader() if etcdLeader != s.member.ID() { log.Info("etcd leader changed, resigns pd leadership", zap.String("old-pd-leader-name", s.Name())) diff --git a/tests/server/member/member_test.go b/tests/server/member/member_test.go index 5d2b0bfdd5a..dd62c90c75a 100644 --- a/tests/server/member/member_test.go +++ b/tests/server/member/member_test.go @@ -248,6 +248,30 @@ func TestLeaderResignWithBlock(t *testing.T) { re.NoError(failpoint.Disable("github.com/tikv/pd/server/raftclusterIsBusy")) } +func TestPDLeaderLostWhileEtcdLeaderIntact(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := tests.NewTestCluster(ctx, 2) + defer cluster.Destroy() + re.NoError(err) + + err = cluster.RunInitialServers() + re.NoError(err) + + leader1 := cluster.WaitLeader() + memberID := cluster.GetServer(leader1).GetLeader().GetMemberId() + + re.NoError(failpoint.Enable("github.com/tikv/pd/server/leaderLoopCheckAgain", fmt.Sprintf("return(\"%d\")", memberID))) + re.NoError(failpoint.Enable("github.com/tikv/pd/server/exitCampaignLeader", fmt.Sprintf("return(\"%d\")", memberID))) + re.NoError(failpoint.Enable("github.com/tikv/pd/server/timeoutWaitPDLeader", `return(true)`)) + leader2 := waitLeaderChange(re, cluster, leader1) + re.NotEqual(leader1, leader2) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/leaderLoopCheckAgain")) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/exitCampaignLeader")) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/timeoutWaitPDLeader")) +} + func waitLeaderChange(re *require.Assertions, cluster *tests.TestCluster, old string) string { var leader string testutil.Eventually(re, func() bool {