Skip to content

Commit

Permalink
checker: fix the too many orphan peers cannot be removed (#6574) (#6575)
Browse files Browse the repository at this point in the history
close #6573, ref #6574

rule-checker: fix the too many orphan peers that cannot be removed
- let the health peer can be removed once there exist redundant

Signed-off-by: nolouch <nolouch@gmail.com>

Co-authored-by: nolouch <nolouch@gmail.com>
  • Loading branch information
ti-chi-bot and nolouch authored Jun 9, 2023
1 parent 9d320ef commit 898dde2
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 6 deletions.
7 changes: 7 additions & 0 deletions server/schedule/checker/rule_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,11 +427,18 @@ loopFits:
// If hasUnhealthyFit is true, try to remove unhealthy orphan peers only if number of OrphanPeers is >= 2.
// Ref https://github.com/tikv/pd/issues/4045
if len(fit.OrphanPeers) >= 2 {
hasHealthPeer := false
for _, orphanPeer := range fit.OrphanPeers {
if isUnhealthyPeer(orphanPeer.GetId()) {
checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc()
return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId)
}
if hasHealthPeer {
// there already exists a healthy orphan peer, so we can remove other orphan Peers.
checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc()
return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId)
}
hasHealthPeer = true
}
}
checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc()
Expand Down
47 changes: 41 additions & 6 deletions server/schedule/checker/rule_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,39 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeers() {
suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore)
}

func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers() {
suite.cluster.AddLeaderStore(1, 1)
suite.cluster.AddLeaderStore(2, 1)
suite.cluster.AddLeaderStore(3, 1)
suite.cluster.AddLeaderStore(4, 1)
suite.cluster.AddLeaderStore(5, 1)
suite.cluster.AddLeaderStore(6, 1)
suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4, 5, 6})
// Case1:
// store 4, 5, 6 are orphan peers, and peer on store 3 is pending and down peer.
region := suite.cluster.GetRegion(1)
region = region.Clone(
core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}),
core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)}))
suite.cluster.PutRegion(region)
op := suite.rc.Check(suite.cluster.GetRegion(1))
suite.NotNil(op)
suite.Equal("remove-orphan-peer", op.Desc())
suite.Equal(uint64(5), op.Step(0).(operator.RemovePeer).FromStore)

// Case2:
// store 4, 5, 6 are orphan peers, and peer on store 3 is down peer. and peer on store 4, 5 are pending.
region = suite.cluster.GetRegion(1)
region = region.Clone(
core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}),
core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(4), region.GetStorePeer(5)}))
suite.cluster.PutRegion(region)
op = suite.rc.Check(suite.cluster.GetRegion(1))
suite.NotNil(op)
suite.Equal("remove-orphan-peer", op.Desc())
suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore)
}

func (suite *ruleCheckerTestSuite) TestFixOrphanPeers2() {
// check orphan peers can only be handled when all rules are satisfied.
suite.cluster.AddLabelsStore(1, 1, map[string]string{"foo": "bar"})
Expand Down Expand Up @@ -311,7 +344,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness() {
suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"})
suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "follower"})
suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"})
suite.cluster.AddLeaderRegion(1, 1, 2)
suite.cluster.AddLeaderRegion(1, 1)

suite.ruleManager.SetRule(&placement.Rule{
GroupID: "pd",
Expand All @@ -328,6 +361,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness() {
op := suite.rc.Check(suite.cluster.GetRegion(1))
suite.NotNil(op)
suite.Equal("add-rule-peer", op.Desc())
fmt.Println(op)
suite.Equal(uint64(3), op.Step(0).(operator.AddLearner).ToStore)
suite.True(op.Step(0).(operator.AddLearner).IsWitness)
}
Expand All @@ -336,24 +370,25 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness2() {
suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"})
suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"})
suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"})
suite.cluster.AddLeaderRegion(1, 1, 2, 3)
suite.cluster.AddLabelsStore(4, 1, map[string]string{"D": "voter"})
suite.cluster.AddLeaderRegion(1, 1, 2, 3, 4)

suite.ruleManager.SetRule(&placement.Rule{
GroupID: "pd",
ID: "r1",
Index: 100,
Override: true,
Override: false,
Role: placement.Voter,
Count: 1,
IsWitness: true,
LabelConstraints: []placement.LabelConstraint{
{Key: "C", Op: "in", Values: []string{"voter"}},
{Key: "D", Op: "in", Values: []string{"voter"}},
},
})
op := suite.rc.Check(suite.cluster.GetRegion(1))
suite.NotNil(op)
suite.Equal("fix-witness-peer", op.Desc())
suite.Equal(uint64(3), op.Step(0).(operator.BecomeWitness).StoreID)
suite.Equal(uint64(4), op.Step(0).(operator.BecomeWitness).StoreID)
}

func (suite *ruleCheckerTestSuite) TestFixRuleWitness3() {
Expand All @@ -365,7 +400,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness3() {
r := suite.cluster.GetRegion(1)
// set peer3 to witness
r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)}))

suite.cluster.PutRegion(r)
op := suite.rc.Check(r)
suite.NotNil(op)
suite.Equal("fix-non-witness-peer", op.Desc())
Expand Down

0 comments on commit 898dde2

Please sign in to comment.