Skip to content

Commit

Permalink
Merge pull request #10998 from tbg/learners-vote
Browse files Browse the repository at this point in the history
raft: let learners vote
  • Loading branch information
tbg authored Aug 8, 2019
2 parents 03bd100 + c30c2e3 commit a41bd30
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 10 deletions.
24 changes: 18 additions & 6 deletions raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -920,12 +920,6 @@ func (r *raft) Step(m pb.Message) error {
}

case pb.MsgVote, pb.MsgPreVote:
if r.isLearner {
// TODO: learner may need to vote, in case of node down when confchange.
r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] ignored %s from %x [logterm: %d, index: %d] at term %d: learner can not vote",
r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term)
return nil
}
// We can vote if this is a repeat of a vote we've already cast...
canVote := r.Vote == m.From ||
// ...we haven't voted and we don't think there's a leader yet in this term...
Expand All @@ -934,6 +928,24 @@ func (r *raft) Step(m pb.Message) error {
(m.Type == pb.MsgPreVote && m.Term > r.Term)
// ...and we believe the candidate is up to date.
if canVote && r.raftLog.isUpToDate(m.Index, m.LogTerm) {
// Note: it turns out that that learners must be allowed to cast votes.
// This seems counter- intuitive but is necessary in the situation in which
// a learner has been promoted (i.e. is now a voter) but has not learned
// about this yet.
// For example, consider a group in which id=1 is a learner and id=2 and
// id=3 are voters. A configuration change promoting 1 can be committed on
// the quorum `{2,3}` without the config change being appended to the
// learner's log. If the leader (say 2) fails, there are de facto two
// voters remaining. Only 3 can win an election (due to its log containing
// all committed entries), but to do so it will need 1 to vote. But 1
// considers itself a learner and will continue to do so until 3 has
// stepped up as leader, replicates the conf change to 1, and 1 applies it.
// Ultimately, by receiving a request to vote, the learner realizes that
// the candidate believes it to be a voter, and that it should act
// accordingly. The candidate's config may be stale, too; but in that case
// it won't win the election, at least in the absence of the bug discussed
// in:
// https://github.com/etcd-io/etcd/issues/7625#issuecomment-488798263.
r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] cast %s for %x [logterm: %d, index: %d] at term %d",
r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term)
// When responding to Msg{Pre,}Vote messages we include the term
Expand Down
13 changes: 9 additions & 4 deletions raft/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,16 +378,21 @@ func TestLearnerPromotion(t *testing.T) {
}
}

// TestLearnerCannotVote checks that a learner can't vote even it receives a valid Vote request.
func TestLearnerCannotVote(t *testing.T) {
// TestLearnerCanVote checks that a learner can vote when it receives a valid Vote request.
// See (*raft).Step for why this is necessary and correct behavior.
func TestLearnerCanVote(t *testing.T) {
n2 := newTestLearnerRaft(2, []uint64{1}, []uint64{2}, 10, 1, NewMemoryStorage())

n2.becomeFollower(1, None)

n2.Step(pb.Message{From: 1, To: 2, Term: 2, Type: pb.MsgVote, LogTerm: 11, Index: 11})

if len(n2.msgs) != 0 {
t.Errorf("expect learner not to vote, but received %v messages", n2.msgs)
if len(n2.msgs) != 1 {
t.Fatalf("expected exactly one message, not %+v", n2.msgs)
}
msg := n2.msgs[0]
if msg.Type != pb.MsgVoteResp && !msg.Reject {
t.Fatal("expected learner to not reject vote")
}
}

Expand Down

0 comments on commit a41bd30

Please sign in to comment.