Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ForgetLeader #78

Merged
merged 3 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,32 @@ type Node interface {
// TransferLeadership attempts to transfer leadership to the given transferee.
TransferLeadership(ctx context.Context, lead, transferee uint64)

// ForgetLeader forgets a follower's current leader, changing it to None. It
// remains a leaderless follower in the current term, without campaigning.
//
// This is useful with PreVote+CheckQuorum, where followers will normally not
// grant pre-votes if they've heard from the leader in the past election
// timeout interval. Leaderless followers can grant pre-votes immediately, so
// if a quorum of followers have strong reason to believe the leader is dead
// (for example via a side-channel or external failure detector) and forget it
// then they can elect a new leader immediately, without waiting out the
// election timeout. They will also revert to normal followers if they hear
// from the leader again, or transition to candidates on an election timeout.
//
// For example, consider a three-node cluster where 1 is the leader and 2+3
// have just received a heartbeat from it. If 2 and 3 believe the leader has
// now died (maybe they know that an orchestration system shut down 1's VM),
// we can instruct 2 to forget the leader and 3 to campaign. 2 will then be
// able to grant 3's pre-vote and elect 3 as leader immediately (normally 2
// would reject the vote until an election timeout passes because it has heard
// from the leader recently). However, 3 can not campaign unilaterally, a
// quorum have to agree that the leader is dead, which avoids disrupting the
// leader if individual nodes are wrong about it being dead.
//
// This does nothing with ReadOnlyLeaseBased, since it would allow a new
// leader to be elected without the old leader knowing.
ForgetLeader(ctx context.Context) error

// ReadIndex request a read state. The read state will be set in the ready.
// Read state has a read index. Once the application advances further than the read
// index, any linearizable read requests issued before the read request can be
Expand Down Expand Up @@ -575,6 +601,10 @@ func (n *node) TransferLeadership(ctx context.Context, lead, transferee uint64)
}
}

func (n *node) ForgetLeader(ctx context.Context) error {
return n.step(ctx, pb.Message{Type: pb.MsgForgetLeader})
}

func (n *node) ReadIndex(ctx context.Context, rctx []byte) error {
return n.step(ctx, pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}})
}
11 changes: 11 additions & 0 deletions raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,8 @@ func stepLeader(r *raft, m pb.Message) error {
sendMsgReadIndexResponse(r, m)

return nil
case pb.MsgForgetLeader:
return nil // noop on leader
}

// All other message types require a progress for m.From (pr).
Expand Down Expand Up @@ -1661,6 +1663,15 @@ func stepFollower(r *raft, m pb.Message) error {
}
m.To = r.lead
r.send(m)
case pb.MsgForgetLeader:
if r.readOnly.option == ReadOnlyLeaseBased {
r.logger.Error("ignoring MsgForgetLeader due to ReadOnlyLeaseBased")
return nil
}
if r.lead != None {
r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term)
r.lead = None
}
case pb.MsgTimeoutNow:
r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From)
// Leadership transfers never use pre-vote even if r.preVote is true; we
Expand Down
141 changes: 72 additions & 69 deletions raftpb/raft.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions raftpb/raft.proto
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum MessageType {
MsgStorageAppendResp = 20;
MsgStorageApply = 21;
MsgStorageApplyResp = 22;
MsgForgetLeader = 23;
// NOTE: when adding new message types, remember to update the isLocalMsg and
// isResponseMsg arrays in raft/util.go and update the corresponding tests in
// raft/util_test.go.
Expand Down
22 changes: 13 additions & 9 deletions rafttest/interaction_env_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ func (env *InteractionEnv) Handle(t *testing.T, d datadriven.TestData) string {
//
// transfer-leadership from=1 to=4
err = env.handleTransferLeadership(t, d)
case "forget-leader":
// Forgets the current leader of the given node.
//
// Example:
//
// forget-leader 1
err = env.handleForgetLeader(t, d)
case "propose":
// Propose an entry.
//
Expand Down Expand Up @@ -182,20 +189,17 @@ func (env *InteractionEnv) Handle(t *testing.T, d datadriven.TestData) string {
default:
err = fmt.Errorf("unknown command")
}
if err != nil {
env.Output.WriteString(err.Error())
}
// NB: the highest log level suppresses all output, including that of the
// handlers. This comes in useful during setup which can be chatty.
// However, errors are always logged.
if env.Output.Len() == 0 {
return "ok"
}
if env.Output.Lvl == len(lvlNames)-1 {
if err != nil {
if err != nil {
if env.Output.Quiet() {
return err.Error()
}
return "ok (quiet)"
env.Output.WriteString(err.Error())
}
if env.Output.Len() == 0 {
return "ok"
}
return env.Output.String()
}
Expand Down
9 changes: 9 additions & 0 deletions rafttest/interaction_env_handler_add_nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ func (env *InteractionEnv) handleAddNodes(t *testing.T, d datadriven.TestData) e
arg.Scan(t, i, &cfg.PreVote)
case "checkquorum":
arg.Scan(t, i, &cfg.CheckQuorum)
case "read-only":
switch arg.Vals[i] {
case "safe":
cfg.ReadOnlyOption = raft.ReadOnlySafe
case "lease-based":
cfg.ReadOnlyOption = raft.ReadOnlyLeaseBased
default:
return fmt.Errorf("invalid read-only option %q", arg.Vals[i])
}
}
}
}
Expand Down
Loading