-
Notifications
You must be signed in to change notification settings - Fork 9.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
etcdserver: adjust election timeout on restart #9364
Changes from all commits
4f67bea
58f5219
a97b9e2
6371853
1d215d8
3257823
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -179,6 +179,12 @@ type EtcdServer struct { | |
consistIndex consistentIndex // must use atomic operations to access; keep 64-bit aligned. | ||
r raftNode // uses 64-bit atomics; keep 64-bit aligned. | ||
|
||
// advanceRaftTicks advances ticks of Raft node. | ||
// This can be used for fast-forwarding election | ||
// ticks in multi data-center deployments, thus | ||
// speeding up election process. | ||
advanceRaftTicks func(ticks int) | ||
|
||
readych chan struct{} | ||
Cfg ServerConfig | ||
|
||
|
@@ -445,6 +451,12 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { | |
hostWhitelist: cfg.HostWhitelist, | ||
} | ||
|
||
srv.advanceRaftTicks = func(ticks int) { | ||
for i := 0; i < ticks; i++ { | ||
srv.r.tick() | ||
} | ||
} | ||
|
||
srv.applyV2 = &applierV2store{store: srv.v2store, cluster: srv.cluster} | ||
|
||
srv.be = be | ||
|
@@ -527,6 +539,62 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { | |
} | ||
srv.r.transport = tr | ||
|
||
// fresh start | ||
if !haveWAL { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need to care about restart vs fresh start? see #9364 (comment). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just easier, so that fresh start does not need to synchronize with peer connection reports. But as you suggested, let me simplify the logic (#9364 (comment)). |
||
ticks := cfg.ElectionTicks - 1 | ||
plog.Infof("%s started anew; fast-forwarding %d ticks (election ticks %d) with %d found member(s)", srv.ID(), ticks, cfg.ElectionTicks, len(cl.Members())) | ||
srv.advanceRaftTicks(ticks) | ||
return srv, nil | ||
} | ||
|
||
srv.goAttach(func() { | ||
select { | ||
case <-cl.InitialAddNotify(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is pretty complicated. let us just get the peer list from the existing snapshot. we do not need to ensure all the configuration in the wal file are executed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the reason for that is reconfiguration in infrequent. and moving from one -> N nodes cluster is even more infrequent. snapshot will contain the correct information 99% of the time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to cover all cases where there's no snapshot (which needs to populate member lists from WAL). But, agree that this should be simplified by loading members from snapshot. Will rework on this. |
||
singleNode := len(cl.Members()) == 1 | ||
if !singleNode { | ||
break // multi-node | ||
} | ||
|
||
// more member-add commands may be applied | ||
// then not single-node cluster | ||
select { | ||
case <-time.After(srv.Cfg.electionTimeout()): | ||
singleNode = len(cl.Members()) == 1 | ||
case <-tr.InitialPeerNotify(): | ||
singleNode = false | ||
} | ||
|
||
// restarted single-node | ||
if singleNode { | ||
if !srv.isLeader() { // and leader has not been elected | ||
ticks := cfg.ElectionTicks - 1 | ||
plog.Infof("%s as 1-node cluster; fast-forwarding %d ticks (election ticks %d)", srv.ID(), ticks, cfg.ElectionTicks) | ||
srv.advanceRaftTicks(ticks) | ||
} else { | ||
plog.Infof("%s started as leader to 1-node cluster", srv.ID()) | ||
} | ||
return | ||
} | ||
|
||
case <-time.After(rafthttp.ConnReadTimeout): | ||
// slow raft config change apply | ||
plog.Infof("%s waited %s for member add apply but timed out", srv.ID(), rafthttp.ConnReadTimeout) | ||
return | ||
} | ||
|
||
// multi-node, wait for peer connection reports | ||
select { | ||
case <-tr.InitialPeerNotify(): | ||
// adjust ticks in case slow leader message receive | ||
ticks := cfg.ElectionTicks - 3 | ||
plog.Infof("%s initialzed peer connection; fast-forwarding %d ticks (election ticks %d) with %d found member(s)", srv.ID(), ticks, cfg.ElectionTicks, len(cl.Members())) | ||
srv.advanceRaftTicks(ticks) | ||
|
||
case <-time.After(rafthttp.ConnReadTimeout): | ||
// connection failed, or no active peers | ||
plog.Infof("%s waited %s but no active peer found (or restarted 1-node cluster)", srv.ID(), rafthttp.ConnReadTimeout) | ||
} | ||
}) | ||
return srv, nil | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we still should advanceTicks for newly start node. is there a reason not to do so?