Skip to content

Commit

Permalink
fix: ensure keep alive is completed in time (#4349)
Browse files Browse the repository at this point in the history
* fix: ensure keep alive is completed in time

* chore: apply suggestions from CR

* chore: use write runtime

* refactor: set META_LEASE_SECS to 5

* chore: set etcd replicas to 1

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* fix: set `MissedTickBehavior::Delay`

* chore: apply suggestions from CR
  • Loading branch information
WenyXu authored Jul 17, 2024
1 parent c595a56 commit e39f49f
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .github/actions/setup-etcd-cluster/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Setup Etcd cluster
description: Deploy Etcd cluster on Kubernetes
inputs:
etcd-replicas:
default: 3
default: 1
description: "Etcd replicas"
namespace:
default: "etcd-cluster"
Expand Down
2 changes: 1 addition & 1 deletion src/common/meta/src/distributed_time_constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub const DATANODE_LEASE_SECS: u64 = REGION_LEASE_SECS;
pub const FLOWNODE_LEASE_SECS: u64 = DATANODE_LEASE_SECS;

/// The lease seconds of metasrv leader.
pub const META_LEASE_SECS: u64 = 3;
pub const META_LEASE_SECS: u64 = 5;

/// In a lease, there are two opportunities for renewal.
pub const META_KEEP_ALIVE_INTERVAL_SECS: u64 = META_LEASE_SECS / 2;
Expand Down
105 changes: 71 additions & 34 deletions src/meta-srv/src/election/etcd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ use std::time::Duration;

use common_meta::distributed_time_constants::{META_KEEP_ALIVE_INTERVAL_SECS, META_LEASE_SECS};
use common_telemetry::{error, info, warn};
use etcd_client::{Client, GetOptions, PutOptions};
use snafu::{OptionExt, ResultExt};
use etcd_client::{Client, GetOptions, LeaderKey, LeaseKeepAliveStream, LeaseKeeper, PutOptions};
use snafu::{ensure, OptionExt, ResultExt};
use tokio::sync::broadcast;
use tokio::sync::broadcast::error::RecvError;
use tokio::sync::broadcast::Receiver;
use tokio::time::{timeout, MissedTickBehavior};

use crate::election::{Election, LeaderChangeMessage, CANDIDATES_ROOT, ELECTION_KEY};
use crate::error;
Expand Down Expand Up @@ -231,44 +232,43 @@ impl Election for EtcdElection {
.await
.context(error::EtcdFailedSnafu)?;

let mut keep_alive_interval =
tokio::time::interval(Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS));
let keep_lease_duration = Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS);
let mut keep_alive_interval = tokio::time::interval(keep_lease_duration);
keep_alive_interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
loop {
let _ = keep_alive_interval.tick().await;
keeper.keep_alive().await.context(error::EtcdFailedSnafu)?;

if let Some(res) = receiver.message().await.context(error::EtcdFailedSnafu)? {
if res.ttl() > 0 {
// Only after a successful `keep_alive` is the leader considered official.
if self
.is_leader
.compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
.is_ok()
{
self.infancy.store(true, Ordering::Relaxed);

if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::Elected(Arc::new(leader.clone())))
{
error!(e; "Failed to send leader change message");
}
}
} else {
if self.is_leader.load(Ordering::Relaxed) {
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::StepDown(Arc::new(leader.clone())))
{
error!("Failed to send leader change message, error: {e}");
}
}
// The keep alive operation MUST be done in `META_KEEP_ALIVE_INTERVAL_SECS`.
match timeout(
keep_lease_duration,
self.keep_alive(&mut keeper, &mut receiver, leader),
)
.await
{
Ok(Ok(())) => {
let _ = keep_alive_interval.tick().await;
}
Ok(Err(err)) => {
error!(err; "Failed to keep alive");
break;
}
Err(_) => {
error!("Refresh lease timeout");
break;
}
}
}

self.is_leader.store(false, Ordering::Relaxed);
if self
.is_leader
.compare_exchange(true, false, Ordering::Relaxed, Ordering::Relaxed)
.is_ok()
{
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::StepDown(Arc::new(leader.clone())))
{
error!(e; "Failed to send leader change message");
}
}
}

Ok(())
Expand Down Expand Up @@ -297,3 +297,40 @@ impl Election for EtcdElection {
self.leader_watcher.subscribe()
}
}

impl EtcdElection {
async fn keep_alive(
&self,
keeper: &mut LeaseKeeper,
receiver: &mut LeaseKeepAliveStream,
leader: &LeaderKey,
) -> Result<()> {
keeper.keep_alive().await.context(error::EtcdFailedSnafu)?;
if let Some(res) = receiver.message().await.context(error::EtcdFailedSnafu)? {
ensure!(
res.ttl() > 0,
error::UnexpectedSnafu {
violated: "Failed to refresh the lease",
}
);

// Only after a successful `keep_alive` is the leader considered official.
if self
.is_leader
.compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
.is_ok()
{
self.infancy.store(true, Ordering::Relaxed);

if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::Elected(Arc::new(leader.clone())))
{
error!(e; "Failed to send leader change message");
}
}
}

Ok(())
}
}
2 changes: 1 addition & 1 deletion src/meta-srv/src/metasrv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ impl Metasrv {
{
let election = election.clone();
let started = self.started.clone();
let _handle = common_runtime::spawn_bg(async move {
let _handle = common_runtime::spawn_write(async move {
while started.load(Ordering::Relaxed) {
let res = election.campaign().await;
if let Err(e) = res {
Expand Down

0 comments on commit e39f49f

Please sign in to comment.