Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add metrics around success rates #530

Merged
merged 5 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions node/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,15 @@ pub enum Cli {
#[arg(long, env("MPC_RECOVERY_MIN_TRIPLES"), default_value("20"))]
min_triples: usize,
/// At maximum, how many triples to stockpile on this node.
#[arg(long, env("MPC_RECOVERY_MAX_TRIPLES"), default_value("560"))]
#[arg(long, env("MPC_RECOVERY_MAX_TRIPLES"), default_value("640"))]
max_triples: usize,

/// At maximum, how many triple protocols can this current node introduce
/// at the same time. This should be something like `max_concurrent_gen / num_nodes`
#[arg(
long,
env("MPC_RECOVERY_MAX_CONCURRENT_INTRODUCTION"),
default_value("4")
default_value("2")
)]
max_concurrent_introduction: usize,

Expand All @@ -78,7 +78,7 @@ pub enum Cli {
#[arg(
long,
env("MPC_RECOVERY_MAX_CONCURRENT_GENERATION"),
default_value("32")
default_value("16")
)]
max_concurrent_generation: usize,

Expand All @@ -87,7 +87,7 @@ pub enum Cli {
min_presignatures: usize,

/// At maximum, how many presignatures to stockpile on the network.
#[arg(long, env("MPC_RECOVERY_MAX_PRESIGNATURES"), default_value("280"))]
#[arg(long, env("MPC_RECOVERY_MAX_PRESIGNATURES"), default_value("320"))]
max_presignatures: usize,
},
}
Expand Down
78 changes: 78 additions & 0 deletions node/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,84 @@ pub(crate) static MPC_CONTRACT_VERSION: Lazy<IntGaugeVec> = Lazy::new(|| {
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_triple_generators",
"number of all triple generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_triple_generators_success",
"number of all successful triple generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_triple_generations_mine_success",
"number of successful triple generators that was mine historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_presignature_generators",
"number of all presignature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_SUCCESS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_presignature_generators_success",
"number of all successful presignature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_presignature_generators_mine",
"number of mine presignature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE_SUCCESS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_presignature_generators_mine_success",
"number of mine presignature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_SIGN_SUCCESS_30S: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_sign_requests_success_30s",
"number of successful multichain sign requests that finished within 30s, marked by publish()",
&["node_account_id"],
)
.unwrap()
});

pub fn try_create_int_gauge_vec(name: &str, help: &str, labels: &[&str]) -> Result<IntGaugeVec> {
check_metric_multichain_prefix(name)?;
let opts = Opts::new(name, help);
Expand Down
10 changes: 7 additions & 3 deletions node/src/protocol/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,11 @@ impl ConsensusProtocol for StartedState {
}
Ordering::Less => Err(ConsensusError::EpochRollback),
Ordering::Equal => {
let account_id = ctx.my_account_id();
let sign_queue = ctx.sign_queue();
match contract_state.participants.find_participant(account_id) {
match contract_state
.participants
.find_participant(&ctx.my_account_id().clone())
{
Some(me) => {
tracing::info!(
"started: contract state is running and we are already a participant"
Expand All @@ -130,7 +132,7 @@ impl ConsensusProtocol for StartedState {
me,
contract_state.threshold,
epoch,
account_id.clone(),
ctx.my_account_id().clone(),
ctx.cfg(),
);
let triple_manager = TripleManager::new(
Expand All @@ -140,6 +142,7 @@ impl ConsensusProtocol for StartedState {
ctx.cfg(),
self.triple_data,
ctx.triple_storage(),
ctx.my_account_id().clone(),
);
Ok(NodeState::Running(RunningState {
epoch,
Expand Down Expand Up @@ -352,6 +355,7 @@ impl ConsensusProtocol for WaitingForConsensusState {
ctx.cfg(),
vec![],
ctx.triple_storage(),
ctx.my_account_id().clone(),
);

Ok(NodeState::Running(RunningState {
Expand Down
16 changes: 15 additions & 1 deletion node/src/protocol/presignature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@ impl PresignatureManager {
)?;
self.generators.insert(id, generator);
self.introduced.insert(id);
crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
Ok(())
}

Expand Down Expand Up @@ -321,6 +327,9 @@ impl PresignatureManager {
false,
)?;
let generator = entry.insert(generator);
crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
Ok(&mut generator.protocol)
}
Entry::Occupied(entry) => Ok(&mut entry.into_mut().protocol),
Expand Down Expand Up @@ -410,13 +419,18 @@ impl PresignatureManager {
if generator.mine {
tracing::info!(id, "assigning presignature to myself");
self.mine.push_back(*id);
crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE_SUCCESS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
}
self.introduced.remove(id);

crate::metrics::PRESIGNATURE_LATENCY
.with_label_values(&[&self.my_account_id.as_ref()])
.observe(generator.timestamp.elapsed().as_secs_f64());

crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_SUCCESS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
// Do not retain the protocol
return false;
}
Expand Down
5 changes: 5 additions & 0 deletions node/src/protocol/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,11 @@ impl SignatureManager {
crate::metrics::SIGN_LATENCY
.with_label_values(&[my_account_id])
.observe(time_added.elapsed().as_secs_f64());
if time_added.elapsed().as_secs() <= 30 {
crate::metrics::NUM_SIGN_SUCCESS_30S
.with_label_values(&[my_account_id])
.inc();
}
tracing::info!(%receipt_id, big_r = signature.big_r.to_base58(), s = ?signature.s, status = ?response.status, "published signature response");
}
Ok(())
Expand Down
22 changes: 18 additions & 4 deletions node/src/protocol/triple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use cait_sith::triples::{TripleGenerationOutput, TriplePub, TripleShare};
use highway::{HighwayHash, HighwayHasher};
use k256::elliptic_curve::group::GroupEncoding;
use k256::Secp256k1;
use near_lake_primitives::AccountId;
use serde::{Deserialize, Serialize};
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet, VecDeque};
Expand Down Expand Up @@ -109,6 +110,7 @@ pub struct TripleManager {
pub triple_storage: LockTripleNodeStorageBox,
/// triple generation protocols that failed.
pub failed_triples: HashMap<TripleId, Instant>,
pub my_account_id: AccountId,
}

impl TripleManager {
Expand All @@ -119,6 +121,7 @@ impl TripleManager {
cfg: Config,
triple_data: Vec<TripleData>,
triple_storage: LockTripleNodeStorageBox,
my_account_id: AccountId,
) -> Self {
let mut mine: VecDeque<TripleId> = VecDeque::new();
let mut all_triples = HashMap::new();
Expand All @@ -143,6 +146,7 @@ impl TripleManager {
triple_cfg: cfg.triple_cfg,
triple_storage,
failed_triples: HashMap::new(),
my_account_id,
}
}

Expand Down Expand Up @@ -187,6 +191,9 @@ impl TripleManager {
.insert(id, TripleGenerator::new(id, participants, protocol));
self.queued.push_back(id);
self.introduced.insert(id);
crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
Ok(())
}

Expand Down Expand Up @@ -339,6 +346,9 @@ impl TripleManager {
)?);
let generator = e.insert(TripleGenerator::new(id, participants, protocol));
self.queued.push_back(id);
crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
Ok(Some(&mut generator.protocol))
}
Entry::Occupied(e) => Ok(Some(&mut e.into_mut().protocol)),
Expand All @@ -362,9 +372,6 @@ impl TripleManager {
let mut messages = Vec::new();
let mut result = Ok(());
let mut triples_to_insert = Vec::new();
let triple_storage_read_lock = self.triple_storage.read().await;
let my_account_id = triple_storage_read_lock.account_id();
drop(triple_storage_read_lock);
self.generators.retain(|id, generator| {
if !self.ongoing.contains(id) {
// If the protocol is not ongoing, we should retain it for the next time
Expand Down Expand Up @@ -429,10 +436,14 @@ impl TripleManager {

if let Some(start_time) = generator.timestamp {
crate::metrics::TRIPLE_LATENCY
.with_label_values(&[&my_account_id])
.with_label_values(&[&self.my_account_id.as_ref()])
.observe(start_time.elapsed().as_secs_f64());
}

crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();

let triple = Triple {
id: *id,
share: output.0,
Expand Down Expand Up @@ -460,6 +471,9 @@ impl TripleManager {

if triple_is_mine {
self.mine.push_back(*id);
crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
}

self.triples.insert(*id, triple.clone());
Expand Down
1 change: 1 addition & 0 deletions node/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ impl TestTripleManagers {
DEFAULT_TEST_CONFIG,
vec![],
triple_storage,
num.to_string().parse().unwrap(),
)
})
.collect();
Expand Down
Loading