Skip to content

Commit

Permalink
[NHC] Add Fetchers, overhaul Evaluator inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
banool committed Dec 8, 2022
1 parent bbdfd32 commit 63a2913
Show file tree
Hide file tree
Showing 38 changed files with 1,416 additions and 1,102 deletions.
7 changes: 5 additions & 2 deletions aptos-move/framework/aptos-framework/doc/voting.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ Extra metadata (e.g. description, code url) can be part of the ProposalType stru
Currently, we have three attributes that are used by the voting flow.
1. RESOLVABLE_TIME_METADATA_KEY: this is uesed to record the resolvable time to ensure that resolution has to be done non-atomically.
2. IS_MULTI_STEP_PROPOSAL_KEY: this is used to track if a proposal is single-step or multi-step.
3. IS_MULTI_STEP_PROPOSAL_IN_EXECUTION_KEY: this attribute only exists for and applies to multi-step proposals. The value is used to
indicate if a multi-step proposal is in execution. If yes, we will disable further voting for this multi-step proposal.
3. IS_MULTI_STEP_PROPOSAL_IN_EXECUTION_KEY: this attribute only applies to multi-step proposals. A single-step proposal will not have
this field in its metadata map. The value is used to indicate if a multi-step proposal is in execution. If yes, we will disable further
voting for this multi-step proposal.
</dd>
<dt>
<code>creation_time_secs: u64</code>
Expand Down Expand Up @@ -889,6 +890,7 @@ This guarantees that voting eligibility and voting power are controlled by the r

## Function `is_proposal_resolvable`

Common checks on if a proposal is resolvable, regardless if the proposal is single-step or multi-step.


<pre><code><b>fun</b> <a href="voting.md#0x1_voting_is_proposal_resolvable">is_proposal_resolvable</a>&lt;ProposalType: store&gt;(voting_forum_address: <b>address</b>, proposal_id: u64)
Expand Down Expand Up @@ -1019,6 +1021,7 @@ there are more yes votes than no. If either of these conditions is not met, this
<b>let</b> voting_forum = <b>borrow_global_mut</b>&lt;<a href="voting.md#0x1_voting_VotingForum">VotingForum</a>&lt;ProposalType&gt;&gt;(voting_forum_address);
<b>let</b> proposal = <a href="../../aptos-stdlib/doc/table.md#0x1_table_borrow_mut">table::borrow_mut</a>(&<b>mut</b> voting_forum.proposals, proposal_id);

// Update the <a href="voting.md#0x1_voting_IS_MULTI_STEP_PROPOSAL_IN_EXECUTION_KEY">IS_MULTI_STEP_PROPOSAL_IN_EXECUTION_KEY</a> key <b>to</b> indicate that the multi-step proposal is in execution.
<b>let</b> multi_step_in_execution_key = utf8(<a href="voting.md#0x1_voting_IS_MULTI_STEP_PROPOSAL_IN_EXECUTION_KEY">IS_MULTI_STEP_PROPOSAL_IN_EXECUTION_KEY</a>);
<b>if</b> (<a href="../../aptos-stdlib/doc/simple_map.md#0x1_simple_map_contains_key">simple_map::contains_key</a>(&proposal.metadata, &multi_step_in_execution_key)) {
<b>let</b> is_multi_step_proposal_in_execution_value = <a href="../../aptos-stdlib/doc/simple_map.md#0x1_simple_map_borrow_mut">simple_map::borrow_mut</a>(&<b>mut</b> proposal.metadata, &multi_step_in_execution_key);
Expand Down
213 changes: 213 additions & 0 deletions ecosystem/node-checker/src/checker/minimum_peers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// Copyright (c) Aptos
// SPDX-License-Identifier: Apache-2.0

/// These evaluators are only valuable in certain contexts. For example, this is
/// not a useful evaluator for node registration for the AITs, since each node
/// is running in their own isolated network, where no consensus is occurring.
/// This is useful for the AIT itself though, where the nodes are participating
/// in a real network.
use crate::{
checker::{CheckResult, Checker},
get_provider,
provider::{
metrics::{get_metric, GetMetricResult, Label, MetricsProvider},
Provider, ProviderCollection,
},
};
use anyhow::Result;
use once_cell::sync::Lazy;
use prometheus_parse::Scrape;
use serde::{Deserialize, Serialize};

use super::traits::CheckerError;

/// Evaluator for minimum number of peers.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct MinimumPeersCheckerConfig {
#[serde(default)]
pub required: bool,

/// The minimum number of inbound connections required to be able to pass.
/// For fullnodes, it only matters that this is greater than zero if the
/// node operator wants to seed data to other nodes.
#[serde(default = "MinimumPeersCheckerConfig::default_minimum_peers_inbound")]
pub minimum_peers_inbound: u64,

/// The minimum number of outbound connections required to be able to pass.
/// This must be greater than zero for the node to be able to synchronize.
#[serde(default = "MinimumPeersCheckerConfig::default_minimum_peers_outbound")]
pub minimum_peers_outbound: u64,
}

impl MinimumPeersCheckerConfig {
pub fn default_minimum_peers_inbound() -> u64 {
0
}

pub fn default_minimum_peers_outbound() -> u64 {
1
}
}

#[derive(Debug)]
pub struct MinimumPeersChecker {
config: MinimumPeersCheckerConfig,
}

impl MinimumPeersChecker {
pub fn new(config: MinimumPeersCheckerConfig) -> Self {
Self { config }
}

#[allow(clippy::comparison_chain)]
fn build_evaluation(
&self,
connections: u64,
minimum: u64,
connection_type: &ConnectionType,
) -> CheckResult {
let name = connection_type.get_name();
let particle = connection_type.get_particle();
let opposite_particle = connection_type.get_opposite_particle();
let explanation = format!(
"There are {} {} connections {} other nodes {} the target node (the minimum is {}).",
connections, name, particle, opposite_particle, minimum
);
if connections >= minimum {
CheckResult::new(
format!(
"There are sufficient {} connections {} the target node",
name, particle
),
100,
explanation,
)
} else {
CheckResult::new(
format!(
"There are not enough {} connections {} the target node",
name, particle
),
50,
format!("{} Try setting explicit peers.", explanation),
)
.links(vec![
"https://aptos.dev/nodes/full-node/troubleshooting-fullnode-setup".to_string(),
])
}
}

fn default_minimum_inbound() -> u64 {
0
}

fn default_minimum_outbound() -> u64 {
1
}
}

#[async_trait::async_trait]
impl Checker for MinimumPeersChecker {
async fn check(&self, input: &ProviderCollection) -> Result<Vec<CheckResult>, CheckerError> {
let target_metrics_provider = get_provider!(
input.target_metrics_provider,
self.config.required,
MetricsProvider
);
let scrape = target_metrics_provider.provide().await?;
let (inbound_connections, outbound_connections) = match get_metrics(&scrape) {
Ok((inbound_connections, outbound_connections)) => {
(inbound_connections, outbound_connections)
}
Err(evaluation_results) => return Ok(evaluation_results),
};

Ok(vec![
self.build_evaluation(
inbound_connections,
self.config.minimum_peers_inbound,
&ConnectionType::Inbound,
),
self.build_evaluation(
outbound_connections,
self.config.minimum_peers_outbound,
&ConnectionType::Outbound,
),
])
}
}

//////////////////////////////////////////////////////////////////////////////
// Helpers.
//////////////////////////////////////////////////////////////////////////////

const METRIC: &str = "aptos_connections";

static INBOUND_LABEL: Lazy<Label> = Lazy::new(|| Label {
key: "direction",
value: "inbound",
});
static OUTBOUND_LABEL: Lazy<Label> = Lazy::new(|| Label {
key: "direction",
value: "outbound",
});

enum ConnectionType {
Inbound,
Outbound,
}

impl ConnectionType {
fn get_name(&self) -> &'static str {
match &self {
ConnectionType::Inbound => "inbound",
ConnectionType::Outbound => "outbound",
}
}

fn get_particle(&self) -> &'static str {
match &self {
ConnectionType::Inbound => "from",
ConnectionType::Outbound => "to",
}
}

fn get_opposite_particle(&self) -> &'static str {
match &self {
ConnectionType::Inbound => "to",
ConnectionType::Outbound => "from",
}
}
}

/// Given a Scrape, pull the metrics telling us the number of inbound and
/// outbound connections.
fn get_metrics(metrics: &Scrape) -> Result<(u64, u64), Vec<CheckResult>> {
let result_on_missing_fn = || {
CheckResult::new(
"Missing metric".to_string(),
0,
format!(
"The metrics from the node are missing the metric: {}",
METRIC
),
)
};
let (inbound, outbound) = (
get_metric(metrics, METRIC, Some(&INBOUND_LABEL), result_on_missing_fn),
get_metric(metrics, METRIC, Some(&OUTBOUND_LABEL), result_on_missing_fn),
);
if let (GetMetricResult::Present(inbound), GetMetricResult::Present(outbound)) =
(&inbound, &outbound)
{
return Ok((*inbound, *outbound));
}
let mut evaluation_results = vec![];
if let GetMetricResult::Missing(evaluation_result) = inbound {
evaluation_results.push(evaluation_result);
}
if let GetMetricResult::Missing(evaluation_result) = outbound {
evaluation_results.push(evaluation_result);
}
Err(evaluation_results)
}
40 changes: 40 additions & 0 deletions ecosystem/node-checker/src/checker/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (c) Aptos
// SPDX-License-Identifier: Apache-2.0

mod minimum_peers;
mod traits;
mod types;

use serde::{Deserialize, Serialize};
pub use traits::{Checker, CheckerError};
pub use types::{CheckResult, CheckSummary};

use self::minimum_peers::{MinimumPeersChecker, MinimumPeersCheckerConfig};

/// This enum lets us represent all the different Bypassers in a config.
/// This should only be used at config reading time.
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(tag = "type")]
pub enum CheckerConfig {
MinimumPeers(MinimumPeersCheckerConfig),
}

impl CheckerConfig {
pub fn try_into_boxed_checker(self) -> Result<Box<dyn Checker>, anyhow::Error> {
match self {
Self::MinimumPeers(config) => Ok(Box::new(MinimumPeersChecker::new(config))),
}
}
}

pub fn build_checkers(checkers: &[CheckerConfig]) -> Result<Vec<Box<dyn Checker>>, anyhow::Error> {
checkers
.iter()
.map(|checker| checker.try_into_boxed_checker())
.collect()
}

#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct CommonCheckerArgs {
pub required: bool,
}
48 changes: 48 additions & 0 deletions ecosystem/node-checker/src/checker/traits.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (c) Aptos
// SPDX-License-Identifier: Apache-2.0

use std::fmt::Debug;
use thiserror::Error;

use crate::provider::{ProviderCollection, ProviderError};

use super::CheckResult;

/// A Checker is a component of NHC that is responsible for checking a
/// particular aspect of the node under investigation, be that metrics,
/// system information, API checks, load tests, etc.
#[async_trait::async_trait]
pub trait Checker: Debug + Sync + Send {
/// This function is expected to take in a ProviderCollection
/// and return a vec of evaluation results. It should only return
/// errors when there is something wrong with NHC itself or the
/// baseline node. If something is unexpected with the target,
/// we expect this function to return an EvaluationResult indicating
/// as such.
async fn check(
&self,
input: &ProviderCollection,
) -> anyhow::Result<Vec<CheckResult>, CheckerError>;
}

#[derive(Error, Debug)]
pub enum CheckerError {
#[error("Provider failed to return data: {0:#}")]
ProviderError(#[from] ProviderError),

#[error("Something went wrong hitting endpoint {0}: {1:#}")]
RetryableEndpointError(&'static str, #[source] anyhow::Error),

#[error("Something went wrong hitting endpoint {0}: {1:#}")]
NonRetryableEndpointError(&'static str, #[source] anyhow::Error),
}

impl CheckerError {
pub fn is_retryable(&self) -> bool {
match self {
CheckerError::ProviderError(error) => error.is_retryable(),
CheckerError::RetryableEndpointError(_, __) => true,
CheckerError::NonRetryableEndpointError(_, _) => false,
}
}
}
Loading

0 comments on commit 63a2913

Please sign in to comment.