Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Add more metrics to prometheus #5034

Merged
merged 12 commits into from
Mar 3, 2020
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions bin/node-template/node/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ pub fn new_full(config: Configuration<GenesisConfig>)
on_exit: service.on_exit(),
telemetry_on_connect: Some(service.telemetry_on_connect_stream()),
voting_rule: grandpa::VotingRulesBuilder::default().build(),
prometheus_registry: service.prometheus_registry()
};

// the GRANDPA voter task is considered infallible, i.e.
Expand Down
1 change: 1 addition & 0 deletions bin/node/cli/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ macro_rules! new_full {
on_exit: service.on_exit(),
telemetry_on_connect: Some(service.telemetry_on_connect_stream()),
voting_rule: grandpa::VotingRulesBuilder::default().build(),
prometheus_registry: service.prometheus_registry(),
};

// the GRANDPA voter task is considered infallible, i.e.
Expand Down
1 change: 1 addition & 0 deletions client/finality-grandpa/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ sc-network = { version = "0.8.0-alpha.2", path = "../network" }
sc-network-gossip = { version = "0.8.0-alpha.2", path = "../network-gossip" }
sp-finality-tracker = { version = "2.0.0-alpha.2", path = "../../primitives/finality-tracker" }
sp-finality-grandpa = { version = "2.0.0-alpha.2", path = "../../primitives/finality-grandpa" }
prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus", version = "0.8.0-alpha.2" }
sc-block-builder = { version = "0.8.0-alpha.2", path = "../block-builder" }
finality-grandpa = { version = "0.11.1", features = ["derive-codec"] }
pin-project = "0.4.6"
Expand Down
31 changes: 31 additions & 0 deletions client/finality-grandpa/src/environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ use crate::justification::GrandpaJustification;
use crate::until_imported::UntilVoteTargetImported;
use crate::voting_rule::VotingRule;
use sp_finality_grandpa::{AuthorityId, AuthoritySignature, SetId, RoundNumber};
use prometheus_endpoint::{Gauge, U64, register, PrometheusError};

type HistoricalVotes<Block> = finality_grandpa::HistoricalVotes<
<Block as BlockT>::Hash,
Expand Down Expand Up @@ -372,6 +373,24 @@ impl<Block: BlockT> SharedVoterSetState<Block> {
}
}

/// Prometheus metrics for GRANDPA.
#[derive(Clone)]
pub(crate) struct Metrics {
finality_grandpa_round: Gauge<U64>,
}

impl Metrics {
pub(crate) fn register(registry: &prometheus_endpoint::Registry) -> Result<Self, PrometheusError> {
Ok(Self {
finality_grandpa_round: register(
Gauge::new("finality_grandpa_round", "Highest completed GRANDPA round.")?,
registry
)?,
})
}
}


/// The environment we run GRANDPA in.
pub(crate) struct Environment<Backend, Block: BlockT, C, N: NetworkT<Block>, SC, VR> {
pub(crate) client: Arc<C>,
Expand All @@ -384,6 +403,7 @@ pub(crate) struct Environment<Backend, Block: BlockT, C, N: NetworkT<Block>, SC,
pub(crate) set_id: SetId,
pub(crate) voter_set_state: SharedVoterSetState<Block>,
pub(crate) voting_rule: VR,
pub(crate) metrics: Option<Metrics>,
pub(crate) _phantom: PhantomData<Backend>,
}

Expand All @@ -397,6 +417,17 @@ impl<Backend, Block: BlockT, C, N: NetworkT<Block>, SC, VR> Environment<Backend,
self.voter_set_state.with(|voter_set_state| {
if let Some(set_state) = f(&voter_set_state)? {
*voter_set_state = set_state;

if let Some(metrics) = self.metrics.as_ref() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using environment.rs as a source of truth for the current round was a guess from my side. @andresilva could you approve that that is a good idea?

if let VoterSetState::Live { completed_rounds, .. } = voter_set_state {
let highest = completed_rounds.rounds.iter()
.map(|round| round.number)
.max()
.expect("There is always one completed round (genesis); qed");

metrics.finality_grandpa_round.set(highest);
}
}
}
Ok(())
})
Expand Down
12 changes: 11 additions & 1 deletion client/finality-grandpa/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ pub use voting_rule::{
};

use aux_schema::PersistentData;
use environment::{Environment, VoterSetState};
use environment::{Environment, VoterSetState, Metrics};
use import::GrandpaBlockImport;
use until_imported::UntilGlobalMessageBlocksImported;
use communication::{NetworkBridge, Network as NetworkT};
Expand Down Expand Up @@ -551,6 +551,8 @@ pub struct GrandpaParams<Block: BlockT, C, N, SC, VR, X> {
pub telemetry_on_connect: Option<futures::channel::mpsc::UnboundedReceiver<()>>,
/// A voting rule used to potentially restrict target votes.
pub voting_rule: VR,
/// The prometheus metrics registry.
pub prometheus_registry: Option<prometheus_endpoint::Registry>,
}

/// Run a GRANDPA voter as a task. Provide configuration and a link to a
Expand All @@ -576,6 +578,7 @@ pub fn run_grandpa_voter<Block: BlockT, BE: 'static, C, N, SC, VR, X>(
on_exit,
telemetry_on_connect,
voting_rule,
prometheus_registry,
} = grandpa_params;

// NOTE: we have recently removed `run_grandpa_observer` from the public
Expand Down Expand Up @@ -634,6 +637,7 @@ pub fn run_grandpa_voter<Block: BlockT, BE: 'static, C, N, SC, VR, X>(
voting_rule,
persistent_data,
voter_commands_rx,
prometheus_registry,
);

let voter_work = voter_work
Expand Down Expand Up @@ -673,6 +677,7 @@ where
voting_rule: VR,
persistent_data: PersistentData<Block>,
voter_commands_rx: mpsc::UnboundedReceiver<VoterCommand<Block::Hash, NumberFor<Block>>>,
prometheus_registry: Option<prometheus_endpoint::Registry>,
) -> Self {

let voters = persistent_data.authority_set.current_authorities();
Expand All @@ -687,6 +692,10 @@ where
authority_set: persistent_data.authority_set.clone(),
consensus_changes: persistent_data.consensus_changes.clone(),
voter_set_state: persistent_data.set_state.clone(),
metrics: prometheus_registry.map(|registry| {
Metrics::register(&registry)
.expect("Other metrics would have failed to register before these; qed")
}),
_phantom: PhantomData,
});

Expand Down Expand Up @@ -807,6 +816,7 @@ where
consensus_changes: self.env.consensus_changes.clone(),
network: self.env.network.clone(),
voting_rule: self.env.voting_rule.clone(),
metrics: self.env.metrics.clone(),
_phantom: PhantomData,
});

Expand Down
6 changes: 6 additions & 0 deletions client/finality-grandpa/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ fn run_to_completion_with<F>(
on_exit: Exit,
telemetry_on_connect: None,
voting_rule: (),
prometheus_registry: None,
};
let voter = run_grandpa_voter(grandpa_params).expect("all in order with client and network");

Expand Down Expand Up @@ -578,6 +579,7 @@ fn finalize_3_voters_1_full_observer() {
on_exit: Exit,
telemetry_on_connect: None,
voting_rule: (),
prometheus_registry: None,
};

voters.push(run_grandpa_voter(grandpa_params).expect("all in order with client and network"));
Expand Down Expand Up @@ -741,6 +743,7 @@ fn transition_3_voters_twice_1_full_observer() {
on_exit: Exit,
telemetry_on_connect: None,
voting_rule: (),
prometheus_registry: None,
};
let voter = run_grandpa_voter(grandpa_params).expect("all in order with client and network");

Expand Down Expand Up @@ -1166,6 +1169,7 @@ fn voter_persists_its_votes() {
on_exit: Exit,
telemetry_on_connect: None,
voting_rule: VotingRulesBuilder::default().build(),
prometheus_registry: None,
};

let voter = run_grandpa_voter(grandpa_params)
Expand Down Expand Up @@ -1511,6 +1515,7 @@ fn voter_catches_up_to_latest_round_when_behind() {
on_exit: Exit,
telemetry_on_connect: None,
voting_rule: (),
prometheus_registry: None,
};

Box::pin(run_grandpa_voter(grandpa_params).expect("all in order with client and network"))
Expand Down Expand Up @@ -1642,6 +1647,7 @@ fn grandpa_environment_respects_voting_rules() {
voters: Arc::new(authority_set.current_authorities()),
network,
voting_rule,
metrics: None,
_phantom: PhantomData,
}
};
Expand Down
1 change: 1 addition & 0 deletions client/network/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ sp-consensus = { version = "0.8.0-alpha.2", path = "../../primitives/consensus/c
sp-consensus-babe = { version = "0.8.0-alpha.2", path = "../../primitives/consensus/babe" }
sp-core = { version = "2.0.0-alpha.2", path = "../../primitives/core" }
sp-runtime = { version = "2.0.0-alpha.2", path = "../../primitives/runtime" }
prometheus-endpoint = { package = "substrate-prometheus-endpoint", version = "0.8.0-alpha.2", path = "../../utils/prometheus" }
thiserror = "1"
unsigned-varint = { version = "0.3.1", features = ["futures", "futures-codec"] }
void = "1.0.2"
Expand Down
4 changes: 4 additions & 0 deletions client/network/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ use core::{fmt, iter};
use std::{future::Future, pin::Pin};
use std::{error::Error, fs, io::{self, Write}, net::Ipv4Addr, path::{Path, PathBuf}, sync::Arc};
use zeroize::Zeroize;
use prometheus_endpoint::Registry;

/// Network initialization parameters.
pub struct Params<B: BlockT, H: ExHashT> {
Expand Down Expand Up @@ -90,6 +91,9 @@ pub struct Params<B: BlockT, H: ExHashT> {

/// Type to check incoming block announcements.
pub block_announce_validator: Box<dyn BlockAnnounceValidator<B> + Send>,

/// Registry for recording prometheus metrics to.
pub metrics_registry: Option<Registry>,
}

bitflags! {
Expand Down
3 changes: 3 additions & 0 deletions client/network/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ pub enum Error {
/// The second peer id that was found for the bootnode.
second_id: PeerId,
},
/// Prometheus metrics error.
Prometheus(prometheus_endpoint::PrometheusError)
}

// Make `Debug` use the `Display` implementation.
Expand All @@ -60,6 +62,7 @@ impl std::error::Error for Error {
Error::Io(ref err) => Some(err),
Error::Client(ref err) => Some(err),
Error::DuplicateBootnode { .. } => None,
Error::Prometheus(ref err) => Some(err),
}
}
}
41 changes: 38 additions & 3 deletions client/network/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use libp2p::swarm::{NetworkBehaviour, SwarmBuilder, SwarmEvent};
use parking_lot::Mutex;
use sc_peerset::PeersetHandle;
use sp_runtime::{traits::{Block as BlockT, NumberFor}, ConsensusEngineId};
use prometheus_endpoint::{Registry, Gauge, U64, register, PrometheusError};

use crate::{behaviour::{Behaviour, BehaviourOut}, config::{parse_str_addr, parse_addr}};
use crate::{transport, config::NonReservedPeerMode, ReputationChange};
Expand Down Expand Up @@ -294,6 +295,10 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
from_worker,
light_client_rqs: params.on_demand.and_then(|od| od.extract_receiver()),
event_streams: Vec::new(),
metrics: match params.metrics_registry {
Some(registry) => Some(Metrics::register(&registry)?),
None => None
}
})
}

Expand Down Expand Up @@ -727,6 +732,26 @@ pub struct NetworkWorker<B: BlockT + 'static, H: ExHashT> {
light_client_rqs: Option<mpsc::UnboundedReceiver<RequestData<B>>>,
/// Senders for events that happen on the network.
event_streams: Vec<mpsc::UnboundedSender<Event>>,
/// Prometheus network metrics.
metrics: Option<Metrics>
}

struct Metrics {
is_major_syncing: Gauge<U64>,
peers_count: Gauge<U64>,
Copy link
Contributor

@mxinden mxinden Feb 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving this to client/network looks a lot cleaner. 👍

}

impl Metrics {
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
Ok(Self {
is_major_syncing: register(Gauge::new(
"is_major_syncing", "Whether the node is performing a major sync or not.",
)?, registry)?,
peers_count: register(Gauge::new(
"peers_count", "Number of network gossip peers",
)?, registry)?,
})
}
}

impl<B: BlockT + 'static, H: ExHashT> Future for NetworkWorker<B, H> {
Expand Down Expand Up @@ -818,16 +843,26 @@ impl<B: BlockT + 'static, H: ExHashT> Future for NetworkWorker<B, H> {
};
}

let num_connected_peers = this.network_service.user_protocol_mut().num_connected_peers();

// Update the variables shared with the `NetworkService`.
this.num_connected.store(this.network_service.user_protocol_mut().num_connected_peers(), Ordering::Relaxed);
this.num_connected.store(num_connected_peers, Ordering::Relaxed);
{
let external_addresses = Swarm::<B, H>::external_addresses(&this.network_service).cloned().collect();
*this.external_addresses.lock() = external_addresses;
}
this.is_major_syncing.store(match this.network_service.user_protocol_mut().sync_state() {

let is_major_syncing = match this.network_service.user_protocol_mut().sync_state() {
SyncState::Idle => false,
SyncState::Downloading => true,
}, Ordering::Relaxed);
};

this.is_major_syncing.store(is_major_syncing, Ordering::Relaxed);

if let Some(metrics) = this.metrics.as_ref() {
metrics.is_major_syncing.set(is_major_syncing as u64);
metrics.peers_count.set(num_connected_peers as u64);
}

Poll::Pending
}
Expand Down
6 changes: 4 additions & 2 deletions client/network/test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,8 @@ pub trait TestNetFactory: Sized {
transaction_pool: Arc::new(EmptyTransactionPool),
protocol_id: ProtocolId::from(&b"test-protocol-name"[..]),
import_queue,
block_announce_validator: Box::new(DefaultBlockAnnounceValidator::new(client.clone()))
block_announce_validator: Box::new(DefaultBlockAnnounceValidator::new(client.clone())),
metrics_registry: None,
}).unwrap();

self.mut_peers(|peers| {
Expand Down Expand Up @@ -713,7 +714,8 @@ pub trait TestNetFactory: Sized {
transaction_pool: Arc::new(EmptyTransactionPool),
protocol_id: ProtocolId::from(&b"test-protocol-name"[..]),
import_queue,
block_announce_validator: Box::new(DefaultBlockAnnounceValidator::new(client.clone()))
block_announce_validator: Box::new(DefaultBlockAnnounceValidator::new(client.clone())),
metrics_registry: None,
}).unwrap();

self.mut_peers(|peers| {
Expand Down
2 changes: 1 addition & 1 deletion client/service/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ sc-rpc = { version = "2.0.0-alpha.2", path = "../rpc" }
sc-telemetry = { version = "2.0.0-alpha.2", path = "../telemetry" }
sc-offchain = { version = "2.0.0-alpha.2", path = "../offchain" }
parity-multiaddr = { package = "parity-multiaddr", version = "0.5.0" }
substrate-prometheus-endpoint = { path = "../../utils/prometheus" , version = "0.8.0-alpha.2"}
prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus" , version = "0.8.0-alpha.2"}
sc-tracing = { version = "2.0.0-alpha.2", path = "../tracing" }
tracing = "0.1.10"
parity-util-mem = { version = "0.5.1", default-features = false, features = ["primitive-types"] }
Expand Down
Loading