From 7bd6b9d9c0cba471f0937d6117c955d360c6593c Mon Sep 17 00:00:00 2001 From: Shanin Roman Date: Thu, 21 Mar 2024 12:37:02 +0300 Subject: [PATCH] [fix]: Report correct view change index for current round Signed-off-by: Shanin Roman --- cli/src/lib.rs | 7 +++++-- core/src/metrics.rs | 4 ---- core/src/sumeragi/main_loop.rs | 4 ++++ core/src/sumeragi/mod.rs | 25 +++++++++++++++++++------ telemetry/src/metrics.rs | 11 ++++++++--- 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 037f69fe061..1d674b77f23 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -26,7 +26,7 @@ use iroha_core::{ try_read_snapshot, SnapshotMaker, SnapshotMakerHandle, TryReadError as TryReadSnapshotError, }, state::{State, StateReadOnly, World}, - sumeragi::{SumeragiHandle, SumeragiStartArgs}, + sumeragi::{SumeragiHandle, SumeragiMetrics, SumeragiStartArgs}, IrohaNetwork, }; use iroha_data_model::prelude::*; @@ -275,7 +275,10 @@ impl Iroha { network: network.clone(), genesis_network: genesis, block_count, - dropped_messages: metrics_reporter.metrics().dropped_messages.clone(), + sumeragi_metrics: SumeragiMetrics { + dropped_messages: metrics_reporter.metrics().dropped_messages.clone(), + view_changes: metrics_reporter.metrics().view_changes.clone(), + }, }; // Starting Sumeragi requires no async context enabled let sumeragi = tokio::task::spawn_blocking(move || SumeragiHandle::start(start_args)) diff --git a/core/src/metrics.rs b/core/src/metrics.rs index 7de5b4f42bd..f5adbb9f6ed 100644 --- a/core/src/metrics.rs +++ b/core/src/metrics.rs @@ -139,10 +139,6 @@ impl MetricsReporter { .set(domain.accounts.len() as u64); } - self.metrics - .view_changes - .set(state_view.latest_block_view_change_index()); - self.metrics.queue_size.set(self.queue.tx_len() as u64); Ok(()) diff --git a/core/src/sumeragi/main_loop.rs b/core/src/sumeragi/main_loop.rs index 12fcde7c4ad..ee913e18611 100644 --- a/core/src/sumeragi/main_loop.rs +++ b/core/src/sumeragi/main_loop.rs @@ -47,6 +47,8 @@ pub struct Sumeragi { /// sumeragi is more dependent on the code that is internal to the /// subsystem. pub transaction_cache: Vec, + /// Metrics for reporting number of view changes in current round + pub view_changes_metric: iroha_telemetry::metrics::ViewChangesGauge, } #[allow(clippy::missing_fields_in_debug)] @@ -896,6 +898,7 @@ pub(crate) fn run( &mut last_view_change_time, &mut view_change_time, ); + sumeragi.view_changes_metric.set(old_view_change_index); if let Some(message) = { let (msg, sleep) = @@ -975,6 +978,7 @@ pub(crate) fn run( &mut last_view_change_time, &mut view_change_time, ); + sumeragi.view_changes_metric.set(old_view_change_index); sumeragi.process_message_independent( &state, diff --git a/core/src/sumeragi/mod.rs b/core/src/sumeragi/mod.rs index be6c3d1435b..f457138283d 100644 --- a/core/src/sumeragi/mod.rs +++ b/core/src/sumeragi/mod.rs @@ -34,7 +34,7 @@ use crate::{kura::Kura, prelude::*, queue::Queue, EventsSender, IrohaNetwork, Ne #[derive(Clone)] pub struct SumeragiHandle { /// Counter for amount of dropped messages by sumeragi - dropped_messages: iroha_telemetry::metrics::IntCounter, + dropped_messages_metric: iroha_telemetry::metrics::DroppedMessagesCounter, _thread_handle: Arc, // Should be dropped after `_thread_handle` to prevent sumeargi thread from panicking control_message_sender: mpsc::SyncSender, @@ -45,7 +45,7 @@ impl SumeragiHandle { /// Deposit a sumeragi control flow network message. pub fn incoming_control_flow_message(&self, msg: ControlFlowMessage) { if let Err(error) = self.control_message_sender.try_send(msg) { - self.dropped_messages.inc(); + self.dropped_messages_metric.inc(); error!( ?error, "This peer is faulty. \ @@ -57,7 +57,7 @@ impl SumeragiHandle { /// Deposit a sumeragi network message. pub fn incoming_block_message(&self, msg: BlockMessage) { if let Err(error) = self.message_sender.try_send(msg) { - self.dropped_messages.inc(); + self.dropped_messages_metric.inc(); error!( ?error, "This peer is faulty. \ @@ -121,7 +121,11 @@ impl SumeragiHandle { network, genesis_network, block_count: BlockCount(block_count), - dropped_messages, + sumeragi_metrics: + SumeragiMetrics { + view_changes, + dropped_messages, + }, }: SumeragiStartArgs, ) -> SumeragiHandle { let (control_message_sender, control_message_receiver) = mpsc::sync_channel(100); @@ -196,6 +200,7 @@ impl SumeragiHandle { debug_force_soft_fork, current_topology, transaction_cache: Vec::new(), + view_changes_metric: view_changes, }; // Oneshot channel to allow forcefully stopping the thread. @@ -219,7 +224,7 @@ impl SumeragiHandle { let thread_handle = ThreadHandler::new(Box::new(shutdown), thread_handle); SumeragiHandle { - dropped_messages, + dropped_messages_metric: dropped_messages, control_message_sender, message_sender, _thread_handle: Arc::new(thread_handle), @@ -287,5 +292,13 @@ pub struct SumeragiStartArgs { pub network: IrohaNetwork, pub genesis_network: Option, pub block_count: BlockCount, - pub dropped_messages: iroha_telemetry::metrics::IntCounter, + pub sumeragi_metrics: SumeragiMetrics, +} + +/// Relevant sumeragi metrics +pub struct SumeragiMetrics { + /// Number of view changes in current round + pub view_changes: iroha_telemetry::metrics::ViewChangesGauge, + /// Amount of dropped messages by sumeragi + pub dropped_messages: iroha_telemetry::metrics::DroppedMessagesCounter, } diff --git a/telemetry/src/metrics.rs b/telemetry/src/metrics.rs index 9c7c30d8659..5146b6016d2 100644 --- a/telemetry/src/metrics.rs +++ b/telemetry/src/metrics.rs @@ -3,12 +3,17 @@ use std::{ops::Deref, time::Duration}; use parity_scale_codec::{Compact, Decode, Encode}; -pub use prometheus::{ +use prometheus::{ core::{AtomicU64, GenericGauge, GenericGaugeVec}, Encoder, Histogram, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, Opts, Registry, }; use serde::{Deserialize, Serialize}; +/// Type for reporting amount of dropped messages for sumeragi +pub type DroppedMessagesCounter = IntCounter; +/// Type for reporting view change index of current round +pub type ViewChangesGauge = GenericGauge; + /// Thin wrapper around duration that `impl`s [`Default`] #[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct Uptime(pub Duration); @@ -102,11 +107,11 @@ pub struct Metrics { /// Query handle time Histogram pub isi_times: HistogramVec, /// Number of view changes in the current round - pub view_changes: GenericGauge, + pub view_changes: ViewChangesGauge, /// Number of transactions in the queue pub queue_size: GenericGauge, /// Number of sumeragi dropped messages - pub dropped_messages: IntCounter, + pub dropped_messages: DroppedMessagesCounter, /// Internal use only. Needed for generating the response. registry: Registry, }