From de0e209a291c0475f2b1f285fe4f17ed79c39313 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Mon, 29 Jul 2024 11:16:32 +0200 Subject: [PATCH 1/6] Fix ConsensusReset Loop --- substrate/client/consensus/beefy/src/lib.rs | 4 ++-- .../client/consensus/beefy/src/worker.rs | 19 +++++++++++-------- substrate/client/state-db/src/lib.rs | 4 +++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/substrate/client/consensus/beefy/src/lib.rs b/substrate/client/consensus/beefy/src/lib.rs index f7c999aad591..501a9794e40b 100644 --- a/substrate/client/consensus/beefy/src/lib.rs +++ b/substrate/client/consensus/beefy/src/lib.rs @@ -32,7 +32,7 @@ use crate::{ metrics::register_metrics, }; use futures::{stream::Fuse, FutureExt, StreamExt}; -use log::{debug, error, info, warn}; +use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; use prometheus_endpoint::Registry; use sc_client_api::{Backend, BlockBackend, BlockchainEvents, FinalityNotification, Finalizer}; @@ -451,7 +451,7 @@ where state.set_best_grandpa(best_grandpa.clone()); // Overwrite persisted data with newly provided `min_block_delta`. state.set_min_block_delta(min_block_delta); - debug!(target: LOG_TARGET, "🥩 Loading BEEFY voter state from db: {:?}.", state); + debug!(target: LOG_TARGET, "🥩 Loading BEEFY voter state from db."); // Make sure that all the headers that we need have been synced. let mut new_sessions = vec![]; diff --git a/substrate/client/consensus/beefy/src/worker.rs b/substrate/client/consensus/beefy/src/worker.rs index ca714ffadd77..3c66dbfd0919 100644 --- a/substrate/client/consensus/beefy/src/worker.rs +++ b/substrate/client/consensus/beefy/src/worker.rs @@ -38,7 +38,7 @@ use futures::{stream::Fuse, FutureExt, StreamExt}; use log::{debug, error, info, trace, warn}; use sc_client_api::{Backend, HeaderBackend}; use sc_utils::notification::NotificationReceiver; -use sp_api::ProvideRuntimeApi; +use sp_api::{ApiError, ProvideRuntimeApi}; use sp_arithmetic::traits::{AtLeast32Bit, Saturating}; use sp_consensus::SyncOracle; use sp_consensus_beefy::{ @@ -458,13 +458,16 @@ where notification.tree_route, ); - self.runtime - .runtime_api() - .beefy_genesis(notification.hash) - .ok() - .flatten() - .filter(|genesis| *genesis == self.persisted_state.pallet_genesis) - .ok_or(Error::ConsensusReset)?; + let result = self.runtime.runtime_api().beefy_genesis(notification.hash); + + match result { + Ok(Some(genesis)) if genesis != self.persisted_state.pallet_genesis => + return Err(Error::ConsensusReset), + Ok(_) => {}, + Err(api_error) => { + warn!(target: LOG_TARGET, "API runtime call failed: {}", api_error); + }, + } let mut new_session_added = false; if *header.number() > self.best_grandpa_block() { diff --git a/substrate/client/state-db/src/lib.rs b/substrate/client/state-db/src/lib.rs index 6c37c87a611b..c2573590f51b 100644 --- a/substrate/client/state-db/src/lib.rs +++ b/substrate/client/state-db/src/lib.rs @@ -47,7 +47,7 @@ mod pruning; mod test; use codec::Codec; -use log::trace; +use log::{info, trace}; use noncanonical::NonCanonicalOverlay; use parking_lot::RwLock; use pruning::{HaveBlock, RefWindow}; @@ -355,6 +355,8 @@ impl StateDbSync { return Ok(commit) } let number = self.non_canonical.canonicalize(hash, &mut commit)?; + + info!(target: LOG_TARGET, "Canonicalizing: #{}", number); if self.mode == PruningMode::ArchiveCanonical { commit.data.deleted.clear(); } From 699803765e5f2c9a48303a62448b60133c086e40 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Mon, 29 Jul 2024 18:05:02 +0200 Subject: [PATCH 2/6] Remove unwanted changes --- substrate/client/consensus/beefy/src/lib.rs | 4 ++-- substrate/client/consensus/beefy/src/worker.rs | 8 ++++---- substrate/client/state-db/src/lib.rs | 4 +--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/substrate/client/consensus/beefy/src/lib.rs b/substrate/client/consensus/beefy/src/lib.rs index 501a9794e40b..c07504c2197e 100644 --- a/substrate/client/consensus/beefy/src/lib.rs +++ b/substrate/client/consensus/beefy/src/lib.rs @@ -32,7 +32,7 @@ use crate::{ metrics::register_metrics, }; use futures::{stream::Fuse, FutureExt, StreamExt}; -use log::{debug, error, info, trace, warn}; +use log::{debug, error, info, log_enabled, warn, Level}; use parking_lot::Mutex; use prometheus_endpoint::Registry; use sc_client_api::{Backend, BlockBackend, BlockchainEvents, FinalityNotification, Finalizer}; @@ -451,7 +451,7 @@ where state.set_best_grandpa(best_grandpa.clone()); // Overwrite persisted data with newly provided `min_block_delta`. state.set_min_block_delta(min_block_delta); - debug!(target: LOG_TARGET, "🥩 Loading BEEFY voter state from db."); + debug!(target: LOG_TARGET, "🥩 Loading BEEFY voter state from db: {:?}.", state); // Make sure that all the headers that we need have been synced. let mut new_sessions = vec![]; diff --git a/substrate/client/consensus/beefy/src/worker.rs b/substrate/client/consensus/beefy/src/worker.rs index 3c66dbfd0919..9d8a2708143d 100644 --- a/substrate/client/consensus/beefy/src/worker.rs +++ b/substrate/client/consensus/beefy/src/worker.rs @@ -458,14 +458,14 @@ where notification.tree_route, ); - let result = self.runtime.runtime_api().beefy_genesis(notification.hash); - - match result { + match self.runtime.runtime_api().beefy_genesis(notification.hash) { Ok(Some(genesis)) if genesis != self.persisted_state.pallet_genesis => return Err(Error::ConsensusReset), Ok(_) => {}, Err(api_error) => { - warn!(target: LOG_TARGET, "API runtime call failed: {}", api_error); + // This can happen in case the block was already pruned. + // Mostly after warp sync when finality notifications are piled up. + debug!(target: LOG_TARGET, "🥩 Unable to check beefy genesis: {}", api_error); }, } diff --git a/substrate/client/state-db/src/lib.rs b/substrate/client/state-db/src/lib.rs index c2573590f51b..6c37c87a611b 100644 --- a/substrate/client/state-db/src/lib.rs +++ b/substrate/client/state-db/src/lib.rs @@ -47,7 +47,7 @@ mod pruning; mod test; use codec::Codec; -use log::{info, trace}; +use log::trace; use noncanonical::NonCanonicalOverlay; use parking_lot::RwLock; use pruning::{HaveBlock, RefWindow}; @@ -355,8 +355,6 @@ impl StateDbSync { return Ok(commit) } let number = self.non_canonical.canonicalize(hash, &mut commit)?; - - info!(target: LOG_TARGET, "Canonicalizing: #{}", number); if self.mode == PruningMode::ArchiveCanonical { commit.data.deleted.clear(); } From f06cce48609f08f25eba21911cfeaccb3d6544e2 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 31 Jul 2024 10:44:33 +0200 Subject: [PATCH 3/6] Improve logging --- substrate/client/consensus/beefy/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/substrate/client/consensus/beefy/src/lib.rs b/substrate/client/consensus/beefy/src/lib.rs index c07504c2197e..ffe04ba7d784 100644 --- a/substrate/client/consensus/beefy/src/lib.rs +++ b/substrate/client/consensus/beefy/src/lib.rs @@ -32,7 +32,7 @@ use crate::{ metrics::register_metrics, }; use futures::{stream::Fuse, FutureExt, StreamExt}; -use log::{debug, error, info, log_enabled, warn, Level}; +use log::{debug, error, info, log_enabled, trace, warn, Level}; use parking_lot::Mutex; use prometheus_endpoint::Registry; use sc_client_api::{Backend, BlockBackend, BlockchainEvents, FinalityNotification, Finalizer}; @@ -451,7 +451,8 @@ where state.set_best_grandpa(best_grandpa.clone()); // Overwrite persisted data with newly provided `min_block_delta`. state.set_min_block_delta(min_block_delta); - debug!(target: LOG_TARGET, "🥩 Loading BEEFY voter state from db: {:?}.", state); + debug!(target: LOG_TARGET, "🥩 Loading BEEFY voter state from db."); + trace!(target: LOG_TARGET, "🥩 Loaded state: {:?}.", state); // Make sure that all the headers that we need have been synced. let mut new_sessions = vec![]; From 02aa612f32bd8e37f1f155777c488f946c755521 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 31 Jul 2024 12:23:48 +0200 Subject: [PATCH 4/6] prdoc --- prdoc/pr_5197.prdoc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 prdoc/pr_5197.prdoc diff --git a/prdoc/pr_5197.prdoc b/prdoc/pr_5197.prdoc new file mode 100644 index 000000000000..40e25cf70dd1 --- /dev/null +++ b/prdoc/pr_5197.prdoc @@ -0,0 +1,16 @@ +# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0 +# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json + +title: Prevent `ConsensusReset` by tolerating runtime API errors in BEEFY + +doc: + - audience: Node Operator + description: | + After warp sync, the BEEFY worker was trying to execute runtime calls on + blocks which had their state already pruned. This led to an error and restarting + of the beefy subsystem in a loop. After this PR, the worker tolerates call errors and therefore prevents this + worker restart loop. + +crates: + - name: sc-consensus-beefy + bump: minor From 568ba1bca9b059b7c4f329e65e58686c99f85ece Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Thu, 1 Aug 2024 11:49:11 +0200 Subject: [PATCH 5/6] Some more logging, remove unwanted imports --- substrate/client/consensus/beefy/src/lib.rs | 2 +- substrate/client/consensus/beefy/src/worker.rs | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/substrate/client/consensus/beefy/src/lib.rs b/substrate/client/consensus/beefy/src/lib.rs index ffe04ba7d784..30cdd494905f 100644 --- a/substrate/client/consensus/beefy/src/lib.rs +++ b/substrate/client/consensus/beefy/src/lib.rs @@ -32,7 +32,7 @@ use crate::{ metrics::register_metrics, }; use futures::{stream::Fuse, FutureExt, StreamExt}; -use log::{debug, error, info, log_enabled, trace, warn, Level}; +use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; use prometheus_endpoint::Registry; use sc_client_api::{Backend, BlockBackend, BlockchainEvents, FinalityNotification, Finalizer}; diff --git a/substrate/client/consensus/beefy/src/worker.rs b/substrate/client/consensus/beefy/src/worker.rs index 9d8a2708143d..09eace935b58 100644 --- a/substrate/client/consensus/beefy/src/worker.rs +++ b/substrate/client/consensus/beefy/src/worker.rs @@ -459,8 +459,10 @@ where ); match self.runtime.runtime_api().beefy_genesis(notification.hash) { - Ok(Some(genesis)) if genesis != self.persisted_state.pallet_genesis => - return Err(Error::ConsensusReset), + Ok(Some(genesis)) if genesis != self.persisted_state.pallet_genesis => { + debug!(target: LOG_TARGET, "🥩 ConsensusReset detected. Expected genesis: {}, found genesis: {}", self.persisted_state.pallet_genesis, genesis); + return Err(Error::ConsensusReset) + }, Ok(_) => {}, Err(api_error) => { // This can happen in case the block was already pruned. From 98f8a98debbd8ab0023508061ebb5a78ca46c166 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Mon, 5 Aug 2024 09:10:51 +0200 Subject: [PATCH 6/6] Import --- substrate/client/consensus/beefy/src/worker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/substrate/client/consensus/beefy/src/worker.rs b/substrate/client/consensus/beefy/src/worker.rs index 09eace935b58..3c7f3b1b6efa 100644 --- a/substrate/client/consensus/beefy/src/worker.rs +++ b/substrate/client/consensus/beefy/src/worker.rs @@ -38,7 +38,7 @@ use futures::{stream::Fuse, FutureExt, StreamExt}; use log::{debug, error, info, trace, warn}; use sc_client_api::{Backend, HeaderBackend}; use sc_utils::notification::NotificationReceiver; -use sp_api::{ApiError, ProvideRuntimeApi}; +use sp_api::ProvideRuntimeApi; use sp_arithmetic::traits::{AtLeast32Bit, Saturating}; use sp_consensus::SyncOracle; use sp_consensus_beefy::{