diff --git a/prdoc/pr_5197.prdoc b/prdoc/pr_5197.prdoc new file mode 100644 index 000000000000..40e25cf70dd1 --- /dev/null +++ b/prdoc/pr_5197.prdoc @@ -0,0 +1,16 @@ +# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0 +# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json + +title: Prevent `ConsensusReset` by tolerating runtime API errors in BEEFY + +doc: + - audience: Node Operator + description: | + After warp sync, the BEEFY worker was trying to execute runtime calls on + blocks which had their state already pruned. This led to an error and restarting + of the beefy subsystem in a loop. After this PR, the worker tolerates call errors and therefore prevents this + worker restart loop. + +crates: + - name: sc-consensus-beefy + bump: minor diff --git a/substrate/client/consensus/beefy/src/lib.rs b/substrate/client/consensus/beefy/src/lib.rs index f7c999aad591..30cdd494905f 100644 --- a/substrate/client/consensus/beefy/src/lib.rs +++ b/substrate/client/consensus/beefy/src/lib.rs @@ -32,7 +32,7 @@ use crate::{ metrics::register_metrics, }; use futures::{stream::Fuse, FutureExt, StreamExt}; -use log::{debug, error, info, warn}; +use log::{debug, error, info, trace, warn}; use parking_lot::Mutex; use prometheus_endpoint::Registry; use sc_client_api::{Backend, BlockBackend, BlockchainEvents, FinalityNotification, Finalizer}; @@ -451,7 +451,8 @@ where state.set_best_grandpa(best_grandpa.clone()); // Overwrite persisted data with newly provided `min_block_delta`. state.set_min_block_delta(min_block_delta); - debug!(target: LOG_TARGET, "🥩 Loading BEEFY voter state from db: {:?}.", state); + debug!(target: LOG_TARGET, "🥩 Loading BEEFY voter state from db."); + trace!(target: LOG_TARGET, "🥩 Loaded state: {:?}.", state); // Make sure that all the headers that we need have been synced. let mut new_sessions = vec![]; diff --git a/substrate/client/consensus/beefy/src/worker.rs b/substrate/client/consensus/beefy/src/worker.rs index ca714ffadd77..3c7f3b1b6efa 100644 --- a/substrate/client/consensus/beefy/src/worker.rs +++ b/substrate/client/consensus/beefy/src/worker.rs @@ -458,13 +458,18 @@ where notification.tree_route, ); - self.runtime - .runtime_api() - .beefy_genesis(notification.hash) - .ok() - .flatten() - .filter(|genesis| *genesis == self.persisted_state.pallet_genesis) - .ok_or(Error::ConsensusReset)?; + match self.runtime.runtime_api().beefy_genesis(notification.hash) { + Ok(Some(genesis)) if genesis != self.persisted_state.pallet_genesis => { + debug!(target: LOG_TARGET, "🥩 ConsensusReset detected. Expected genesis: {}, found genesis: {}", self.persisted_state.pallet_genesis, genesis); + return Err(Error::ConsensusReset) + }, + Ok(_) => {}, + Err(api_error) => { + // This can happen in case the block was already pruned. + // Mostly after warp sync when finality notifications are piled up. + debug!(target: LOG_TARGET, "🥩 Unable to check beefy genesis: {}", api_error); + }, + } let mut new_session_added = false; if *header.number() > self.best_grandpa_block() {