From 8dd0c4ecfc0add665577da51cc20bf220fcee756 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Tue, 13 Feb 2024 13:54:45 +0000 Subject: [PATCH 01/34] Remove witness retrying logic on missing block Sometimes when a ChunkStateWitness arrives to be processed, the block required to process it isn't available yet, we have to wait for it. https://github.com/near/nearcore/pull/10535 implemented a hacky way to do it by retrying the processing every 500ms until the required block arrives. This PR will implement a proper solution, so let's remove the hacky workaround. --- chain/client/src/adapter.rs | 6 +---- chain/client/src/client_actor.rs | 27 +++---------------- .../stateless_validation/chunk_validator.rs | 13 +++------ 3 files changed, 8 insertions(+), 38 deletions(-) diff --git a/chain/client/src/adapter.rs b/chain/client/src/adapter.rs index ad1254c1290..40f1d6eb102 100644 --- a/chain/client/src/adapter.rs +++ b/chain/client/src/adapter.rs @@ -141,7 +141,6 @@ pub enum ProcessTxResponse { pub struct ChunkStateWitnessMessage { pub witness: ChunkStateWitness, pub peer_id: PeerId, - pub attempts_remaining: usize, } #[derive(actix::Message, Debug)] @@ -350,10 +349,7 @@ impl near_network::client::Client for Adapter { async fn chunk_state_witness(&self, witness: ChunkStateWitness, peer_id: PeerId) { match self .client_addr - .send( - ChunkStateWitnessMessage { witness, peer_id, attempts_remaining: 5 } - .with_span_context(), - ) + .send(ChunkStateWitnessMessage { witness, peer_id }.with_span_context()) .await { Ok(()) => {} diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 684e60a076e..40503b2dead 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -2006,32 +2006,11 @@ impl Handler> for ClientActor { fn handle( &mut self, msg: WithSpanContext, - ctx: &mut Context, + _ctx: &mut Context, ) -> Self::Result { let (_span, msg) = handler_debug_span!(target: "client", msg); - let peer_id = msg.peer_id.clone(); - let attempts_remaining = msg.attempts_remaining; - match self.client.process_chunk_state_witness(msg.witness, msg.peer_id, None) { - Err(err) => { - tracing::error!(target: "client", ?err, "Error processing chunk state witness"); - } - Ok(Some(witness)) => { - if attempts_remaining > 0 { - ctx.run_later(Duration::from_millis(100), move |_, ctx| { - ctx.address().do_send( - ChunkStateWitnessMessage { - witness, - peer_id, - attempts_remaining: attempts_remaining - 1, - } - .with_span_context(), - ); - }); - } else { - tracing::error!(target: "client", "Failed to process chunk state witness even after 5 tries due to missing parent block"); - } - } - Ok(None) => {} + if let Err(err) = self.client.process_chunk_state_witness(msg.witness, msg.peer_id, None) { + tracing::error!(target: "client", ?err, "Error processing chunk state witness"); } } } diff --git a/chain/client/src/stateless_validation/chunk_validator.rs b/chain/client/src/stateless_validation/chunk_validator.rs index 46b184558f0..21e6ffb2351 100644 --- a/chain/client/src/stateless_validation/chunk_validator.rs +++ b/chain/client/src/stateless_validation/chunk_validator.rs @@ -588,18 +588,13 @@ impl Client { witness: ChunkStateWitness, peer_id: PeerId, processing_done_tracker: Option, - ) -> Result, Error> { + ) -> Result<(), Error> { // TODO(#10502): Handle production of state witness for first chunk after genesis. // Properly handle case for chunk right after genesis. // Context: We are currently unable to handle production of the state witness for the // first chunk after genesis as it's not possible to run the genesis chunk in runtime. let prev_block_hash = witness.inner.chunk_header.prev_block_hash(); - let prev_block = match self.chain.get_block(prev_block_hash) { - Ok(block) => block, - Err(_) => { - return Ok(Some(witness)); - } - }; + let prev_block = self.chain.get_block(prev_block_hash)?; let prev_chunk_header = Chain::get_prev_chunk_header( self.epoch_manager.as_ref(), &prev_block, @@ -616,7 +611,7 @@ impl Client { &self.chunk_validator.network_sender, self.chunk_endorsement_tracker.as_ref(), ); - return Ok(None); + return Ok(()); } // TODO(#10265): If the previous block does not exist, we should @@ -635,6 +630,6 @@ impl Client { }, )); } - result.map(|_| None) + result } } From 456a821b6d59fe28370952c0565283525405e4cd Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 14 Feb 2024 14:20:46 +0000 Subject: [PATCH 02/34] Add verify_chunk_state_witness_signature_in_epoch The current function to verify state witness signatures takes the epoch_id from the previous block. For orphaned state witnesses the previous block isn't available, so it's impossible to use this function. Let's add another function that takes the epoch_id as an argument instead of fetching it from a block. It will allow us to verify signatures of orphaned chunk state witnesses. --- chain/chain/src/test_utils/kv_runtime.rs | 8 ++++++++ chain/epoch-manager/src/adapter.rs | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/chain/chain/src/test_utils/kv_runtime.rs b/chain/chain/src/test_utils/kv_runtime.rs index 861132f9b99..746612e4b74 100644 --- a/chain/chain/src/test_utils/kv_runtime.rs +++ b/chain/chain/src/test_utils/kv_runtime.rs @@ -934,6 +934,14 @@ impl EpochManagerAdapter for MockEpochManager { Ok(true) } + fn verify_chunk_state_witness_signature_in_epoch( + &self, + _state_witness: &ChunkStateWitness, + _epoch_id: &EpochId, + ) -> Result { + Ok(true) + } + fn cares_about_shard_from_prev_block( &self, parent_hash: &CryptoHash, diff --git a/chain/epoch-manager/src/adapter.rs b/chain/epoch-manager/src/adapter.rs index fe9080b35db..161925f1aaf 100644 --- a/chain/epoch-manager/src/adapter.rs +++ b/chain/epoch-manager/src/adapter.rs @@ -393,6 +393,12 @@ pub trait EpochManagerAdapter: Send + Sync { state_witness: &ChunkStateWitness, ) -> Result; + fn verify_chunk_state_witness_signature_in_epoch( + &self, + state_witness: &ChunkStateWitness, + epoch_id: &EpochId, + ) -> Result; + fn cares_about_shard_from_prev_block( &self, parent_hash: &CryptoHash, @@ -1004,6 +1010,16 @@ impl EpochManagerAdapter for EpochManagerHandle { let chunk_header = &state_witness.inner.chunk_header; let epoch_id = epoch_manager.get_epoch_id_from_prev_block(chunk_header.prev_block_hash())?; + self.verify_chunk_state_witness_signature_in_epoch(state_witness, &epoch_id) + } + + fn verify_chunk_state_witness_signature_in_epoch( + &self, + state_witness: &ChunkStateWitness, + epoch_id: &EpochId, + ) -> Result { + let epoch_manager = self.read(); + let chunk_header = &state_witness.inner.chunk_header; let chunk_producer = epoch_manager.get_chunk_producer_info( &epoch_id, chunk_header.height_created(), From 16828b37ae9cea15d0331d27a72240f59e2d756c Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 14 Feb 2024 14:27:06 +0000 Subject: [PATCH 03/34] Add OrphanStateWitnessPool Add a struct which keeps a cache of orphaned ChunkStateWitnesses. It provides two methods that allow to easily add a ChunkStateWitness to the pool, and then when a block arrrives, another method allows to easily fetch all orphaned ChunkStateWitnesses that were waiting for this block. --- .../mod.rs} | 5 + .../chunk_validator/orphan_witness_pool.rs | 105 ++++++++++++++++++ 2 files changed, 110 insertions(+) rename chain/client/src/stateless_validation/{chunk_validator.rs => chunk_validator/mod.rs} (99%) create mode 100644 chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs diff --git a/chain/client/src/stateless_validation/chunk_validator.rs b/chain/client/src/stateless_validation/chunk_validator/mod.rs similarity index 99% rename from chain/client/src/stateless_validation/chunk_validator.rs rename to chain/client/src/stateless_validation/chunk_validator/mod.rs index 21e6ffb2351..a49a64255c7 100644 --- a/chain/client/src/stateless_validation/chunk_validator.rs +++ b/chain/client/src/stateless_validation/chunk_validator/mod.rs @@ -1,3 +1,5 @@ +pub mod orphan_witness_pool; + use super::processing_tracker::ProcessingDoneTracker; use crate::stateless_validation::chunk_endorsement_tracker::ChunkEndorsementTracker; use crate::{metrics, Client}; @@ -31,6 +33,7 @@ use near_primitives::types::chunk_extra::ChunkExtra; use near_primitives::types::ShardId; use near_primitives::validator_signer::ValidatorSigner; use near_store::PartialStorage; +use orphan_witness_pool::OrphanStateWitnessPool; use std::collections::HashMap; use std::sync::Arc; @@ -53,6 +56,7 @@ pub struct ChunkValidator { network_sender: Sender, runtime_adapter: Arc, chunk_endorsement_tracker: Arc, + orphan_witness_pool: OrphanStateWitnessPool, } impl ChunkValidator { @@ -69,6 +73,7 @@ impl ChunkValidator { network_sender, runtime_adapter, chunk_endorsement_tracker, + orphan_witness_pool: OrphanStateWitnessPool::default(), } } diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs new file mode 100644 index 00000000000..e26000d6c4b --- /dev/null +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -0,0 +1,105 @@ +use std::collections::{HashMap, HashSet}; + +use lru::LruCache; +use near_primitives::hash::CryptoHash; +use near_primitives::stateless_validation::ChunkStateWitness; +use near_primitives::types::{BlockHeight, ShardId}; + +/// `OrphanStateWitnessPool` is used to keep orphaned ChunkStateWitnesses until it's possible to process them. +/// To process a ChunkStateWitness we need to have the previous block, but it might happen that a ChunkStateWitness +/// shows up before the block is available. In such cases the witness is put in `OrphanStateWitnessPool` until the +/// required block arrives and the witness can be processed. +pub struct OrphanStateWitnessPool { + witness_cache: LruCache<(ShardId, BlockHeight), ChunkStateWitness>, + /// List of orphaned witnesses that wait for this block to appear. + /// Maps block hash to entries in `witness_cache`. + /// Must be kept in sync with `witness_cache`. + waiting_for_block: HashMap>, +} + +impl OrphanStateWitnessPool { + /// Create a new `OrphanStateWitnessPool` with a capacity of `cache_capacity` witnesses. + /// The `Default` trait implementation provides reasonable defaults. + pub fn new(cache_capacity: usize) -> Self { + OrphanStateWitnessPool { + witness_cache: LruCache::new(cache_capacity), + waiting_for_block: HashMap::new(), + } + } + + /// Add an orphaned chunk state witness to the pool. The witness will be put in a cache and it'll + /// wait there for the block that's required to process it. + /// It's expected that this `ChunkStateWitness` has gone through basic validation - including signature, + /// shard_id, size and distance from the tip. The pool would still work without it, but without validation + /// it'd be possible to fill the whole cache with spam. + pub fn add_orphan_state_witness(&mut self, witness: ChunkStateWitness) { + if self.witness_cache.cap() == 0 { + // A cache with 0 capacity doesn't keep anything. + return; + } + + // Insert the new ChunkStateWitness into the cache + let chunk_header = &witness.inner.chunk_header; + let cache_key = (chunk_header.shard_id(), chunk_header.height_created()); + let prev_block_hash = *chunk_header.prev_block_hash(); + if let Some((_, ejected_witness)) = self.witness_cache.push(cache_key, witness) { + // If another witness has been ejected from the cache due to capacity limit, + // then remove the ejected witness from `waiting_for_block` to keep them in sync + let header = &ejected_witness.inner.chunk_header; + tracing::debug!( + target: "client", + witness_height = header.height_created(), + witness_shard = header.shard_id(), + witness_chunk = ?header.chunk_hash(), + witness_prev_block = ?header.prev_block_hash(), + "Ejecting an orphaned ChunkStateWitness from the cache due to capacity limit. It will not be processed." + ); + self.remove_from_waiting_for_block(&ejected_witness) + } + + // Add the new orphaned state witness to `waiting_for_block` + self.waiting_for_block + .entry(prev_block_hash) + .or_insert_with(|| HashSet::new()) + .insert(cache_key); + } + + fn remove_from_waiting_for_block(&mut self, witness: &ChunkStateWitness) { + let chunk_header = &witness.inner.chunk_header; + let waiting_set = self + .waiting_for_block + .get_mut(chunk_header.prev_block_hash()) + .expect("Every ejected witness must have a corresponding entry in waiting_for_block."); + waiting_set.remove(&(chunk_header.shard_id(), chunk_header.height_created())); + if waiting_set.is_empty() { + self.waiting_for_block.remove(chunk_header.prev_block_hash()); + } + } + + /// Find all orphaned witnesses that were waiting for this block and remove them from the pool. + /// The block has arrived, so they can be now processed, they're no longer orphans. + pub fn take_state_witnesses_waiting_for_block( + &mut self, + prev_block: &CryptoHash, + ) -> Vec { + let Some(waiting) = self.waiting_for_block.remove(prev_block) else { + return Vec::new(); + }; + let mut result = Vec::new(); + for (shard_id, height) in waiting { + // Remove this witness from `witness_cache` to keep them in sync + let witness = self.witness_cache.pop(&(shard_id, height)).expect( + "Every entry in waiting_for_block must have a corresponding witness in the cache", + ); + + result.push(witness); + } + result + } +} + +impl Default for OrphanStateWitnessPool { + fn default() -> OrphanStateWitnessPool { + OrphanStateWitnessPool::new(12) + } +} From d3dc4009b0078d89e3c4c37573a6bb3eb4cfd527 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 14 Feb 2024 14:34:00 +0000 Subject: [PATCH 04/34] Split process_chunk_state_witness into two parts Let's split this function into two parts - one part which tries to find the previous block for the given ChunkStateWitness and another which processes the witness when the previous block is available. It will make handling orphaned witnesses easier - once the block arrives, we can just call the second function to process it. In the future it might also make it easier to process witnesses before the previous block is saved to the database, which could reduce latency. --- .../chunk_validator/mod.rs | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/chain/client/src/stateless_validation/chunk_validator/mod.rs b/chain/client/src/stateless_validation/chunk_validator/mod.rs index a49a64255c7..2266f0902a0 100644 --- a/chain/client/src/stateless_validation/chunk_validator/mod.rs +++ b/chain/client/src/stateless_validation/chunk_validator/mod.rs @@ -600,6 +600,28 @@ impl Client { // first chunk after genesis as it's not possible to run the genesis chunk in runtime. let prev_block_hash = witness.inner.chunk_header.prev_block_hash(); let prev_block = self.chain.get_block(prev_block_hash)?; + self.process_chunk_state_witness_with_prev_block( + witness, + peer_id, + &prev_block, + processing_done_tracker, + ) + } + + pub fn process_chunk_state_witness_with_prev_block( + &mut self, + witness: ChunkStateWitness, + peer_id: PeerId, + prev_block: &Block, + processing_done_tracker: Option, + ) -> Result<(), Error> { + if witness.inner.chunk_header.prev_block_hash() != prev_block.hash() { + return Err(Error::Other( + "process_chunk_state_witness_with_prev_block - prev_block doesn't match" + .to_string(), + )); + } + let prev_chunk_header = Chain::get_prev_chunk_header( self.epoch_manager.as_ref(), &prev_block, From 5da1fea0f063abc3b6b863bceb05d0f3774da5a0 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 21 Feb 2024 02:32:33 +0000 Subject: [PATCH 05/34] Add possible_epochs_of_height_around_tip Add a function which tries to determine in which epoch the specified height is located. It looks at the previous, current and next epoch around the chain Tip and tries to figure out to which one of them this height belongs. It's not always possible to determine in which epoch a given height will be, as the start height of the next epoch isn't known in the current epoch, so the function returns a set of possible epochs where the height might be. For example if the tip is in the middle of the current epoch, and the height is about epoch_length blocks higher than the tip, this function would return [current_epoch_id, next_epoch_id], as this height could be in either one of those epochs. It will later be used to verify the signature of orphaned witnesses. To verify the signature we need to know what is the chunk producer who should produce a witness at some height, and for that we need to know what's the epoch_id at this height. --- chain/chain/src/test_utils/kv_runtime.rs | 9 + chain/epoch-manager/src/adapter.rs | 24 ++ chain/epoch-manager/src/lib.rs | 76 ++++++ chain/epoch-manager/src/tests/mod.rs | 331 +++++++++++++++++++++++ 4 files changed, 440 insertions(+) diff --git a/chain/chain/src/test_utils/kv_runtime.rs b/chain/chain/src/test_utils/kv_runtime.rs index 746612e4b74..773b4b8e8ab 100644 --- a/chain/chain/src/test_utils/kv_runtime.rs +++ b/chain/chain/src/test_utils/kv_runtime.rs @@ -13,6 +13,7 @@ use near_epoch_manager::types::BlockHeaderInfo; use near_epoch_manager::{EpochManagerAdapter, RngSeed}; use near_pool::types::TransactionGroupIterator; use near_primitives::account::{AccessKey, Account}; +use near_primitives::block::Tip; use near_primitives::block_header::{Approval, ApprovalInner}; use near_primitives::epoch_manager::block_info::BlockInfo; use near_primitives::epoch_manager::epoch_info::EpochInfo; @@ -990,6 +991,14 @@ impl EpochManagerAdapter for MockEpochManager { Ok(shard_layout != next_shard_layout) } + fn possible_epochs_of_height_around_tip( + &self, + _tip: &Tip, + _height: BlockHeight, + ) -> Result, EpochError> { + unimplemented!(); + } + #[cfg(feature = "new_epoch_sync")] fn get_all_epoch_hashes( &self, diff --git a/chain/epoch-manager/src/adapter.rs b/chain/epoch-manager/src/adapter.rs index 161925f1aaf..3bc6c548d78 100644 --- a/chain/epoch-manager/src/adapter.rs +++ b/chain/epoch-manager/src/adapter.rs @@ -4,6 +4,7 @@ use crate::EpochInfoAggregator; use crate::EpochManagerHandle; use near_chain_primitives::Error; use near_crypto::Signature; +use near_primitives::block::Tip; use near_primitives::block_header::{Approval, ApprovalInner, BlockHeader}; use near_primitives::epoch_manager::block_info::BlockInfo; use near_primitives::epoch_manager::epoch_info::EpochInfo; @@ -415,6 +416,20 @@ pub trait EpochManagerAdapter: Send + Sync { fn will_shard_layout_change(&self, parent_hash: &CryptoHash) -> Result; + /// Tries to estimate in which epoch the given height would reside. + /// Looks at the previous, current and next epoch around the tip + /// and adds them to the result if the height might be inside the epoch. + /// It returns a list of possible epochs instead of a single value + /// because sometimes it's impossible to determine the exact epoch + /// in which the height will be. The exact starting height of the + /// next epoch isn't known until it actually starts, so it's impossible + /// to determine the exact epoch for heights which are ahead of the tip. + fn possible_epochs_of_height_around_tip( + &self, + tip: &Tip, + height: BlockHeight, + ) -> Result, EpochError>; + /// Returns a vector of all hashes in the epoch ending with `last_block_info`. /// Only return blocks on chain of `last_block_info`. /// Hashes are returned in the order from the last block to the first block. @@ -1059,6 +1074,15 @@ impl EpochManagerAdapter for EpochManagerHandle { epoch_manager.will_shard_layout_change(parent_hash) } + fn possible_epochs_of_height_around_tip( + &self, + tip: &Tip, + height: BlockHeight, + ) -> Result, EpochError> { + let epoch_manager = self.read(); + epoch_manager.possible_epochs_of_height_around_tip(tip, height) + } + #[cfg(feature = "new_epoch_sync")] fn get_all_epoch_hashes( &self, diff --git a/chain/epoch-manager/src/lib.rs b/chain/epoch-manager/src/lib.rs index 8404abd48b1..ed3b830870a 100644 --- a/chain/epoch-manager/src/lib.rs +++ b/chain/epoch-manager/src/lib.rs @@ -2,6 +2,7 @@ use crate::proposals::proposals_to_epoch_info; use crate::types::EpochInfoAggregator; use near_cache::SyncLruCache; use near_chain_configs::GenesisConfig; +use near_primitives::block::Tip; use near_primitives::checked_feature; use near_primitives::epoch_manager::block_info::BlockInfo; use near_primitives::epoch_manager::epoch_info::{EpochInfo, EpochSummary}; @@ -1900,6 +1901,81 @@ impl EpochManager { } } + pub fn possible_epochs_of_height_around_tip( + &self, + tip: &Tip, + height: BlockHeight, + ) -> Result, EpochError> { + // If the tip is at the genesis block, it has to be handled in a special way. + // For genesis block, epoch_first_block() is the dummy block (11111...) + // with height 0, which could cause issues with estimating the epoch end + // if the genesis height is nonzero. It's easier to handle it manually. + if tip.prev_block_hash == CryptoHash::default() { + if tip.height == height { + return Ok(vec![tip.epoch_id.clone()]); + } + + if height > tip.height { + return Ok(vec![tip.next_epoch_id.clone()]); + } + + return Ok(vec![]); + } + + // See if the height is in the current epoch + let current_epoch_first_block_hash = + *self.get_block_info(&tip.last_block_hash)?.epoch_first_block(); + let current_epoch_first_block_info = + self.get_block_info(¤t_epoch_first_block_hash)?; + + let current_epoch_start = current_epoch_first_block_info.height(); + let current_epoch_length = self.get_epoch_config(&tip.epoch_id)?.epoch_length; + let current_epoch_estimated_end = current_epoch_start.saturating_add(current_epoch_length); + + // All blocks with height lower than the estimated end are guaranteed to reside in the current epoch. + // The situation is clear here. + if (current_epoch_start..current_epoch_estimated_end).contains(&height) { + return Ok(vec![tip.epoch_id.clone()]); + } + + // If the height is higher than the current epoch's estimated end, then it's + // not clear in which epoch it'll be. Under normal circumstances it would be + // in the next epoch, but with missing blocks the current epoch could stretch out + // past its estimated end, so the height might end up being in the current epoch, + // even though its height is higher than the estimated end. + if height >= current_epoch_estimated_end { + return Ok(vec![tip.epoch_id.clone(), tip.next_epoch_id.clone()]); + } + + // Finally try the previous epoch. + // First and last blocks of the previous epoch are already known, so the situation is clear. + let prev_epoch_last_block_hash = current_epoch_first_block_info.prev_hash(); + let prev_epoch_last_block_info = self.get_block_info(prev_epoch_last_block_hash)?; + let prev_epoch_first_block_info = + self.get_block_info(prev_epoch_last_block_info.epoch_first_block())?; + + // If the current epoch is the epoch after genesis, then the previous + // epoch contains only the genesis block. This case has to be handled separately + // because epoch_first_block() points to the dummy block (1111..), which has height 0. + if tip.epoch_id == EpochId(CryptoHash::default()) { + let genesis_block_info = prev_epoch_last_block_info; + if height == genesis_block_info.height() { + return Ok(vec![genesis_block_info.epoch_id().clone()]); + } else { + return Ok(vec![]); + } + } + + if (prev_epoch_first_block_info.height()..=prev_epoch_last_block_info.height()) + .contains(&height) + { + return Ok(vec![prev_epoch_last_block_info.epoch_id().clone()]); + } + + // The height doesn't belong to any of the epochs around the tip, return an empty Vec. + Ok(vec![]) + } + #[cfg(feature = "new_epoch_sync")] pub fn get_all_epoch_hashes_from_db( &self, diff --git a/chain/epoch-manager/src/tests/mod.rs b/chain/epoch-manager/src/tests/mod.rs index 2083295c3ae..1e72602e0f6 100644 --- a/chain/epoch-manager/src/tests/mod.rs +++ b/chain/epoch-manager/src/tests/mod.rs @@ -10,6 +10,7 @@ use crate::test_utils::{ DEFAULT_TOTAL_SUPPLY, }; use near_primitives::account::id::AccountIdRef; +use near_primitives::block::Tip; use near_primitives::challenge::SlashedValidator; use near_primitives::epoch_manager::EpochConfig; use near_primitives::hash::hash; @@ -2808,3 +2809,333 @@ fn test_verify_chunk_state_witness() { bad_signer.sign_chunk_state_witness(&chunk_state_witness.inner).0; assert!(!epoch_manager.verify_chunk_state_witness_signature(&chunk_state_witness).unwrap()); } + +/// Simulate the blockchain over a few epochs and verify that possible_epochs_of_height_around_tip() +/// gives the correct results at each step. +/// Some of the blocks are missing to make the test more interesting. +/// The blocks present in each epoch are: +/// Epoch(111): genesis +/// Epoch(111): 1, 2, 3, 4, 5 +/// epoch1: 6, 7, 8, 9, 10 +/// epoch2: 12, 14, 16, 18, 20, 22, 24, 25, 26 +/// epoch3: 27, 28, 29, 30, 31 +/// epoch4: 32+ +#[test] +fn test_possible_epochs_of_height_around_tip() { + use std::str::FromStr; + + let amount_staked = 1_000_000; + let account_id = AccountId::from_str("test1").unwrap(); + let validators = vec![(account_id, amount_staked)]; + let h = hash_range(50); + + let genesis_epoch = EpochId(CryptoHash::default()); + + let epoch_length = 5; + let mut epoch_manager = setup_default_epoch_manager(validators, epoch_length, 1, 2, 2, 90, 60); + + // Add the genesis block with height 1000 + let genesis_height = 1000; + record_block(&mut epoch_manager, CryptoHash::default(), h[0], genesis_height, vec![]); + + let genesis_tip = Tip { + height: genesis_height, + last_block_hash: h[0], + prev_block_hash: CryptoHash::default(), + epoch_id: genesis_epoch.clone(), + next_epoch_id: genesis_epoch.clone(), + }; + + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&genesis_tip, 0).unwrap(), + vec![] + ); + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&genesis_tip, genesis_height - 1) + .unwrap(), + vec![] + ); + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&genesis_tip, genesis_height).unwrap(), + vec![genesis_epoch.clone()] + ); + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&genesis_tip, genesis_height + 1) + .unwrap(), + vec![genesis_epoch.clone()] + ); + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&genesis_tip, 10000000).unwrap(), + vec![genesis_epoch.clone()] + ); + + let epoch1 = EpochId(h[0]); + dbg!(&epoch1); + + // Add blocks with heights 1..5, a standard epoch with no surprises + for i in 1..=5 { + let height = genesis_height + i as BlockHeight; + dbg!(height); + record_block(&mut epoch_manager, h[i - 1], h[i], height, vec![]); + let tip = Tip { + height, + last_block_hash: h[i], + prev_block_hash: h[i - 1], + epoch_id: genesis_epoch.clone(), + next_epoch_id: epoch1.clone(), + }; + assert_eq!(epoch_manager.possible_epochs_of_height_around_tip(&tip, 0).unwrap(), vec![]); + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, genesis_height).unwrap(), + vec![genesis_epoch.clone()] + ); + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, genesis_height + 1).unwrap(), + vec![genesis_epoch.clone()] + ); + for h in 1..=5 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h as BlockHeight) + .unwrap(), + vec![genesis_epoch.clone()] + ); + } + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, genesis_height + 6).unwrap(), + vec![genesis_epoch.clone(), epoch1.clone()] + ); + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, 1000000).unwrap(), + vec![genesis_epoch.clone(), epoch1.clone()] + ); + } + + let epoch2 = EpochId(h[5]); + dbg!(&epoch2); + + // Add blocks with heights 6..10, also a standard epoch with no surprises + for i in 6..=10 { + let height = genesis_height + i as BlockHeight; + dbg!(height); + record_block(&mut epoch_manager, h[i - 1], h[i], height, vec![]); + let tip = Tip { + height, + last_block_hash: h[i], + prev_block_hash: h[i - 1], + epoch_id: epoch1.clone(), + next_epoch_id: epoch2.clone(), + }; + assert_eq!(epoch_manager.possible_epochs_of_height_around_tip(&tip, 0).unwrap(), vec![]); + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, genesis_height).unwrap(), + vec![] + ); + for h in 1..=5 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![genesis_epoch.clone()] + ); + } + for h in 6..=10 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h as BlockHeight) + .unwrap(), + vec![epoch1.clone()] + ); + } + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, genesis_height + 11).unwrap(), + vec![epoch1.clone(), epoch2.clone()] + ); + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, 1000000).unwrap(), + vec![epoch1.clone(), epoch2.clone()] + ); + } + + let epoch3 = EpochId(h[10]); + dbg!(&epoch3); + + // Now there is a very long epoch with no final blocks (all odd blocks are missing) + // For all the blocks inside this for the last final block will be block #8, as it has #9 and #10 + // on top of it. + let last_final_block_hash = h[8]; + let last_finalized_height = genesis_height + 8; + for i in (12..=24).filter(|i| i % 2 == 0) { + let height = genesis_height + i as BlockHeight; + dbg!(height); + let block_info = block_info( + h[i], + height, + last_finalized_height, + last_final_block_hash, + h[i - 2], + h[12], + vec![], + DEFAULT_TOTAL_SUPPLY, + ); + epoch_manager.record_block_info(block_info, [0; 32]).unwrap().commit().unwrap(); + let tip = Tip { + height, + last_block_hash: h[i], + prev_block_hash: h[i - 2], + epoch_id: epoch2.clone(), + next_epoch_id: epoch3.clone(), + }; + for h in 0..=5 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![] + ); + } + for h in 6..=10 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch1.clone()] + ); + } + // Block 11 isn't in any epoch. Block 10 was the last of the previous epoch and block 12 + // is the first one of the new epoch. Block 11 was skipped and doesn't belong to any epoch. + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, genesis_height + 11).unwrap(), + vec![] + ); + for h in 12..17 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch2.clone()] + ); + } + for h in 17..=24 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch2.clone(), epoch3.clone()] + ); + } + } + + // Finally there are two consecutive blocks on top of block 24 which + // make block 24 final and finalize epoch2. + for i in [25, 26] { + let height = genesis_height + i as BlockHeight; + dbg!(height); + let block_info = block_info( + h[i], + height, + if i == 26 { genesis_height + 24 } else { last_finalized_height }, + if i == 26 { h[24] } else { last_final_block_hash }, + h[i - 1], + h[12], + vec![], + DEFAULT_TOTAL_SUPPLY, + ); + epoch_manager.record_block_info(block_info, [0; 32]).unwrap().commit().unwrap(); + let tip = Tip { + height, + last_block_hash: h[i], + prev_block_hash: h[i - 1], + epoch_id: epoch2.clone(), + next_epoch_id: epoch3.clone(), + }; + for h in 0..=5 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![] + ); + } + for h in 6..=10 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch1.clone()] + ); + } + // Block 11 isn't in any epoch. Block 10 was the last of the previous epoch and block 12 + // is the first one of the new epoch. Block 11 was skipped and doesn't belong to any epoch. + assert_eq!( + epoch_manager.possible_epochs_of_height_around_tip(&tip, genesis_height + 11).unwrap(), + vec![] + ); + for h in 12..17 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch2.clone()] + ); + } + for h in 17..=26 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch2.clone(), epoch3.clone()] + ); + } + } + + // One more epoch without surprises to make sure that the previous weird epoch is handled correctly + let epoch4 = EpochId(h[12]); + for i in 27..=31 { + let height = genesis_height + i as BlockHeight; + dbg!(height); + record_block(&mut epoch_manager, h[i - 1], h[i], height, vec![]); + let tip = Tip { + height, + last_block_hash: h[i], + prev_block_hash: h[i - 1], + epoch_id: epoch3.clone(), + next_epoch_id: epoch4.clone(), + }; + assert_eq!(epoch_manager.possible_epochs_of_height_around_tip(&tip, 0).unwrap(), vec![]); + for h in 0..=11 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![] + ); + } + for h in 12..=26 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch2.clone()] + ); + } + for h in 27..=31 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch3.clone()] + ); + } + for h in 32..40 { + assert_eq!( + epoch_manager + .possible_epochs_of_height_around_tip(&tip, genesis_height + h) + .unwrap(), + vec![epoch3.clone(), epoch4.clone()] + ); + } + } +} From 9cb7df9ab3f9c583dbb48ad72d6fc31e461cc649 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 14 Feb 2024 14:36:45 +0000 Subject: [PATCH 06/34] Add orphan state witnesses to the orphan pool When the previous block isn't available, we can't process the ChunkStateWitness and it becomes an orphaned state witness, waiting for the required block to arrive. In such situations the witness is partialy validated and then put in the `OrphanStateWitnessPool`. It's impossible to fully validate the witness, as the previous block isn't available, but we can do some basic checks to curb abusive behavior - check the shard_id, signature, size, etc. --- .../chunk_validator/mod.rs | 11 +- .../orphan_witness_handling.rs | 165 ++++++++++++++++++ 2 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs diff --git a/chain/client/src/stateless_validation/chunk_validator/mod.rs b/chain/client/src/stateless_validation/chunk_validator/mod.rs index 2266f0902a0..1d163ca86f3 100644 --- a/chain/client/src/stateless_validation/chunk_validator/mod.rs +++ b/chain/client/src/stateless_validation/chunk_validator/mod.rs @@ -1,3 +1,4 @@ +pub mod orphan_witness_handling; pub mod orphan_witness_pool; use super::processing_tracker::ProcessingDoneTracker; @@ -599,7 +600,15 @@ impl Client { // Context: We are currently unable to handle production of the state witness for the // first chunk after genesis as it's not possible to run the genesis chunk in runtime. let prev_block_hash = witness.inner.chunk_header.prev_block_hash(); - let prev_block = self.chain.get_block(prev_block_hash)?; + let prev_block = match self.chain.get_block(prev_block_hash) { + Ok(block) => block, + Err(Error::DBNotFoundErr(_)) => { + // Previous block isn't available at the moment, add this witness to the orphan pool. + self.handle_orphan_state_witness(witness)?; + return Ok(()); + } + Err(err) => return Err(err), + }; self.process_chunk_state_witness_with_prev_block( witness, peer_id, diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs new file mode 100644 index 00000000000..dbcf4947373 --- /dev/null +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs @@ -0,0 +1,165 @@ +//! This module contains the logic of handling orphaned chunk state witnesses. +//! To process a ChunkStateWitness we need its previous block, but sometimes +//! the witness shows up before the previous block is available, so it can't be +//! processed immediately. In such cases the witness becomes an orphaned witness +//! and it's kept in the pool until the required block arrives. Once the block +//! arrives, all witnesses that were waiting for it can be processed. + +use crate::Client; +use itertools::Itertools; +use near_chain_primitives::Error; +use near_primitives::stateless_validation::ChunkStateWitness; +use near_primitives::types::{BlockHeight, EpochId}; +use std::ops::Range; + +/// We keep only orphan witnesses that are within this distance of +/// the current chain head. This helps to reduce the size of +/// OrphanStateWitnessPool and protects against spam attacks. +/// The range starts at 2 because a witness at height of head+1 would +/// have the previous block available (the chain head), so it wouldn't be an orphan. +pub const ALLOWED_ORPHAN_WITNESS_DISTANCE_FROM_HEAD: Range = 2..6; + +/// We keep only orphan witnesses which are smaller than this size. +/// This limits the maximum memory usage of OrphanStateWitnessPool. +pub const MAX_ORPHAN_WITNESS_SIZE: usize = 40_000_000; + +impl Client { + pub fn handle_orphan_state_witness( + &mut self, + witness: ChunkStateWitness, + ) -> Result { + let chunk_header = &witness.inner.chunk_header; + let witness_height = chunk_header.height_created(); + let witness_shard = chunk_header.shard_id(); + + // Don't save orphaned state witnesses which are far away from the current chain head. + let chain_head = &self.chain.head()?; + let head_distance = witness_height.saturating_sub(chain_head.height); + if !ALLOWED_ORPHAN_WITNESS_DISTANCE_FROM_HEAD.contains(&head_distance) { + tracing::debug!( + target: "client", + head_height = chain_head.height, + witness_height, + witness_shard, + witness_chunk = ?chunk_header.chunk_hash(), + witness_prev_block = ?chunk_header.prev_block_hash(), + "Not saving an orphaned ChunkStateWitness because its height isn't within the allowed height range"); + return Ok(HandleOrphanWitnessOutcome::TooFarFromHead { + witness_height, + head_height: chain_head.height, + }); + } + + // Don't save orphaned state witnesses which are bigger than the allowed limit. + let witness_size = borsh::to_vec(&witness)?.len(); + if witness_size > MAX_ORPHAN_WITNESS_SIZE { + tracing::warn!( + target: "client", + witness_height, + witness_shard, + witness_chunk = ?chunk_header.chunk_hash(), + witness_prev_block = ?chunk_header.prev_block_hash(), + witness_size, + "Not saving an orphaned ChunkStateWitness because it's too big. This is unexpected."); + return Ok(HandleOrphanWitnessOutcome::TooBig(witness_size)); + } + + // Try to find the EpochId to which this witness will belong based on its height. + // It's not always possible to determine the exact epoch_id because the exact + // starting height of the next epoch isn't known until it actually starts, + // so things can get unclear around epoch boundaries. + // Let's collect the epoch_ids in which the witness might possibly be. + let possible_epochs = + self.epoch_manager.possible_epochs_of_height_around_tip(&chain_head, witness_height)?; + + // Try to validate the witness assuming that it resides in one of the possible epochs. + // The witness must pass validation in one of these epochs before it can be admitted to the pool. + let mut epoch_validation_result: Option> = None; + for epoch_id in possible_epochs { + match self.partially_validate_orphan_witness_in_epoch(&witness, &epoch_id) { + Ok(()) => { + epoch_validation_result = Some(Ok(())); + break; + } + Err(err) => epoch_validation_result = Some(Err(err)), + } + } + match epoch_validation_result { + Some(Ok(())) => {} // Validation passed in one of the possible epochs, witness can be added to the pool. + Some(Err(err)) => { + // Validation failed in all possible epochs, reject the witness + return Err(err); + } + None => { + // possible_epochs was empty. This shouldn't happen as all epochs around the chain head are known. + return Err(Error::Other(format!( + "Couldn't find any matching EpochId for orphan chunk state witness with height {}", + witness_height + ))); + } + } + + // Orphan witness is OK, save it to the pool + tracing::debug!( + target: "client", + witness_height, + witness_shard, + witness_chunk = ?chunk_header.chunk_hash(), + witness_prev_block = ?chunk_header.prev_block_hash(), + "Saving an orphaned ChunkStateWitness to orphan pool"); + self.chunk_validator.orphan_witness_pool.add_orphan_state_witness(witness); + Ok(HandleOrphanWitnessOutcome::SavedTooPool) + } + + fn partially_validate_orphan_witness_in_epoch( + &self, + witness: &ChunkStateWitness, + epoch_id: &EpochId, + ) -> Result<(), Error> { + let chunk_header = &witness.inner.chunk_header; + let witness_height = chunk_header.height_created(); + let witness_shard = chunk_header.shard_id(); + + // Validate shard_id + if !self.epoch_manager.get_shard_layout(&epoch_id)?.shard_ids().contains(&witness_shard) { + return Err(Error::InvalidChunkStateWitness(format!( + "Invalid shard_id in ChunkStateWitness: {}", + witness_shard + ))); + } + + // Reject witnesses for chunks for which which this node isn't a validator. + // It's an error, as the sender shouldn't send the witness to a non-validator node. + let Some(my_signer) = self.chunk_validator.my_signer.as_ref() else { + return Err(Error::NotAValidator); + }; + let chunk_validator_assignments = self.epoch_manager.get_chunk_validator_assignments( + &epoch_id, + witness_shard, + witness_height, + )?; + if !chunk_validator_assignments.contains(my_signer.validator_id()) { + return Err(Error::NotAChunkValidator); + } + + // Verify signature + if !self.epoch_manager.verify_chunk_state_witness_signature_in_epoch(&witness, &epoch_id)? { + return Err(Error::InvalidChunkStateWitness("Invalid signature".to_string())); + } + + Ok(()) + } +} + +/// Outcome of processing an orphaned witness. +/// If the witness is clearly invalid, it's rejected and the handler function produces an error. +/// Sometimes the witness might not be strictly invalid, but we still don't want to save it because +/// of other reasons. In such cases the handler function returns Ok(outcome) to let the caller +/// know what happened with the witness. +/// It's useful in tests. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum HandleOrphanWitnessOutcome { + SavedTooPool, + TooBig(usize), + TooFarFromHead { head_height: BlockHeight, witness_height: BlockHeight }, +} From 5a76c7c0da95fb53054df95ffd2b964937758525 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 14 Feb 2024 14:38:52 +0000 Subject: [PATCH 07/34] Process orphaned witnesses when a new block is accepted When a new block is accepted we can process all orphaned state witnesses that were waiting for this block. Witnesses which became ready are taken out of the pool and processed one by one. --- chain/client/src/client.rs | 2 ++ .../orphan_witness_handling.rs | 28 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 6b856861988..0c7575ba86a 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -1643,6 +1643,8 @@ impl Client { self.shards_manager_adapter .send(ShardsManagerRequestFromClient::CheckIncompleteChunks(*block.hash())); + + self.process_ready_orphan_chunk_state_witnesses(&block); } /// Reconcile the transaction pool after processing a block. diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs index dbcf4947373..2e0aee3c953 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs @@ -7,7 +7,9 @@ use crate::Client; use itertools::Itertools; +use near_chain::Block; use near_chain_primitives::Error; +use near_primitives::network::PeerId; use near_primitives::stateless_validation::ChunkStateWitness; use near_primitives::types::{BlockHeight, EpochId}; use std::ops::Range; @@ -149,6 +151,32 @@ impl Client { Ok(()) } + + pub fn process_ready_orphan_chunk_state_witnesses(&mut self, accepted_block: &Block) { + let ready_witnesses = self + .chunk_validator + .orphan_witness_pool + .take_state_witnesses_waiting_for_block(accepted_block.hash()); + for witness in ready_witnesses { + let header = &witness.inner.chunk_header; + tracing::debug!( + target: "client", + witness_height = header.height_created(), + witness_shard = header.shard_id(), + witness_chunk = ?header.chunk_hash(), + witness_prev_block = ?header.prev_block_hash(), + "Processing an orphaned ChunkStateWitness, its previous block has arrived." + ); + if let Err(err) = self.process_chunk_state_witness_with_prev_block( + witness, + PeerId::random(), // TODO: Should peer_id even be here? https://github.com/near/stakewars-iv/issues/17 + accepted_block, + None, + ) { + tracing::error!(target: "client", ?err, "Error processing orphan chunk state witness"); + } + } + } } /// Outcome of processing an orphaned witness. From 97a0482d6062343d7000df6b0ca21d7bf7e6850b Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 16 Feb 2024 16:32:29 +0000 Subject: [PATCH 08/34] Allow to configure orphan pool size from config.js There is no ideal size of the witness pool, node operators might want to adjust it to achieve optimal performance. Let's make this possible by adding a configuration option which allows to control the pool size. --- chain/client/src/client.rs | 1 + .../chunk_validator/mod.rs | 3 ++- .../chunk_validator/orphan_witness_pool.rs | 3 ++- core/chain-configs/src/client_config.rs | 10 +++++++++ core/chain-configs/src/lib.rs | 19 ++++++++-------- nearcore/src/config.rs | 22 ++++++++++++------- 6 files changed, 39 insertions(+), 19 deletions(-) diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 0c7575ba86a..dd179fa417c 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -353,6 +353,7 @@ impl Client { network_adapter.clone().into_sender(), runtime_adapter.clone(), chunk_endorsement_tracker.clone(), + config.orphan_state_witness_pool_size, ); let chunk_distribution_network = ChunkDistributionNetwork::from_config(&config); Ok(Self { diff --git a/chain/client/src/stateless_validation/chunk_validator/mod.rs b/chain/client/src/stateless_validation/chunk_validator/mod.rs index 1d163ca86f3..e7cacc9f0cd 100644 --- a/chain/client/src/stateless_validation/chunk_validator/mod.rs +++ b/chain/client/src/stateless_validation/chunk_validator/mod.rs @@ -67,6 +67,7 @@ impl ChunkValidator { network_sender: Sender, runtime_adapter: Arc, chunk_endorsement_tracker: Arc, + orphan_witness_pool_size: usize, ) -> Self { Self { my_signer, @@ -74,7 +75,7 @@ impl ChunkValidator { network_sender, runtime_adapter, chunk_endorsement_tracker, - orphan_witness_pool: OrphanStateWitnessPool::default(), + orphan_witness_pool: OrphanStateWitnessPool::new(orphan_witness_pool_size), } } diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs index e26000d6c4b..9a1e533e54d 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -1,6 +1,7 @@ use std::collections::{HashMap, HashSet}; use lru::LruCache; +use near_chain_configs::default_orphan_state_witness_pool_size; use near_primitives::hash::CryptoHash; use near_primitives::stateless_validation::ChunkStateWitness; use near_primitives::types::{BlockHeight, ShardId}; @@ -100,6 +101,6 @@ impl OrphanStateWitnessPool { impl Default for OrphanStateWitnessPool { fn default() -> OrphanStateWitnessPool { - OrphanStateWitnessPool::new(12) + OrphanStateWitnessPool::new(default_orphan_state_witness_pool_size()) } } diff --git a/core/chain-configs/src/client_config.rs b/core/chain-configs/src/client_config.rs index b26110128f7..0edf07722e7 100644 --- a/core/chain-configs/src/client_config.rs +++ b/core/chain-configs/src/client_config.rs @@ -301,6 +301,11 @@ pub fn default_produce_chunk_add_transactions_time_limit() -> Option { Some(Duration::from_millis(200)) } +pub fn default_orphan_state_witness_pool_size() -> usize { + // With 4 shards, a capacity of 12 witnesses allows to store 3 orphan witnesses per shard. + 12 +} + /// Config for the Chunk Distribution Network feature. /// This allows nodes to push and pull chunks from a central stream. /// The two benefits of this approach are: (1) less request/response traffic @@ -443,6 +448,10 @@ pub struct ClientConfig { /// Nodes not participating will still function fine, but possibly with higher /// latency due to the need of requesting chunks over the peer-to-peer network. pub chunk_distribution_network: Option, + /// OrphanStateWitnessPool keeps instances of ChunkStateWitness which can't be processed + /// because the previous block isn't available. The witnesses wait in the pool untl the + /// required block appears. This variable controls how many witnesses can be stored in the pool. + pub orphan_state_witness_pool_size: usize, } impl ClientConfig { @@ -527,6 +536,7 @@ impl ClientConfig { "produce_chunk_add_transactions_time_limit", ), chunk_distribution_network: None, + orphan_state_witness_pool_size: default_orphan_state_witness_pool_size(), } } } diff --git a/core/chain-configs/src/lib.rs b/core/chain-configs/src/lib.rs index 304edf608d0..8790ac5421b 100644 --- a/core/chain-configs/src/lib.rs +++ b/core/chain-configs/src/lib.rs @@ -10,15 +10,16 @@ pub use client_config::{ default_enable_multiline_logging, default_epoch_sync_enabled, default_header_sync_expected_height_per_second, default_header_sync_initial_timeout, default_header_sync_progress_timeout, default_header_sync_stall_ban_timeout, - default_log_summary_period, default_produce_chunk_add_transactions_time_limit, - default_state_sync, default_state_sync_enabled, default_state_sync_timeout, - default_sync_check_period, default_sync_height_threshold, default_sync_step_period, - default_transaction_pool_size_limit, default_trie_viewer_state_size_limit, - default_tx_routing_height_horizon, default_view_client_threads, - default_view_client_throttle_period, ChunkDistributionNetworkConfig, ChunkDistributionUris, - ClientConfig, DumpConfig, ExternalStorageConfig, ExternalStorageLocation, GCConfig, - LogSummaryStyle, ReshardingConfig, ReshardingHandle, StateSyncConfig, SyncConfig, - DEFAULT_GC_NUM_EPOCHS_TO_KEEP, DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_EXTERNAL, + default_log_summary_period, default_orphan_state_witness_pool_size, + default_produce_chunk_add_transactions_time_limit, default_state_sync, + default_state_sync_enabled, default_state_sync_timeout, default_sync_check_period, + default_sync_height_threshold, default_sync_step_period, default_transaction_pool_size_limit, + default_trie_viewer_state_size_limit, default_tx_routing_height_horizon, + default_view_client_threads, default_view_client_throttle_period, + ChunkDistributionNetworkConfig, ChunkDistributionUris, ClientConfig, DumpConfig, + ExternalStorageConfig, ExternalStorageLocation, GCConfig, LogSummaryStyle, ReshardingConfig, + ReshardingHandle, StateSyncConfig, SyncConfig, DEFAULT_GC_NUM_EPOCHS_TO_KEEP, + DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_EXTERNAL, DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_ON_CATCHUP_EXTERNAL, MIN_GC_NUM_EPOCHS_TO_KEEP, TEST_STATE_SYNC_TIMEOUT, }; diff --git a/nearcore/src/config.rs b/nearcore/src/config.rs index 1137f3f3e5a..7b525b5b46a 100644 --- a/nearcore/src/config.rs +++ b/nearcore/src/config.rs @@ -5,14 +5,14 @@ use near_chain_configs::{ default_enable_multiline_logging, default_epoch_sync_enabled, default_header_sync_expected_height_per_second, default_header_sync_initial_timeout, default_header_sync_progress_timeout, default_header_sync_stall_ban_timeout, - default_log_summary_period, default_produce_chunk_add_transactions_time_limit, - default_state_sync, default_state_sync_enabled, default_state_sync_timeout, - default_sync_check_period, default_sync_height_threshold, default_sync_step_period, - default_transaction_pool_size_limit, default_trie_viewer_state_size_limit, - default_tx_routing_height_horizon, default_view_client_threads, - default_view_client_throttle_period, get_initial_supply, ChunkDistributionNetworkConfig, - ClientConfig, GCConfig, Genesis, GenesisConfig, GenesisValidationMode, LogSummaryStyle, - MutableConfigValue, ReshardingConfig, StateSyncConfig, + default_log_summary_period, default_orphan_state_witness_pool_size, + default_produce_chunk_add_transactions_time_limit, default_state_sync, + default_state_sync_enabled, default_state_sync_timeout, default_sync_check_period, + default_sync_height_threshold, default_sync_step_period, default_transaction_pool_size_limit, + default_trie_viewer_state_size_limit, default_tx_routing_height_horizon, + default_view_client_threads, default_view_client_throttle_period, get_initial_supply, + ChunkDistributionNetworkConfig, ClientConfig, GCConfig, Genesis, GenesisConfig, + GenesisValidationMode, LogSummaryStyle, MutableConfigValue, ReshardingConfig, StateSyncConfig, }; use near_config_utils::{ValidationError, ValidationErrors}; use near_crypto::{InMemorySigner, KeyFile, KeyType, PublicKey, Signer}; @@ -322,6 +322,10 @@ pub struct Config { /// latency due to the need of requesting chunks over the peer-to-peer network. #[serde(skip_serializing_if = "Option::is_none")] pub chunk_distribution_network: Option, + /// OrphanStateWitnessPool keeps instances of ChunkStateWitness which can't be processed + /// because the previous block isn't available. The witnesses wait in the pool untl the + /// required block appears. This variable controls how many witnesses can be stored in the pool. + pub orphan_state_witness_pool_size: usize, } fn is_false(value: &bool) -> bool { @@ -367,6 +371,7 @@ impl Default for Config { produce_chunk_add_transactions_time_limit: default_produce_chunk_add_transactions_time_limit(), chunk_distribution_network: None, + orphan_state_witness_pool_size: default_orphan_state_witness_pool_size(), } } } @@ -681,6 +686,7 @@ impl NearConfig { "produce_chunk_add_transactions_time_limit", ), chunk_distribution_network: config.chunk_distribution_network, + orphan_state_witness_pool_size: config.orphan_state_witness_pool_size, }, network_config: NetworkConfig::new( config.network, From f9b9c520afa021c76454801058308df35de637c2 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 16 Feb 2024 17:56:32 +0000 Subject: [PATCH 09/34] Add metrics to improve observability of OrphanStateWitnessPool Add metrics which provide information about witnesses kept in the pool. Metrics are increased and decreased using a RAII struct to ensure that they're always correct, no matter what happens with the pool. --- chain/client/src/metrics.rs | 28 +++++++ .../orphan_witness_handling.rs | 2 +- .../chunk_validator/orphan_witness_pool.rs | 83 +++++++++++++++++-- 3 files changed, 104 insertions(+), 9 deletions(-) diff --git a/chain/client/src/metrics.rs b/chain/client/src/metrics.rs index b153e3cbc99..a9bf2324448 100644 --- a/chain/client/src/metrics.rs +++ b/chain/client/src/metrics.rs @@ -583,6 +583,34 @@ pub(crate) static CHUNK_STATE_WITNESS_TOTAL_SIZE: Lazy = Lazy::new .unwrap() }); +pub(crate) static ORPHAN_CHUNK_STATE_WITNESSES_TOTAL_COUNT: Lazy = Lazy::new(|| { + try_create_int_counter_vec( + "near_orphan_chunk_state_witness_total_count", + "Total number of orphaned chunk state witnesses that were saved for later processing", + &["shard_id"], + ) + .unwrap() +}); + +pub(crate) static ORPHAN_CHUNK_STATE_WITNESS_POOL_SIZE: Lazy = Lazy::new(|| { + try_create_int_gauge_vec( + "near_orphan_chunk_state_witness_pool_size", + "Number of orphaned witnesses kept in OrphanStateWitnessPool (by shard_id)", + &["shard_id"], + ) + .unwrap() +}); + +pub(crate) static ORPHAN_CHUNK_STATE_WITNESS_POOL_MEMORY_USED: Lazy = + Lazy::new(|| { + try_create_int_gauge_vec( + "near_orphan_chunk_state_witness_pool_memory_used", + "Memory in bytes consumed by the OrphanStateWitnessPool (by shard_id)", + &["shard_id"], + ) + .unwrap() + }); + pub(crate) static BLOCK_PRODUCER_ENDORSED_STAKE_RATIO: Lazy = Lazy::new(|| { try_create_histogram_vec( "near_block_producer_endorsed_stake_ratio", diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs index 2e0aee3c953..3a4bc1461b1 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs @@ -109,7 +109,7 @@ impl Client { witness_chunk = ?chunk_header.chunk_hash(), witness_prev_block = ?chunk_header.prev_block_hash(), "Saving an orphaned ChunkStateWitness to orphan pool"); - self.chunk_validator.orphan_witness_pool.add_orphan_state_witness(witness); + self.chunk_validator.orphan_witness_pool.add_orphan_state_witness(witness, witness_size); Ok(HandleOrphanWitnessOutcome::SavedTooPool) } diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs index 9a1e533e54d..7383827e846 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -6,18 +6,28 @@ use near_primitives::hash::CryptoHash; use near_primitives::stateless_validation::ChunkStateWitness; use near_primitives::types::{BlockHeight, ShardId}; +use metrics_tracker::OrphanWitnessMetricsTracker; + /// `OrphanStateWitnessPool` is used to keep orphaned ChunkStateWitnesses until it's possible to process them. /// To process a ChunkStateWitness we need to have the previous block, but it might happen that a ChunkStateWitness /// shows up before the block is available. In such cases the witness is put in `OrphanStateWitnessPool` until the /// required block arrives and the witness can be processed. pub struct OrphanStateWitnessPool { - witness_cache: LruCache<(ShardId, BlockHeight), ChunkStateWitness>, + witness_cache: LruCache<(ShardId, BlockHeight), CacheEntry>, /// List of orphaned witnesses that wait for this block to appear. /// Maps block hash to entries in `witness_cache`. /// Must be kept in sync with `witness_cache`. waiting_for_block: HashMap>, } +struct CacheEntry { + witness: ChunkStateWitness, + // cargo complains that metrics_tracker is never read, but it's ok, + // as metrics_tracker does all of its work during initalization and destruction. + #[allow(dead_code)] + metrics_tracker: OrphanWitnessMetricsTracker, +} + impl OrphanStateWitnessPool { /// Create a new `OrphanStateWitnessPool` with a capacity of `cache_capacity` witnesses. /// The `Default` trait implementation provides reasonable defaults. @@ -33,7 +43,8 @@ impl OrphanStateWitnessPool { /// It's expected that this `ChunkStateWitness` has gone through basic validation - including signature, /// shard_id, size and distance from the tip. The pool would still work without it, but without validation /// it'd be possible to fill the whole cache with spam. - pub fn add_orphan_state_witness(&mut self, witness: ChunkStateWitness) { + /// `witness_size` is only used for metrics, it's okay to pass 0 if you don't care about the metrics. + pub fn add_orphan_state_witness(&mut self, witness: ChunkStateWitness, witness_size: usize) { if self.witness_cache.cap() == 0 { // A cache with 0 capacity doesn't keep anything. return; @@ -41,12 +52,14 @@ impl OrphanStateWitnessPool { // Insert the new ChunkStateWitness into the cache let chunk_header = &witness.inner.chunk_header; - let cache_key = (chunk_header.shard_id(), chunk_header.height_created()); let prev_block_hash = *chunk_header.prev_block_hash(); - if let Some((_, ejected_witness)) = self.witness_cache.push(cache_key, witness) { + let cache_key = (chunk_header.shard_id(), chunk_header.height_created()); + let metrics_tracker = OrphanWitnessMetricsTracker::new(&witness, witness_size); + let cache_entry = CacheEntry { witness, metrics_tracker }; + if let Some((_, ejected_entry)) = self.witness_cache.push(cache_key, cache_entry) { // If another witness has been ejected from the cache due to capacity limit, // then remove the ejected witness from `waiting_for_block` to keep them in sync - let header = &ejected_witness.inner.chunk_header; + let header = &ejected_entry.witness.inner.chunk_header; tracing::debug!( target: "client", witness_height = header.height_created(), @@ -55,7 +68,7 @@ impl OrphanStateWitnessPool { witness_prev_block = ?header.prev_block_hash(), "Ejecting an orphaned ChunkStateWitness from the cache due to capacity limit. It will not be processed." ); - self.remove_from_waiting_for_block(&ejected_witness) + self.remove_from_waiting_for_block(&ejected_entry.witness); } // Add the new orphaned state witness to `waiting_for_block` @@ -89,11 +102,11 @@ impl OrphanStateWitnessPool { let mut result = Vec::new(); for (shard_id, height) in waiting { // Remove this witness from `witness_cache` to keep them in sync - let witness = self.witness_cache.pop(&(shard_id, height)).expect( + let entry = self.witness_cache.pop(&(shard_id, height)).expect( "Every entry in waiting_for_block must have a corresponding witness in the cache", ); - result.push(witness); + result.push(entry.witness); } result } @@ -104,3 +117,57 @@ impl Default for OrphanStateWitnessPool { OrphanStateWitnessPool::new(default_orphan_state_witness_pool_size()) } } + +mod metrics_tracker { + use near_primitives::stateless_validation::ChunkStateWitness; + + use crate::metrics; + + /// OrphanWitnessMetricsTracker is a helper struct which leverages RAII to update + /// the metrics about witnesses in the orphan pool when they're added and removed. + /// Its constructor adds the witness to the metrics, and later its destructor + /// removes the witness from metrics. + /// Using this struct is much less error-prone than adjusting the metrics by hand. + pub struct OrphanWitnessMetricsTracker { + shard_id: String, + witness_size: usize, + } + + impl OrphanWitnessMetricsTracker { + pub fn new( + witness: &ChunkStateWitness, + witness_size: usize, + ) -> OrphanWitnessMetricsTracker { + let shard_id = witness.inner.chunk_header.shard_id().to_string(); + metrics::ORPHAN_CHUNK_STATE_WITNESSES_TOTAL_COUNT + .with_label_values(&[shard_id.as_str()]) + .inc(); + metrics::ORPHAN_CHUNK_STATE_WITNESS_POOL_SIZE + .with_label_values(&[shard_id.as_str()]) + .inc(); + metrics::ORPHAN_CHUNK_STATE_WITNESS_POOL_MEMORY_USED + .with_label_values(&[shard_id.as_str()]) + .add(witness_size_to_i64(witness_size)); + + OrphanWitnessMetricsTracker { shard_id, witness_size } + } + } + + impl Drop for OrphanWitnessMetricsTracker { + fn drop(&mut self) { + metrics::ORPHAN_CHUNK_STATE_WITNESS_POOL_SIZE + .with_label_values(&[self.shard_id.as_str()]) + .dec(); + metrics::ORPHAN_CHUNK_STATE_WITNESS_POOL_MEMORY_USED + .with_label_values(&[self.shard_id.as_str()]) + .sub(witness_size_to_i64(self.witness_size)); + } + } + + fn witness_size_to_i64(witness_size: usize) -> i64 { + witness_size.try_into().expect( + "Orphaned ChunkStateWitness size can't be converted to i64. \ + This should be impossible, is it over one exabyte in size?", + ) + } +} From 42af070fc997db2b9424747b8f2face326e526e4 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 16 Feb 2024 20:39:32 +0000 Subject: [PATCH 10/34] Add unit tests for OrphanStateWitnessPool --- .../chunk_validator/orphan_witness_pool.rs | 284 ++++++++++++++++++ core/primitives/src/stateless_validation.rs | 31 +- 2 files changed, 312 insertions(+), 3 deletions(-) diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs index 7383827e846..ebe54aaf863 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -171,3 +171,287 @@ mod metrics_tracker { ) } } + +#[cfg(test)] +mod tests { + use near_primitives::hash::{hash, CryptoHash}; + use near_primitives::sharding::{ShardChunkHeader, ShardChunkHeaderInner}; + use near_primitives::stateless_validation::ChunkStateWitness; + use near_primitives::types::{BlockHeight, ShardId}; + + use super::OrphanStateWitnessPool; + + /// Make a dummy witness for testing + /// encoded_length is used to differentiate between witnesses with the same main parameters. + fn make_witness( + height: BlockHeight, + shard_id: ShardId, + prev_block_hash: CryptoHash, + encoded_length: u64, + ) -> ChunkStateWitness { + let mut witness = ChunkStateWitness::new_dummy(height, shard_id, prev_block_hash); + match &mut witness.inner.chunk_header { + ShardChunkHeader::V3(header) => match &mut header.inner { + ShardChunkHeaderInner::V2(inner) => inner.encoded_length = encoded_length, + _ => panic!(), + }, + _ => panic!(), + } + witness + } + + /// Generate fake block hash based on height + fn block(height: BlockHeight) -> CryptoHash { + hash(&height.to_be_bytes()) + } + + /// Assert that both Vecs are equal after sorting. It's order-independent, unlike the standard assert_eq! + fn assert_same(mut observed: Vec, mut expected: Vec) { + let sort_comparator = |witness1: &ChunkStateWitness, witness2: &ChunkStateWitness| { + let bytes1 = borsh::to_vec(witness1).unwrap(); + let bytes2 = borsh::to_vec(witness2).unwrap(); + bytes1.cmp(&bytes2) + }; + observed.sort_by(sort_comparator); + expected.sort_by(sort_comparator); + if observed != expected { + let print_witness_info = |witness: &ChunkStateWitness| { + let header = &witness.inner.chunk_header; + eprintln!( + "- height = {}, shard_id = {}, encoded_length: {} prev_block: {}", + header.height_created(), + header.shard_id(), + header.encoded_length(), + header.prev_block_hash() + ); + }; + eprintln!("Mismatch!"); + eprintln!("Expected {} witnesses:", expected.len()); + for witness in expected { + print_witness_info(&witness); + } + eprintln!("Observed {} witnesse:", observed.len()); + for witness in observed { + print_witness_info(&witness); + } + eprintln!("=================="); + panic!("assert_same failed"); + } + } + + // Check that the pool is empty, all witnesses have been removed from both fields + fn assert_empty(pool: &OrphanStateWitnessPool) { + assert!(pool.witness_cache.is_empty()); + assert!(pool.waiting_for_block.is_empty()); + } + + /// Basic functionality - inserting witnesses and fetching them works as expected + #[test] + fn basic() { + let mut pool = OrphanStateWitnessPool::new(10); + + let witness1 = make_witness(100, 1, block(99), 0); + let witness2 = make_witness(100, 2, block(99), 0); + let witness3 = make_witness(101, 1, block(100), 0); + let witness4 = make_witness(101, 2, block(100), 0); + + pool.add_orphan_state_witness(witness1.clone(), 0); + pool.add_orphan_state_witness(witness2.clone(), 0); + pool.add_orphan_state_witness(witness3.clone(), 0); + pool.add_orphan_state_witness(witness4.clone(), 0); + + let waiting_for_99 = pool.take_state_witnesses_waiting_for_block(&block(99)); + assert_same(waiting_for_99, vec![witness1, witness2]); + + let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); + assert_same(waiting_for_100, vec![witness3, witness4]); + + assert_empty(&pool); + } + + /// When a new witness is inserted with the same (shard_id, height) as an existing witness, the new witness + /// should replace the old one. The old one should be ejected from the pool. + #[test] + fn replacing() { + let mut pool = OrphanStateWitnessPool::new(10); + + // The old witness is replaced when the awaited block is the same + { + let witness1 = make_witness(100, 1, block(99), 0); + let witness2 = make_witness(100, 1, block(99), 1); + pool.add_orphan_state_witness(witness1, 0); + pool.add_orphan_state_witness(witness2.clone(), 0); + + let waiting_for_99 = pool.take_state_witnesses_waiting_for_block(&block(99)); + assert_same(waiting_for_99, vec![witness2]); + } + + // The old witness is replaced when the awaited block is different, waiting_for_block is cleaned as expected + { + let witness3 = make_witness(102, 1, block(100), 0); + let witness4 = make_witness(102, 1, block(101), 0); + pool.add_orphan_state_witness(witness3, 0); + pool.add_orphan_state_witness(witness4.clone(), 0); + + let waiting_for_101 = pool.take_state_witnesses_waiting_for_block(&block(101)); + assert_same(waiting_for_101, vec![witness4]); + + let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); + assert_same(waiting_for_100, vec![]); + } + + assert_empty(&pool); + } + + /// The pool has limited capacity. Once it hits the capacity, the least-recently used witness will be ejected. + #[test] + fn limited_capacity() { + let mut pool = OrphanStateWitnessPool::new(2); + + let witness1 = make_witness(102, 1, block(101), 0); + let witness2 = make_witness(101, 1, block(100), 0); + let witness3 = make_witness(101, 2, block(100), 0); + + pool.add_orphan_state_witness(witness1, 0); + pool.add_orphan_state_witness(witness2.clone(), 0); + + // Inserting the third witness causes the pool to go over capacity, so witness1 should be ejected. + pool.add_orphan_state_witness(witness3.clone(), 0); + + let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); + assert_same(waiting_for_100, vec![witness2, witness3]); + + // witness1 should be ejected, no one is waiting for block 101 + let waiting_for_101 = pool.take_state_witnesses_waiting_for_block(&block(101)); + assert_same(waiting_for_101, vec![]); + + assert_empty(&pool); + } + + /// When a witness is ejected from the cache, it should also be removed from the corresponding waiting_for_block set. + /// But it's not enough to remove it from the set, if the set has become empty we must also remove the set itself + /// from `waiting_for_block`. Otherwise `waiting_for_block` would have a constantly increasing amount of empty sets, + /// which would cause a memory leak. + #[test] + fn empty_waiting_sets_cleared_on_ejection() { + let mut pool = OrphanStateWitnessPool::new(1); + + let witness1 = make_witness(100, 1, block(99), 0); + pool.add_orphan_state_witness(witness1, 0); + + // When witness2 is added to the pool witness1 gets ejected from the cache (capacity is set to 1). + // When this happens the set of witness waiting for block 99 will become empty. Then this empty set should + // be removed from `waiting_for_block`, otherwise there'd be a memory leak. + let witness2 = make_witness(100, 2, block(100), 0); + pool.add_orphan_state_witness(witness2.clone(), 0); + + // waiting_for_block contains only one entry, list of witnesses waiting for block 100. + assert_eq!(pool.waiting_for_block.len(), 1); + + let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); + assert_same(waiting_for_100, vec![witness2]); + + assert_empty(&pool); + } + + /// OrphanStateWitnessPool can handle large shard ids without any problems, it doesn't keep a Vec indexed by shard_id + #[test] + fn large_shard_id() { + let mut pool = OrphanStateWitnessPool::new(10); + + let large_shard_id = ShardId::MAX; + let witness = make_witness(101, large_shard_id, block(99), 0); + pool.add_orphan_state_witness(witness.clone(), 0); + + let waiting_for_99 = pool.take_state_witnesses_waiting_for_block(&block(99)); + assert_same(waiting_for_99, vec![witness]); + + assert_empty(&pool); + } + + /// An OrphanStateWitnessPool with 0 capacity shouldn't crash, it should just ignore all witnesses + #[test] + fn zero_capacity() { + let mut pool = OrphanStateWitnessPool::new(0); + + pool.add_orphan_state_witness(make_witness(100, 1, block(99), 0), 0); + pool.add_orphan_state_witness(make_witness(100, 1, block(99), 0), 1); + pool.add_orphan_state_witness(make_witness(100, 2, block(99), 0), 0); + pool.add_orphan_state_witness(make_witness(101, 0, block(100), 0), 0); + + let waiting = pool.take_state_witnesses_waiting_for_block(&block(99)); + assert_same(waiting, vec![]); + + assert_empty(&pool); + } + + /// OrphanStateWitnessPool has a Drop implementation which clears the metrics. + /// It's hard to test it because metrics are global and it could interfere with other tests, + /// but we can at least test that it doesn't crash. That's always something. + #[test] + fn destructor_doesnt_crash() { + let mut pool = OrphanStateWitnessPool::new(10); + pool.add_orphan_state_witness(make_witness(100, 0, block(99), 0), 0); + pool.add_orphan_state_witness(make_witness(100, 2, block(99), 0), 0); + pool.add_orphan_state_witness(make_witness(100, 2, block(99), 0), 1); + pool.add_orphan_state_witness(make_witness(101, 0, block(100), 0), 0); + std::mem::drop(pool); + println!("Ok!"); + } + + /// A longer test scenario + #[test] + fn scenario() { + let mut pool = OrphanStateWitnessPool::new(5); + + // Witnesses for shards 0, 1, 2, 3 at height 1000, looking for block 99 + let witness0 = make_witness(100, 0, block(99), 0); + let witness1 = make_witness(100, 1, block(99), 0); + let witness2 = make_witness(100, 2, block(99), 0); + let witness3 = make_witness(100, 3, block(99), 0); + pool.add_orphan_state_witness(witness0, 0); + pool.add_orphan_state_witness(witness1, 0); + pool.add_orphan_state_witness(witness2, 0); + pool.add_orphan_state_witness(witness3, 0); + + // Another witness on shard 1, height 100. Should replace witness1 + let witness5 = make_witness(100, 1, block(99), 1); + pool.add_orphan_state_witness(witness5.clone(), 0); + + // Witnesses for shards 0, 1, 2, 3 at height 101, looking for block 100 + let witness6 = make_witness(101, 0, block(100), 0); + let witness7 = make_witness(101, 1, block(100), 0); + let witness8 = make_witness(101, 2, block(100), 0); + let witness9 = make_witness(101, 3, block(100), 0); + pool.add_orphan_state_witness(witness6, 0); + pool.add_orphan_state_witness(witness7.clone(), 0); + pool.add_orphan_state_witness(witness8.clone(), 0); + pool.add_orphan_state_witness(witness9.clone(), 0); + + // Pool capacity is 5, so three witnesses at height 100 should be ejected. + // The only surviving witness should be witness5, which was the freshest one among them + let looking_for_99 = pool.take_state_witnesses_waiting_for_block(&block(99)); + assert_same(looking_for_99, vec![witness5]); + + // Let's add a few more witnesses + let witness10 = make_witness(102, 1, block(101), 0); + let witness11 = make_witness(102, 4, block(100), 0); + let witness12 = make_witness(102, 1, block(77), 0); + pool.add_orphan_state_witness(witness10, 0); + pool.add_orphan_state_witness(witness11.clone(), 0); + pool.add_orphan_state_witness(witness12.clone(), 0); + + // Check that witnesses waiting for block 100 are correct + let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); + assert_same(waiting_for_100, vec![witness7, witness8, witness9, witness11]); + + // At this point the pool contains only witness12, no one should be waiting for block 101. + let waiting_for_101 = pool.take_state_witnesses_waiting_for_block(&block(101)); + assert_same(waiting_for_101, vec![]); + + let waiting_for_77 = pool.take_state_witnesses_waiting_for_block(&block(77)); + assert_same(waiting_for_77, vec![witness12]); + + assert_empty(&pool); + } +} diff --git a/core/primitives/src/stateless_validation.rs b/core/primitives/src/stateless_validation.rs index 12e342854bd..c2d2d06c520 100644 --- a/core/primitives/src/stateless_validation.rs +++ b/core/primitives/src/stateless_validation.rs @@ -1,13 +1,13 @@ use std::collections::{HashMap, HashSet}; use crate::challenge::PartialState; -use crate::sharding::{ChunkHash, ReceiptProof, ShardChunkHeader}; +use crate::sharding::{ChunkHash, ReceiptProof, ShardChunkHeader, ShardChunkHeaderV3}; use crate::transaction::SignedTransaction; -use crate::validator_signer::ValidatorSigner; +use crate::validator_signer::{EmptyValidatorSigner, ValidatorSigner}; use borsh::{BorshDeserialize, BorshSerialize}; use near_crypto::{PublicKey, Signature}; use near_primitives_core::hash::CryptoHash; -use near_primitives_core::types::{AccountId, Balance}; +use near_primitives_core::types::{AccountId, Balance, BlockHeight, ShardId}; /// An arbitrary static string to make sure that this struct cannot be /// serialized to look identical to another serialized struct. For chunk @@ -134,6 +134,31 @@ impl ChunkStateWitness { ); ChunkStateWitness { inner, signature: Signature::default() } } + + // Make a new dummy ChunkStateWitness for testing. + pub fn new_dummy( + height: BlockHeight, + shard_id: ShardId, + prev_block_hash: CryptoHash, + ) -> ChunkStateWitness { + let header = ShardChunkHeader::V3(ShardChunkHeaderV3::new( + prev_block_hash, + Default::default(), + Default::default(), + Default::default(), + Default::default(), + height, + shard_id, + Default::default(), + Default::default(), + Default::default(), + Default::default(), + Default::default(), + Default::default(), + &EmptyValidatorSigner::default(), + )); + ChunkStateWitness::empty(header) + } } /// Represents the base state and the expected post-state-root of a chunk's state From 6592ace7e584755b44a154066571c283c9b4c0ef Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Tue, 20 Feb 2024 08:36:15 +0000 Subject: [PATCH 11/34] Add TestEnv::get_client_index() There was no way to get the index of a client with some AccountId, let's add a method that enables it to `TestEnv` --- chain/client/src/test_utils/test_env.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chain/client/src/test_utils/test_env.rs b/chain/client/src/test_utils/test_env.rs index 92e59dca487..b0fc3d9c984 100644 --- a/chain/client/src/test_utils/test_env.rs +++ b/chain/client/src/test_utils/test_env.rs @@ -553,6 +553,11 @@ impl TestEnv { self.clients[idx].validator_signer.as_ref().unwrap().validator_id() } + /// Returns the index of client with the given [`AccoountId`]. + pub fn get_client_index(&self, account_id: &AccountId) -> usize { + self.account_indices.index(account_id) + } + pub fn get_runtime_config(&self, idx: usize, epoch_id: EpochId) -> RuntimeConfig { self.clients[idx].runtime_adapter.get_protocol_config(&epoch_id).unwrap().runtime_config } From a29b8dfc57ff875ba2d632a1d4377502ab8e6647 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Tue, 20 Feb 2024 08:39:57 +0000 Subject: [PATCH 12/34] Add TestEnv::wait_for_chunk_endorsement Add a function that allows to wait until some chunk endorsement is sent out. It will later be used to verify that a validator sends out endorsements for an orphaned chunk state witness when its block arrives. --- chain/client/src/test_utils/test_env.rs | 50 +++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/chain/client/src/test_utils/test_env.rs b/chain/client/src/test_utils/test_env.rs index b0fc3d9c984..2473d6ae1bd 100644 --- a/chain/client/src/test_utils/test_env.rs +++ b/chain/client/src/test_utils/test_env.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; use std::sync::{Arc, Mutex}; -use std::time::Instant; +use std::time::{Duration, Instant}; use crate::adapter::ProcessTxResponse; use crate::stateless_validation::processing_tracker::{ @@ -29,8 +29,8 @@ use near_primitives::epoch_manager::RngSeed; use near_primitives::errors::InvalidTxError; use near_primitives::hash::CryptoHash; use near_primitives::network::PeerId; -use near_primitives::sharding::PartialEncodedChunk; -use near_primitives::stateless_validation::ChunkStateWitness; +use near_primitives::sharding::{ChunkHash, PartialEncodedChunk}; +use near_primitives::stateless_validation::{ChunkEndorsement, ChunkStateWitness}; use near_primitives::test_utils::create_test_signer; use near_primitives::transaction::{Action, FunctionCallAction, SignedTransaction}; use near_primitives::types::{AccountId, Balance, BlockHeight, EpochId, NumSeats}; @@ -47,6 +47,9 @@ use super::setup::setup_client_with_runtime; use super::test_env_builder::TestEnvBuilder; use super::TEST_SEED; +/// Timeout used in tests that wait for a specific chunk endorsement to appear +const CHUNK_ENDORSEMENTS_TIMEOUT: Duration = Duration::from_secs(10); + /// An environment for writing integration tests with multiple clients. /// This environment can simulate near nodes without network and it can be configured to use different runtimes. pub struct TestEnv { @@ -382,6 +385,43 @@ impl TestEnv { self.propagate_chunk_endorsements(allow_errors); } + /// Wait until an endorsement for `chunk_hash` appears in the network messages send by + /// the Client with index `client_idx`. Times out after CHUNK_ENDORSEMENTS_TIMEOUT. + /// Doesn't process or consume the message, it just waits until the message appears on the network_adapter. + pub fn wait_for_chunk_endorsement( + &mut self, + client_idx: usize, + chunk_hash: &ChunkHash, + ) -> Result { + let start_time = Instant::now(); + let network_adapter = self.network_adapters[client_idx].clone(); + loop { + let mut endorsement_opt = None; + network_adapter.handle_filtered(|request| { + match &request { + PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::ChunkEndorsement(_receiver_account_id, endorsement), + ) if endorsement.chunk_hash() == chunk_hash => { + endorsement_opt = Some(endorsement.clone()); + } + _ => {} + }; + Some(request) + }); + + if let Some(endorsement) = endorsement_opt { + return Ok(endorsement); + } + + let elapsed_since_start = start_time.elapsed(); + if elapsed_since_start > CHUNK_ENDORSEMENTS_TIMEOUT { + return Err(TimeoutError(elapsed_since_start)); + } + + std::thread::sleep(Duration::from_millis(50)); + } + } + pub fn send_money(&mut self, id: usize) -> ProcessTxResponse { let account_id = self.get_client_id(0); let signer = @@ -690,3 +730,7 @@ impl AccountIndices { &mut container[self.0[account_id]] } } + +#[derive(thiserror::Error, Debug)] +#[error("Timed out after {0:?}")] +pub struct TimeoutError(Duration); From 22a2b15e89c1e333d350e5afa31b199d0da11332 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Tue, 20 Feb 2024 08:41:20 +0000 Subject: [PATCH 13/34] Expose some items that are needed for the integration tests `stateless_validation` is a private module of `near_client`, so it's impossible to use items defined there in integration tests. Let's expose some of them to make their use in tests possible. --- chain/client/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chain/client/src/lib.rs b/chain/client/src/lib.rs index 29c360a6601..b4e6b82068e 100644 --- a/chain/client/src/lib.rs +++ b/chain/client/src/lib.rs @@ -16,9 +16,13 @@ pub use crate::client::{Client, ProduceChunkResult}; pub use crate::client_actor::NetworkAdversarialMessage; pub use crate::client_actor::{start_client, ClientActor}; pub use crate::config_updater::ConfigUpdater; +pub use crate::stateless_validation::chunk_validator::orphan_witness_handling::{ + HandleOrphanWitnessOutcome, MAX_ORPHAN_WITNESS_SIZE, +}; pub use crate::sync::adapter::{SyncAdapter, SyncMessage}; pub use crate::view_client::{start_view_client, ViewClientActor}; pub use near_client_primitives::debug::DebugStatus; +pub use stateless_validation::processing_tracker::{ProcessingDoneTracker, ProcessingDoneWaiter}; pub mod adapter; pub mod adversarial; From 9974f09637bde794c85dad0bbfceebf072779e3b Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 21 Feb 2024 07:25:08 +0000 Subject: [PATCH 14/34] Move get_block_producer to TestEnv `run_chunk_validation_test` has a very useful function which allows to determine a block producer at some offset. I would like to also use it in my integration tests. Let's add it to `TestEnv` so that other tests can also use it. --- chain/client/src/test_utils/test_env.rs | 14 ++++++++++++++ .../tests/client/features/stateless_validation.rs | 14 +------------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/chain/client/src/test_utils/test_env.rs b/chain/client/src/test_utils/test_env.rs index 2473d6ae1bd..a629bb2570c 100644 --- a/chain/client/src/test_utils/test_env.rs +++ b/chain/client/src/test_utils/test_env.rs @@ -10,6 +10,7 @@ use crate::Client; use near_async::messaging::{CanSend, IntoMultiSender}; use near_async::time::Clock; use near_chain::test_utils::ValidatorSchedule; +use near_chain::types::Tip; use near_chain::{ChainGenesis, Provenance}; use near_chain_configs::GenesisConfig; use near_chain_primitives::error::QueryError; @@ -598,6 +599,19 @@ impl TestEnv { self.account_indices.index(account_id) } + /// Get block producer responsible for producing the block at height head.height + height_offset. + /// Doesn't handle epoch boundaries with height_offset > 1. With offsets bigger than one, + /// the function assumes that the epoch doesn't change after head.height + 1. + pub fn get_block_producer_at_offset(&self, head: &Tip, height_offset: u64) -> AccountId { + let client = &self.clients[0]; + let epoch_manager = &client.epoch_manager; + let parent_hash = &head.last_block_hash; + let epoch_id = epoch_manager.get_epoch_id_from_prev_block(parent_hash).unwrap(); + let height = head.height + height_offset; + + epoch_manager.get_block_producer(&epoch_id, height).unwrap() + } + pub fn get_runtime_config(&self, idx: usize, epoch_id: EpochId) -> RuntimeConfig { self.clients[idx].runtime_adapter.get_protocol_config(&epoch_id).unwrap().runtime_config } diff --git a/integration-tests/src/tests/client/features/stateless_validation.rs b/integration-tests/src/tests/client/features/stateless_validation.rs index 0f378a4cace..36afb061cb3 100644 --- a/integration-tests/src/tests/client/features/stateless_validation.rs +++ b/integration-tests/src/tests/client/features/stateless_validation.rs @@ -9,7 +9,6 @@ use near_chain_configs::{Genesis, GenesisConfig, GenesisRecords}; use near_client::test_utils::TestEnv; use near_crypto::{InMemorySigner, KeyType}; use near_o11y::testonly::init_integration_logger; -use near_primitives::block::Tip; use near_primitives::epoch_manager::AllEpochConfigTestOverrides; use near_primitives::num_rational::Rational32; use near_primitives::shard_layout::ShardLayout; @@ -164,7 +163,7 @@ fn run_chunk_validation_test(seed: u64, prob_missing_chunk: f64) { let _ = env.clients[0].process_tx(tx, false, false); } - let block_producer = get_block_producer(&env, &tip, 1); + let block_producer = env.get_block_producer_at_offset(&tip, 1); tracing::debug!( target: "stateless_validation", "Producing block at height {} by {}", tip.height + 1, block_producer @@ -238,17 +237,6 @@ fn test_chunk_validation_high_missing_chunks() { run_chunk_validation_test(44, 0.81); } -/// Returns the block producer for the height of head + height_offset. -fn get_block_producer(env: &TestEnv, head: &Tip, height_offset: u64) -> AccountId { - let client = &env.clients[0]; - let epoch_manager = &client.epoch_manager; - let parent_hash = &head.last_block_hash; - let epoch_id = epoch_manager.get_epoch_id_from_prev_block(parent_hash).unwrap(); - let height = head.height + height_offset; - let block_producer = epoch_manager.get_block_producer(&epoch_id, height).unwrap(); - block_producer -} - #[test] fn test_protocol_upgrade_81() { init_integration_logger(); From dc197278d852a0cf9d94333f9779c0c1ec1d3ee3 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 21 Feb 2024 07:27:27 +0000 Subject: [PATCH 15/34] Add TestEnv::get_chunk_producer_at_offset Add a function analogous to `get_block_producer_at_offset`, but for chunk producers. --- chain/client/src/test_utils/test_env.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/chain/client/src/test_utils/test_env.rs b/chain/client/src/test_utils/test_env.rs index a629bb2570c..93fd5725f63 100644 --- a/chain/client/src/test_utils/test_env.rs +++ b/chain/client/src/test_utils/test_env.rs @@ -34,7 +34,7 @@ use near_primitives::sharding::{ChunkHash, PartialEncodedChunk}; use near_primitives::stateless_validation::{ChunkEndorsement, ChunkStateWitness}; use near_primitives::test_utils::create_test_signer; use near_primitives::transaction::{Action, FunctionCallAction, SignedTransaction}; -use near_primitives::types::{AccountId, Balance, BlockHeight, EpochId, NumSeats}; +use near_primitives::types::{AccountId, Balance, BlockHeight, EpochId, NumSeats, ShardId}; use near_primitives::utils::MaybeValidated; use near_primitives::version::ProtocolVersion; use near_primitives::views::{ @@ -612,6 +612,24 @@ impl TestEnv { epoch_manager.get_block_producer(&epoch_id, height).unwrap() } + /// Get chunk producer responsible for producing the chunk at height head.height + height_offset. + /// Doesn't handle epoch boundaries with height_offset > 1. With offsets bigger than one, + /// the function assumes that the epoch doesn't change after head.height + 1. + pub fn get_chunk_producer_at_offset( + &self, + head: &Tip, + height_offset: u64, + shard_id: ShardId, + ) -> AccountId { + let client = &self.clients[0]; + let epoch_manager = &client.epoch_manager; + let parent_hash = &head.last_block_hash; + let epoch_id = epoch_manager.get_epoch_id_from_prev_block(parent_hash).unwrap(); + let height = head.height + height_offset; + + epoch_manager.get_chunk_producer(&epoch_id, height, shard_id).unwrap() + } + pub fn get_runtime_config(&self, idx: usize, epoch_id: EpochId) -> RuntimeConfig { self.clients[idx].runtime_adapter.get_protocol_config(&epoch_id).unwrap().runtime_config } From 0183075487dcb5d0594bf530e505099d5e3bd8e7 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Wed, 21 Feb 2024 03:26:38 +0000 Subject: [PATCH 16/34] Add integration tests --- .../src/tests/client/features.rs | 1 + .../features/orphan_chunk_state_witness.rs | 416 ++++++++++++++++++ 2 files changed, 417 insertions(+) create mode 100644 integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs diff --git a/integration-tests/src/tests/client/features.rs b/integration-tests/src/tests/client/features.rs index e4c069e63fd..39860298cac 100644 --- a/integration-tests/src/tests/client/features.rs +++ b/integration-tests/src/tests/client/features.rs @@ -18,6 +18,7 @@ mod lower_storage_key_limit; mod nearvm; #[cfg(feature = "protocol_feature_nonrefundable_transfer_nep491")] mod nonrefundable_transfer; +mod orphan_chunk_state_witness; mod restore_receipts_after_fix_apply_chunks; mod restrict_tla; mod stateless_validation; diff --git a/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs b/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs new file mode 100644 index 00000000000..4cc95e84640 --- /dev/null +++ b/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs @@ -0,0 +1,416 @@ +use std::collections::HashSet; + +use near_chain::{Block, Provenance}; +use near_chain_configs::Genesis; +use near_client::test_utils::TestEnv; +use near_client::{Client, ProcessingDoneTracker, ProcessingDoneWaiter}; +use near_client::{HandleOrphanWitnessOutcome, MAX_ORPHAN_WITNESS_SIZE}; +use near_crypto::Signature; +use near_network::types::{NetworkRequests, PeerManagerMessageRequest}; +use near_o11y::testonly::init_integration_logger; +use near_primitives::merkle::{Direction, MerklePathItem}; +use near_primitives::network::PeerId; +use near_primitives::sharding::{ + ChunkHash, ReceiptProof, ShardChunkHeader, ShardChunkHeaderInner, ShardChunkHeaderInnerV2, + ShardProof, +}; +use near_primitives::stateless_validation::ChunkStateWitness; +use near_primitives_core::checked_feature; +use near_primitives_core::hash::CryptoHash; +use near_primitives_core::types::AccountId; +use near_primitives_core::version::PROTOCOL_VERSION; +use nearcore::config::GenesisExt; +use nearcore::test_utils::TestEnvNightshadeSetupExt; + +struct OrphanWitnessTest { + env: TestEnv, + block1: Block, + block2: Block, + witness: ChunkStateWitness, + excluded_validator: AccountId, + excluded_validator_idx: usize, + chunk_producer: AccountId, +} + +/// This function prepares a scenario in which an orphaned chunk witness will occur. +/// It creates two blocks (`block1` and `block2`), but doesn't pass them to `excluded_validator`. +/// When `excluded_validator` receives a witness for the chunk belonging to `block2`, it doesn't +/// have `block1` which is required to process the witness, so it becomes an orphaned state witness. +fn setup_orphan_witness_test() -> OrphanWitnessTest { + let accounts: Vec = (0..4).map(|i| format!("test{i}").parse().unwrap()).collect(); + let genesis = Genesis::test(accounts.clone(), accounts.len().try_into().unwrap()); + let mut env = TestEnv::builder(&genesis.config) + .clients(accounts.clone()) + .validators(accounts.clone()) + .nightshade_runtimes(&genesis) + .build(); + + // Run the blockchain for a few blocks + for height in 1..4 { + // Produce the next block + let tip = env.clients[0].chain.head().unwrap(); + let block_producer = env.get_block_producer_at_offset(&tip, 1); + tracing::info!(target: "test", "Producing block at height: {height} by {block_producer}"); + let block = env.client(&block_producer).produce_block(tip.height + 1).unwrap().unwrap(); + tracing::info!(target: "test", "Block produced at height {} has chunk {:?}", height, block.chunks()[0].chunk_hash()); + + // The first block after genesis doesn't have any chunks, but all other blocks should have a new chunk inside. + if height > 1 { + assert_eq!( + block.chunks()[0].height_created(), + block.header().height(), + "There should be no missing chunks." + ); + } + + // Pass network messages around + for i in 0..env.clients.len() { + let blocks_processed = + env.clients[i].process_block_test(block.clone().into(), Provenance::NONE).unwrap(); + assert_eq!(blocks_processed, vec![*block.hash()]); + } + + env.process_partial_encoded_chunks(); + for client_idx in 0..env.clients.len() { + env.process_shards_manager_responses_and_finish_processing_blocks(client_idx); + } + env.propagate_chunk_state_witnesses_and_endorsements(false); + + // Verify heads + let heads = env + .clients + .iter() + .map(|client| client.chain.head().unwrap().last_block_hash) + .collect::>(); + assert_eq!(heads.len(), 1, "All clients should have the same head"); + } + + // Produce two more blocks (`block1` and `block2`), but don't send them to the `excluded_validator`. + // The `excluded_validator` will receive a chunk witness for the chunk in `block2`, but it won't + // have `block1`, so it will become an orphaned chunk state witness. + let tip = env.clients[0].chain.head().unwrap(); + + let block1_producer = env.get_block_producer_at_offset(&tip, 1); + let block2_producer = env.get_block_producer_at_offset(&tip, 2); + let block2_chunk_producer = env.get_chunk_producer_at_offset(&tip, 2, 0); + + // The excluded validator shouldn't produce any blocks or chunks in the next two blocks. + // There's 4 validators and at most 3 aren't a good candidate, so there's always at least + // one that fits all the criteria, the `unwrap()` won't fail. + let excluded_validator = accounts + .into_iter() + .filter(|acc| { + acc != &block1_producer && acc != &block2_producer && acc != &block2_chunk_producer + }) + .next() + .unwrap(); + let excluded_validator_idx = env.get_client_index(&excluded_validator); + let clients_without_excluded = + (0..env.clients.len()).filter(|idx| *idx != excluded_validator_idx); + + tracing::info!(target:"test", "Producing block1 at height {}", tip.height + 1); + let block1 = env.client(&block1_producer).produce_block(tip.height + 1).unwrap().unwrap(); + assert_eq!( + block1.chunks()[0].height_created(), + block1.header().height(), + "There should be no missing chunks." + ); + + for client_idx in clients_without_excluded.clone() { + let blocks_processed = env.clients[client_idx] + .process_block_test(block1.clone().into(), Provenance::NONE) + .unwrap(); + assert_eq!(blocks_processed, vec![*block1.hash()]); + } + env.process_partial_encoded_chunks(); + for client_idx in 0..env.clients.len() { + env.process_shards_manager_responses(client_idx); + } + + // At this point chunk producer for the chunk belonging to block2 produces + // the chunk and sends out a witness for it. Let's intercept the witness + // and process it on all validators except for `excluded_validator`. + // The witness isn't processed on `excluded_validator` to give users of + // `setup_orphan_witness_test()` full control over the events. + let mut witness_opt = None; + let network_adapter = + env.network_adapters[env.get_client_index(&block2_chunk_producer)].clone(); + network_adapter.handle_filtered(|request| match request { + PeerManagerMessageRequest::NetworkRequests(NetworkRequests::ChunkStateWitness( + account_ids, + state_witness, + )) => { + let mut witness_processing_done_waiters: Vec = Vec::new(); + for account_id in account_ids.iter().filter(|acc| **acc != excluded_validator) { + let processing_done_tracker = ProcessingDoneTracker::new(); + witness_processing_done_waiters.push(processing_done_tracker.make_waiter()); + env.client(account_id) + .process_chunk_state_witness( + state_witness.clone(), + PeerId::random(), + Some(processing_done_tracker), + ) + .unwrap(); + } + for waiter in witness_processing_done_waiters { + waiter.wait(); + } + witness_opt = Some(state_witness); + None + } + _ => Some(request), + }); + let witness = witness_opt.unwrap(); + + env.propagate_chunk_endorsements(false); + + tracing::info!(target:"test", "Producing block2 at height {}", tip.height + 2); + let block2 = env.client(&block2_producer).produce_block(tip.height + 2).unwrap().unwrap(); + assert_eq!( + block2.chunks()[0].height_created(), + block2.header().height(), + "There should be no missing chunks." + ); + assert_eq!(witness.inner.chunk_header.chunk_hash(), block2.chunks()[0].chunk_hash()); + + for client_idx in clients_without_excluded { + let blocks_processed = env.clients[client_idx] + .process_block_test(block2.clone().into(), Provenance::NONE) + .unwrap(); + assert_eq!(blocks_processed, vec![*block2.hash()]); + } + + env.process_partial_encoded_chunks(); + for client_idx in 0..env.clients.len() { + env.process_shards_manager_responses_and_finish_processing_blocks(client_idx); + } + + OrphanWitnessTest { + env, + block1, + block2, + witness, + excluded_validator, + excluded_validator_idx, + chunk_producer: block2_chunk_producer, + } +} + +/// Test that a valid orphan witness is correctly processed once the required block arrives. +#[test] +fn test_orphan_witness_valid() { + init_integration_logger(); + + if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) { + println!("Test not applicable without StatelessValidation enabled"); + return; + } + + let OrphanWitnessTest { + mut env, + block1, + block2, + witness, + excluded_validator, + excluded_validator_idx, + .. + } = setup_orphan_witness_test(); + + // `excluded_validator` receives witness for chunk belonging to `block2`, but it doesn't have `block1`. + // The witness should become an orphaned witness and it should be saved to the orphan pool. + env.client(&excluded_validator) + .process_chunk_state_witness(witness, PeerId::random(), None) + .unwrap(); + + let block_processed = env + .client(&excluded_validator) + .process_block_test(block1.clone().into(), Provenance::NONE) + .unwrap(); + assert_eq!(block_processed, vec![*block1.hash()]); + + // After processing `block1`, `excluded_validator` should process the orphaned witness for the chunk belonging to `block2` + // and it should send out an endorsement for this chunk. This happens asynchronously, so we have to wait for it. + env.wait_for_chunk_endorsement(excluded_validator_idx, &block2.chunks()[0].chunk_hash()) + .unwrap(); +} + +#[test] +fn test_orphan_witness_bad_signature() { + init_integration_logger(); + + if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) { + println!("Test not applicable without StatelessValidation enabled"); + return; + } + + let OrphanWitnessTest { mut env, mut witness, excluded_validator, .. } = + setup_orphan_witness_test(); + + // Modify the witness to contain an invalid signature + witness.signature = Signature::default(); + + let error = env + .client(&excluded_validator) + .process_chunk_state_witness(witness, PeerId::random(), None) + .unwrap_err(); + let error_message = format!("{error}").to_lowercase(); + tracing::info!(target:"test", "Error message: {}", error_message); + assert!(error_message.contains("invalid signature")); +} + +#[test] +fn test_orphan_witness_signature_from_wrong_peer() { + init_integration_logger(); + + if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) { + println!("Test not applicable without StatelessValidation enabled"); + return; + } + + let OrphanWitnessTest { mut env, mut witness, excluded_validator, .. } = + setup_orphan_witness_test(); + + // Sign the witness using another validator's key. + // Only witnesses from the chunk producer that produced this witness should be accepted. + resign_witness(&mut witness, env.client(&excluded_validator)); + + let error = env + .client(&excluded_validator) + .process_chunk_state_witness(witness, PeerId::random(), None) + .unwrap_err(); + let error_message = format!("{error}").to_lowercase(); + tracing::info!(target:"test", "Error message: {}", error_message); + assert!(error_message.contains("invalid signature")); +} + +#[test] +fn test_orphan_witness_invalid_shard_id() { + init_integration_logger(); + + if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) { + println!("Test not applicable without StatelessValidation enabled"); + return; + } + + let OrphanWitnessTest { mut env, mut witness, excluded_validator, chunk_producer, .. } = + setup_orphan_witness_test(); + + // Set invalid shard_id in the witness header + modify_witness_header_inner(&mut witness, |header| header.shard_id = 10000000); + resign_witness(&mut witness, env.client(&chunk_producer)); + + // The witness should be rejected + let error = env + .client(&excluded_validator) + .process_chunk_state_witness(witness, PeerId::random(), None) + .unwrap_err(); + let error_message = format!("{error}").to_lowercase(); + tracing::info!(target:"test", "Error message: {}", error_message); + assert!(error_message.contains("shard")); +} + +#[test] +fn test_orphan_witness_too_large() { + init_integration_logger(); + + if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) { + println!("Test not applicable without StatelessValidation enabled"); + return; + } + + let OrphanWitnessTest { mut env, mut witness, excluded_validator, chunk_producer, .. } = + setup_orphan_witness_test(); + + // Modify the witness to be larger than the allowed limit + let dummy_merkle_path_item = + MerklePathItem { hash: CryptoHash::default(), direction: Direction::Left }; + let items_count = MAX_ORPHAN_WITNESS_SIZE / std::mem::size_of::() + 1; + let big_path = vec![dummy_merkle_path_item; items_count]; + let big_receipt_proof = + ReceiptProof(Vec::new(), ShardProof { from_shard_id: 0, to_shard_id: 0, proof: big_path }); + witness.inner.source_receipt_proofs.insert(ChunkHash::default(), big_receipt_proof); + resign_witness(&mut witness, env.client(&chunk_producer)); + + // The witness should not be saved too the pool, as it's too big + let outcome = env.client(&excluded_validator).handle_orphan_state_witness(witness).unwrap(); + assert!(matches!(outcome, HandleOrphanWitnessOutcome::TooBig(_))) +} + +/// Witnesses which are too far from the chain head should not be saved to the orphan pool +#[test] +fn test_orphan_witness_far_from_head() { + init_integration_logger(); + + if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) { + println!("Test not applicable without StatelessValidation enabled"); + return; + } + + let OrphanWitnessTest { + mut env, mut witness, chunk_producer, block1, excluded_validator, .. + } = setup_orphan_witness_test(); + + let bad_height = 10000; + modify_witness_header_inner(&mut witness, |header| header.height_created = bad_height); + resign_witness(&mut witness, env.client(&chunk_producer)); + + let outcome = env.client(&excluded_validator).handle_orphan_state_witness(witness).unwrap(); + assert_eq!( + outcome, + HandleOrphanWitnessOutcome::TooFarFromHead { + witness_height: bad_height, + head_height: block1.header().height() - 1 + } + ); +} + +// Test that orphan witnesses are only partially validated - an orphan witness with invalid +// `source_receipt_proofs` will be accepted and saved into the pool, as there's no way to validate +// this field without the previous block. +#[test] +fn test_orphan_witness_not_fully_validated() { + init_integration_logger(); + + if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) { + println!("Test not applicable without StatelessValidation enabled"); + return; + } + + let OrphanWitnessTest { mut env, mut witness, chunk_producer, excluded_validator, .. } = + setup_orphan_witness_test(); + + // Make the witness invalid in a way that won't be detected during orphan witness validation + witness.inner.source_receipt_proofs.insert( + ChunkHash::default(), + ReceiptProof( + vec![], + ShardProof { from_shard_id: 100230230, to_shard_id: 383939, proof: vec![] }, + ), + ); + resign_witness(&mut witness, env.client(&chunk_producer)); + + // The witness should be accepted and saved into the pool, even though it's invalid. + // There is no way to fully validate an orphan witness, so this is the correct behavior. + // The witness will later be fully validated when the required block arrives. + env.client(&excluded_validator) + .process_chunk_state_witness(witness, PeerId::random(), None) + .unwrap(); +} + +fn modify_witness_header_inner( + witness: &mut ChunkStateWitness, + f: impl FnOnce(&mut ShardChunkHeaderInnerV2), +) { + match &mut witness.inner.chunk_header { + ShardChunkHeader::V3(header) => match &mut header.inner { + ShardChunkHeaderInner::V2(header_inner) => f(header_inner), + _ => panic!(), + }, + _ => panic!(), + }; +} + +fn resign_witness(witness: &mut ChunkStateWitness, signer: &Client) { + witness.signature = + signer.validator_signer.as_ref().unwrap().sign_chunk_state_witness(&witness.inner).0; +} From aedb405b9ccf8516a94b6e3c4b7eb104668bd9da Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 16:51:04 +0000 Subject: [PATCH 17/34] Use ByteSize --- Cargo.lock | 1 + chain/client/Cargo.toml | 1 + .../src/stateless_validation/chunk_validator/mod.rs | 9 +++++---- .../chunk_validator/orphan_witness_handling.rs | 9 +++++++-- .../tests/client/features/orphan_chunk_state_witness.rs | 3 ++- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62fdcaee91b..9bd74a43136 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3571,6 +3571,7 @@ dependencies = [ "assert_matches", "async-trait", "borsh 1.0.0", + "bytesize", "chrono", "cloud-storage", "derive_more", diff --git a/chain/client/Cargo.toml b/chain/client/Cargo.toml index abe33fdcf90..022911d6921 100644 --- a/chain/client/Cargo.toml +++ b/chain/client/Cargo.toml @@ -41,6 +41,7 @@ thiserror.workspace = true tokio.workspace = true tracing.workspace = true yansi.workspace = true +bytesize.workspace = true near-async.workspace = true near-cache.workspace = true diff --git a/chain/client/src/stateless_validation/chunk_validator/mod.rs b/chain/client/src/stateless_validation/chunk_validator/mod.rs index e7cacc9f0cd..352d2208f8f 100644 --- a/chain/client/src/stateless_validation/chunk_validator/mod.rs +++ b/chain/client/src/stateless_validation/chunk_validator/mod.rs @@ -626,10 +626,11 @@ impl Client { processing_done_tracker: Option, ) -> Result<(), Error> { if witness.inner.chunk_header.prev_block_hash() != prev_block.hash() { - return Err(Error::Other( - "process_chunk_state_witness_with_prev_block - prev_block doesn't match" - .to_string(), - )); + return Err(Error::Other(format!( + "process_chunk_state_witness_with_prev_block - prev_block doesn't match ({} != {})", + witness.inner.chunk_header.prev_block_hash(), + prev_block.hash() + ))); } let prev_chunk_header = Chain::get_prev_chunk_header( diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs index 3a4bc1461b1..bcfd9568434 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs @@ -6,6 +6,7 @@ //! arrives, all witnesses that were waiting for it can be processed. use crate::Client; +use bytesize::ByteSize; use itertools::Itertools; use near_chain::Block; use near_chain_primitives::Error; @@ -23,7 +24,8 @@ pub const ALLOWED_ORPHAN_WITNESS_DISTANCE_FROM_HEAD: Range = 2..6; /// We keep only orphan witnesses which are smaller than this size. /// This limits the maximum memory usage of OrphanStateWitnessPool. -pub const MAX_ORPHAN_WITNESS_SIZE: usize = 40_000_000; +/// TODO(#10259) - consider merging this limit with the non-orphan witness size limit. +pub const MAX_ORPHAN_WITNESS_SIZE: ByteSize = ByteSize::mb(40); impl Client { pub fn handle_orphan_state_witness( @@ -54,7 +56,10 @@ impl Client { // Don't save orphaned state witnesses which are bigger than the allowed limit. let witness_size = borsh::to_vec(&witness)?.len(); - if witness_size > MAX_ORPHAN_WITNESS_SIZE { + let witness_size_u64: u64 = witness_size.try_into().map_err(|_| { + Error::Other(format!("Cannot convert witness size to u64: {}", witness_size)) + })?; + if witness_size_u64 > MAX_ORPHAN_WITNESS_SIZE.as_u64() { tracing::warn!( target: "client", witness_height, diff --git a/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs b/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs index 4cc95e84640..ba7b04ee890 100644 --- a/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs +++ b/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs @@ -324,7 +324,8 @@ fn test_orphan_witness_too_large() { // Modify the witness to be larger than the allowed limit let dummy_merkle_path_item = MerklePathItem { hash: CryptoHash::default(), direction: Direction::Left }; - let items_count = MAX_ORPHAN_WITNESS_SIZE / std::mem::size_of::() + 1; + let max_size_usize: usize = MAX_ORPHAN_WITNESS_SIZE.as_u64().try_into().unwrap(); + let items_count = max_size_usize / std::mem::size_of::() + 1; let big_path = vec![dummy_merkle_path_item; items_count]; let big_receipt_proof = ReceiptProof(Vec::new(), ShardProof { from_shard_id: 0, to_shard_id: 0, proof: big_path }); From 198a4283ed4a857d38596c54c9116484d49b7610 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 16:51:51 +0000 Subject: [PATCH 18/34] Add comment on process_ready_orphan_chunk_state_witnesses --- .../chunk_validator/orphan_witness_handling.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs index bcfd9568434..3ebe84d6728 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs @@ -157,11 +157,13 @@ impl Client { Ok(()) } - pub fn process_ready_orphan_chunk_state_witnesses(&mut self, accepted_block: &Block) { + /// Once a new block arrives, we can process the orphaned chunk state witnesses that were waiting + // for this block. This function takes the ready witnesses out of the orhan pool and process them. + pub fn process_ready_orphan_chunk_state_witnesses(&mut self, new_block: &Block) { let ready_witnesses = self .chunk_validator .orphan_witness_pool - .take_state_witnesses_waiting_for_block(accepted_block.hash()); + .take_state_witnesses_waiting_for_block(new_block.hash()); for witness in ready_witnesses { let header = &witness.inner.chunk_header; tracing::debug!( @@ -175,7 +177,7 @@ impl Client { if let Err(err) = self.process_chunk_state_witness_with_prev_block( witness, PeerId::random(), // TODO: Should peer_id even be here? https://github.com/near/stakewars-iv/issues/17 - accepted_block, + new_block, None, ) { tracing::error!(target: "client", ?err, "Error processing orphan chunk state witness"); From 49bea3215d2c4601c8df40a21db0479d493be6ee Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 16:52:16 +0000 Subject: [PATCH 19/34] Use _metrics_tracker instead of #[allow(dead_code)] --- .../chunk_validator/orphan_witness_pool.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs index ebe54aaf863..835415fffcd 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -22,10 +22,7 @@ pub struct OrphanStateWitnessPool { struct CacheEntry { witness: ChunkStateWitness, - // cargo complains that metrics_tracker is never read, but it's ok, - // as metrics_tracker does all of its work during initalization and destruction. - #[allow(dead_code)] - metrics_tracker: OrphanWitnessMetricsTracker, + _metrics_tracker: OrphanWitnessMetricsTracker, } impl OrphanStateWitnessPool { @@ -55,7 +52,7 @@ impl OrphanStateWitnessPool { let prev_block_hash = *chunk_header.prev_block_hash(); let cache_key = (chunk_header.shard_id(), chunk_header.height_created()); let metrics_tracker = OrphanWitnessMetricsTracker::new(&witness, witness_size); - let cache_entry = CacheEntry { witness, metrics_tracker }; + let cache_entry = CacheEntry { witness, _metrics_tracker: metrics_tracker }; if let Some((_, ejected_entry)) = self.witness_cache.push(cache_key, cache_entry) { // If another witness has been ejected from the cache due to capacity limit, // then remove the ejected witness from `waiting_for_block` to keep them in sync From 36b7391d81f93cea529266fd7e174a3d6c03b4f4 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 16:56:56 +0000 Subject: [PATCH 20/34] use tracing::span to reduce repetition --- .../orphan_witness_handling.rs | 21 +++++++++---------- .../chunk_validator/orphan_witness_pool.rs | 5 ----- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs index 3ebe84d6728..247b0943b6a 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs @@ -36,6 +36,15 @@ impl Client { let witness_height = chunk_header.height_created(); let witness_shard = chunk_header.shard_id(); + let _span = tracing::debug_span!(target: "client", + "handle_orphan_state_witness", + witness_height, + witness_shard, + witness_chunk = ?chunk_header.chunk_hash(), + witness_prev_block = ?chunk_header.prev_block_hash(), + ) + .entered(); + // Don't save orphaned state witnesses which are far away from the current chain head. let chain_head = &self.chain.head()?; let head_distance = witness_height.saturating_sub(chain_head.height); @@ -43,10 +52,6 @@ impl Client { tracing::debug!( target: "client", head_height = chain_head.height, - witness_height, - witness_shard, - witness_chunk = ?chunk_header.chunk_hash(), - witness_prev_block = ?chunk_header.prev_block_hash(), "Not saving an orphaned ChunkStateWitness because its height isn't within the allowed height range"); return Ok(HandleOrphanWitnessOutcome::TooFarFromHead { witness_height, @@ -107,13 +112,7 @@ impl Client { } // Orphan witness is OK, save it to the pool - tracing::debug!( - target: "client", - witness_height, - witness_shard, - witness_chunk = ?chunk_header.chunk_hash(), - witness_prev_block = ?chunk_header.prev_block_hash(), - "Saving an orphaned ChunkStateWitness to orphan pool"); + tracing::debug!(target: "client", "Saving an orphaned ChunkStateWitness to orphan pool"); self.chunk_validator.orphan_witness_pool.add_orphan_state_witness(witness, witness_size); Ok(HandleOrphanWitnessOutcome::SavedTooPool) } diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs index 835415fffcd..5ab6775d626 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -56,13 +56,8 @@ impl OrphanStateWitnessPool { if let Some((_, ejected_entry)) = self.witness_cache.push(cache_key, cache_entry) { // If another witness has been ejected from the cache due to capacity limit, // then remove the ejected witness from `waiting_for_block` to keep them in sync - let header = &ejected_entry.witness.inner.chunk_header; tracing::debug!( target: "client", - witness_height = header.height_created(), - witness_shard = header.shard_id(), - witness_chunk = ?header.chunk_hash(), - witness_prev_block = ?header.prev_block_hash(), "Ejecting an orphaned ChunkStateWitness from the cache due to capacity limit. It will not be processed." ); self.remove_from_waiting_for_block(&ejected_entry.witness); From 3b3f8c40e8712c292d1649bed6e8a121fe0e7dec Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 17:15:44 +0000 Subject: [PATCH 21/34] Remove extra newline --- chain/client/src/adapter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/chain/client/src/adapter.rs b/chain/client/src/adapter.rs index 106ce630a11..3eaf714a4fa 100644 --- a/chain/client/src/adapter.rs +++ b/chain/client/src/adapter.rs @@ -31,7 +31,6 @@ pub(crate) struct RecvPartialEncodedChunkRequest(pub PartialEncodedChunkRequestM pub fn client_sender_for_network( client_addr: actix::Addr, view_client_addr: actix::Addr, - ) -> ClientSenderForNetwork { let client_addr = client_addr.with_auto_span_context(); let view_client_addr = view_client_addr.with_auto_span_context(); From db14aa8f57d1fb029d510a55e92a6ec337fadf8b Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 17:33:02 +0000 Subject: [PATCH 22/34] Fix cargo format, VSCode forgot to format a file o_0 --- chain/client/src/lib.rs | 2 +- chain/network/src/peer/peer_actor.rs | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/chain/client/src/lib.rs b/chain/client/src/lib.rs index 87c6cee37c5..3c1dff251af 100644 --- a/chain/client/src/lib.rs +++ b/chain/client/src/lib.rs @@ -19,10 +19,10 @@ pub use crate::stateless_validation::chunk_validator::orphan_witness_handling::{ pub use crate::sync::adapter::{SyncAdapter, SyncMessage}; pub use crate::view_client::{start_view_client, ViewClientActor}; pub use near_client_primitives::debug::DebugStatus; -pub use stateless_validation::processing_tracker::{ProcessingDoneTracker, ProcessingDoneWaiter}; pub use near_network::client::{ BlockApproval, BlockResponse, ProcessTxRequest, ProcessTxResponse, SetNetworkInfo, }; +pub use stateless_validation::processing_tracker::{ProcessingDoneTracker, ProcessingDoneWaiter}; pub mod adapter; pub mod adversarial; diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index c91a3930066..b118a7452fa 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -1012,10 +1012,7 @@ impl PeerActor { RoutedMessageBody::ChunkStateWitness(witness) => { network_state .client - .send_async(ChunkStateWitnessMessage { - witness, - peer_id, - }) + .send_async(ChunkStateWitnessMessage { witness, peer_id }) .await .ok(); None From 3c045e2cca0a6bf46e6b13236d0c38158d5175f3 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 18:57:18 +0000 Subject: [PATCH 23/34] Apply review comments --- .../orphan_witness_handling.rs | 4 +- .../chunk_validator/orphan_witness_pool.rs | 39 +++++++++---------- .../features/orphan_chunk_state_witness.rs | 27 +++++++------ 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs index 247b0943b6a..00bc6aac408 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_handling.rs @@ -114,7 +114,7 @@ impl Client { // Orphan witness is OK, save it to the pool tracing::debug!(target: "client", "Saving an orphaned ChunkStateWitness to orphan pool"); self.chunk_validator.orphan_witness_pool.add_orphan_state_witness(witness, witness_size); - Ok(HandleOrphanWitnessOutcome::SavedTooPool) + Ok(HandleOrphanWitnessOutcome::SavedToPool) } fn partially_validate_orphan_witness_in_epoch( @@ -193,7 +193,7 @@ impl Client { /// It's useful in tests. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum HandleOrphanWitnessOutcome { - SavedTooPool, + SavedToPool, TooBig(usize), TooFarFromHead { head_height: BlockHeight, witness_height: BlockHeight }, } diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs index 5ab6775d626..a8bc3506dc1 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -185,9 +185,9 @@ mod tests { match &mut witness.inner.chunk_header { ShardChunkHeader::V3(header) => match &mut header.inner { ShardChunkHeaderInner::V2(inner) => inner.encoded_length = encoded_length, - _ => panic!(), + _ => unimplemented!(), }, - _ => panic!(), + _ => unimplemented!(), } witness } @@ -198,7 +198,7 @@ mod tests { } /// Assert that both Vecs are equal after sorting. It's order-independent, unlike the standard assert_eq! - fn assert_same(mut observed: Vec, mut expected: Vec) { + fn assert_contents(mut observed: Vec, mut expected: Vec) { let sort_comparator = |witness1: &ChunkStateWitness, witness2: &ChunkStateWitness| { let bytes1 = borsh::to_vec(witness1).unwrap(); let bytes2 = borsh::to_vec(witness2).unwrap(); @@ -222,12 +222,12 @@ mod tests { for witness in expected { print_witness_info(&witness); } - eprintln!("Observed {} witnesse:", observed.len()); + eprintln!("Observed {} witnesses:", observed.len()); for witness in observed { print_witness_info(&witness); } eprintln!("=================="); - panic!("assert_same failed"); + panic!("assert_contents failed"); } } @@ -253,10 +253,10 @@ mod tests { pool.add_orphan_state_witness(witness4.clone(), 0); let waiting_for_99 = pool.take_state_witnesses_waiting_for_block(&block(99)); - assert_same(waiting_for_99, vec![witness1, witness2]); + assert_contents(waiting_for_99, vec![witness1, witness2]); let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); - assert_same(waiting_for_100, vec![witness3, witness4]); + assert_contents(waiting_for_100, vec![witness3, witness4]); assert_empty(&pool); } @@ -275,7 +275,7 @@ mod tests { pool.add_orphan_state_witness(witness2.clone(), 0); let waiting_for_99 = pool.take_state_witnesses_waiting_for_block(&block(99)); - assert_same(waiting_for_99, vec![witness2]); + assert_contents(waiting_for_99, vec![witness2]); } // The old witness is replaced when the awaited block is different, waiting_for_block is cleaned as expected @@ -286,10 +286,10 @@ mod tests { pool.add_orphan_state_witness(witness4.clone(), 0); let waiting_for_101 = pool.take_state_witnesses_waiting_for_block(&block(101)); - assert_same(waiting_for_101, vec![witness4]); + assert_contents(waiting_for_101, vec![witness4]); let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); - assert_same(waiting_for_100, vec![]); + assert_contents(waiting_for_100, vec![]); } assert_empty(&pool); @@ -311,11 +311,11 @@ mod tests { pool.add_orphan_state_witness(witness3.clone(), 0); let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); - assert_same(waiting_for_100, vec![witness2, witness3]); + assert_contents(waiting_for_100, vec![witness2, witness3]); // witness1 should be ejected, no one is waiting for block 101 let waiting_for_101 = pool.take_state_witnesses_waiting_for_block(&block(101)); - assert_same(waiting_for_101, vec![]); + assert_contents(waiting_for_101, vec![]); assert_empty(&pool); } @@ -341,7 +341,7 @@ mod tests { assert_eq!(pool.waiting_for_block.len(), 1); let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); - assert_same(waiting_for_100, vec![witness2]); + assert_contents(waiting_for_100, vec![witness2]); assert_empty(&pool); } @@ -356,7 +356,7 @@ mod tests { pool.add_orphan_state_witness(witness.clone(), 0); let waiting_for_99 = pool.take_state_witnesses_waiting_for_block(&block(99)); - assert_same(waiting_for_99, vec![witness]); + assert_contents(waiting_for_99, vec![witness]); assert_empty(&pool); } @@ -372,7 +372,7 @@ mod tests { pool.add_orphan_state_witness(make_witness(101, 0, block(100), 0), 0); let waiting = pool.take_state_witnesses_waiting_for_block(&block(99)); - assert_same(waiting, vec![]); + assert_contents(waiting, vec![]); assert_empty(&pool); } @@ -388,7 +388,6 @@ mod tests { pool.add_orphan_state_witness(make_witness(100, 2, block(99), 0), 1); pool.add_orphan_state_witness(make_witness(101, 0, block(100), 0), 0); std::mem::drop(pool); - println!("Ok!"); } /// A longer test scenario @@ -423,7 +422,7 @@ mod tests { // Pool capacity is 5, so three witnesses at height 100 should be ejected. // The only surviving witness should be witness5, which was the freshest one among them let looking_for_99 = pool.take_state_witnesses_waiting_for_block(&block(99)); - assert_same(looking_for_99, vec![witness5]); + assert_contents(looking_for_99, vec![witness5]); // Let's add a few more witnesses let witness10 = make_witness(102, 1, block(101), 0); @@ -435,14 +434,14 @@ mod tests { // Check that witnesses waiting for block 100 are correct let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); - assert_same(waiting_for_100, vec![witness7, witness8, witness9, witness11]); + assert_contents(waiting_for_100, vec![witness7, witness8, witness9, witness11]); // At this point the pool contains only witness12, no one should be waiting for block 101. let waiting_for_101 = pool.take_state_witnesses_waiting_for_block(&block(101)); - assert_same(waiting_for_101, vec![]); + assert_contents(waiting_for_101, vec![]); let waiting_for_77 = pool.take_state_witnesses_waiting_for_block(&block(77)); - assert_same(waiting_for_77, vec![witness12]); + assert_contents(waiting_for_77, vec![witness12]); assert_empty(&pool); } diff --git a/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs b/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs index ba7b04ee890..75faea53f94 100644 --- a/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs +++ b/integration-tests/src/tests/client/features/orphan_chunk_state_witness.rs @@ -22,7 +22,7 @@ use near_primitives_core::version::PROTOCOL_VERSION; use nearcore::config::GenesisExt; use nearcore::test_utils::TestEnvNightshadeSetupExt; -struct OrphanWitnessTest { +struct OrphanWitnessTestEnv { env: TestEnv, block1: Block, block2: Block, @@ -36,7 +36,7 @@ struct OrphanWitnessTest { /// It creates two blocks (`block1` and `block2`), but doesn't pass them to `excluded_validator`. /// When `excluded_validator` receives a witness for the chunk belonging to `block2`, it doesn't /// have `block1` which is required to process the witness, so it becomes an orphaned state witness. -fn setup_orphan_witness_test() -> OrphanWitnessTest { +fn setup_orphan_witness_test() -> OrphanWitnessTestEnv { let accounts: Vec = (0..4).map(|i| format!("test{i}").parse().unwrap()).collect(); let genesis = Genesis::test(accounts.clone(), accounts.len().try_into().unwrap()); let mut env = TestEnv::builder(&genesis.config) @@ -185,7 +185,7 @@ fn setup_orphan_witness_test() -> OrphanWitnessTest { env.process_shards_manager_responses_and_finish_processing_blocks(client_idx); } - OrphanWitnessTest { + OrphanWitnessTestEnv { env, block1, block2, @@ -206,7 +206,7 @@ fn test_orphan_witness_valid() { return; } - let OrphanWitnessTest { + let OrphanWitnessTestEnv { mut env, block1, block2, @@ -243,7 +243,7 @@ fn test_orphan_witness_bad_signature() { return; } - let OrphanWitnessTest { mut env, mut witness, excluded_validator, .. } = + let OrphanWitnessTestEnv { mut env, mut witness, excluded_validator, .. } = setup_orphan_witness_test(); // Modify the witness to contain an invalid signature @@ -267,7 +267,7 @@ fn test_orphan_witness_signature_from_wrong_peer() { return; } - let OrphanWitnessTest { mut env, mut witness, excluded_validator, .. } = + let OrphanWitnessTestEnv { mut env, mut witness, excluded_validator, .. } = setup_orphan_witness_test(); // Sign the witness using another validator's key. @@ -292,7 +292,7 @@ fn test_orphan_witness_invalid_shard_id() { return; } - let OrphanWitnessTest { mut env, mut witness, excluded_validator, chunk_producer, .. } = + let OrphanWitnessTestEnv { mut env, mut witness, excluded_validator, chunk_producer, .. } = setup_orphan_witness_test(); // Set invalid shard_id in the witness header @@ -318,7 +318,7 @@ fn test_orphan_witness_too_large() { return; } - let OrphanWitnessTest { mut env, mut witness, excluded_validator, chunk_producer, .. } = + let OrphanWitnessTestEnv { mut env, mut witness, excluded_validator, chunk_producer, .. } = setup_orphan_witness_test(); // Modify the witness to be larger than the allowed limit @@ -347,8 +347,13 @@ fn test_orphan_witness_far_from_head() { return; } - let OrphanWitnessTest { - mut env, mut witness, chunk_producer, block1, excluded_validator, .. + let OrphanWitnessTestEnv { + mut env, + mut witness, + chunk_producer, + block1, + excluded_validator, + .. } = setup_orphan_witness_test(); let bad_height = 10000; @@ -377,7 +382,7 @@ fn test_orphan_witness_not_fully_validated() { return; } - let OrphanWitnessTest { mut env, mut witness, chunk_producer, excluded_validator, .. } = + let OrphanWitnessTestEnv { mut env, mut witness, chunk_producer, excluded_validator, .. } = setup_orphan_witness_test(); // Make the witness invalid in a way that won't be detected during orphan witness validation From 3bcd5c511f41c507440774e6f415bfd30c412f77 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 19:00:07 +0000 Subject: [PATCH 24/34] Use ChunkStateWitness::new_dummy in another test There is another test that created a ChunkStateWitness manually. Let's use the newly introduced `ChunkStateWitness::new_dummy` in this test, it simplifies things. --- .../client/features/stateless_validation.rs | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/integration-tests/src/tests/client/features/stateless_validation.rs b/integration-tests/src/tests/client/features/stateless_validation.rs index 3c236ccf7db..09913cbe60d 100644 --- a/integration-tests/src/tests/client/features/stateless_validation.rs +++ b/integration-tests/src/tests/client/features/stateless_validation.rs @@ -1,8 +1,6 @@ use near_epoch_manager::{EpochManager, EpochManagerAdapter}; use near_primitives::network::PeerId; -use near_primitives::sharding::{ShardChunkHeader, ShardChunkHeaderV3}; use near_primitives::stateless_validation::ChunkStateWitness; -use near_primitives::validator_signer::EmptyValidatorSigner; use near_store::test_utils::create_test_store; use nearcore::config::GenesisExt; use rand::rngs::StdRng; @@ -334,24 +332,7 @@ fn test_chunk_state_witness_bad_shard_id() { // Create a dummy ChunkStateWitness with an invalid shard_id let previous_block = env.clients[0].chain.head().unwrap().prev_block_hash; let invalid_shard_id = 1000000000; - - let shard_header = ShardChunkHeader::V3(ShardChunkHeaderV3::new( - previous_block, - Default::default(), - Default::default(), - Default::default(), - Default::default(), - upper_height, - invalid_shard_id, - Default::default(), - Default::default(), - Default::default(), - Default::default(), - Default::default(), - Default::default(), - &EmptyValidatorSigner::default(), - )); - let witness = ChunkStateWitness::empty(shard_header); + let witness = ChunkStateWitness::new_dummy(upper_height, invalid_shard_id, previous_block); // Client should reject this ChunkStateWitness and the error message should mention "shard" tracing::info!(target: "test", "Processing invalid ChunkStateWitness"); From 0b126b83f8caec638b215075128a3effeef1fd73 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 19:08:47 +0000 Subject: [PATCH 25/34] Remove comment which no longer applies --- chain/client/src/stateless_validation/chunk_validator/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/chain/client/src/stateless_validation/chunk_validator/mod.rs b/chain/client/src/stateless_validation/chunk_validator/mod.rs index 352d2208f8f..4779917303e 100644 --- a/chain/client/src/stateless_validation/chunk_validator/mod.rs +++ b/chain/client/src/stateless_validation/chunk_validator/mod.rs @@ -652,8 +652,6 @@ impl Client { return Ok(()); } - // TODO(#10265): If the previous block does not exist, we should - // queue this (similar to orphans) to retry later. let result = self.chunk_validator.start_validating_chunk( witness, &self.chain, From a6858eba5db5df65360c9db6658ad6c57a8b11b9 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 19:25:15 +0000 Subject: [PATCH 26/34] Increase default orphan pool size to 25 https://github.com/near/nearcore/pull/10613#discussion_r1499507848 --- core/chain-configs/src/client_config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/chain-configs/src/client_config.rs b/core/chain-configs/src/client_config.rs index 0edf07722e7..a7bf2976ca5 100644 --- a/core/chain-configs/src/client_config.rs +++ b/core/chain-configs/src/client_config.rs @@ -302,8 +302,8 @@ pub fn default_produce_chunk_add_transactions_time_limit() -> Option { } pub fn default_orphan_state_witness_pool_size() -> usize { - // With 4 shards, a capacity of 12 witnesses allows to store 3 orphan witnesses per shard. - 12 + // With 5 shards, a capacity of 25 witnesses allows to store 3 orphan witnesses per shard. + 25 } /// Config for the Chunk Distribution Network feature. From 0edf57aec56a3f8368ae4709b6731e369fc789d1 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Thu, 22 Feb 2024 19:27:26 +0000 Subject: [PATCH 27/34] I'm bad at math --- core/chain-configs/src/client_config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/chain-configs/src/client_config.rs b/core/chain-configs/src/client_config.rs index a7bf2976ca5..df0af6eae38 100644 --- a/core/chain-configs/src/client_config.rs +++ b/core/chain-configs/src/client_config.rs @@ -302,7 +302,7 @@ pub fn default_produce_chunk_add_transactions_time_limit() -> Option { } pub fn default_orphan_state_witness_pool_size() -> usize { - // With 5 shards, a capacity of 25 witnesses allows to store 3 orphan witnesses per shard. + // With 5 shards, a capacity of 25 witnesses allows to store 5 orphan witnesses per shard. 25 } From f61f34cdbcfd1c52006f5a58087df833461b2c0c Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 23 Feb 2024 14:23:45 +0000 Subject: [PATCH 28/34] fix log message when a witness is ejected In previous commit I added debug spans to avoid writing witness_height, ... everywhere. I also removed them from the ejection message, but that was a mistake. The span contains information about the witness that is being added, not the one that is being ejected. Because of that the message about ejecting a witness would contain invalid witness_height and other parameters. Fix by using the correct parameters, and names which don't collide with the ones in the span. --- .../chunk_validator/orphan_witness_pool.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs index a8bc3506dc1..f4faa2c207a 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -56,8 +56,13 @@ impl OrphanStateWitnessPool { if let Some((_, ejected_entry)) = self.witness_cache.push(cache_key, cache_entry) { // If another witness has been ejected from the cache due to capacity limit, // then remove the ejected witness from `waiting_for_block` to keep them in sync + let header = &ejected_entry.witness.inner.chunk_header; tracing::debug!( target: "client", + ejected_witness_height = header.height_created(), + ejected_witness_shard = header.shard_id(), + ejected_witness_chunk = ?header.chunk_hash(), + ejected_witness_prev_block = ?header.prev_block_hash(), "Ejecting an orphaned ChunkStateWitness from the cache due to capacity limit. It will not be processed." ); self.remove_from_waiting_for_block(&ejected_entry.witness); From bb41b3705d07797c3b1ad40beebbfd931b48045c Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 23 Feb 2024 14:35:51 +0000 Subject: [PATCH 29/34] Use tracing for logs in test_possible_epochs_of_height_around_tip --- Cargo.lock | 1 + chain/epoch-manager/Cargo.toml | 1 + chain/epoch-manager/src/tests/mod.rs | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6d0ffa4bc08..5fa46c8bd62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3723,6 +3723,7 @@ dependencies = [ "near-chain-configs", "near-chain-primitives", "near-crypto", + "near-o11y", "near-primitives", "near-store", "num-rational", diff --git a/chain/epoch-manager/Cargo.toml b/chain/epoch-manager/Cargo.toml index 51940e234d5..02e2c675cae 100644 --- a/chain/epoch-manager/Cargo.toml +++ b/chain/epoch-manager/Cargo.toml @@ -23,6 +23,7 @@ smart-default.workspace = true tracing.workspace = true # itertools has collect_vec which is useful in quick debugging prints itertools.workspace = true +near-o11y.workspace = true near-crypto.workspace = true near-primitives.workspace = true diff --git a/chain/epoch-manager/src/tests/mod.rs b/chain/epoch-manager/src/tests/mod.rs index 1e72602e0f6..f68976c17b2 100644 --- a/chain/epoch-manager/src/tests/mod.rs +++ b/chain/epoch-manager/src/tests/mod.rs @@ -9,6 +9,7 @@ use crate::test_utils::{ record_with_block_info, reward, setup_default_epoch_manager, setup_epoch_manager, stake, DEFAULT_TOTAL_SUPPLY, }; +use near_o11y::testonly::init_test_logger; use near_primitives::account::id::AccountIdRef; use near_primitives::block::Tip; use near_primitives::challenge::SlashedValidator; @@ -2823,6 +2824,7 @@ fn test_verify_chunk_state_witness() { #[test] fn test_possible_epochs_of_height_around_tip() { use std::str::FromStr; + init_test_logger(); let amount_staked = 1_000_000; let account_id = AccountId::from_str("test1").unwrap(); @@ -2872,12 +2874,12 @@ fn test_possible_epochs_of_height_around_tip() { ); let epoch1 = EpochId(h[0]); - dbg!(&epoch1); + tracing::info!(target: "test", ?epoch1); // Add blocks with heights 1..5, a standard epoch with no surprises for i in 1..=5 { let height = genesis_height + i as BlockHeight; - dbg!(height); + tracing::info!(target: "test", height); record_block(&mut epoch_manager, h[i - 1], h[i], height, vec![]); let tip = Tip { height, @@ -2914,12 +2916,12 @@ fn test_possible_epochs_of_height_around_tip() { } let epoch2 = EpochId(h[5]); - dbg!(&epoch2); + tracing::info!(target: "test", ?epoch2); // Add blocks with heights 6..10, also a standard epoch with no surprises for i in 6..=10 { let height = genesis_height + i as BlockHeight; - dbg!(height); + tracing::info!(target: "test", height); record_block(&mut epoch_manager, h[i - 1], h[i], height, vec![]); let tip = Tip { height, @@ -2960,7 +2962,7 @@ fn test_possible_epochs_of_height_around_tip() { } let epoch3 = EpochId(h[10]); - dbg!(&epoch3); + tracing::info!(target: "test", ?epoch3); // Now there is a very long epoch with no final blocks (all odd blocks are missing) // For all the blocks inside this for the last final block will be block #8, as it has #9 and #10 @@ -2969,7 +2971,7 @@ fn test_possible_epochs_of_height_around_tip() { let last_finalized_height = genesis_height + 8; for i in (12..=24).filter(|i| i % 2 == 0) { let height = genesis_height + i as BlockHeight; - dbg!(height); + tracing::info!(target: "test", height); let block_info = block_info( h[i], height, @@ -3032,7 +3034,7 @@ fn test_possible_epochs_of_height_around_tip() { // make block 24 final and finalize epoch2. for i in [25, 26] { let height = genesis_height + i as BlockHeight; - dbg!(height); + tracing::info!(target: "test", height); let block_info = block_info( h[i], height, @@ -3095,7 +3097,7 @@ fn test_possible_epochs_of_height_around_tip() { let epoch4 = EpochId(h[12]); for i in 27..=31 { let height = genesis_height + i as BlockHeight; - dbg!(height); + tracing::info!(target: "test", height); record_block(&mut epoch_manager, h[i - 1], h[i], height, vec![]); let tip = Tip { height, From 33248acac4f6c12a9e28734cfb3f37ce0409c2cf Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 23 Feb 2024 15:01:40 +0000 Subject: [PATCH 30/34] Use naive implementation of OrphanStateWitnessPool to reduce complexity --- .../chunk_validator/orphan_witness_pool.rs | 98 +++++-------------- 1 file changed, 23 insertions(+), 75 deletions(-) diff --git a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs index f4faa2c207a..c803e6de376 100644 --- a/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs +++ b/chain/client/src/stateless_validation/chunk_validator/orphan_witness_pool.rs @@ -1,5 +1,3 @@ -use std::collections::{HashMap, HashSet}; - use lru::LruCache; use near_chain_configs::default_orphan_state_witness_pool_size; use near_primitives::hash::CryptoHash; @@ -14,10 +12,6 @@ use metrics_tracker::OrphanWitnessMetricsTracker; /// required block arrives and the witness can be processed. pub struct OrphanStateWitnessPool { witness_cache: LruCache<(ShardId, BlockHeight), CacheEntry>, - /// List of orphaned witnesses that wait for this block to appear. - /// Maps block hash to entries in `witness_cache`. - /// Must be kept in sync with `witness_cache`. - waiting_for_block: HashMap>, } struct CacheEntry { @@ -29,10 +23,15 @@ impl OrphanStateWitnessPool { /// Create a new `OrphanStateWitnessPool` with a capacity of `cache_capacity` witnesses. /// The `Default` trait implementation provides reasonable defaults. pub fn new(cache_capacity: usize) -> Self { - OrphanStateWitnessPool { - witness_cache: LruCache::new(cache_capacity), - waiting_for_block: HashMap::new(), + if cache_capacity > 128 { + tracing::warn!( + target: "client", + "OrphanStateWitnessPool capacity is set to {}, which is larger than expected. \ + OrphanStateWitnessPool uses a naive algorithm, using a large capacity might lead \ + to performance problems.", cache_capacity); } + + OrphanStateWitnessPool { witness_cache: LruCache::new(cache_capacity) } } /// Add an orphaned chunk state witness to the pool. The witness will be put in a cache and it'll @@ -42,20 +41,13 @@ impl OrphanStateWitnessPool { /// it'd be possible to fill the whole cache with spam. /// `witness_size` is only used for metrics, it's okay to pass 0 if you don't care about the metrics. pub fn add_orphan_state_witness(&mut self, witness: ChunkStateWitness, witness_size: usize) { - if self.witness_cache.cap() == 0 { - // A cache with 0 capacity doesn't keep anything. - return; - } - // Insert the new ChunkStateWitness into the cache let chunk_header = &witness.inner.chunk_header; - let prev_block_hash = *chunk_header.prev_block_hash(); let cache_key = (chunk_header.shard_id(), chunk_header.height_created()); let metrics_tracker = OrphanWitnessMetricsTracker::new(&witness, witness_size); let cache_entry = CacheEntry { witness, _metrics_tracker: metrics_tracker }; if let Some((_, ejected_entry)) = self.witness_cache.push(cache_key, cache_entry) { - // If another witness has been ejected from the cache due to capacity limit, - // then remove the ejected witness from `waiting_for_block` to keep them in sync + // Another witness has been ejected from the cache due to capacity limit let header = &ejected_entry.witness.inner.chunk_header; tracing::debug!( target: "client", @@ -65,25 +57,6 @@ impl OrphanStateWitnessPool { ejected_witness_prev_block = ?header.prev_block_hash(), "Ejecting an orphaned ChunkStateWitness from the cache due to capacity limit. It will not be processed." ); - self.remove_from_waiting_for_block(&ejected_entry.witness); - } - - // Add the new orphaned state witness to `waiting_for_block` - self.waiting_for_block - .entry(prev_block_hash) - .or_insert_with(|| HashSet::new()) - .insert(cache_key); - } - - fn remove_from_waiting_for_block(&mut self, witness: &ChunkStateWitness) { - let chunk_header = &witness.inner.chunk_header; - let waiting_set = self - .waiting_for_block - .get_mut(chunk_header.prev_block_hash()) - .expect("Every ejected witness must have a corresponding entry in waiting_for_block."); - waiting_set.remove(&(chunk_header.shard_id(), chunk_header.height_created())); - if waiting_set.is_empty() { - self.waiting_for_block.remove(chunk_header.prev_block_hash()); } } @@ -93,17 +66,19 @@ impl OrphanStateWitnessPool { &mut self, prev_block: &CryptoHash, ) -> Vec { - let Some(waiting) = self.waiting_for_block.remove(prev_block) else { - return Vec::new(); - }; + let mut to_remove: Vec<(ShardId, BlockHeight)> = Vec::new(); + for (cache_key, cache_entry) in self.witness_cache.iter() { + if cache_entry.witness.inner.chunk_header.prev_block_hash() == prev_block { + to_remove.push(*cache_key); + } + } let mut result = Vec::new(); - for (shard_id, height) in waiting { - // Remove this witness from `witness_cache` to keep them in sync - let entry = self.witness_cache.pop(&(shard_id, height)).expect( - "Every entry in waiting_for_block must have a corresponding witness in the cache", - ); - - result.push(entry.witness); + for cache_key in to_remove { + let ready_witness = self + .witness_cache + .pop(&cache_key) + .expect("The cache contains this entry, a moment ago it was iterated over"); + result.push(ready_witness.witness); } result } @@ -236,10 +211,9 @@ mod tests { } } - // Check that the pool is empty, all witnesses have been removed from both fields + // Check that the pool is empty, all witnesses have been removed fn assert_empty(pool: &OrphanStateWitnessPool) { - assert!(pool.witness_cache.is_empty()); - assert!(pool.waiting_for_block.is_empty()); + assert_eq!(pool.witness_cache.len(), 0); } /// Basic functionality - inserting witnesses and fetching them works as expected @@ -325,32 +299,6 @@ mod tests { assert_empty(&pool); } - /// When a witness is ejected from the cache, it should also be removed from the corresponding waiting_for_block set. - /// But it's not enough to remove it from the set, if the set has become empty we must also remove the set itself - /// from `waiting_for_block`. Otherwise `waiting_for_block` would have a constantly increasing amount of empty sets, - /// which would cause a memory leak. - #[test] - fn empty_waiting_sets_cleared_on_ejection() { - let mut pool = OrphanStateWitnessPool::new(1); - - let witness1 = make_witness(100, 1, block(99), 0); - pool.add_orphan_state_witness(witness1, 0); - - // When witness2 is added to the pool witness1 gets ejected from the cache (capacity is set to 1). - // When this happens the set of witness waiting for block 99 will become empty. Then this empty set should - // be removed from `waiting_for_block`, otherwise there'd be a memory leak. - let witness2 = make_witness(100, 2, block(100), 0); - pool.add_orphan_state_witness(witness2.clone(), 0); - - // waiting_for_block contains only one entry, list of witnesses waiting for block 100. - assert_eq!(pool.waiting_for_block.len(), 1); - - let waiting_for_100 = pool.take_state_witnesses_waiting_for_block(&block(100)); - assert_contents(waiting_for_100, vec![witness2]); - - assert_empty(&pool); - } - /// OrphanStateWitnessPool can handle large shard ids without any problems, it doesn't keep a Vec indexed by shard_id #[test] fn large_shard_id() { From 24b25f5fde4ef63e206a0511f61605b76ad67771 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 23 Feb 2024 16:01:05 +0000 Subject: [PATCH 31/34] Fix the import of near-o11y I imported near-o11y to use the function for initializing a tracing-subscriber in epoch manager tests, but I didn't import the corresponding nightly packages, so a CI check failed. Let's fix this by importing the packages that the CI check prosposed. --- chain/epoch-manager/Cargo.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/chain/epoch-manager/Cargo.toml b/chain/epoch-manager/Cargo.toml index 02e2c675cae..67551f76267 100644 --- a/chain/epoch-manager/Cargo.toml +++ b/chain/epoch-manager/Cargo.toml @@ -23,8 +23,8 @@ smart-default.workspace = true tracing.workspace = true # itertools has collect_vec which is useful in quick debugging prints itertools.workspace = true -near-o11y.workspace = true +near-o11y.workspace = true near-crypto.workspace = true near-primitives.workspace = true near-store.workspace = true @@ -38,6 +38,7 @@ protocol_feature_fix_staking_threshold = [ "near-primitives/protocol_feature_fix_staking_threshold", ] nightly = [ + "near-o11y/nightly", "near-chain-configs/nightly", "near-primitives/nightly", "near-store/nightly", @@ -45,6 +46,7 @@ nightly = [ "protocol_feature_fix_staking_threshold", ] nightly_protocol = [ + "near-o11y/nightly_protocol", "near-chain-configs/nightly_protocol", "near-primitives/nightly_protocol", "near-store/nightly_protocol", From 58be57bf026dd96064d798933f9b90e9fab608ed Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 23 Feb 2024 16:11:59 +0000 Subject: [PATCH 32/34] Fix formatting of epoch-manager/Cargo.toml Ran python3 scripts/fix_nightly_feature_flags.py fix to satisfy the CI --- chain/epoch-manager/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chain/epoch-manager/Cargo.toml b/chain/epoch-manager/Cargo.toml index 67551f76267..5f43d7d3d30 100644 --- a/chain/epoch-manager/Cargo.toml +++ b/chain/epoch-manager/Cargo.toml @@ -38,16 +38,16 @@ protocol_feature_fix_staking_threshold = [ "near-primitives/protocol_feature_fix_staking_threshold", ] nightly = [ - "near-o11y/nightly", "near-chain-configs/nightly", + "near-o11y/nightly", "near-primitives/nightly", "near-store/nightly", "nightly_protocol", "protocol_feature_fix_staking_threshold", ] nightly_protocol = [ - "near-o11y/nightly_protocol", "near-chain-configs/nightly_protocol", + "near-o11y/nightly_protocol", "near-primitives/nightly_protocol", "near-store/nightly_protocol", ] From 5cae3c332e7ff582fe717296bb08205022392191 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 23 Feb 2024 18:52:03 +0000 Subject: [PATCH 33/34] fix: Don't remove the handler for ChunkEndorsementMessage While resolving a merge conflict caused by https://github.com/near/nearcore/pull/10646, I accidentally removed a handler for chunk endorsement messages. I didn't mean to do it, I wanted to only modify handlers for chunk state witnesses. Let's undo the change and bring the chunk endorsement handler back. --- chain/client/src/client_actions.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/chain/client/src/client_actions.rs b/chain/client/src/client_actions.rs index 7eb3dada71a..f5f5558382b 100644 --- a/chain/client/src/client_actions.rs +++ b/chain/client/src/client_actions.rs @@ -1846,6 +1846,22 @@ impl ClientActionHandler for ClientActions { } } +impl Handler> for ClientActor { + type Result = (); + + #[perf] + fn handle( + &mut self, + msg: WithSpanContext, + _: &mut Context, + ) -> Self::Result { + let (_span, msg) = handler_debug_span!(target: "client", msg); + if let Err(err) = self.client.process_chunk_endorsement(msg.0) { + tracing::error!(target: "client", ?err, "Error processing chunk endorsement"); + } + } +} + /// Returns random seed sampled from the current thread pub fn random_seed_from_thread() -> RngSeed { let mut rng_seed: RngSeed = [0; 32]; From 6e9b14337bfe99dc7f28342c292f98624115c4d1 Mon Sep 17 00:00:00 2001 From: Jan Ciolek Date: Fri, 23 Feb 2024 18:55:30 +0000 Subject: [PATCH 34/34] Revert "fix: Don't remove the handler for ChunkEndorsementMessage" This reverts commit 5cae3c332e7ff582fe717296bb08205022392191. It was actually necessary to remove the endorsement message handler, it doesn't compile otherwise :facepalm: --- chain/client/src/client_actions.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/chain/client/src/client_actions.rs b/chain/client/src/client_actions.rs index f5f5558382b..7eb3dada71a 100644 --- a/chain/client/src/client_actions.rs +++ b/chain/client/src/client_actions.rs @@ -1846,22 +1846,6 @@ impl ClientActionHandler for ClientActions { } } -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - if let Err(err) = self.client.process_chunk_endorsement(msg.0) { - tracing::error!(target: "client", ?err, "Error processing chunk endorsement"); - } - } -} - /// Returns random seed sampled from the current thread pub fn random_seed_from_thread() -> RngSeed { let mut rng_seed: RngSeed = [0; 32];