diff --git a/chain/chain/src/state_snapshot_actor.rs b/chain/chain/src/state_snapshot_actor.rs index e2662538bad..2cd6e4aeef9 100644 --- a/chain/chain/src/state_snapshot_actor.rs +++ b/chain/chain/src/state_snapshot_actor.rs @@ -1,7 +1,7 @@ use actix::{AsyncContext, Context}; use near_async::messaging::CanSend; use near_network::types::{NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest}; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::block::Block; use near_primitives::hash::CryptoHash; diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 183d5ad9682..fa1e42405e2 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -16,8 +16,6 @@ use crate::SyncAdapter; use crate::SyncMessage; use crate::{metrics, SyncStatus}; use actix_rt::ArbiterHandle; -use chrono::DateTime; -use chrono::Utc; use itertools::Itertools; use near_async::messaging::IntoSender; use near_async::messaging::{CanSend, Sender}; @@ -182,7 +180,7 @@ pub struct Client { /// When the "sync block" was requested. /// The "sync block" is the last block of the previous epoch, i.e. `prev_hash` of the `sync_hash` block. - pub last_time_sync_block_requested: Option>, + pub last_time_sync_block_requested: Option, /// Helper module for stateless validation functionality like chunk witness production, validation /// chunk endorsements tracking etc. pub chunk_validator: ChunkValidator, diff --git a/chain/client/src/client_actions.rs b/chain/client/src/client_actions.rs new file mode 100644 index 00000000000..11de323e68f --- /dev/null +++ b/chain/client/src/client_actions.rs @@ -0,0 +1,1876 @@ +//! Client actor orchestrates Client and facilitates network connection. +//! It should just serve as a coordinator class to handle messages and check triggers but immediately +//! pass the control to Client. This means, any real block processing or production logic should +//! be put in Client. +//! Unfortunately, this is not the case today. We are in the process of refactoring ClientActor +//! https://github.com/near/nearcore/issues/7899 + +#[cfg(feature = "test_features")] +use crate::client::AdvProduceBlocksMode; +use crate::client::{Client, EPOCH_START_INFO_BLOCKS}; +use crate::config_updater::ConfigUpdater; +use crate::debug::new_network_info_view; +use crate::info::{display_sync_status, InfoHelper}; +use crate::sync::adapter::{SyncMessage, SyncShardInfo}; +use crate::sync::state::{StateSync, StateSyncResult}; +use crate::{metrics, StatusResponse}; +use actix::{Addr, Arbiter}; +use near_async::futures::{DelayedActionRunner, DelayedActionRunnerExt}; +use near_async::messaging::{CanSend, Sender}; +use near_async::time::{Clock, Utc}; +use near_chain::chain::{ + ApplyStatePartsRequest, ApplyStatePartsResponse, BlockCatchUpRequest, BlockCatchUpResponse, +}; +use near_chain::resharding::{ReshardingRequest, ReshardingResponse}; +use near_chain::test_utils::format_hash; +#[cfg(feature = "test_features")] +use near_chain::ChainStoreAccess; +use near_chain::{ + byzantine_assert, near_chain_primitives, Block, BlockHeader, BlockProcessingArtifact, + DoneApplyChunkCallback, Provenance, +}; +use near_chain_configs::{ClientConfig, LogSummaryStyle}; +use near_chain_primitives::error::EpochErrorResultToChainError; +use near_chunks::client::ShardsManagerResponse; +use near_chunks::logic::cares_about_shard_this_or_next_epoch; +use near_client_primitives::types::{ + Error, GetClientConfig, GetClientConfigError, GetNetworkInfo, NetworkInfoResponse, + StateSyncStatus, Status, StatusError, StatusSyncInfo, SyncStatus, +}; +use near_network::client::{ + BlockApproval, BlockHeadersResponse, BlockResponse, ChunkEndorsementMessage, + ChunkStateWitnessMessage, ProcessTxRequest, ProcessTxResponse, RecvChallenge, SetNetworkInfo, + StateResponse, +}; +use near_network::types::ReasonForBan; +use near_network::types::{ + NetworkInfo, NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, +}; +use near_o11y::WithSpanContextExt; +use near_performance_metrics; +use near_performance_metrics_macros::perf; +use near_primitives::block::Tip; +use near_primitives::block_header::ApprovalType; +use near_primitives::epoch_manager::RngSeed; +use near_primitives::hash::CryptoHash; +use near_primitives::network::{AnnounceAccount, PeerId}; +use near_primitives::static_clock::StaticClock; +use near_primitives::types::BlockHeight; +use near_primitives::unwrap_or_return; +use near_primitives::utils::{from_timestamp, MaybeValidated}; +use near_primitives::validator_signer::ValidatorSigner; +use near_primitives::version::PROTOCOL_VERSION; +use near_primitives::views::{DetailedDebugStatus, ValidatorInfo}; +#[cfg(feature = "test_features")] +use near_store::DBCol; +use near_store::ShardUId; +use near_telemetry::TelemetryActor; +use rand::seq::SliceRandom; +use rand::{thread_rng, Rng}; +use std::collections::HashMap; +use std::fmt; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::sync::broadcast; +use tracing::{debug, debug_span, error, info, trace, warn}; + +/// Multiplier on `max_block_time` to wait until deciding that chain stalled. +const STATUS_WAIT_TIME_MULTIPLIER: u64 = 10; +/// `max_block_production_time` times this multiplier is how long we wait before rebroadcasting +/// the current `head` +const HEAD_STALL_MULTIPLIER: u32 = 4; + +#[derive(Clone, near_async::MultiSend, near_async::MultiSenderFrom)] +pub struct ClientSenderForClient { + pub apply_chunks_done: Sender, +} + +pub struct ClientActions { + clock: Clock, + + /// Adversarial controls + pub adv: crate::adversarial::Controls, + + // Sender to be able to send a message to myself. + myself_sender: ClientSenderForClient, + pub(crate) client: Client, + network_adapter: PeerManagerAdapter, + network_info: NetworkInfo, + /// Identity that represents this Client at the network level. + /// It is used as part of the messages that identify this client. + node_id: PeerId, + /// Last time we announced our accounts as validators. + last_validator_announce_time: Option, + /// Info helper. + info_helper: InfoHelper, + + /// Last time handle_block_production method was called + block_production_next_attempt: near_async::time::Utc, + + // Last time when log_summary method was called. + log_summary_timer_next_attempt: near_async::time::Utc, + + block_production_started: bool, + doomslug_timer_next_attempt: near_async::time::Utc, + sync_timer_next_attempt: near_async::time::Utc, + sync_started: bool, + state_parts_task_scheduler: Box, + block_catch_up_scheduler: Box, + resharding_scheduler: Box, + state_parts_client_arbiter: Arbiter, + + #[cfg(feature = "sandbox")] + fastforward_delta: near_primitives::types::BlockHeightDelta, + + /// Synchronization measure to allow graceful shutdown. + /// Informs the system when a ClientActor gets dropped. + shutdown_signal: Option>, + + /// Manages updating the config. + config_updater: Option, +} + +impl ClientActions { + pub fn new( + clock: Clock, + client: Client, + myself_sender: ClientSenderForClient, + config: ClientConfig, + node_id: PeerId, + network_adapter: PeerManagerAdapter, + validator_signer: Option>, + telemetry_actor: Addr, + shutdown_signal: Option>, + adv: crate::adversarial::Controls, + config_updater: Option, + state_parts_task_scheduler: Box, + block_catch_up_scheduler: Box, + resharding_scheduler: Box, + state_parts_client_arbiter: Arbiter, + ) -> Result { + if let Some(vs) = &validator_signer { + info!(target: "client", "Starting validator node: {}", vs.validator_id()); + } + let info_helper = InfoHelper::new(Some(telemetry_actor), &config, validator_signer.clone()); + + let now = clock.now_utc(); + Ok(ClientActions { + clock, + adv, + myself_sender, + client, + network_adapter, + node_id, + network_info: NetworkInfo { + connected_peers: vec![], + tier1_connections: vec![], + num_connected_peers: 0, + peer_max_count: 0, + highest_height_peers: vec![], + received_bytes_per_sec: 0, + sent_bytes_per_sec: 0, + known_producers: vec![], + tier1_accounts_keys: vec![], + tier1_accounts_data: vec![], + }, + last_validator_announce_time: None, + info_helper, + block_production_next_attempt: now, + log_summary_timer_next_attempt: now, + block_production_started: false, + doomslug_timer_next_attempt: now, + sync_timer_next_attempt: now, + sync_started: false, + #[cfg(feature = "sandbox")] + fastforward_delta: 0, + shutdown_signal, + config_updater, + + state_parts_task_scheduler, + block_catch_up_scheduler, + resharding_scheduler, + state_parts_client_arbiter, + }) + } +} + +#[cfg(feature = "test_features")] +#[derive(actix::Message, Debug)] +#[rtype(result = "Option")] +pub enum NetworkAdversarialMessage { + AdvProduceBlocks(u64, bool), + AdvSwitchToHeight(u64), + AdvDisableHeaderSync, + AdvDisableDoomslug, + AdvGetSavedBlocks, + AdvCheckStorageConsistency, +} + +pub(crate) trait ClientActionHandler { + type Result; + fn handle(&mut self, msg: M) -> Self::Result; +} + +#[cfg(feature = "test_features")] +impl ClientActionHandler for ClientActions { + type Result = Option; + + fn handle(&mut self, msg: NetworkAdversarialMessage) -> Self::Result { + match msg { + NetworkAdversarialMessage::AdvDisableDoomslug => { + info!(target: "adversary", "Turning Doomslug off"); + self.adv.set_disable_doomslug(true); + self.client.doomslug.adv_disable(); + self.client.chain.adv_disable_doomslug(); + None + } + NetworkAdversarialMessage::AdvDisableHeaderSync => { + info!(target: "adversary", "Blocking header sync"); + self.adv.set_disable_header_sync(true); + None + } + NetworkAdversarialMessage::AdvProduceBlocks(num_blocks, only_valid) => { + info!(target: "adversary", num_blocks, "Starting adversary blocks production"); + if only_valid { + self.client.adv_produce_blocks = Some(AdvProduceBlocksMode::OnlyValid); + } else { + self.client.adv_produce_blocks = Some(AdvProduceBlocksMode::All); + } + let start_height = + self.client.chain.mut_chain_store().get_latest_known().unwrap().height + 1; + let mut blocks_produced = 0; + for height in start_height.. { + let block = + self.client.produce_block(height).expect("block should be produced"); + if only_valid && block == None { + continue; + } + let block = block.expect("block should exist after produced"); + info!(target: "adversary", blocks_produced, num_blocks, height, "Producing adversary block"); + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::Block { block: block.clone() }, + )); + let _ = self.client.start_process_block( + block.into(), + Provenance::PRODUCED, + self.get_apply_chunks_done_callback(), + ); + blocks_produced += 1; + if blocks_produced == num_blocks { + break; + } + } + None + } + NetworkAdversarialMessage::AdvSwitchToHeight(height) => { + info!(target: "adversary", "Switching to height {:?}", height); + let mut chain_store_update = self.client.chain.mut_chain_store().store_update(); + chain_store_update.save_largest_target_height(height); + chain_store_update + .adv_save_latest_known(height) + .expect("adv method should not fail"); + chain_store_update.commit().expect("adv method should not fail"); + None + } + NetworkAdversarialMessage::AdvGetSavedBlocks => { + info!(target: "adversary", "Requested number of saved blocks"); + let store = self.client.chain.chain_store().store(); + let mut num_blocks = 0; + for _ in store.iter(DBCol::Block) { + num_blocks += 1; + } + Some(num_blocks) + } + NetworkAdversarialMessage::AdvCheckStorageConsistency => { + // timeout is set to 1.5 seconds to give some room as we wait in Nightly for 2 seconds + let timeout = 1500; + info!(target: "adversary", "Check Storage Consistency, timeout set to {:?} milliseconds", timeout); + let mut genesis = near_chain_configs::GenesisConfig::default(); + genesis.genesis_height = self.client.chain.chain_store().get_genesis_height(); + let mut store_validator = near_chain::store_validator::StoreValidator::new( + self.client.validator_signer.as_ref().map(|x| x.validator_id().clone()), + genesis, + self.client.epoch_manager.clone(), + self.client.shard_tracker.clone(), + self.client.runtime_adapter.clone(), + self.client.chain.chain_store().store().clone(), + self.adv.is_archival(), + ); + store_validator.set_timeout(timeout); + store_validator.validate(); + if store_validator.is_failed() { + error!(target: "client", "Storage Validation failed, {:?}", store_validator.errors); + Some(0) + } else { + Some(store_validator.tests_done()) + } + } + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = ProcessTxResponse; + + fn handle(&mut self, msg: ProcessTxRequest) -> Self::Result { + let ProcessTxRequest { transaction, is_forwarded, check_only } = msg; + self.client.process_tx(transaction, is_forwarded, check_only) + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: BlockResponse) { + let BlockResponse { block, peer_id, was_requested } = msg; + debug!(target: "client", block_height = block.header().height(), block_hash = ?block.header().hash(), "BlockResponse"); + let blocks_at_height = + self.client.chain.chain_store().get_all_block_hashes_by_height(block.header().height()); + if was_requested + || blocks_at_height.is_err() + || blocks_at_height.as_ref().unwrap().is_empty() + { + // This is a very sneaky piece of logic. + if self.maybe_receive_state_sync_blocks(&block) { + // A node is syncing its state. Don't consider receiving + // blocks other than the few special ones that State Sync expects. + return; + } + self.client.receive_block( + block, + peer_id, + was_requested, + self.get_apply_chunks_done_callback(), + ); + } else { + match self.client.epoch_manager.get_epoch_id_from_prev_block(block.header().prev_hash()) + { + Ok(epoch_id) => { + if let Some(hashes) = blocks_at_height.unwrap().get(&epoch_id) { + if !hashes.contains(block.header().hash()) { + warn!(target: "client", "Rejecting unrequested block {}, height {}", block.header().hash(), block.header().height()); + } + } + } + _ => {} + } + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = Result<(), ReasonForBan>; + + fn handle(&mut self, msg: BlockHeadersResponse) -> Self::Result { + let BlockHeadersResponse(headers, peer_id) = msg; + if self.receive_headers(headers, peer_id) { + Ok(()) + } else { + warn!(target: "client", "Banning node for sending invalid block headers"); + Err(ReasonForBan::BadBlockHeader) + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: BlockApproval) { + let BlockApproval(approval, peer_id) = msg; + debug!(target: "client", "Receive approval {:?} from peer {:?}", approval, peer_id); + self.client.collect_block_approval(&approval, ApprovalType::PeerApproval(peer_id)); + } +} + +/// StateResponse is used during StateSync and catchup. +/// It contains either StateSync header information (that tells us how many parts there are etc) or a single part. +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: StateResponse) { + let StateResponse(state_response_info) = msg; + let shard_id = state_response_info.shard_id(); + let hash = state_response_info.sync_hash(); + let state_response = state_response_info.take_state_response(); + + trace!(target: "sync", "Received state response shard_id: {} sync_hash: {:?} part(id/size): {:?}", + shard_id, + hash, + state_response.part().as_ref().map(|(part_id, data)| (part_id, data.len())) + ); + // Get the download that matches the shard_id and hash + + // ... It could be that the state was requested by the state sync + if let SyncStatus::StateSync(StateSyncStatus { + sync_hash, + sync_status: shards_to_download, + }) = &mut self.client.sync_status + { + if hash == *sync_hash { + if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { + self.client.state_sync.update_download_on_state_response_message( + shard_download, + hash, + shard_id, + state_response, + &mut self.client.chain, + ); + return; + } + } + } + + // ... Or one of the catchups + if let Some((state_sync, shards_to_download, _)) = + self.client.catchup_state_syncs.get_mut(&hash) + { + if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { + state_sync.update_download_on_state_response_message( + shard_download, + hash, + shard_id, + state_response, + &mut self.client.chain, + ); + return; + } + } + + error!(target: "sync", "State sync received hash {} that we're not expecting, potential malicious peer or a very delayed response.", hash); + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: RecvChallenge) { + let RecvChallenge(challenge) = msg; + match self.client.process_challenge(challenge) { + Ok(_) => {} + Err(err) => error!(target: "client", "Error processing challenge: {}", err), + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: SetNetworkInfo) { + // SetNetworkInfo is a large message. Avoid printing it at the `debug` verbosity. + let SetNetworkInfo(network_info) = msg; + self.network_info = network_info; + } +} + +#[cfg(feature = "sandbox")] +impl ClientActionHandler for ClientActions { + type Result = near_client_primitives::types::SandboxResponse; + + fn handle( + &mut self, + msg: near_client_primitives::types::SandboxMessage, + ) -> near_client_primitives::types::SandboxResponse { + match msg { + near_client_primitives::types::SandboxMessage::SandboxPatchState(state) => { + self.client.chain.patch_state( + near_primitives::sandbox::state_patch::SandboxStatePatch::new(state), + ); + near_client_primitives::types::SandboxResponse::SandboxNoResponse + } + near_client_primitives::types::SandboxMessage::SandboxPatchStateStatus => { + near_client_primitives::types::SandboxResponse::SandboxPatchStateFinished( + !self.client.chain.patch_state_in_progress(), + ) + } + near_client_primitives::types::SandboxMessage::SandboxFastForward(delta_height) => { + if self.fastforward_delta > 0 { + return near_client_primitives::types::SandboxResponse::SandboxFastForwardFailed( + "Consecutive fast_forward requests cannot be made while a current one is going on.".to_string()); + } + + self.fastforward_delta = delta_height; + near_client_primitives::types::SandboxResponse::SandboxNoResponse + } + near_client_primitives::types::SandboxMessage::SandboxFastForwardStatus => { + near_client_primitives::types::SandboxResponse::SandboxFastForwardFinished( + self.fastforward_delta == 0, + ) + } + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = Result; + + fn handle(&mut self, msg: Status) -> Self::Result { + let head = self.client.chain.head()?; + let head_header = self.client.chain.get_block_header(&head.last_block_hash)?; + let latest_block_time = head_header.raw_timestamp(); + let latest_state_root = *head_header.prev_state_root(); + if msg.is_health_check { + let now = self.clock.now_utc(); + let block_timestamp = + Utc::from_unix_timestamp_nanos(latest_block_time as i128).unwrap(); + if now > block_timestamp { + let elapsed = (now - block_timestamp).unsigned_abs(); + if elapsed + > Duration::from_millis( + self.client.config.max_block_production_delay.as_millis() as u64 + * STATUS_WAIT_TIME_MULTIPLIER, + ) + { + return Err(StatusError::NoNewBlocks { elapsed }); + } + } + + if self.client.sync_status.is_syncing() { + return Err(StatusError::NodeIsSyncing); + } + } + let validators: Vec = self + .client + .epoch_manager + .get_epoch_block_producers_ordered(&head.epoch_id, &head.last_block_hash) + .into_chain_error()? + .into_iter() + .map(|(validator_stake, is_slashed)| ValidatorInfo { + account_id: validator_stake.take_account_id(), + is_slashed, + }) + .collect(); + + let epoch_start_height = + self.client.epoch_manager.get_epoch_start_height(&head.last_block_hash).ok(); + + let protocol_version = self + .client + .epoch_manager + .get_epoch_protocol_version(&head.epoch_id) + .into_chain_error()?; + + let node_public_key = self.node_id.public_key().clone(); + let (validator_account_id, validator_public_key) = match &self.client.validator_signer { + Some(vs) => (Some(vs.validator_id().clone()), Some(vs.public_key())), + None => (None, None), + }; + let node_key = validator_public_key.clone(); + + let mut earliest_block_hash = None; + let mut earliest_block_height = None; + let mut earliest_block_time = None; + if let Some(earliest_block_hash_value) = self.client.chain.get_earliest_block_hash()? { + earliest_block_hash = Some(earliest_block_hash_value); + if let Ok(earliest_block) = + self.client.chain.get_block_header(&earliest_block_hash_value) + { + earliest_block_height = Some(earliest_block.height()); + earliest_block_time = Some(earliest_block.timestamp()); + } + } + // Provide more detailed information about the current state of chain. + // For now - provide info about last 50 blocks. + let detailed_debug_status = if msg.detailed { + Some(DetailedDebugStatus { + network_info: new_network_info_view(&self.client.chain, &self.network_info), + sync_status: format!( + "{} ({})", + self.client.sync_status.as_variant_name(), + display_sync_status( + &self.client.sync_status, + &self.client.chain.head()?, + &self.client.config.state_sync.sync, + ), + ), + catchup_status: self.client.get_catchup_status()?, + current_head_status: head.clone().into(), + current_header_head_status: self.client.chain.header_head()?.into(), + block_production_delay_millis: self + .client + .config + .min_block_production_delay + .as_millis() as u64, + }) + } else { + None + }; + let uptime_sec = StaticClock::utc().timestamp() - self.info_helper.boot_time_seconds; + Ok(StatusResponse { + version: self.client.config.version.clone(), + protocol_version, + latest_protocol_version: PROTOCOL_VERSION, + chain_id: self.client.config.chain_id.clone(), + rpc_addr: self.client.config.rpc_addr.clone(), + validators, + sync_info: StatusSyncInfo { + latest_block_hash: head.last_block_hash, + latest_block_height: head.height, + latest_state_root, + latest_block_time: from_timestamp(latest_block_time), + syncing: self.client.sync_status.is_syncing(), + earliest_block_hash, + earliest_block_height, + earliest_block_time, + epoch_id: Some(head.epoch_id), + epoch_start_height, + }, + validator_account_id, + validator_public_key, + node_public_key, + node_key, + uptime_sec, + detailed_debug_status, + }) + } +} + +/// Private to public API conversion. +fn make_peer_info(from: near_network::types::PeerInfo) -> near_client_primitives::types::PeerInfo { + near_client_primitives::types::PeerInfo { + id: from.id, + addr: from.addr, + account_id: from.account_id, + } +} + +/// Private to public API conversion. +fn make_known_producer( + from: near_network::types::KnownProducer, +) -> near_client_primitives::types::KnownProducer { + near_client_primitives::types::KnownProducer { + peer_id: from.peer_id, + account_id: from.account_id, + addr: from.addr, + next_hops: from.next_hops, + } +} + +impl ClientActionHandler for ClientActions { + type Result = Result; + + fn handle(&mut self, _msg: GetNetworkInfo) -> Self::Result { + Ok(NetworkInfoResponse { + connected_peers: (self.network_info.connected_peers.iter()) + .map(|fpi| make_peer_info(fpi.full_peer_info.peer_info.clone())) + .collect(), + num_connected_peers: self.network_info.num_connected_peers, + peer_max_count: self.network_info.peer_max_count, + sent_bytes_per_sec: self.network_info.sent_bytes_per_sec, + received_bytes_per_sec: self.network_info.received_bytes_per_sec, + known_producers: self + .network_info + .known_producers + .iter() + .map(|p| make_known_producer(p.clone())) + .collect(), + }) + } +} + +/// `ApplyChunksDoneMessage` is a message that signals the finishing of applying chunks of a block. +/// Upon receiving this message, ClientActors knows that it's time to finish processing the blocks that +/// just finished applying chunks. +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub struct ApplyChunksDoneMessage; + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, _msg: ApplyChunksDoneMessage) -> Self::Result { + self.try_process_unfinished_blocks(); + } +} + +#[derive(Debug)] +enum SyncRequirement { + SyncNeeded { peer_id: PeerId, highest_height: BlockHeight, head: Tip }, + AlreadyCaughtUp { peer_id: PeerId, highest_height: BlockHeight, head: Tip }, + NoPeers, + AdvHeaderSyncDisabled, +} + +impl SyncRequirement { + fn sync_needed(&self) -> bool { + matches!(self, Self::SyncNeeded { .. }) + } + + fn to_metrics_string(&self) -> String { + match self { + Self::SyncNeeded { .. } => "SyncNeeded", + Self::AlreadyCaughtUp { .. } => "AlreadyCaughtUp", + Self::NoPeers => "NoPeers", + Self::AdvHeaderSyncDisabled { .. } => "AdvHeaderSyncDisabled", + } + .to_string() + } +} + +impl fmt::Display for SyncRequirement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::SyncNeeded { peer_id, highest_height, head: my_head } => write!( + f, + "sync needed at #{} [{}]. highest height peer: {} at #{}", + my_head.height, + format_hash(my_head.last_block_hash), + peer_id, + highest_height + ), + Self::AlreadyCaughtUp { peer_id, highest_height, head: my_head } => write!( + f, + "synced at #{} [{}]. highest height peer: {} at #{}", + my_head.height, + format_hash(my_head.last_block_hash), + peer_id, + highest_height + ), + Self::NoPeers => write!(f, "no available peers"), + Self::AdvHeaderSyncDisabled => { + write!(f, "syncing disabled via adv_disable_header_sync") + } + } + } +} + +impl ClientActions { + pub(crate) fn start(&mut self, ctx: &mut dyn DelayedActionRunner) { + self.start_flat_storage_creation(ctx); + + // Start syncing job. + self.start_sync(ctx); + + // Start block production tracking if have block producer info. + if self.client.validator_signer.is_some() { + self.block_production_started = true; + } + + // Start triggers + self.schedule_triggers(ctx); + + // Start catchup job. + self.catchup(ctx); + + if let Err(err) = self.client.send_network_chain_info() { + tracing::error!(target: "client", ?err, "Failed to update network chain info"); + } + } + + /// Check if client Account Id should be sent and send it. + /// Account Id is sent when is not current a validator but are becoming a validator soon. + fn check_send_announce_account(&mut self, prev_block_hash: CryptoHash) { + // If no peers, there is no one to announce to. + if self.network_info.num_connected_peers == 0 { + debug!(target: "client", "No peers: skip account announce"); + return; + } + + // First check that we currently have an AccountId + let validator_signer = match self.client.validator_signer.as_ref() { + None => return, + Some(signer) => signer, + }; + + let now = StaticClock::instant(); + // Check that we haven't announced it too recently + if let Some(last_validator_announce_time) = self.last_validator_announce_time { + // Don't make announcement if have passed less than half of the time in which other peers + // should remove our Account Id from their Routing Tables. + if 2 * (now - last_validator_announce_time) < self.client.config.ttl_account_id_router { + return; + } + } + + debug!(target: "client", "Check announce account for {}, last announce time {:?}", validator_signer.validator_id(), self.last_validator_announce_time); + + // Announce AccountId if client is becoming a validator soon. + let next_epoch_id = unwrap_or_return!(self + .client + .epoch_manager + .get_next_epoch_id_from_prev_block(&prev_block_hash)); + + // Check client is part of the futures validators + if self.client.is_validator(&next_epoch_id, &prev_block_hash) { + debug!(target: "client", "Sending announce account for {}", validator_signer.validator_id()); + self.last_validator_announce_time = Some(now); + + let signature = validator_signer.sign_account_announce( + validator_signer.validator_id(), + &self.node_id, + &next_epoch_id, + ); + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::AnnounceAccount(AnnounceAccount { + account_id: validator_signer.validator_id().clone(), + peer_id: self.node_id.clone(), + epoch_id: next_epoch_id, + signature, + }), + )); + } + } + + /// Process the sandbox fast forward request. If the change in block height is past an epoch, + /// we fast forward to just right before the epoch, produce some blocks to get past and into + /// a new epoch, then we continue on with the residual amount to fast forward. + #[cfg(feature = "sandbox")] + fn sandbox_process_fast_forward( + &mut self, + block_height: BlockHeight, + ) -> Result, Error> { + let mut delta_height = std::mem::replace(&mut self.fastforward_delta, 0); + if delta_height == 0 { + return Ok(None); + } + + let epoch_length = self.client.config.epoch_length; + if epoch_length <= 3 { + return Err(Error::Other( + "Unsupported: fast_forward with an epoch length of 3 or less".to_string(), + )); + } + + // Check if we are at epoch boundary. If we are, do not fast forward until new + // epoch is here. Decrement the fast_forward count by 1 when a block is produced + // during this period of waiting + let block_height_wrt_epoch = block_height % epoch_length; + if epoch_length - block_height_wrt_epoch <= 3 || block_height_wrt_epoch == 0 { + // wait for doomslug to call into produce block + self.fastforward_delta = delta_height; + return Ok(None); + } + + let delta_height = if block_height_wrt_epoch + delta_height >= epoch_length { + // fast forward to just right before epoch boundary to have epoch_manager + // handle the epoch_height updates as normal. `- 3` since this is being + // done 3 blocks before the epoch ends. + let right_before_epoch_update = epoch_length - block_height_wrt_epoch - 3; + + delta_height -= right_before_epoch_update; + self.fastforward_delta = delta_height; + right_before_epoch_update + } else { + delta_height + }; + + self.client.accrued_fastforward_delta += delta_height; + let delta_time = self.client.sandbox_delta_time(); + let new_latest_known = near_chain::types::LatestKnown { + height: block_height + delta_height, + seen: near_primitives::utils::to_timestamp(StaticClock::utc() + delta_time), + }; + + Ok(Some(new_latest_known)) + } + + fn pre_block_production(&mut self) -> Result<(), Error> { + #[cfg(feature = "sandbox")] + { + let latest_known = self.client.chain.mut_chain_store().get_latest_known()?; + if let Some(new_latest_known) = + self.sandbox_process_fast_forward(latest_known.height)? + { + self.client.chain.mut_chain_store().save_latest_known(new_latest_known.clone())?; + self.client.sandbox_update_tip(new_latest_known.height)?; + } + } + Ok(()) + } + + fn post_block_production(&mut self) { + #[cfg(feature = "sandbox")] + if self.fastforward_delta > 0 { + // Decrease the delta_height by 1 since we've produced a single block. This + // ensures that we advanced the right amount of blocks when fast forwarding + // and fast forwarding triggers regular block production in the case of + // stepping between epoch boundaries. + self.fastforward_delta -= 1; + } + } + + /// Retrieves latest height, and checks if must produce next block. + /// Otherwise wait for block arrival or suggest to skip after timeout. + fn handle_block_production(&mut self) -> Result<(), Error> { + let _span = tracing::debug_span!(target: "client", "handle_block_production").entered(); + // If syncing, don't try to produce blocks. + if self.client.sync_status.is_syncing() { + debug!(target:"client", sync_status=format!("{:#?}", self.client.sync_status), "Syncing - block production disabled"); + return Ok(()); + } + + let _ = self.client.check_and_update_doomslug_tip(); + + self.pre_block_production()?; + let head = self.client.chain.head()?; + let latest_known = self.client.chain.chain_store().get_latest_known()?; + + assert!( + head.height <= latest_known.height, + "Latest known height is invalid {} vs {}", + head.height, + latest_known.height + ); + + let epoch_id = + self.client.epoch_manager.get_epoch_id_from_prev_block(&head.last_block_hash)?; + let log_block_production_info = + if self.client.epoch_manager.is_next_block_epoch_start(&head.last_block_hash)? { + true + } else { + // the next block is still the same epoch + let epoch_start_height = + self.client.epoch_manager.get_epoch_start_height(&head.last_block_hash)?; + latest_known.height - epoch_start_height < EPOCH_START_INFO_BLOCKS + }; + + // We try to produce block for multiple heights (up to the highest height for which we've seen 2/3 of approvals). + if latest_known.height + 1 <= self.client.doomslug.get_largest_height_crossing_threshold() { + debug!(target: "client", "Considering blocks for production between {} and {} ", latest_known.height + 1, self.client.doomslug.get_largest_height_crossing_threshold()); + } else { + debug!(target: "client", "Cannot produce any block: not enough approvals beyond {}", latest_known.height); + } + + let me = if let Some(me) = &self.client.validator_signer { + me.validator_id().clone() + } else { + return Ok(()); + }; + + // For debug purpose, we record the approvals we have seen so far to the future blocks + for height in latest_known.height + 1..=self.client.doomslug.get_largest_approval_height() { + let next_block_producer_account = + self.client.epoch_manager.get_block_producer(&epoch_id, height)?; + + if me == next_block_producer_account { + self.client.block_production_info.record_approvals( + height, + self.client.doomslug.approval_status_at_height(&height), + ); + } + } + + for height in + latest_known.height + 1..=self.client.doomslug.get_largest_height_crossing_threshold() + { + let next_block_producer_account = + self.client.epoch_manager.get_block_producer(&epoch_id, height)?; + + if me == next_block_producer_account { + self.client.chunk_inclusion_tracker.prepare_chunk_headers_ready_for_inclusion( + &head.last_block_hash, + self.client.chunk_endorsement_tracker.as_ref(), + )?; + let num_chunks = self + .client + .chunk_inclusion_tracker + .num_chunk_headers_ready_for_inclusion(&epoch_id, &head.last_block_hash); + let have_all_chunks = head.height == 0 + || num_chunks == self.client.epoch_manager.shard_ids(&epoch_id).unwrap().len(); + + if self.client.doomslug.ready_to_produce_block( + StaticClock::instant(), + height, + have_all_chunks, + log_block_production_info, + ) { + if let Err(err) = self.produce_block(height) { + // If there is an error, report it and let it retry on the next loop step. + error!(target: "client", height, "Block production failed: {}", err); + } else { + self.post_block_production(); + } + } + } + } + + Ok(()) + } + + fn schedule_triggers(&mut self, ctx: &mut dyn DelayedActionRunner) { + let wait = self.check_triggers(ctx); + + ctx.run_later(wait, move |act, ctx| { + act.schedule_triggers(ctx); + }); + } + + /// Check if the scheduled time of any "triggers" has passed, and if so, call the trigger. + /// Triggers are important functions of client, like running single step of state sync or + /// checking if we can produce a block. + /// + /// It is called before processing Actix message and also in schedule_triggers. + /// This is to ensure all triggers enjoy higher priority than any actix message. + /// Otherwise due to a bug in Actix library Actix prioritizes processing messages + /// while there are messages in mailbox. Because of that we handle scheduling + /// triggers with custom `run_timer` function instead of `run_later` in Actix. + /// + /// Returns the delay before the next time `check_triggers` should be called, which is + /// min(time until the closest trigger, 1 second). + pub(crate) fn check_triggers(&mut self, ctx: &mut dyn DelayedActionRunner) -> Duration { + let _span = tracing::debug_span!(target: "client", "check_triggers").entered(); + if let Some(config_updater) = &mut self.config_updater { + config_updater.try_update(&|updateable_client_config| { + self.client.update_client_config(updateable_client_config) + }); + } + + // Check block height to trigger expected shutdown + if let Ok(head) = self.client.chain.head() { + if let Some(block_height_to_shutdown) = self.client.config.expected_shutdown.get() { + if head.height >= block_height_to_shutdown { + info!(target: "client", "Expected shutdown triggered: head block({}) >= ({:?})", head.height, block_height_to_shutdown); + if let Some(tx) = self.shutdown_signal.take() { + let _ = tx.send(()); // Ignore send signal fail, it will send again in next trigger + } + } + } + } + + self.try_process_unfinished_blocks(); + + let mut delay = near_async::time::Duration::seconds(1); + let now = self.clock.now_utc(); + + let timer = metrics::CHECK_TRIGGERS_TIME.start_timer(); + if self.sync_started { + self.sync_timer_next_attempt = self.run_timer( + self.sync_wait_period(), + self.sync_timer_next_attempt, + ctx, + |act, _| act.run_sync_step(), + "sync", + ); + + delay = std::cmp::min(delay, self.sync_timer_next_attempt - now); + + self.doomslug_timer_next_attempt = self.run_timer( + self.client.config.doosmslug_step_period, + self.doomslug_timer_next_attempt, + ctx, + |act, _| act.try_doomslug_timer(), + "doomslug", + ); + delay = core::cmp::min(delay, self.doomslug_timer_next_attempt - now) + } + if self.block_production_started { + self.block_production_next_attempt = self.run_timer( + self.client.config.block_production_tracking_delay, + self.block_production_next_attempt, + ctx, + |act, _ctx| act.try_handle_block_production(), + "block_production", + ); + + let _ = self.client.check_head_progress_stalled( + self.client.config.max_block_production_delay * HEAD_STALL_MULTIPLIER, + ); + + delay = core::cmp::min(delay, self.block_production_next_attempt - now) + } + + self.log_summary_timer_next_attempt = self.run_timer( + self.client.config.log_summary_period, + self.log_summary_timer_next_attempt, + ctx, + |act, _ctx| act.log_summary(), + "log_summary", + ); + delay = core::cmp::min(delay, self.log_summary_timer_next_attempt - now); + timer.observe_duration(); + core::cmp::max(delay, near_async::time::Duration::ZERO).unsigned_abs() + } + + /// "Unfinished" blocks means that blocks that client has started the processing and haven't + /// finished because it was waiting for applying chunks to be done. This function checks + /// if there are any "unfinished" blocks that are ready to be processed again and finish processing + /// these blocks. + /// This function is called at two places, upon receiving ApplyChunkDoneMessage and `check_triggers`. + /// The job that executes applying chunks will send an ApplyChunkDoneMessage to ClientActor after + /// applying chunks is done, so when receiving ApplyChunkDoneMessage messages, ClientActor + /// calls this function to finish processing the unfinished blocks. ClientActor also calls + /// this function in `check_triggers`, because the actix queue may be blocked by other messages + /// and we want to prioritize block processing. + fn try_process_unfinished_blocks(&mut self) { + let _span = debug_span!(target: "client", "try_process_unfinished_blocks").entered(); + let (accepted_blocks, errors) = + self.client.postprocess_ready_blocks(self.get_apply_chunks_done_callback(), true); + if !errors.is_empty() { + error!(target: "client", ?errors, "try_process_unfinished_blocks got errors"); + } + self.process_accepted_blocks(accepted_blocks); + } + + fn try_handle_block_production(&mut self) { + let _span = debug_span!(target: "client", "try_handle_block_production").entered(); + if let Err(err) = self.handle_block_production() { + tracing::error!(target: "client", ?err, "Handle block production failed") + } + } + + fn try_doomslug_timer(&mut self) { + let _span = tracing::debug_span!(target: "client", "try_doomslug_timer").entered(); + let _ = self.client.check_and_update_doomslug_tip(); + let approvals = self.client.doomslug.process_timer(StaticClock::instant()); + + // Important to save the largest approval target height before sending approvals, so + // that if the node crashes in the meantime, we cannot get slashed on recovery + let mut chain_store_update = self.client.chain.mut_chain_store().store_update(); + chain_store_update + .save_largest_target_height(self.client.doomslug.get_largest_target_height()); + + match chain_store_update.commit() { + Ok(_) => { + let head = unwrap_or_return!(self.client.chain.head()); + if self.client.is_validator(&head.epoch_id, &head.last_block_hash) + || self.client.is_validator(&head.next_epoch_id, &head.last_block_hash) + { + for approval in approvals { + if let Err(e) = + self.client.send_approval(&self.client.doomslug.get_tip().0, approval) + { + error!("Error while sending an approval {:?}", e); + } + } + } + } + Err(e) => error!("Error while committing largest skipped height {:?}", e), + }; + } + + /// Produce block if we are block producer for given `next_height` height. + /// Can return error, should be called with `produce_block` to handle errors and reschedule. + fn produce_block(&mut self, next_height: BlockHeight) -> Result<(), Error> { + let _span = tracing::debug_span!(target: "client", "produce_block", next_height).entered(); + if let Some(block) = self.client.produce_block_on_head(next_height, false)? { + // If we produced the block, send it out before we apply the block. + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::Block { block: block.clone() }, + )); + // We’ve produced the block so that counts as validated block. + let block = MaybeValidated::from_validated(block); + let res = self.client.start_process_block( + block, + Provenance::PRODUCED, + self.get_apply_chunks_done_callback(), + ); + if let Err(e) = &res { + match e { + near_chain::Error::ChunksMissing(_) => { + debug!(target: "client", "chunks missing"); + // missing chunks were already handled in Client::process_block, we don't need to + // do anything here + return Ok(()); + } + _ => { + error!(target: "client", ?res, "Failed to process freshly produced block"); + byzantine_assert!(false); + return res.map_err(|err| err.into()); + } + } + } + } + Ok(()) + } + + fn send_chunks_metrics(&mut self, block: &Block) { + let chunks = block.chunks(); + for (chunk, &included) in chunks.iter().zip(block.header().chunk_mask().iter()) { + if included { + self.info_helper.chunk_processed( + chunk.shard_id(), + chunk.prev_gas_used(), + chunk.prev_balance_burnt(), + ); + } else { + self.info_helper.chunk_skipped(chunk.shard_id()); + } + } + } + + fn send_block_metrics(&mut self, block: &Block) { + let chunks_in_block = block.header().chunk_mask().iter().filter(|&&m| m).count(); + let gas_used = Block::compute_gas_used(block.chunks().iter(), block.header().height()); + + let last_final_hash = block.header().last_final_block(); + let last_final_ds_hash = block.header().last_ds_final_block(); + let last_final_block_height = self + .client + .chain + .get_block(&last_final_hash) + .map_or(0, |block| block.header().height()); + let last_final_ds_block_height = self + .client + .chain + .get_block(&last_final_ds_hash) + .map_or(0, |block| block.header().height()); + + let epoch_height = + self.client.epoch_manager.get_epoch_height_from_prev_block(block.hash()).unwrap_or(0); + let epoch_start_height = self + .client + .epoch_manager + .get_epoch_start_height(&last_final_hash) + .unwrap_or(last_final_block_height); + let last_final_block_height_in_epoch = + last_final_block_height.checked_sub(epoch_start_height); + + self.info_helper.block_processed( + gas_used, + chunks_in_block as u64, + block.header().next_gas_price(), + block.header().total_supply(), + last_final_block_height, + last_final_ds_block_height, + epoch_height, + last_final_block_height_in_epoch, + ); + } + + /// Process all blocks that were accepted by calling other relevant services. + fn process_accepted_blocks(&mut self, accepted_blocks: Vec) { + let _span = tracing::debug_span!( + target: "client", + "process_accepted_blocks", + num_blocks = accepted_blocks.len()) + .entered(); + for accepted_block in accepted_blocks { + let block = self.client.chain.get_block(&accepted_block).unwrap().clone(); + self.send_chunks_metrics(&block); + self.send_block_metrics(&block); + self.check_send_announce_account(*block.header().last_final_block()); + } + } + + /// Returns the callback function that will be passed to various functions that may trigger + /// the processing of new blocks. This callback will be called at the end of applying chunks + /// for every block. + fn get_apply_chunks_done_callback(&self) -> DoneApplyChunkCallback { + let myself_sender = self.myself_sender.clone(); + Arc::new(move |_| { + myself_sender.send(ApplyChunksDoneMessage {}); + }) + } + + fn receive_headers(&mut self, headers: Vec, peer_id: PeerId) -> bool { + let _span = + debug_span!(target: "client", "receive_headers", num_headers = headers.len(), ?peer_id) + .entered(); + if headers.is_empty() { + info!(target: "client", "Received an empty set of block headers"); + return true; + } + match self.client.sync_block_headers(headers) { + Ok(_) => true, + Err(err) => { + if err.is_bad_data() { + error!(target: "client", ?err, "Error processing sync blocks"); + false + } else { + debug!(target: "client", ?err, "Block headers refused by chain"); + true + } + } + } + } + + /// Check whether need to (continue) sync. + /// Also return higher height with known peers at that height. + fn syncing_info(&self) -> Result { + if self.adv.disable_header_sync() { + return Ok(SyncRequirement::AdvHeaderSyncDisabled); + } + + let head = self.client.chain.head()?; + let is_syncing = self.client.sync_status.is_syncing(); + + // Only consider peers whose latest block is not invalid blocks + let eligible_peers: Vec<_> = self + .network_info + .highest_height_peers + .iter() + .filter(|p| !self.client.chain.is_block_invalid(&p.highest_block_hash)) + .collect(); + metrics::PEERS_WITH_INVALID_HASH + .set(self.network_info.highest_height_peers.len() as i64 - eligible_peers.len() as i64); + let peer_info = if let Some(peer_info) = eligible_peers.choose(&mut thread_rng()) { + peer_info + } else { + return Ok(SyncRequirement::NoPeers); + }; + + let peer_id = peer_info.peer_info.id.clone(); + let highest_height = peer_info.highest_block_height; + + if is_syncing { + if highest_height <= head.height { + Ok(SyncRequirement::AlreadyCaughtUp { peer_id, highest_height, head }) + } else { + Ok(SyncRequirement::SyncNeeded { peer_id, highest_height, head }) + } + } else { + if highest_height > head.height + self.client.config.sync_height_threshold { + Ok(SyncRequirement::SyncNeeded { peer_id, highest_height, head }) + } else { + Ok(SyncRequirement::AlreadyCaughtUp { peer_id, highest_height, head }) + } + } + } + + fn start_flat_storage_creation(&mut self, ctx: &mut dyn DelayedActionRunner) { + if !self.client.config.flat_storage_creation_enabled { + return; + } + match self.client.run_flat_storage_creation_step() { + Ok(false) => {} + Ok(true) => { + return; + } + Err(err) => { + error!(target: "client", "Error occurred during flat storage creation step: {:?}", err); + } + } + + ctx.run_later(self.client.config.flat_storage_creation_period, move |act, ctx| { + act.start_flat_storage_creation(ctx); + }); + } + + /// Starts syncing and then switches to either syncing or regular mode. + fn start_sync(&mut self, ctx: &mut dyn DelayedActionRunner) { + // Wait for connections reach at least minimum peers unless skipping sync. + if self.network_info.num_connected_peers < self.client.config.min_num_peers + && !self.client.config.skip_sync_wait + { + ctx.run_later(self.client.config.sync_step_period, move |act, ctx| { + act.start_sync(ctx); + }); + return; + } + self.sync_started = true; + + // Sync loop will be started by check_triggers. + } + + /// Select the block hash we are using to sync state. It will sync with the state before applying the + /// content of such block. + /// + /// The selected block will always be the first block on a new epoch: + /// . + fn find_sync_hash(&mut self) -> Result { + let header_head = self.client.chain.header_head()?; + let sync_hash = header_head.last_block_hash; + let epoch_start_sync_hash = + StateSync::get_epoch_start_sync_hash(&mut self.client.chain, &sync_hash)?; + + let genesis_hash = self.client.chain.genesis().hash(); + tracing::debug!( + target: "sync", + ?header_head, + ?sync_hash, + ?epoch_start_sync_hash, + ?genesis_hash, + "find_sync_hash"); + assert_ne!(&epoch_start_sync_hash, genesis_hash); + Ok(epoch_start_sync_hash) + } + + /// Runs catchup on repeat, if this client is a validator. + /// Schedules itself again if it was not ran as response to state parts job result + fn catchup(&mut self, ctx: &mut dyn DelayedActionRunner) { + { + // An extra scope to limit the lifetime of the span. + let _span = tracing::debug_span!(target: "client", "catchup").entered(); + if let Err(err) = self.client.run_catchup( + &self.network_info.highest_height_peers, + &self.state_parts_task_scheduler, + &self.block_catch_up_scheduler, + &self.resharding_scheduler, + self.get_apply_chunks_done_callback(), + &self.state_parts_client_arbiter.handle(), + ) { + error!(target: "client", "{:?} Error occurred during catchup for the next epoch: {:?}", self.client.validator_signer.as_ref().map(|vs| vs.validator_id()), err); + } + } + + ctx.run_later(self.client.config.catchup_step_period, move |act, ctx| { + act.catchup(ctx); + }); + } + + /// Runs given callback if the time now is at least `next_attempt`. + /// Returns time for next run which should be made based on given `delay` between runs. + fn run_timer( + &mut self, + delay: Duration, + next_attempt: Utc, + ctx: &mut dyn DelayedActionRunner, + f: F, + timer_label: &str, + ) -> Utc + where + F: FnOnce(&mut Self, &mut dyn DelayedActionRunner) + 'static, + { + let now = self.clock.now_utc(); + if now < next_attempt { + return next_attempt; + } + + let timer = + metrics::CLIENT_TRIGGER_TIME_BY_TYPE.with_label_values(&[timer_label]).start_timer(); + f(self, ctx); + timer.observe_duration(); + + now + delay + } + + fn sync_wait_period(&self) -> Duration { + if let Ok(sync) = self.syncing_info() { + if !sync.sync_needed() { + // If we don't need syncing - retry the sync call rarely. + self.client.config.sync_check_period + } else { + // If we need syncing - retry the sync call often. + self.client.config.sync_step_period + } + } else { + self.client.config.sync_step_period + } + } + + /// Main syncing job responsible for syncing client with other peers. + /// Runs itself iff it was not ran as reaction for message with results of + /// finishing state part job + fn run_sync_step(&mut self) { + let _span = tracing::debug_span!(target: "client", "run_sync_step").entered(); + + macro_rules! unwrap_and_report (($obj: expr) => (match $obj { + Ok(v) => v, + Err(err) => { + error!(target: "sync", "Sync: Unexpected error: {}", err); + return; + } + })); + + let currently_syncing = self.client.sync_status.is_syncing(); + let sync = unwrap_and_report!(self.syncing_info()); + self.info_helper.update_sync_requirements_metrics(sync.to_metrics_string()); + + match sync { + SyncRequirement::AlreadyCaughtUp { .. } + | SyncRequirement::NoPeers + | SyncRequirement::AdvHeaderSyncDisabled => { + if currently_syncing { + // Initial transition out of "syncing" state. + debug!(target: "sync", prev_sync_status = ?self.client.sync_status, "disabling sync"); + self.client.sync_status.update(SyncStatus::NoSync); + // Announce this client's account id if their epoch is coming up. + let head = unwrap_and_report!(self.client.chain.head()); + self.check_send_announce_account(head.prev_block_hash); + } + } + + SyncRequirement::SyncNeeded { highest_height, .. } => { + let mut notify_start_sync = false; + if !currently_syncing { + info!(target: "client", ?sync, "enabling sync"); + } + // Run each step of syncing separately. + unwrap_and_report!(self.client.header_sync.run( + &mut self.client.sync_status, + &mut self.client.chain, + highest_height, + &self.network_info.highest_height_peers + )); + // Only body / state sync if header height is close to the latest. + let header_head = unwrap_and_report!(self.client.chain.header_head()); + + // Sync state if already running sync state or if block sync is too far. + let sync_state = match self.client.sync_status { + SyncStatus::StateSync(_) => true, + _ if header_head.height + >= highest_height + .saturating_sub(self.client.config.block_header_fetch_horizon) => + { + unwrap_and_report!(self.client.block_sync.run( + &mut self.client.sync_status, + &self.client.chain, + highest_height, + &self.network_info.highest_height_peers + )) + } + _ => false, + }; + if sync_state { + match self.client.sync_status { + SyncStatus::StateSync(_) => (), + _ => { + let sync_hash = unwrap_and_report!(self.find_sync_hash()); + if !self.client.config.archive { + unwrap_and_report!(self + .client + .chain + .reset_data_pre_state_sync(sync_hash)); + } + self.client.sync_status.update(SyncStatus::StateSync( + StateSyncStatus { sync_hash, sync_status: HashMap::default() }, + )); + // This is the first time we run state sync. + notify_start_sync = true; + } + }; + let sync_hash = match &self.client.sync_status { + SyncStatus::StateSync(s) => s.sync_hash, + _ => unreachable!("Sync status should have been StateSync!"), + }; + + let me = + self.client.validator_signer.as_ref().map(|x| x.validator_id().clone()); + let block_header = + unwrap_and_report!(self.client.chain.get_block_header(&sync_hash)); + let prev_hash = *block_header.prev_hash(); + let epoch_id = block_header.epoch_id().clone(); + let shards_to_sync: Vec<_> = self + .client + .epoch_manager + .shard_ids(&epoch_id) + .unwrap() + .into_iter() + .filter(|&shard_id| { + cares_about_shard_this_or_next_epoch( + me.as_ref(), + &prev_hash, + shard_id, + true, + &self.client.shard_tracker, + ) + }) + .collect(); + + let use_colour = + matches!(self.client.config.log_summary_style, LogSummaryStyle::Colored); + + // Notify each shard to sync. + if notify_start_sync { + let shard_layout = self + .client + .epoch_manager + .get_shard_layout(&epoch_id) + .expect("Cannot get shard layout"); + for &shard_id in &shards_to_sync { + let shard_uid = + ShardUId::from_shard_id_and_layout(shard_id, &shard_layout); + match self.client.state_sync_adapter.clone().read() { + Ok(sync_adapter) => sync_adapter.send( + shard_uid, + (SyncMessage::StartSync(SyncShardInfo { + shard_uid, + sync_hash, + })) + .with_span_context(), + ), + Err(_) => { + error!(target:"client", "State sync adapter lock is poisoned.") + } + } + } + } + + let now = self.clock.now_utc(); + + // FIXME: it checks if the block exists.. but I have no idea why.. + // seems that we don't really use this block in case of catchup - we use it only for state sync. + // Seems it is related to some bug with block getting orphaned after state sync? but not sure. + let (request_block, have_block) = + unwrap_and_report!(self.sync_block_status(&prev_hash, now)); + + if request_block { + self.client.last_time_sync_block_requested = Some(now); + if let Some(peer_info) = + self.network_info.highest_height_peers.choose(&mut thread_rng()) + { + let id = peer_info.peer_info.id.clone(); + + for hash in + vec![*block_header.prev_hash(), *block_header.hash()].into_iter() + { + self.client.request_block(hash, id.clone()); + } + } + } + if have_block { + self.client.last_time_sync_block_requested = None; + } + + let state_sync_status = match &mut self.client.sync_status { + SyncStatus::StateSync(s) => s, + _ => unreachable!("Sync status should have been StateSync!"), + }; + match unwrap_and_report!(self.client.state_sync.run( + &me, + sync_hash, + &mut state_sync_status.sync_status, + &mut self.client.chain, + self.client.epoch_manager.as_ref(), + &self.network_info.highest_height_peers, + shards_to_sync, + &self.state_parts_task_scheduler, + &self.resharding_scheduler, + &self.state_parts_client_arbiter.handle(), + use_colour, + self.client.runtime_adapter.clone(), + )) { + StateSyncResult::InProgress => (), + StateSyncResult::Completed => { + if !have_block { + trace!(target: "sync", "Sync done. Waiting for sync block."); + return; + } + info!(target: "sync", "State sync: all shards are done"); + + let mut block_processing_artifacts = BlockProcessingArtifact::default(); + + unwrap_and_report!(self.client.chain.reset_heads_post_state_sync( + &me, + sync_hash, + &mut block_processing_artifacts, + self.get_apply_chunks_done_callback(), + )); + + self.client + .process_block_processing_artifact(block_processing_artifacts); + self.client.sync_status.update(SyncStatus::BlockSync { + start_height: 0, + current_height: 0, + highest_height: 0, + }); + } + } + } + } + } + } + + /// Verifies if the node possesses sync block. + /// It is the last block of the previous epoch. + /// If the block is absent, the node requests it from peers. + fn sync_block_status( + &self, + prev_hash: &CryptoHash, + now: Utc, + ) -> Result<(bool, bool), near_chain::Error> { + let (request_block, have_block) = if !self.client.chain.block_exists(prev_hash)? { + let timeout = + near_async::time::Duration::try_from(self.client.config.state_sync_timeout) + .unwrap(); + match self.client.last_time_sync_block_requested { + None => (true, false), + Some(last_time) => { + if (now - last_time) >= timeout { + tracing::error!( + target: "sync", + %prev_hash, + ?timeout, + "State sync: block request timed out"); + (true, false) + } else { + (false, false) + } + } + } + } else { + (false, true) + }; + Ok((request_block, have_block)) + } + + /// Print current summary. + fn log_summary(&mut self) { + let _span = tracing::debug_span!(target: "client", "log_summary").entered(); + self.info_helper.log_summary( + &self.client, + &self.node_id, + &self.network_info, + &self.config_updater, + ) + } + + /// Checks if the node is syncing its State and applies special logic in that case. + /// A node usually ignores blocks that are too far ahead, but in case of a node syncing its state it is looking for 2 specific blocks: + /// * The first block of the new epoch + /// * The last block of the prev epoch + /// Returns whether the node is syncing its state. + fn maybe_receive_state_sync_blocks(&mut self, block: &Block) -> bool { + let SyncStatus::StateSync(StateSyncStatus { sync_hash, .. }) = self.client.sync_status + else { + return false; + }; + if let Ok(header) = self.client.chain.get_block_header(&sync_hash) { + let block: MaybeValidated = (*block).clone().into(); + let block_hash = *block.hash(); + if let Err(err) = self.client.chain.validate_block(&block) { + byzantine_assert!(false); + error!(target: "client", ?err, ?block_hash, "Received an invalid block during state sync"); + } + // Notice that two blocks are saved differently: + // * save_block() for one block. + // * save_orphan() for another block. + if &block_hash == header.prev_hash() { + // The last block of the previous epoch. + if let Err(err) = self.client.chain.save_block(block) { + error!(target: "client", ?err, ?block_hash, "Failed to save a block during state sync"); + } + } else if block_hash == sync_hash { + // The first block of the new epoch. + self.client.chain.save_orphan(block, Provenance::NONE, None, false); + } + } + true + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: ApplyStatePartsResponse) -> Self::Result { + tracing::debug!(target: "client", ?msg); + if let Some((sync, _, _)) = self.client.catchup_state_syncs.get_mut(&msg.sync_hash) { + // We are doing catchup + sync.set_apply_result(msg.shard_id, msg.apply_result); + } else { + self.client.state_sync.set_apply_result(msg.shard_id, msg.apply_result); + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: BlockCatchUpResponse) -> Self::Result { + tracing::debug!(target: "client", ?msg); + if let Some((_, _, blocks_catch_up_state)) = + self.client.catchup_state_syncs.get_mut(&msg.sync_hash) + { + assert!(blocks_catch_up_state.scheduled_blocks.remove(&msg.block_hash)); + blocks_catch_up_state.processed_blocks.insert( + msg.block_hash, + msg.results.into_iter().map(|res| res.1).collect::>(), + ); + } else { + panic!("block catch up processing result from unknown sync hash"); + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + #[perf] + fn handle(&mut self, msg: ReshardingResponse) -> Self::Result { + tracing::debug!(target: "client", ?msg); + if let Some((sync, _, _)) = self.client.catchup_state_syncs.get_mut(&msg.sync_hash) { + // We are doing catchup + sync.set_resharding_result(msg.shard_id, msg.new_state_roots); + } else { + self.client.state_sync.set_resharding_result(msg.shard_id, msg.new_state_roots); + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + #[perf] + fn handle(&mut self, msg: ShardsManagerResponse) -> Self::Result { + match msg { + ShardsManagerResponse::ChunkCompleted { partial_chunk, shard_chunk } => { + self.client.on_chunk_completed( + partial_chunk, + shard_chunk, + self.get_apply_chunks_done_callback(), + ); + } + ShardsManagerResponse::InvalidChunk(encoded_chunk) => { + self.client.on_invalid_chunk(encoded_chunk); + } + ShardsManagerResponse::ChunkHeaderReadyForInclusion { + chunk_header, + chunk_producer, + } => { + self.client + .chunk_inclusion_tracker + .mark_chunk_header_ready_for_inclusion(chunk_header, chunk_producer); + } + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = Result; + + fn handle(&mut self, msg: GetClientConfig) -> Self::Result { + tracing::debug!(target: "client", ?msg); + + Ok(self.client.config.clone()) + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: SyncMessage) -> Self::Result { + tracing::debug!(target: "client", ?msg); + // TODO + // process messages from SyncActors + } +} + +impl ClientActions { + pub fn handle_state_witness_message( + &mut self, + msg: ChunkStateWitnessMessage, + ctx: &mut dyn DelayedActionRunner, + ) { + let peer_id = msg.peer_id.clone(); + let attempts_remaining = msg.attempts_remaining; + match self.client.process_chunk_state_witness(msg.witness, msg.peer_id, None) { + Err(err) => { + tracing::error!(target: "client", ?err, "Error processing chunk state witness"); + } + Ok(Some(witness)) => { + if attempts_remaining > 0 { + ctx.run_later(Duration::from_millis(100), move |actions, ctx| { + actions.handle_state_witness_message( + ChunkStateWitnessMessage { + witness, + peer_id, + attempts_remaining: attempts_remaining - 1, + }, + ctx, + ); + }); + } else { + tracing::error!(target: "client", "Failed to process chunk state witness even after 5 tries due to missing parent block"); + } + } + Ok(None) => {} + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + #[perf] + fn handle(&mut self, msg: ChunkEndorsementMessage) -> Self::Result { + if let Err(err) = self.client.process_chunk_endorsement(msg.0) { + tracing::error!(target: "client", ?err, "Error processing chunk endorsement"); + } + } +} + +/// Returns random seed sampled from the current thread +pub fn random_seed_from_thread() -> RngSeed { + let mut rng_seed: RngSeed = [0; 32]; + rand::thread_rng().fill(&mut rng_seed); + rng_seed +} diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 29504e6ddfe..74033415dd4 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -5,156 +5,61 @@ //! Unfortunately, this is not the case today. We are in the process of refactoring ClientActor //! https://github.com/near/nearcore/issues/7899 -#[cfg(feature = "test_features")] -use crate::client::AdvProduceBlocksMode; -use crate::client::{Client, EPOCH_START_INFO_BLOCKS}; -use crate::config_updater::ConfigUpdater; -use crate::debug::new_network_info_view; -use crate::info::{display_sync_status, InfoHelper}; -use crate::sync::adapter::{SyncMessage, SyncShardInfo}; -use crate::sync::state::{StateSync, StateSyncResult}; -use crate::sync_jobs_actor::{create_sync_job_scheduler, SyncJobsActor}; -use crate::{metrics, StatusResponse, SyncAdapter}; -use actix::{Actor, Addr, Arbiter, AsyncContext, Context, Handler}; -use actix_rt::ArbiterHandle; +use actix::{Actor, Addr, AsyncContext, Context, Handler}; +use actix_rt::{Arbiter, ArbiterHandle}; use chrono::{DateTime, Utc}; -use itertools::Itertools; -use near_async::messaging::{CanSend, Sender}; -use near_chain::chain::{ - ApplyStatePartsRequest, ApplyStatePartsResponse, BlockCatchUpRequest, BlockCatchUpResponse, -}; -use near_chain::resharding::{ReshardingRequest, ReshardingResponse}; +use near_async::actix::AddrWithAutoSpanContextExt; +use near_async::messaging::{IntoMultiSender, Sender}; +use near_async::time::Clock; +use near_chain::chain::{ApplyStatePartsRequest, BlockCatchUpRequest}; +use near_chain::resharding::ReshardingRequest; use near_chain::state_snapshot_actor::SnapshotCallbacks; -use near_chain::test_utils::format_hash; use near_chain::types::RuntimeAdapter; -#[cfg(feature = "test_features")] -use near_chain::ChainStoreAccess; -use near_chain::{ - byzantine_assert, near_chain_primitives, Block, BlockHeader, BlockProcessingArtifact, - ChainGenesis, DoneApplyChunkCallback, Provenance, -}; -use near_chain_configs::{ClientConfig, LogSummaryStyle, ReshardingHandle}; -use near_chain_primitives::error::EpochErrorResultToChainError; +use near_chain::ChainGenesis; +use near_chain_configs::{ClientConfig, ReshardingHandle}; use near_chunks::adapter::ShardsManagerRequestFromClient; -use near_chunks::client::ShardsManagerResponse; -use near_chunks::logic::cares_about_shard_this_or_next_epoch; -use near_client_primitives::types::{ - Error, GetClientConfig, GetClientConfigError, GetNetworkInfo, NetworkInfoResponse, - StateSyncStatus, Status, StatusError, StatusSyncInfo, SyncStatus, -}; +use near_client_primitives::types::Error; use near_epoch_manager::shard_tracker::ShardTracker; -use near_epoch_manager::EpochManagerAdapter; -use near_network::client::{ - BlockApproval, BlockHeadersResponse, BlockResponse, ChunkEndorsementMessage, - ChunkStateWitnessMessage, ProcessTxRequest, ProcessTxResponse, RecvChallenge, SetNetworkInfo, - StateResponse, -}; -use near_network::types::ReasonForBan; -use near_network::types::{ - NetworkInfo, NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, -}; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; -use near_performance_metrics; -use near_performance_metrics_macros::perf; -use near_primitives::block::Tip; -use near_primitives::block_header::ApprovalType; -use near_primitives::epoch_manager::RngSeed; -use near_primitives::hash::CryptoHash; -use near_primitives::network::{AnnounceAccount, PeerId}; +use near_epoch_manager::{EpochManagerAdapter, RngSeed}; +use near_network::client::ChunkStateWitnessMessage; +use near_network::types::PeerManagerAdapter; +use near_o11y::{handler_debug_span, WithSpanContext}; +use near_primitives::network::PeerId; use near_primitives::static_clock::StaticClock; -use near_primitives::types::BlockHeight; -use near_primitives::unwrap_or_return; -use near_primitives::utils::{from_timestamp, MaybeValidated}; use near_primitives::validator_signer::ValidatorSigner; -use near_primitives::version::PROTOCOL_VERSION; -use near_primitives::views::{DetailedDebugStatus, ValidatorInfo}; -#[cfg(feature = "test_features")] -use near_store::DBCol; -use near_store::ShardUId; use near_telemetry::TelemetryActor; -use rand::seq::SliceRandom; -use rand::{thread_rng, Rng}; -use std::collections::HashMap; -use std::fmt; +use rand::Rng; +use std::fmt::Debug; +use std::ops::{Deref, DerefMut}; use std::sync::{Arc, RwLock}; -use std::thread; -use std::time::{Duration, Instant}; use tokio::sync::broadcast; -use tracing::{debug, debug_span, error, info, trace, warn}; -/// Multiplier on `max_block_time` to wait until deciding that chain stalled. -const STATUS_WAIT_TIME_MULTIPLIER: u64 = 10; -/// `max_block_production_time` times this multiplier is how long we wait before rebroadcasting -/// the current `head` -const HEAD_STALL_MULTIPLIER: u32 = 4; +use crate::client_actions::{ClientActionHandler, ClientActions, ClientSenderForClient}; +use crate::sync_jobs_actor::{create_sync_job_scheduler, SyncJobsActor}; +use crate::{metrics, Client, ConfigUpdater, SyncAdapter}; pub struct ClientActor { - /// Adversarial controls - pub adv: crate::adversarial::Controls, - - // Address of this ClientActor. Can be used to send messages to self. - my_address: Addr, - pub(crate) client: Client, - network_adapter: PeerManagerAdapter, - network_info: NetworkInfo, - /// Identity that represents this Client at the network level. - /// It is used as part of the messages that identify this client. - node_id: PeerId, - /// Last time we announced our accounts as validators. - last_validator_announce_time: Option, - /// Info helper. - info_helper: InfoHelper, - - /// Last time handle_block_production method was called - block_production_next_attempt: DateTime, - - // Last time when log_summary method was called. - log_summary_timer_next_attempt: DateTime, - - block_production_started: bool, - doomslug_timer_next_attempt: DateTime, - sync_timer_next_attempt: DateTime, - sync_started: bool, - state_parts_task_scheduler: Box, - block_catch_up_scheduler: Box, - resharding_scheduler: Box, - state_parts_client_arbiter: Arbiter, - - #[cfg(feature = "sandbox")] - fastforward_delta: near_primitives::types::BlockHeightDelta, - - /// Synchronization measure to allow graceful shutdown. - /// Informs the system when a ClientActor gets dropped. - shutdown_signal: Option>, + actions: ClientActions, +} - /// Manages updating the config. - config_updater: Option, +impl Deref for ClientActor { + type Target = ClientActions; + fn deref(&self) -> &ClientActions { + &self.actions + } } -/// Blocks the program until given genesis time arrives. -fn wait_until_genesis(genesis_time: &DateTime) { - loop { - // Get chrono::Duration::num_seconds() by deducting genesis_time from now. - let duration = genesis_time.signed_duration_since(StaticClock::utc()); - let chrono_seconds = duration.num_seconds(); - // Check if number of seconds in chrono::Duration larger than zero. - if chrono_seconds <= 0 { - break; - } - info!(target: "near", "Waiting until genesis: {}d {}h {}m {}s", duration.num_days(), - (duration.num_hours() % 24), - (duration.num_minutes() % 60), - (duration.num_seconds() % 60)); - let wait = - std::cmp::min(Duration::from_secs(10), Duration::from_secs(chrono_seconds as u64)); - thread::sleep(wait); +impl DerefMut for ClientActor { + fn deref_mut(&mut self) -> &mut ClientActions { + &mut self.actions } } impl ClientActor { pub fn new( + clock: Clock, client: Client, - address: Addr, + myself_sender: ClientSenderForClient, config: ClientConfig, node_id: PeerId, network_adapter: PeerManagerAdapter, @@ -175,80 +80,34 @@ impl ClientActor { SyncJobsActor { client_addr: self_addr_clone } }, ); - if let Some(vs) = &validator_signer { - info!(target: "client", "Starting validator node: {}", vs.validator_id()); - } - let info_helper = InfoHelper::new(Some(telemetry_actor), &config, validator_signer.clone()); - - let now = Utc::now(); - Ok(ClientActor { - adv, - my_address: address, + let actions = ClientActions::new( + clock, client, - network_adapter, + myself_sender, + config, node_id, - network_info: NetworkInfo { - connected_peers: vec![], - tier1_connections: vec![], - num_connected_peers: 0, - peer_max_count: 0, - highest_height_peers: vec![], - received_bytes_per_sec: 0, - sent_bytes_per_sec: 0, - known_producers: vec![], - tier1_accounts_keys: vec![], - tier1_accounts_data: vec![], - }, - last_validator_announce_time: None, - info_helper, - block_production_next_attempt: now, - log_summary_timer_next_attempt: now, - block_production_started: false, - doomslug_timer_next_attempt: now, - sync_timer_next_attempt: now, - sync_started: false, - state_parts_task_scheduler: create_sync_job_scheduler::( - sync_jobs_actor_addr.clone(), - ), - block_catch_up_scheduler: create_sync_job_scheduler::( - sync_jobs_actor_addr.clone(), - ), - resharding_scheduler: create_sync_job_scheduler::( - sync_jobs_actor_addr, - ), - state_parts_client_arbiter: state_parts_arbiter, - - #[cfg(feature = "sandbox")] - fastforward_delta: 0, + network_adapter, + validator_signer, + telemetry_actor, shutdown_signal, + adv, config_updater, - }) + create_sync_job_scheduler::(sync_jobs_actor_addr.clone()), + create_sync_job_scheduler::(sync_jobs_actor_addr.clone()), + create_sync_job_scheduler::(sync_jobs_actor_addr), + state_parts_arbiter, + )?; + Ok(Self { actions }) } + + // NOTE: Do not add any more functionality to ClientActor. Add to ClientActions instead. } impl Actor for ClientActor { type Context = Context; fn started(&mut self, ctx: &mut Self::Context) { - self.start_flat_storage_creation(ctx); - - // Start syncing job. - self.start_sync(ctx); - - // Start block production tracking if have block producer info. - if self.client.validator_signer.is_some() { - self.block_production_started = true; - } - - // Start triggers - self.schedule_triggers(ctx); - - // Start catchup job. - self.catchup(ctx); - - if let Err(err) = self.client.send_network_chain_info() { - error!(target: "client", ?err, "Failed to update network chain info"); - } + self.actions.start(ctx); } } @@ -265,1790 +124,42 @@ impl ClientActor { msg: WithSpanContext, ctx: &mut Context, msg_type: &str, - f: impl FnOnce(&mut Self, Req) -> Res, + f: impl FnOnce(&mut Self, Req, &mut Context) -> Res, ) -> Res { let (_span, msg) = handler_debug_span!(target: "client", msg, msg_type); - self.check_triggers(ctx); + self.actions.check_triggers(ctx); let _span_inner = tracing::debug_span!(target: "client", "NetworkClientMessage").entered(); metrics::CLIENT_MESSAGES_COUNT.with_label_values(&[msg_type]).inc(); let timer = metrics::CLIENT_MESSAGES_PROCESSING_TIME.with_label_values(&[msg_type]).start_timer(); - let res = f(self, msg); + let res = f(self, msg, ctx); timer.observe_duration(); res } } -#[cfg(feature = "test_features")] -#[derive(actix::Message, Debug)] -#[rtype(result = "Option")] -pub enum NetworkAdversarialMessage { - AdvProduceBlocks(u64, bool), - AdvSwitchToHeight(u64), - AdvDisableHeaderSync, - AdvDisableDoomslug, - AdvGetSavedBlocks, - AdvCheckStorageConsistency, -} - -#[cfg(feature = "test_features")] -impl Handler> for ClientActor { - type Result = Option; - - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - self.wrap(msg, ctx, "NetworkAdversarialMessage", |this, msg| match msg { - NetworkAdversarialMessage::AdvDisableDoomslug => { - info!(target: "adversary", "Turning Doomslug off"); - this.adv.set_disable_doomslug(true); - this.client.doomslug.adv_disable(); - this.client.chain.adv_disable_doomslug(); - None - } - NetworkAdversarialMessage::AdvDisableHeaderSync => { - info!(target: "adversary", "Blocking header sync"); - this.adv.set_disable_header_sync(true); - None - } - NetworkAdversarialMessage::AdvProduceBlocks( - num_blocks, - only_valid, - ) => { - info!(target: "adversary", num_blocks, "Starting adversary blocks production"); - if only_valid { - this.client.adv_produce_blocks = Some(AdvProduceBlocksMode::OnlyValid); - } else { - this.client.adv_produce_blocks = Some(AdvProduceBlocksMode::All); - } - let start_height = - this.client.chain.mut_chain_store().get_latest_known().unwrap().height + 1; - let mut blocks_produced = 0; - for height in start_height.. { - let block = this - .client - .produce_block(height) - .expect("block should be produced"); - if only_valid && block == None { - continue; - } - let block = block.expect("block should exist after produced"); - info!(target: "adversary", blocks_produced, num_blocks, height, "Producing adversary block"); - this.network_adapter.send( - PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::Block { block: block.clone() }, - ) - ); - let _ = this.client.start_process_block( - block.into(), - Provenance::PRODUCED, - this.get_apply_chunks_done_callback(), - ); - blocks_produced += 1; - if blocks_produced == num_blocks { - break; - } - } - None - } - NetworkAdversarialMessage::AdvSwitchToHeight(height) => { - info!(target: "adversary", "Switching to height {:?}", height); - let mut chain_store_update = this.client.chain.mut_chain_store().store_update(); - chain_store_update.save_largest_target_height(height); - chain_store_update - .adv_save_latest_known(height) - .expect("adv method should not fail"); - chain_store_update.commit().expect("adv method should not fail"); - None - } - NetworkAdversarialMessage::AdvGetSavedBlocks => { - info!(target: "adversary", "Requested number of saved blocks"); - let store = this.client.chain.chain_store().store(); - let mut num_blocks = 0; - for _ in store.iter(DBCol::Block) { - num_blocks += 1; - } - Some(num_blocks) - } - NetworkAdversarialMessage::AdvCheckStorageConsistency => { - // timeout is set to 1.5 seconds to give some room as we wait in Nightly for 2 seconds - let timeout = 1500; - info!(target: "adversary", "Check Storage Consistency, timeout set to {:?} milliseconds", timeout); - let mut genesis = near_chain_configs::GenesisConfig::default(); - genesis.genesis_height = this.client.chain.chain_store().get_genesis_height(); - let mut store_validator = near_chain::store_validator::StoreValidator::new( - this.client.validator_signer.as_ref().map(|x| x.validator_id().clone()), - genesis, - this.client.epoch_manager.clone(), - this.client.shard_tracker.clone(), - this.client.runtime_adapter.clone(), - this.client.chain.chain_store().store().clone(), - this.adv.is_archival(), - ); - store_validator.set_timeout(timeout); - store_validator.validate(); - if store_validator.is_failed() { - error!(target: "client", "Storage Validation failed, {:?}", store_validator.errors); - Some(0) - } else { - Some(store_validator.tests_done()) - } - } - }) - } -} - -impl Handler> for ClientActor { - type Result = ProcessTxResponse; - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - self.wrap(msg, ctx, "ProcessTxRequest", |this: &mut Self, msg| { - let ProcessTxRequest { transaction, is_forwarded, check_only } = msg; - this.client.process_tx(transaction, is_forwarded, check_only) - }) - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - self.wrap(msg, ctx, "BlockResponse", |this: &mut Self, msg|{ - let BlockResponse{ block, peer_id, was_requested } = msg; - debug!(target: "client", block_height = block.header().height(), block_hash = ?block.header().hash(), "BlockResponse"); - let blocks_at_height = this - .client - .chain - .chain_store() - .get_all_block_hashes_by_height(block.header().height()); - if was_requested || blocks_at_height.is_err() || blocks_at_height.as_ref().unwrap().is_empty() { - // This is a very sneaky piece of logic. - if this.maybe_receive_state_sync_blocks(&block) { - // A node is syncing its state. Don't consider receiving - // blocks other than the few special ones that State Sync expects. - return; - } - this.client.receive_block( - block, - peer_id, - was_requested, - this.get_apply_chunks_done_callback(), - ); - } else { - match this - .client - .epoch_manager - .get_epoch_id_from_prev_block(block.header().prev_hash()) - { - Ok(epoch_id) => { - if let Some(hashes) = blocks_at_height.unwrap().get(&epoch_id) { - if !hashes.contains(block.header().hash()) { - warn!(target: "client", "Rejecting unrequested block {}, height {}", block.header().hash(), block.header().height()); - } - } - } - _ => {} - } - } - }) - } -} - -impl Handler> for ClientActor { - type Result = Result<(), ReasonForBan>; - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - self.wrap(msg, ctx, "BlockHeadersResponse", |this, msg| { - let BlockHeadersResponse(headers, peer_id) = msg; - if this.receive_headers(headers, peer_id) { - Ok(()) - } else { - warn!(target: "client", "Banning node for sending invalid block headers"); - Err(ReasonForBan::BadBlockHeader) - } - }) - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - self.wrap(msg, ctx, "BlockApproval", |this, msg| { - let BlockApproval(approval, peer_id) = msg; - debug!(target: "client", "Receive approval {:?} from peer {:?}", approval, peer_id); - this.client.collect_block_approval(&approval, ApprovalType::PeerApproval(peer_id)); - }) - } -} - -/// StateResponse is used during StateSync and catchup. -/// It contains either StateSync header information (that tells us how many parts there are etc) or a single part. -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - self.wrap(msg, ctx, "StateResponse", |this, msg| { - let StateResponse(state_response_info) = msg; - let shard_id = state_response_info.shard_id(); - let hash = state_response_info.sync_hash(); - let state_response = state_response_info.take_state_response(); - - trace!(target: "sync", "Received state response shard_id: {} sync_hash: {:?} part(id/size): {:?}", - shard_id, - hash, - state_response.part().as_ref().map(|(part_id, data)| (part_id, data.len())) - ); - // Get the download that matches the shard_id and hash - - // ... It could be that the state was requested by the state sync - if let SyncStatus::StateSync(StateSyncStatus{ sync_hash, sync_status: shards_to_download }) = - &mut this.client.sync_status - { - if hash == *sync_hash { - if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { - this.client.state_sync.update_download_on_state_response_message(shard_download, hash, shard_id, state_response, &mut this.client.chain); - return; - } - } - } - - // ... Or one of the catchups - if let Some((state_sync, shards_to_download, _)) = - this.client.catchup_state_syncs.get_mut(&hash) - { - if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { - state_sync.update_download_on_state_response_message(shard_download, hash, shard_id, state_response, &mut this.client.chain); - return; - } - } - - error!(target: "sync", "State sync received hash {} that we're not expecting, potential malicious peer or a very delayed response.", hash); - }) - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - self.wrap(msg, ctx, "RecvChallenge", |this, msg| { - let RecvChallenge(challenge) = msg; - match this.client.process_challenge(challenge) { - Ok(_) => {} - Err(err) => error!(target: "client", "Error processing challenge: {}", err), - } - }); - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - // SetNetworkInfo is a large message. Avoid printing it at the `debug` verbosity. - self.wrap(msg, ctx, "SetNetworkInfo", |this, msg| { - let SetNetworkInfo(network_info) = msg; - this.network_info = network_info; - }) - } -} - -#[cfg(feature = "sandbox")] -impl Handler> for ClientActor { - type Result = near_client_primitives::types::SandboxResponse; - - fn handle( - &mut self, - msg: WithSpanContext, - _ctx: &mut Context, - ) -> near_client_primitives::types::SandboxResponse { - let (_span, msg) = handler_debug_span!(target: "client", msg); - match msg { - near_client_primitives::types::SandboxMessage::SandboxPatchState(state) => { - self.client.chain.patch_state( - near_primitives::sandbox::state_patch::SandboxStatePatch::new(state), - ); - near_client_primitives::types::SandboxResponse::SandboxNoResponse - } - near_client_primitives::types::SandboxMessage::SandboxPatchStateStatus => { - near_client_primitives::types::SandboxResponse::SandboxPatchStateFinished( - !self.client.chain.patch_state_in_progress(), - ) - } - near_client_primitives::types::SandboxMessage::SandboxFastForward(delta_height) => { - if self.fastforward_delta > 0 { - return near_client_primitives::types::SandboxResponse::SandboxFastForwardFailed( - "Consecutive fast_forward requests cannot be made while a current one is going on.".to_string()); - } - - self.fastforward_delta = delta_height; - near_client_primitives::types::SandboxResponse::SandboxNoResponse - } - near_client_primitives::types::SandboxMessage::SandboxFastForwardStatus => { - near_client_primitives::types::SandboxResponse::SandboxFastForwardFinished( - self.fastforward_delta == 0, - ) - } - } - } -} - -impl Handler> for ClientActor { - type Result = Result; - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - self.check_triggers(ctx); - - let head = self.client.chain.head()?; - let head_header = self.client.chain.get_block_header(&head.last_block_hash)?; - let latest_block_time = head_header.raw_timestamp(); - let latest_state_root = *head_header.prev_state_root(); - if msg.is_health_check { - let now = Utc::now(); - let block_timestamp = from_timestamp(latest_block_time); - if now > block_timestamp { - let elapsed = (now - block_timestamp).to_std().unwrap(); - if elapsed - > Duration::from_millis( - self.client.config.max_block_production_delay.as_millis() as u64 - * STATUS_WAIT_TIME_MULTIPLIER, - ) - { - return Err(StatusError::NoNewBlocks { elapsed }); - } - } - - if self.client.sync_status.is_syncing() { - return Err(StatusError::NodeIsSyncing); - } - } - let validators: Vec = self - .client - .epoch_manager - .get_epoch_block_producers_ordered(&head.epoch_id, &head.last_block_hash) - .into_chain_error()? - .into_iter() - .map(|(validator_stake, is_slashed)| ValidatorInfo { - account_id: validator_stake.take_account_id(), - is_slashed, - }) - .collect(); - - let epoch_start_height = - self.client.epoch_manager.get_epoch_start_height(&head.last_block_hash).ok(); - - let protocol_version = self - .client - .epoch_manager - .get_epoch_protocol_version(&head.epoch_id) - .into_chain_error()?; - - let node_public_key = self.node_id.public_key().clone(); - let (validator_account_id, validator_public_key) = match &self.client.validator_signer { - Some(vs) => (Some(vs.validator_id().clone()), Some(vs.public_key())), - None => (None, None), - }; - let node_key = validator_public_key.clone(); - - let mut earliest_block_hash = None; - let mut earliest_block_height = None; - let mut earliest_block_time = None; - if let Some(earliest_block_hash_value) = self.client.chain.get_earliest_block_hash()? { - earliest_block_hash = Some(earliest_block_hash_value); - if let Ok(earliest_block) = - self.client.chain.get_block_header(&earliest_block_hash_value) - { - earliest_block_height = Some(earliest_block.height()); - earliest_block_time = Some(earliest_block.timestamp()); - } - } - // Provide more detailed information about the current state of chain. - // For now - provide info about last 50 blocks. - let detailed_debug_status = if msg.detailed { - Some(DetailedDebugStatus { - network_info: new_network_info_view(&self.client.chain, &self.network_info), - sync_status: format!( - "{} ({})", - self.client.sync_status.as_variant_name(), - display_sync_status( - &self.client.sync_status, - &self.client.chain.head()?, - &self.client.config.state_sync.sync, - ), - ), - catchup_status: self.client.get_catchup_status()?, - current_head_status: head.clone().into(), - current_header_head_status: self.client.chain.header_head()?.into(), - block_production_delay_millis: self - .client - .config - .min_block_production_delay - .as_millis() as u64, - }) - } else { - None - }; - let uptime_sec = StaticClock::utc().timestamp() - self.info_helper.boot_time_seconds; - Ok(StatusResponse { - version: self.client.config.version.clone(), - protocol_version, - latest_protocol_version: PROTOCOL_VERSION, - chain_id: self.client.config.chain_id.clone(), - rpc_addr: self.client.config.rpc_addr.clone(), - validators, - sync_info: StatusSyncInfo { - latest_block_hash: head.last_block_hash, - latest_block_height: head.height, - latest_state_root, - latest_block_time: from_timestamp(latest_block_time), - syncing: self.client.sync_status.is_syncing(), - earliest_block_hash, - earliest_block_height, - earliest_block_time, - epoch_id: Some(head.epoch_id), - epoch_start_height, - }, - validator_account_id, - validator_public_key, - node_public_key, - node_key, - uptime_sec, - detailed_debug_status, - }) - } -} - -/// Private to public API conversion. -fn make_peer_info(from: near_network::types::PeerInfo) -> near_client_primitives::types::PeerInfo { - near_client_primitives::types::PeerInfo { - id: from.id, - addr: from.addr, - account_id: from.account_id, - } -} - -/// Private to public API conversion. -fn make_known_producer( - from: near_network::types::KnownProducer, -) -> near_client_primitives::types::KnownProducer { - near_client_primitives::types::KnownProducer { - peer_id: from.peer_id, - account_id: from.account_id, - addr: from.addr, - next_hops: from.next_hops, - } -} - -impl Handler> for ClientActor { - type Result = Result; - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - let (_span, _msg) = handler_debug_span!(target: "client", msg); - self.check_triggers(ctx); - - Ok(NetworkInfoResponse { - connected_peers: (self.network_info.connected_peers.iter()) - .map(|fpi| make_peer_info(fpi.full_peer_info.peer_info.clone())) - .collect(), - num_connected_peers: self.network_info.num_connected_peers, - peer_max_count: self.network_info.peer_max_count, - sent_bytes_per_sec: self.network_info.sent_bytes_per_sec, - received_bytes_per_sec: self.network_info.received_bytes_per_sec, - known_producers: self - .network_info - .known_producers - .iter() - .map(|p| make_known_producer(p.clone())) - .collect(), - }) - } -} - -/// `ApplyChunksDoneMessage` is a message that signals the finishing of applying chunks of a block. -/// Upon receiving this message, ClientActors knows that it's time to finish processing the blocks that -/// just finished applying chunks. -#[derive(actix::Message, Debug)] -#[rtype(result = "()")] -pub struct ApplyChunksDoneMessage; - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _ctx: &mut Self::Context, - ) -> Self::Result { - let (_span, _msg) = handler_debug_span!(target: "client", msg); - self.try_process_unfinished_blocks(); - } -} - -#[derive(Debug)] -enum SyncRequirement { - SyncNeeded { peer_id: PeerId, highest_height: BlockHeight, head: Tip }, - AlreadyCaughtUp { peer_id: PeerId, highest_height: BlockHeight, head: Tip }, - NoPeers, - AdvHeaderSyncDisabled, -} - -impl SyncRequirement { - fn sync_needed(&self) -> bool { - matches!(self, Self::SyncNeeded { .. }) - } - - fn to_metrics_string(&self) -> String { - match self { - Self::SyncNeeded { .. } => "SyncNeeded", - Self::AlreadyCaughtUp { .. } => "AlreadyCaughtUp", - Self::NoPeers => "NoPeers", - Self::AdvHeaderSyncDisabled { .. } => "AdvHeaderSyncDisabled", - } - .to_string() - } -} - -impl fmt::Display for SyncRequirement { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::SyncNeeded { peer_id, highest_height, head: my_head } => write!( - f, - "sync needed at #{} [{}]. highest height peer: {} at #{}", - my_head.height, - format_hash(my_head.last_block_hash), - peer_id, - highest_height - ), - Self::AlreadyCaughtUp { peer_id, highest_height, head: my_head } => write!( - f, - "synced at #{} [{}]. highest height peer: {} at #{}", - my_head.height, - format_hash(my_head.last_block_hash), - peer_id, - highest_height - ), - Self::NoPeers => write!(f, "no available peers"), - Self::AdvHeaderSyncDisabled => { - write!(f, "syncing disabled via adv_disable_header_sync") - } - } - } -} - -impl ClientActor { - /// Check if client Account Id should be sent and send it. - /// Account Id is sent when is not current a validator but are becoming a validator soon. - fn check_send_announce_account(&mut self, prev_block_hash: CryptoHash) { - // If no peers, there is no one to announce to. - if self.network_info.num_connected_peers == 0 { - debug!(target: "client", "No peers: skip account announce"); - return; - } - - // First check that we currently have an AccountId - let validator_signer = match self.client.validator_signer.as_ref() { - None => return, - Some(signer) => signer, - }; - - let now = StaticClock::instant(); - // Check that we haven't announced it too recently - if let Some(last_validator_announce_time) = self.last_validator_announce_time { - // Don't make announcement if have passed less than half of the time in which other peers - // should remove our Account Id from their Routing Tables. - if 2 * (now - last_validator_announce_time) < self.client.config.ttl_account_id_router { - return; - } - } - - debug!(target: "client", "Check announce account for {}, last announce time {:?}", validator_signer.validator_id(), self.last_validator_announce_time); - - // Announce AccountId if client is becoming a validator soon. - let next_epoch_id = unwrap_or_return!(self - .client - .epoch_manager - .get_next_epoch_id_from_prev_block(&prev_block_hash)); - - // Check client is part of the futures validators - if self.client.is_validator(&next_epoch_id, &prev_block_hash) { - debug!(target: "client", "Sending announce account for {}", validator_signer.validator_id()); - self.last_validator_announce_time = Some(now); - - let signature = validator_signer.sign_account_announce( - validator_signer.validator_id(), - &self.node_id, - &next_epoch_id, - ); - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::AnnounceAccount(AnnounceAccount { - account_id: validator_signer.validator_id().clone(), - peer_id: self.node_id.clone(), - epoch_id: next_epoch_id, - signature, - }), - )); - } - } - - /// Process the sandbox fast forward request. If the change in block height is past an epoch, - /// we fast forward to just right before the epoch, produce some blocks to get past and into - /// a new epoch, then we continue on with the residual amount to fast forward. - #[cfg(feature = "sandbox")] - fn sandbox_process_fast_forward( - &mut self, - block_height: BlockHeight, - ) -> Result, Error> { - let mut delta_height = std::mem::replace(&mut self.fastforward_delta, 0); - if delta_height == 0 { - return Ok(None); - } - - let epoch_length = self.client.config.epoch_length; - if epoch_length <= 3 { - return Err(Error::Other( - "Unsupported: fast_forward with an epoch length of 3 or less".to_string(), - )); - } - - // Check if we are at epoch boundary. If we are, do not fast forward until new - // epoch is here. Decrement the fast_forward count by 1 when a block is produced - // during this period of waiting - let block_height_wrt_epoch = block_height % epoch_length; - if epoch_length - block_height_wrt_epoch <= 3 || block_height_wrt_epoch == 0 { - // wait for doomslug to call into produce block - self.fastforward_delta = delta_height; - return Ok(None); - } - - let delta_height = if block_height_wrt_epoch + delta_height >= epoch_length { - // fast forward to just right before epoch boundary to have epoch_manager - // handle the epoch_height updates as normal. `- 3` since this is being - // done 3 blocks before the epoch ends. - let right_before_epoch_update = epoch_length - block_height_wrt_epoch - 3; - - delta_height -= right_before_epoch_update; - self.fastforward_delta = delta_height; - right_before_epoch_update - } else { - delta_height - }; - - self.client.accrued_fastforward_delta += delta_height; - let delta_time = self.client.sandbox_delta_time(); - let new_latest_known = near_chain::types::LatestKnown { - height: block_height + delta_height, - seen: near_primitives::utils::to_timestamp(StaticClock::utc() + delta_time), - }; - - Ok(Some(new_latest_known)) - } - - fn pre_block_production(&mut self) -> Result<(), Error> { - #[cfg(feature = "sandbox")] - { - let latest_known = self.client.chain.mut_chain_store().get_latest_known()?; - if let Some(new_latest_known) = - self.sandbox_process_fast_forward(latest_known.height)? - { - self.client.chain.mut_chain_store().save_latest_known(new_latest_known.clone())?; - self.client.sandbox_update_tip(new_latest_known.height)?; - } - } - Ok(()) - } - - fn post_block_production(&mut self) { - #[cfg(feature = "sandbox")] - if self.fastforward_delta > 0 { - // Decrease the delta_height by 1 since we've produced a single block. This - // ensures that we advanced the right amount of blocks when fast forwarding - // and fast forwarding triggers regular block production in the case of - // stepping between epoch boundaries. - self.fastforward_delta -= 1; - } - } - - /// Retrieves latest height, and checks if must produce next block. - /// Otherwise wait for block arrival or suggest to skip after timeout. - fn handle_block_production(&mut self) -> Result<(), Error> { - let _span = tracing::debug_span!(target: "client", "handle_block_production").entered(); - // If syncing, don't try to produce blocks. - if self.client.sync_status.is_syncing() { - debug!(target:"client", sync_status=format!("{:#?}", self.client.sync_status), "Syncing - block production disabled"); - return Ok(()); - } - - let _ = self.client.check_and_update_doomslug_tip(); - - self.pre_block_production()?; - let head = self.client.chain.head()?; - let latest_known = self.client.chain.chain_store().get_latest_known()?; - - assert!( - head.height <= latest_known.height, - "Latest known height is invalid {} vs {}", - head.height, - latest_known.height - ); - - let epoch_id = - self.client.epoch_manager.get_epoch_id_from_prev_block(&head.last_block_hash)?; - let log_block_production_info = - if self.client.epoch_manager.is_next_block_epoch_start(&head.last_block_hash)? { - true - } else { - // the next block is still the same epoch - let epoch_start_height = - self.client.epoch_manager.get_epoch_start_height(&head.last_block_hash)?; - latest_known.height - epoch_start_height < EPOCH_START_INFO_BLOCKS - }; - - // We try to produce block for multiple heights (up to the highest height for which we've seen 2/3 of approvals). - if latest_known.height + 1 <= self.client.doomslug.get_largest_height_crossing_threshold() { - debug!(target: "client", "Considering blocks for production between {} and {} ", latest_known.height + 1, self.client.doomslug.get_largest_height_crossing_threshold()); - } else { - debug!(target: "client", "Cannot produce any block: not enough approvals beyond {}", latest_known.height); - } - - let me = if let Some(me) = &self.client.validator_signer { - me.validator_id().clone() - } else { - return Ok(()); - }; - - // For debug purpose, we record the approvals we have seen so far to the future blocks - for height in latest_known.height + 1..=self.client.doomslug.get_largest_approval_height() { - let next_block_producer_account = - self.client.epoch_manager.get_block_producer(&epoch_id, height)?; - - if me == next_block_producer_account { - self.client.block_production_info.record_approvals( - height, - self.client.doomslug.approval_status_at_height(&height), - ); - } - } - - for height in - latest_known.height + 1..=self.client.doomslug.get_largest_height_crossing_threshold() - { - let next_block_producer_account = - self.client.epoch_manager.get_block_producer(&epoch_id, height)?; - - if me == next_block_producer_account { - self.client.chunk_inclusion_tracker.prepare_chunk_headers_ready_for_inclusion( - &head.last_block_hash, - self.client.chunk_endorsement_tracker.as_ref(), - )?; - let num_chunks = self - .client - .chunk_inclusion_tracker - .num_chunk_headers_ready_for_inclusion(&epoch_id, &head.last_block_hash); - let have_all_chunks = head.height == 0 - || num_chunks == self.client.epoch_manager.shard_ids(&epoch_id).unwrap().len(); - - if self.client.doomslug.ready_to_produce_block( - StaticClock::instant(), - height, - have_all_chunks, - log_block_production_info, - ) { - if let Err(err) = self.produce_block(height) { - // If there is an error, report it and let it retry on the next loop step. - error!(target: "client", height, "Block production failed: {}", err); - } else { - self.post_block_production(); - } - } - } - } - - Ok(()) - } - - fn schedule_triggers(&mut self, ctx: &mut Context) { - let wait = self.check_triggers(ctx); - - near_performance_metrics::actix::run_later(ctx, wait, move |act, ctx| { - act.schedule_triggers(ctx); - }); - } - - /// Check if the scheduled time of any "triggers" has passed, and if so, call the trigger. - /// Triggers are important functions of client, like running single step of state sync or - /// checking if we can produce a block. - /// - /// It is called before processing Actix message and also in schedule_triggers. - /// This is to ensure all triggers enjoy higher priority than any actix message. - /// Otherwise due to a bug in Actix library Actix prioritizes processing messages - /// while there are messages in mailbox. Because of that we handle scheduling - /// triggers with custom `run_timer` function instead of `run_later` in Actix. - /// - /// Returns the delay before the next time `check_triggers` should be called, which is - /// min(time until the closest trigger, 1 second). - fn check_triggers(&mut self, ctx: &mut Context) -> Duration { - let _span = tracing::debug_span!(target: "client", "check_triggers").entered(); - if let Some(config_updater) = &mut self.config_updater { - config_updater.try_update(&|updateable_client_config| { - self.client.update_client_config(updateable_client_config) - }); - } - - // Check block height to trigger expected shutdown - if let Ok(head) = self.client.chain.head() { - if let Some(block_height_to_shutdown) = self.client.config.expected_shutdown.get() { - if head.height >= block_height_to_shutdown { - info!(target: "client", "Expected shutdown triggered: head block({}) >= ({:?})", head.height, block_height_to_shutdown); - if let Some(tx) = self.shutdown_signal.take() { - let _ = tx.send(()); // Ignore send signal fail, it will send again in next trigger - } - } - } - } - - self.try_process_unfinished_blocks(); - - let mut delay = Duration::from_secs(1); - let now = Utc::now(); - - let timer = metrics::CHECK_TRIGGERS_TIME.start_timer(); - if self.sync_started { - self.sync_timer_next_attempt = self.run_timer( - self.sync_wait_period(), - self.sync_timer_next_attempt, - ctx, - |act, _| act.run_sync_step(), - "sync", - ); - - delay = std::cmp::min( - delay, - self.sync_timer_next_attempt.signed_duration_since(now).to_std().unwrap_or(delay), - ); - - self.doomslug_timer_next_attempt = self.run_timer( - self.client.config.doosmslug_step_period, - self.doomslug_timer_next_attempt, - ctx, - |act, ctx| act.try_doomslug_timer(ctx), - "doomslug", - ); - delay = core::cmp::min( - delay, - self.doomslug_timer_next_attempt - .signed_duration_since(now) - .to_std() - .unwrap_or(delay), - ) - } - if self.block_production_started { - self.block_production_next_attempt = self.run_timer( - self.client.config.block_production_tracking_delay, - self.block_production_next_attempt, - ctx, - |act, _ctx| act.try_handle_block_production(), - "block_production", - ); - - let _ = self.client.check_head_progress_stalled( - self.client.config.max_block_production_delay * HEAD_STALL_MULTIPLIER, - ); - - delay = core::cmp::min( - delay, - self.block_production_next_attempt - .signed_duration_since(now) - .to_std() - .unwrap_or(delay), - ) - } - - self.log_summary_timer_next_attempt = self.run_timer( - self.client.config.log_summary_period, - self.log_summary_timer_next_attempt, - ctx, - |act, _ctx| act.log_summary(), - "log_summary", - ); - delay = core::cmp::min( - delay, - self.log_summary_timer_next_attempt - .signed_duration_since(now) - .to_std() - .unwrap_or(delay), - ); - timer.observe_duration(); - delay - } - - /// "Unfinished" blocks means that blocks that client has started the processing and haven't - /// finished because it was waiting for applying chunks to be done. This function checks - /// if there are any "unfinished" blocks that are ready to be processed again and finish processing - /// these blocks. - /// This function is called at two places, upon receiving ApplyChunkDoneMessage and `check_triggers`. - /// The job that executes applying chunks will send an ApplyChunkDoneMessage to ClientActor after - /// applying chunks is done, so when receiving ApplyChunkDoneMessage messages, ClientActor - /// calls this function to finish processing the unfinished blocks. ClientActor also calls - /// this function in `check_triggers`, because the actix queue may be blocked by other messages - /// and we want to prioritize block processing. - fn try_process_unfinished_blocks(&mut self) { - let _span = debug_span!(target: "client", "try_process_unfinished_blocks").entered(); - let (accepted_blocks, errors) = - self.client.postprocess_ready_blocks(self.get_apply_chunks_done_callback(), true); - if !errors.is_empty() { - error!(target: "client", ?errors, "try_process_unfinished_blocks got errors"); - } - self.process_accepted_blocks(accepted_blocks); - } - - fn try_handle_block_production(&mut self) { - let _span = debug_span!(target: "client", "try_handle_block_production").entered(); - if let Err(err) = self.handle_block_production() { - tracing::error!(target: "client", ?err, "Handle block production failed") - } - } - - fn try_doomslug_timer(&mut self, _: &mut Context) { - let _span = tracing::debug_span!(target: "client", "try_doomslug_timer").entered(); - let _ = self.client.check_and_update_doomslug_tip(); - let approvals = self.client.doomslug.process_timer(StaticClock::instant()); - - // Important to save the largest approval target height before sending approvals, so - // that if the node crashes in the meantime, we cannot get slashed on recovery - let mut chain_store_update = self.client.chain.mut_chain_store().store_update(); - chain_store_update - .save_largest_target_height(self.client.doomslug.get_largest_target_height()); - - match chain_store_update.commit() { - Ok(_) => { - let head = unwrap_or_return!(self.client.chain.head()); - if self.client.is_validator(&head.epoch_id, &head.last_block_hash) - || self.client.is_validator(&head.next_epoch_id, &head.last_block_hash) - { - for approval in approvals { - if let Err(e) = - self.client.send_approval(&self.client.doomslug.get_tip().0, approval) - { - error!("Error while sending an approval {:?}", e); - } - } - } - } - Err(e) => error!("Error while committing largest skipped height {:?}", e), - }; - } - - /// Produce block if we are block producer for given `next_height` height. - /// Can return error, should be called with `produce_block` to handle errors and reschedule. - fn produce_block(&mut self, next_height: BlockHeight) -> Result<(), Error> { - let _span = tracing::debug_span!(target: "client", "produce_block", next_height).entered(); - if let Some(block) = self.client.produce_block_on_head(next_height, false)? { - // If we produced the block, send it out before we apply the block. - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::Block { block: block.clone() }, - )); - // We’ve produced the block so that counts as validated block. - let block = MaybeValidated::from_validated(block); - let res = self.client.start_process_block( - block, - Provenance::PRODUCED, - self.get_apply_chunks_done_callback(), - ); - if let Err(e) = &res { - match e { - near_chain::Error::ChunksMissing(_) => { - debug!(target: "client", "chunks missing"); - // missing chunks were already handled in Client::process_block, we don't need to - // do anything here - return Ok(()); - } - _ => { - error!(target: "client", ?res, "Failed to process freshly produced block"); - byzantine_assert!(false); - return res.map_err(|err| err.into()); - } - } - } - } - Ok(()) - } - - fn send_chunks_metrics(&mut self, block: &Block) { - let chunks = block.chunks(); - for (chunk, &included) in chunks.iter().zip(block.header().chunk_mask().iter()) { - if included { - self.info_helper.chunk_processed( - chunk.shard_id(), - chunk.prev_gas_used(), - chunk.prev_balance_burnt(), - ); - } else { - self.info_helper.chunk_skipped(chunk.shard_id()); - } - } - } - - fn send_block_metrics(&mut self, block: &Block) { - let chunks_in_block = block.header().chunk_mask().iter().filter(|&&m| m).count(); - let gas_used = Block::compute_gas_used(block.chunks().iter(), block.header().height()); - - let last_final_hash = block.header().last_final_block(); - let last_final_ds_hash = block.header().last_ds_final_block(); - let last_final_block_height = self - .client - .chain - .get_block(&last_final_hash) - .map_or(0, |block| block.header().height()); - let last_final_ds_block_height = self - .client - .chain - .get_block(&last_final_ds_hash) - .map_or(0, |block| block.header().height()); - - let epoch_height = - self.client.epoch_manager.get_epoch_height_from_prev_block(block.hash()).unwrap_or(0); - let epoch_start_height = self - .client - .epoch_manager - .get_epoch_start_height(&last_final_hash) - .unwrap_or(last_final_block_height); - let last_final_block_height_in_epoch = - last_final_block_height.checked_sub(epoch_start_height); - - self.info_helper.block_processed( - gas_used, - chunks_in_block as u64, - block.header().next_gas_price(), - block.header().total_supply(), - last_final_block_height, - last_final_ds_block_height, - epoch_height, - last_final_block_height_in_epoch, - ); - } - - /// Process all blocks that were accepted by calling other relevant services. - fn process_accepted_blocks(&mut self, accepted_blocks: Vec) { - let _span = tracing::debug_span!( - target: "client", - "process_accepted_blocks", - num_blocks = accepted_blocks.len()) - .entered(); - for accepted_block in accepted_blocks { - let block = self.client.chain.get_block(&accepted_block).unwrap().clone(); - self.send_chunks_metrics(&block); - self.send_block_metrics(&block); - self.check_send_announce_account(*block.header().last_final_block()); - } - } - - /// Returns the callback function that will be passed to various functions that may trigger - /// the processing of new blocks. This callback will be called at the end of applying chunks - /// for every block. - fn get_apply_chunks_done_callback(&self) -> DoneApplyChunkCallback { - let addr = self.my_address.clone(); - Arc::new(move |_| { - addr.do_send(ApplyChunksDoneMessage {}.with_span_context()); - }) - } - - fn receive_headers(&mut self, headers: Vec, peer_id: PeerId) -> bool { - let _span = - debug_span!(target: "client", "receive_headers", num_headers = headers.len(), ?peer_id) - .entered(); - if headers.is_empty() { - info!(target: "client", "Received an empty set of block headers"); - return true; - } - match self.client.sync_block_headers(headers) { - Ok(_) => true, - Err(err) => { - if err.is_bad_data() { - error!(target: "client", ?err, "Error processing sync blocks"); - false - } else { - debug!(target: "client", ?err, "Block headers refused by chain"); - true - } - } - } - } - - /// Check whether need to (continue) sync. - /// Also return higher height with known peers at that height. - fn syncing_info(&self) -> Result { - if self.adv.disable_header_sync() { - return Ok(SyncRequirement::AdvHeaderSyncDisabled); - } - - let head = self.client.chain.head()?; - let is_syncing = self.client.sync_status.is_syncing(); - - // Only consider peers whose latest block is not invalid blocks - let eligible_peers: Vec<_> = self - .network_info - .highest_height_peers - .iter() - .filter(|p| !self.client.chain.is_block_invalid(&p.highest_block_hash)) - .collect(); - metrics::PEERS_WITH_INVALID_HASH - .set(self.network_info.highest_height_peers.len() as i64 - eligible_peers.len() as i64); - let peer_info = if let Some(peer_info) = eligible_peers.choose(&mut thread_rng()) { - peer_info - } else { - return Ok(SyncRequirement::NoPeers); - }; - - let peer_id = peer_info.peer_info.id.clone(); - let highest_height = peer_info.highest_block_height; - - if is_syncing { - if highest_height <= head.height { - Ok(SyncRequirement::AlreadyCaughtUp { peer_id, highest_height, head }) - } else { - Ok(SyncRequirement::SyncNeeded { peer_id, highest_height, head }) - } - } else { - if highest_height > head.height + self.client.config.sync_height_threshold { - Ok(SyncRequirement::SyncNeeded { peer_id, highest_height, head }) - } else { - Ok(SyncRequirement::AlreadyCaughtUp { peer_id, highest_height, head }) - } - } - } - - fn start_flat_storage_creation(&mut self, ctx: &mut Context) { - if !self.client.config.flat_storage_creation_enabled { - return; - } - match self.client.run_flat_storage_creation_step() { - Ok(false) => {} - Ok(true) => { - return; - } - Err(err) => { - error!(target: "client", "Error occurred during flat storage creation step: {:?}", err); - } - } - - near_performance_metrics::actix::run_later( - ctx, - self.client.config.flat_storage_creation_period, - move |act, ctx| { - act.start_flat_storage_creation(ctx); - }, - ); - } - - /// Starts syncing and then switches to either syncing or regular mode. - fn start_sync(&mut self, ctx: &mut Context) { - // Wait for connections reach at least minimum peers unless skipping sync. - if self.network_info.num_connected_peers < self.client.config.min_num_peers - && !self.client.config.skip_sync_wait - { - near_performance_metrics::actix::run_later( - ctx, - self.client.config.sync_step_period, - move |act, ctx| { - act.start_sync(ctx); - }, - ); - return; - } - self.sync_started = true; - - // Sync loop will be started by check_triggers. - } - - /// Select the block hash we are using to sync state. It will sync with the state before applying the - /// content of such block. - /// - /// The selected block will always be the first block on a new epoch: - /// . - fn find_sync_hash(&mut self) -> Result { - let header_head = self.client.chain.header_head()?; - let sync_hash = header_head.last_block_hash; - let epoch_start_sync_hash = - StateSync::get_epoch_start_sync_hash(&mut self.client.chain, &sync_hash)?; - - let genesis_hash = self.client.chain.genesis().hash(); - tracing::debug!( - target: "sync", - ?header_head, - ?sync_hash, - ?epoch_start_sync_hash, - ?genesis_hash, - "find_sync_hash"); - assert_ne!(&epoch_start_sync_hash, genesis_hash); - Ok(epoch_start_sync_hash) - } - - /// Runs catchup on repeat, if this client is a validator. - /// Schedules itself again if it was not ran as response to state parts job result - fn catchup(&mut self, ctx: &mut Context) { - { - // An extra scope to limit the lifetime of the span. - let _span = tracing::debug_span!(target: "client", "catchup").entered(); - if let Err(err) = self.client.run_catchup( - &self.network_info.highest_height_peers, - &self.state_parts_task_scheduler, - &self.block_catch_up_scheduler, - &self.resharding_scheduler, - self.get_apply_chunks_done_callback(), - &self.state_parts_client_arbiter.handle(), - ) { - error!(target: "client", "{:?} Error occurred during catchup for the next epoch: {:?}", self.client.validator_signer.as_ref().map(|vs| vs.validator_id()), err); - } - } - - near_performance_metrics::actix::run_later( - ctx, - self.client.config.catchup_step_period, - move |act, ctx| { - act.catchup(ctx); - }, - ); - } - - /// Runs given callback if the time now is at least `next_attempt`. - /// Returns time for next run which should be made based on given `delay` between runs. - fn run_timer( - &mut self, - delay: Duration, - next_attempt: DateTime, - ctx: &mut Context, - f: F, - timer_label: &str, - ) -> DateTime - where - F: FnOnce(&mut Self, &mut ::Context) + 'static, - { - let now = Utc::now(); - if now < next_attempt { - return next_attempt; - } - - let timer = - metrics::CLIENT_TRIGGER_TIME_BY_TYPE.with_label_values(&[timer_label]).start_timer(); - f(self, ctx); - timer.observe_duration(); - - now.checked_add_signed(chrono::Duration::from_std(delay).unwrap()).unwrap() - } - - fn sync_wait_period(&self) -> Duration { - if let Ok(sync) = self.syncing_info() { - if !sync.sync_needed() { - // If we don't need syncing - retry the sync call rarely. - self.client.config.sync_check_period - } else { - // If we need syncing - retry the sync call often. - self.client.config.sync_step_period - } - } else { - self.client.config.sync_step_period - } - } - - /// Main syncing job responsible for syncing client with other peers. - /// Runs itself iff it was not ran as reaction for message with results of - /// finishing state part job - fn run_sync_step(&mut self) { - let _span = tracing::debug_span!(target: "client", "run_sync_step").entered(); - - macro_rules! unwrap_and_report (($obj: expr) => (match $obj { - Ok(v) => v, - Err(err) => { - error!(target: "sync", "Sync: Unexpected error: {}", err); - return; - } - })); - - let currently_syncing = self.client.sync_status.is_syncing(); - let sync = unwrap_and_report!(self.syncing_info()); - self.info_helper.update_sync_requirements_metrics(sync.to_metrics_string()); - - match sync { - SyncRequirement::AlreadyCaughtUp { .. } - | SyncRequirement::NoPeers - | SyncRequirement::AdvHeaderSyncDisabled => { - if currently_syncing { - // Initial transition out of "syncing" state. - debug!(target: "sync", prev_sync_status = ?self.client.sync_status, "disabling sync"); - self.client.sync_status.update(SyncStatus::NoSync); - // Announce this client's account id if their epoch is coming up. - let head = unwrap_and_report!(self.client.chain.head()); - self.check_send_announce_account(head.prev_block_hash); - } - } - - SyncRequirement::SyncNeeded { highest_height, .. } => { - let mut notify_start_sync = false; - if !currently_syncing { - info!(target: "client", ?sync, "enabling sync"); - } - // Run each step of syncing separately. - unwrap_and_report!(self.client.header_sync.run( - &mut self.client.sync_status, - &mut self.client.chain, - highest_height, - &self.network_info.highest_height_peers - )); - // Only body / state sync if header height is close to the latest. - let header_head = unwrap_and_report!(self.client.chain.header_head()); - - // Sync state if already running sync state or if block sync is too far. - let sync_state = match self.client.sync_status { - SyncStatus::StateSync(_) => true, - _ if header_head.height - >= highest_height - .saturating_sub(self.client.config.block_header_fetch_horizon) => - { - unwrap_and_report!(self.client.block_sync.run( - &mut self.client.sync_status, - &self.client.chain, - highest_height, - &self.network_info.highest_height_peers - )) - } - _ => false, - }; - if sync_state { - match self.client.sync_status { - SyncStatus::StateSync(_) => (), - _ => { - let sync_hash = unwrap_and_report!(self.find_sync_hash()); - if !self.client.config.archive { - unwrap_and_report!(self - .client - .chain - .reset_data_pre_state_sync(sync_hash)); - } - self.client.sync_status.update(SyncStatus::StateSync( - StateSyncStatus { sync_hash, sync_status: HashMap::default() }, - )); - // This is the first time we run state sync. - notify_start_sync = true; - } - }; - let sync_hash = match &self.client.sync_status { - SyncStatus::StateSync(s) => s.sync_hash, - _ => unreachable!("Sync status should have been StateSync!"), - }; - - let me = - self.client.validator_signer.as_ref().map(|x| x.validator_id().clone()); - let block_header = - unwrap_and_report!(self.client.chain.get_block_header(&sync_hash)); - let prev_hash = *block_header.prev_hash(); - let epoch_id = block_header.epoch_id().clone(); - let shards_to_sync: Vec<_> = self - .client - .epoch_manager - .shard_ids(&epoch_id) - .unwrap() - .into_iter() - .filter(|&shard_id| { - cares_about_shard_this_or_next_epoch( - me.as_ref(), - &prev_hash, - shard_id, - true, - &self.client.shard_tracker, - ) - }) - .collect(); - - let use_colour = - matches!(self.client.config.log_summary_style, LogSummaryStyle::Colored); - - // Notify each shard to sync. - if notify_start_sync { - let shard_layout = self - .client - .epoch_manager - .get_shard_layout(&epoch_id) - .expect("Cannot get shard layout"); - for &shard_id in &shards_to_sync { - let shard_uid = - ShardUId::from_shard_id_and_layout(shard_id, &shard_layout); - match self.client.state_sync_adapter.clone().read() { - Ok(sync_adapter) => sync_adapter.send( - shard_uid, - (SyncMessage::StartSync(SyncShardInfo { - shard_uid, - sync_hash, - })) - .with_span_context(), - ), - Err(_) => { - error!(target:"client", "State sync adapter lock is poisoned.") - } - } - } - } - - let now = StaticClock::utc(); - - // FIXME: it checks if the block exists.. but I have no idea why.. - // seems that we don't really use this block in case of catchup - we use it only for state sync. - // Seems it is related to some bug with block getting orphaned after state sync? but not sure. - let (request_block, have_block) = - unwrap_and_report!(self.sync_block_status(&prev_hash, now)); - - if request_block { - self.client.last_time_sync_block_requested = Some(now); - if let Some(peer_info) = - self.network_info.highest_height_peers.choose(&mut thread_rng()) - { - let id = peer_info.peer_info.id.clone(); - - for hash in - vec![*block_header.prev_hash(), *block_header.hash()].into_iter() - { - self.client.request_block(hash, id.clone()); - } - } - } - if have_block { - self.client.last_time_sync_block_requested = None; - } - - let state_sync_status = match &mut self.client.sync_status { - SyncStatus::StateSync(s) => s, - _ => unreachable!("Sync status should have been StateSync!"), - }; - match unwrap_and_report!(self.client.state_sync.run( - &me, - sync_hash, - &mut state_sync_status.sync_status, - &mut self.client.chain, - self.client.epoch_manager.as_ref(), - &self.network_info.highest_height_peers, - shards_to_sync, - &self.state_parts_task_scheduler, - &self.resharding_scheduler, - &self.state_parts_client_arbiter.handle(), - use_colour, - self.client.runtime_adapter.clone(), - )) { - StateSyncResult::InProgress => (), - StateSyncResult::Completed => { - if !have_block { - trace!(target: "sync", "Sync done. Waiting for sync block."); - return; - } - info!(target: "sync", "State sync: all shards are done"); - - let mut block_processing_artifacts = BlockProcessingArtifact::default(); - - unwrap_and_report!(self.client.chain.reset_heads_post_state_sync( - &me, - sync_hash, - &mut block_processing_artifacts, - self.get_apply_chunks_done_callback(), - )); - - self.client - .process_block_processing_artifact(block_processing_artifacts); - self.client.sync_status.update(SyncStatus::BlockSync { - start_height: 0, - current_height: 0, - highest_height: 0, - }); - } - } - } - } - } - } - - /// Verifies if the node possesses sync block. - /// It is the last block of the previous epoch. - /// If the block is absent, the node requests it from peers. - fn sync_block_status( - &self, - prev_hash: &CryptoHash, - now: DateTime, - ) -> Result<(bool, bool), near_chain::Error> { - let (request_block, have_block) = if !self.client.chain.block_exists(prev_hash)? { - let timeout = - chrono::Duration::from_std(self.client.config.state_sync_timeout).unwrap(); - match self.client.last_time_sync_block_requested { - None => (true, false), - Some(last_time) => { - if (now - last_time) >= timeout { - tracing::error!( - target: "sync", - %prev_hash, - ?timeout, - "State sync: block request timed out"); - (true, false) - } else { - (false, false) - } - } - } - } else { - (false, true) - }; - Ok((request_block, have_block)) - } - - /// Print current summary. - fn log_summary(&mut self) { - let _span = tracing::debug_span!(target: "client", "log_summary").entered(); - self.info_helper.log_summary( - &self.client, - &self.node_id, - &self.network_info, - &self.config_updater, - ) - } - - /// Checks if the node is syncing its State and applies special logic in that case. - /// A node usually ignores blocks that are too far ahead, but in case of a node syncing its state it is looking for 2 specific blocks: - /// * The first block of the new epoch - /// * The last block of the prev epoch - /// Returns whether the node is syncing its state. - fn maybe_receive_state_sync_blocks(&mut self, block: &Block) -> bool { - let SyncStatus::StateSync(StateSyncStatus { sync_hash, .. }) = self.client.sync_status - else { - return false; - }; - if let Ok(header) = self.client.chain.get_block_header(&sync_hash) { - let block: MaybeValidated = (*block).clone().into(); - let block_hash = *block.hash(); - if let Err(err) = self.client.chain.validate_block(&block) { - byzantine_assert!(false); - error!(target: "client", ?err, ?block_hash, "Received an invalid block during state sync"); - } - // Notice that two blocks are saved differently: - // * save_block() for one block. - // * save_orphan() for another block. - if &block_hash == header.prev_hash() { - // The last block of the previous epoch. - if let Err(err) = self.client.chain.save_block(block) { - error!(target: "client", ?err, ?block_hash, "Failed to save a block during state sync"); - } - } else if block_hash == sync_hash { - // The first block of the new epoch. - self.client.chain.save_orphan(block, Provenance::NONE, None, false); - } - } - true - } -} - -impl Drop for ClientActor { - fn drop(&mut self) { - let _span = tracing::debug_span!(target: "client", "drop").entered(); - self.state_parts_client_arbiter.stop(); - } -} - -impl Handler> for ClientActor { - type Result = (); +impl Handler> for ClientActor +where + T: actix::Message + Debug, + ClientActions: ClientActionHandler, + T::Result: actix::dev::MessageResponse>, +{ + type Result = T::Result; - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Self::Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - if let Some((sync, _, _)) = self.client.catchup_state_syncs.get_mut(&msg.sync_hash) { - // We are doing catchup - sync.set_apply_result(msg.shard_id, msg.apply_result); - } else { - self.client.state_sync.set_apply_result(msg.shard_id, msg.apply_result); - } - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Self::Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - if let Some((_, _, blocks_catch_up_state)) = - self.client.catchup_state_syncs.get_mut(&msg.sync_hash) - { - assert!(blocks_catch_up_state.scheduled_blocks.remove(&msg.block_hash)); - blocks_catch_up_state - .processed_blocks - .insert(msg.block_hash, msg.results.into_iter().map(|res| res.1).collect_vec()); - } else { - panic!("block catch up processing result from unknown sync hash"); - } - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Self::Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - if let Some((sync, _, _)) = self.client.catchup_state_syncs.get_mut(&msg.sync_hash) { - // We are doing catchup - sync.set_resharding_result(msg.shard_id, msg.new_state_roots); - } else { - self.client.state_sync.set_resharding_result(msg.shard_id, msg.new_state_roots); - } - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Self::Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - match msg { - ShardsManagerResponse::ChunkCompleted { partial_chunk, shard_chunk } => { - self.client.on_chunk_completed( - partial_chunk, - shard_chunk, - self.get_apply_chunks_done_callback(), - ); - } - ShardsManagerResponse::InvalidChunk(encoded_chunk) => { - self.client.on_invalid_chunk(encoded_chunk); - } - ShardsManagerResponse::ChunkHeaderReadyForInclusion { - chunk_header, - chunk_producer, - } => { - self.client - .chunk_inclusion_tracker - .mark_chunk_header_ready_for_inclusion(chunk_header, chunk_producer); - } - } - } -} - -impl Handler> for ClientActor { - type Result = Result; - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - - Ok(self.client.config.clone()) - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, _: &mut Context) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - // TODO - // process messages from SyncActors + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) -> Self::Result { + self.wrap(msg, ctx, std::any::type_name::(), |this, msg, _| this.actions.handle(msg)) } } +// This one requires the context for further scheduling of messages, so +// we can't use the generic wrapper above. impl Handler> for ClientActor { type Result = (); - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - let peer_id = msg.peer_id.clone(); - let attempts_remaining = msg.attempts_remaining; - match self.client.process_chunk_state_witness(msg.witness, msg.peer_id, None) { - Err(err) => { - tracing::error!(target: "client", ?err, "Error processing chunk state witness"); - } - Ok(Some(witness)) => { - if attempts_remaining > 0 { - ctx.run_later(Duration::from_millis(100), move |_, ctx| { - ctx.address().do_send( - ChunkStateWitnessMessage { - witness, - peer_id, - attempts_remaining: attempts_remaining - 1, - } - .with_span_context(), - ); - }); - } else { - tracing::error!(target: "client", "Failed to process chunk state witness even after 5 tries due to missing parent block"); - } - } - Ok(None) => {} - } - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - if let Err(err) = self.client.process_chunk_endorsement(msg.0) { - tracing::error!(target: "client", ?err, "Error processing chunk endorsement"); - } + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { + self.wrap(msg, ctx, "ChunkStateWitnessMessage", |this, msg, ctx| { + this.actions.handle_state_witness_message(msg, ctx) + }) } } @@ -2059,8 +170,31 @@ pub fn random_seed_from_thread() -> RngSeed { rng_seed } +/// Blocks the program until given genesis time arrives. +fn wait_until_genesis(genesis_time: &DateTime) { + loop { + // Get chrono::Duration::num_seconds() by deducting genesis_time from now. + let duration = genesis_time.signed_duration_since(StaticClock::utc()); + let chrono_seconds = duration.num_seconds(); + // Check if number of seconds in chrono::Duration larger than zero. + if chrono_seconds <= 0 { + break; + } + tracing::info!(target: "near", "Waiting until genesis: {}d {}h {}m {}s", duration.num_days(), + (duration.num_hours() % 24), + (duration.num_minutes() % 60), + (duration.num_seconds() % 60)); + let wait = std::cmp::min( + std::time::Duration::from_secs(10), + std::time::Duration::from_secs(chrono_seconds as u64), + ); + std::thread::sleep(wait); + } +} + /// Starts client in a separate Arbiter (thread). pub fn start_client( + clock: Clock, client_config: ClientConfig, chain_genesis: ChainGenesis, epoch_manager: Arc, @@ -2099,8 +233,9 @@ pub fn start_client( let resharding_handle = client.chain.resharding_handle.clone(); let client_addr = ClientActor::start_in_arbiter(&client_arbiter_handle, move |ctx| { ClientActor::new( + clock, client, - ctx.address(), + ctx.address().with_auto_span_context().into_multi_sender(), client_config, node_id, network_adapter, diff --git a/chain/client/src/debug.rs b/chain/client/src/debug.rs index fe5dfc7568e..5f12e17618e 100644 --- a/chain/client/src/debug.rs +++ b/chain/client/src/debug.rs @@ -16,7 +16,7 @@ use near_client_primitives::{ types::StatusError, }; use near_epoch_manager::EpochManagerAdapter; -use near_o11y::{handler_debug_span, log_assert, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, log_assert, WithSpanContext}; use near_performance_metrics_macros::perf; use near_primitives::state_sync::get_num_state_parts; use near_primitives::types::{AccountId, BlockHeight, NumShards, ShardId, ValidatorInfoIdentifier}; diff --git a/chain/client/src/lib.rs b/chain/client/src/lib.rs index 5e92ea7b56c..af34791cae9 100644 --- a/chain/client/src/lib.rs +++ b/chain/client/src/lib.rs @@ -10,7 +10,7 @@ pub use near_client_primitives::types::{ pub use crate::client::{Client, ProduceChunkResult}; #[cfg(feature = "test_features")] -pub use crate::client_actor::NetworkAdversarialMessage; +pub use crate::client_actions::NetworkAdversarialMessage; pub use crate::client_actor::{start_client, ClientActor}; pub use crate::config_updater::ConfigUpdater; pub use crate::sync::adapter::{SyncAdapter, SyncMessage}; @@ -25,6 +25,7 @@ pub mod adversarial; mod chunk_distribution_network; mod chunk_inclusion_tracker; mod client; +pub mod client_actions; mod client_actor; mod config_updater; pub mod debug; diff --git a/chain/client/src/sync/sync_actor.rs b/chain/client/src/sync/sync_actor.rs index bb853868250..02b9b2dfb96 100644 --- a/chain/client/src/sync/sync_actor.rs +++ b/chain/client/src/sync/sync_actor.rs @@ -1,7 +1,7 @@ use super::adapter::{SyncMessage as ClientSyncMessage, SyncShardInfo}; use near_async::messaging::Sender; use near_network::types::{PeerManagerMessageRequest, StateSyncResponse}; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, WithSpanContext}; use near_performance_metrics_macros::perf; use near_primitives::hash::CryptoHash; use near_store::ShardUId; diff --git a/chain/client/src/sync_jobs_actor.rs b/chain/client/src/sync_jobs_actor.rs index 0f5c1394146..2411fc21423 100644 --- a/chain/client/src/sync_jobs_actor.rs +++ b/chain/client/src/sync_jobs_actor.rs @@ -8,7 +8,7 @@ use near_chain::chain::{ }; use near_chain::resharding::ReshardingRequest; use near_chain::Chain; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::state_part::PartId; use near_primitives::state_sync::StatePartKey; diff --git a/chain/client/src/test_utils/setup.rs b/chain/client/src/test_utils/setup.rs index e405c39cb2f..a99512c3b7a 100644 --- a/chain/client/src/test_utils/setup.rs +++ b/chain/client/src/test_utils/setup.rs @@ -185,8 +185,9 @@ pub fn setup( ) .unwrap(); let client_actor = ClientActor::new( + Clock::real(), // TODO: use fake clock client, - ctx.address(), + ctx.address().with_auto_span_context().into_multi_sender(), config, PeerId::new(PublicKey::empty(KeyType::ED25519)), network_adapter, diff --git a/chain/client/src/view_client.rs b/chain/client/src/view_client.rs index ef8679f352a..548e84d9742 100644 --- a/chain/client/src/view_client.rs +++ b/chain/client/src/view_client.rs @@ -33,7 +33,7 @@ use near_network::types::{ NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, ReasonForBan, StateResponseInfo, StateResponseInfoV2, }; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::block::{Block, BlockHeader}; use near_primitives::epoch_manager::epoch_info::EpochInfo; diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index 1142eef245b..36f81d834c9 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -38,7 +38,7 @@ use lru::LruCache; use near_async::messaging::SendAsync; use near_async::time; use near_crypto::Signature; -use near_o11y::{handler_debug_span, log_assert, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, log_assert, WithSpanContext}; use near_performance_metrics_macros::perf; use near_primitives::hash::CryptoHash; use near_primitives::network::{AnnounceAccount, PeerId}; diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index 6b1d70afad6..cace48ae3ff 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -24,7 +24,7 @@ use actix::{Actor as _, AsyncContext as _}; use anyhow::Context as _; use near_async::messaging::{SendAsync, Sender}; use near_async::time; -use near_o11y::{handler_debug_span, handler_trace_span, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, handler_trace_span, WithSpanContext}; use near_performance_metrics_macros::perf; use near_primitives::block::GenesisId; use near_primitives::network::{AnnounceAccount, PeerId}; diff --git a/chain/network/src/test_utils.rs b/chain/network/src/test_utils.rs index 42f070584e6..4806752517f 100644 --- a/chain/network/src/test_utils.rs +++ b/chain/network/src/test_utils.rs @@ -8,7 +8,7 @@ use actix::{Actor, ActorContext, Context, Handler}; use futures::{future, Future, FutureExt}; use near_async::messaging::{CanSend, MessageWithCallback}; use near_crypto::{KeyType, SecretKey}; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, WithSpanContext}; use near_primitives::hash::hash; use near_primitives::network::PeerId; use near_primitives::types::EpochId; diff --git a/chain/telemetry/src/lib.rs b/chain/telemetry/src/lib.rs index 11961aaa378..7e39a154bc6 100644 --- a/chain/telemetry/src/lib.rs +++ b/chain/telemetry/src/lib.rs @@ -3,7 +3,7 @@ mod metrics; use actix::{Actor, Addr, Context, Handler}; use awc::{Client, Connector}; use futures::FutureExt; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::static_clock::StaticClock; use std::ops::Sub; diff --git a/core/async/src/examples/actix_component.rs b/core/async/src/examples/actix_component.rs new file mode 100644 index 00000000000..5daafbd1d05 --- /dev/null +++ b/core/async/src/examples/actix_component.rs @@ -0,0 +1,123 @@ +use crate as near_async; // only needed because we're in this crate itself +use crate::futures::{DelayedActionRunner, DelayedActionRunnerExt}; +use crate::messaging::{AsyncSender, SendAsync, Sender}; +use futures::future::BoxFuture; +use futures::FutureExt; +use near_async_derive::{MultiSend, MultiSendMessage, MultiSenderFrom}; +use std::ops::{Deref, DerefMut}; + +#[derive(actix::Message, Debug, Clone, PartialEq, Eq)] +#[rtype(result = "ExampleResponse")] +pub struct ExampleRequest { + pub id: u32, +} + +#[derive(actix::MessageResponse, Debug, Clone, PartialEq, Eq)] +pub struct ExampleResponse { + pub id: u32, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PeriodicRequest { + pub id: u32, +} + +/// An example component that represents the backing state of an actor. +/// +/// It supports two functionalities: processing of ExampleRequest, as well as +/// sending a PeriodicRequest every second. We'll be testing both of these +/// functionalities. +pub struct ExampleComponent { + next_periodic_request_id: u32, + periodic_request_sender: Sender, +} + +impl ExampleComponent { + pub fn new(periodic_request_sender: Sender) -> Self { + Self { next_periodic_request_id: 0, periodic_request_sender } + } + + /// Example function that processes a request received by the actor. + pub fn process_request(&mut self, request: ExampleRequest) -> ExampleResponse { + ExampleResponse { id: request.id } + } + + /// Example start function that is called at the start of the actor, + /// to schedule timers. + pub fn start(&mut self, ctx: &mut dyn DelayedActionRunner) { + self.schedule_periodic_request(ctx); + } + + fn schedule_periodic_request(&mut self, ctx: &mut dyn DelayedActionRunner) { + ctx.run_later(std::time::Duration::from_secs(1), |component, ctx| { + component + .periodic_request_sender + .send(PeriodicRequest { id: component.next_periodic_request_id }); + component.next_periodic_request_id += 1; + component.schedule_periodic_request(ctx); + }); + } +} + +/// Example actix Actor. Actors should have nothing but a shell that +/// forwards messages to the implementing component, because in the +/// TestLoop tests we aren't able to use this part at all. +struct ExampleActor { + component: ExampleComponent, +} + +// This Deref and DerefMut is used to support the DelayedActionRunner. +impl Deref for ExampleActor { + type Target = ExampleComponent; + + fn deref(&self) -> &Self::Target { + &self.component + } +} + +impl DerefMut for ExampleActor { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.component + } +} + +impl actix::Actor for ExampleActor { + type Context = actix::Context; + + fn started(&mut self, ctx: &mut Self::Context) { + self.component.start(ctx); + } +} + +impl actix::Handler for ExampleActor { + type Result = ExampleResponse; + + fn handle(&mut self, msg: ExampleRequest, _ctx: &mut Self::Context) -> Self::Result { + self.component.process_request(msg) + } +} + +/// Typically an actor would handle multiple messages, and this is where +/// multisenders come in handy. For this example we have only message but +/// we still demonstrate the use of multisenders. +#[derive(MultiSend, MultiSenderFrom, MultiSendMessage)] +#[multi_send_message_derive(Debug)] +pub struct ExampleComponentAdapter { + pub example: AsyncSender, +} + +/// Just another component that will send a request to the ExampleComponent. +pub struct OuterComponent { + example: ExampleComponentAdapter, +} + +impl OuterComponent { + pub fn new(example: ExampleComponentAdapter) -> Self { + Self { example } + } + + pub fn call_example_component_for_response(&self, id: u32) -> BoxFuture<'static, u32> { + let response = self.example.send_async(ExampleRequest { id }); + async move { response.await.unwrap().id }.boxed() + } +} diff --git a/core/async/src/examples/actix_component_test.rs b/core/async/src/examples/actix_component_test.rs new file mode 100644 index 00000000000..6f2c12bd9ac --- /dev/null +++ b/core/async/src/examples/actix_component_test.rs @@ -0,0 +1,102 @@ +use super::actix_component::{ + ExampleComponent, ExampleComponentAdapterMessage, OuterComponent, PeriodicRequest, +}; +use crate::futures::FutureSpawnerExt; +use crate::messaging::{IntoMultiSender, IntoSender}; +use crate::test_loop::event_handler::{capture_events, LoopEventHandler}; +use crate::test_loop::futures::{ + drive_delayed_action_runners, drive_futures, TestLoopDelayedActionEvent, TestLoopTask, +}; +use crate::test_loop::TestLoopBuilder; +use derive_enum_from_into::{EnumFrom, EnumTryInto}; +use std::sync::Arc; +use time::Duration; + +#[derive(derive_more::AsMut, derive_more::AsRef)] +struct ExampleComponentTestData { + dummy: (), + example: ExampleComponent, + outer: OuterComponent, + periodic_requests_captured: Vec, +} + +#[derive(Debug, EnumTryInto, EnumFrom)] +enum ExampleComponentTestEvent { + PeriodicRequest(PeriodicRequest), + ExampleRequest(ExampleComponentAdapterMessage), + // Needed to support DelayedActionRunner on the ExampleComponent. + DelayedAction(TestLoopDelayedActionEvent), + // Arc is needed to support futures. + Task(Arc), +} + +fn example_handler() -> LoopEventHandler { + LoopEventHandler::new_simple( + |event: ExampleComponentAdapterMessage, data: &mut ExampleComponent| match event { + ExampleComponentAdapterMessage::_example(request) => { + let response = data.process_request(request.message); + (request.callback)(Ok(response)); + } + }, + ) +} + +#[test] +fn test_actix_component() { + let builder = TestLoopBuilder::::new(); + let sender = builder.sender(); + let mut test = builder.build(ExampleComponentTestData { + dummy: (), + example: ExampleComponent::new(sender.clone().into_sender()), + outer: OuterComponent::new( + // We need to specify the explicit type here because this is a + // three-step conversion. First, we're converting the + // `DelaySender` into a + // `Sender`, which is a single + // sender that accepts an enum (generated by MultiSendMessage) + // that contains every type of message that can be sent to + // ExampleComponent. Then, we break it apart so that it becomes + // something that can send any of the messages that the enum + // contains, and finally we convert it a multisender + // ExampleComponentAdapter. + IntoSender::::into_sender(sender) + .break_apart() + .into_multi_sender(), + ), + periodic_requests_captured: vec![], + }); + // This is to allow futures to be used in the test even though the + // test itself is synchronous. + test.register_handler(drive_futures().widen()); + // This is to allow the ExampleComponent to run delayed actions (timers). + test.register_handler(drive_delayed_action_runners::().widen()); + // This is to capture the periodic requests sent by the ExampleComponent + // so we can assert against it. + test.register_handler(capture_events::().widen()); + // This is to handle the ExampleComponentAdapterMessage events by + // forwarding them to the ExampleComponent. + test.register_handler(example_handler().widen()); + + // We need to redo whatever the ExampleActor does in its `started` method. + test.data.example.start(&mut test.delayed_action_runner()); + // Send some requests; this can be done in the asynchronous context. + test.future_spawner().spawn("wait for 5", { + let res = test.data.outer.call_example_component_for_response(5); + async move { + assert_eq!(res.await, 5); + } + }); + test.future_spawner().spawn("wait for 6", { + let res = test.data.outer.call_example_component_for_response(6); + async move { + assert_eq!(res.await, 6); + } + }); + // Run for 3 seconds (not real time, but in the test loop time). + // It should result in sending 3 periodic requests. + test.run_for(Duration::seconds(3)); + assert_eq!( + test.data.periodic_requests_captured, + vec![PeriodicRequest { id: 0 }, PeriodicRequest { id: 1 }, PeriodicRequest { id: 2 },] + ); +} diff --git a/core/async/src/examples/async_component.rs b/core/async/src/examples/async_component.rs index 1ebd03766d3..542957b775a 100644 --- a/core/async/src/examples/async_component.rs +++ b/core/async/src/examples/async_component.rs @@ -1,5 +1,5 @@ use crate::{ - futures::FutureSpawner, + futures::{FutureSpawner, FutureSpawnerExt}, messaging::{AsyncSender, Sender}, }; diff --git a/core/async/src/examples/mod.rs b/core/async/src/examples/mod.rs index e48d526ab19..710c4525f93 100644 --- a/core/async/src/examples/mod.rs +++ b/core/async/src/examples/mod.rs @@ -1,3 +1,5 @@ +mod actix_component; +mod actix_component_test; mod async_component; mod async_component_test; mod multi_instance_test; diff --git a/core/async/src/futures.rs b/core/async/src/futures.rs index fc936b2d5c6..4fd55ef920d 100644 --- a/core/async/src/futures.rs +++ b/core/async/src/futures.rs @@ -1,5 +1,7 @@ +use actix::Actor; pub use futures::future::BoxFuture; // pub for macros use futures::FutureExt; +use std::ops::DerefMut; /// Abstraction for something that can drive futures. /// @@ -14,13 +16,27 @@ pub trait FutureSpawner { fn spawn_boxed(&self, description: &'static str, f: BoxFuture<'static, ()>); } -impl<'a> dyn FutureSpawner + 'a { - /// Spawns a future by automatically boxing it. - pub fn spawn(&self, description: &'static str, f: F) +pub trait FutureSpawnerExt { + fn spawn(&self, description: &'static str, f: F) + where + F: futures::Future + Send + 'static; +} + +impl FutureSpawnerExt for T { + fn spawn(&self, description: &'static str, f: F) where F: futures::Future + Send + 'static, { - self.spawn_boxed(description, f.boxed()) + self.spawn_boxed(description, f.boxed()); + } +} + +impl FutureSpawnerExt for dyn FutureSpawner + '_ { + fn spawn(&self, description: &'static str, f: F) + where + F: futures::Future + Send + 'static, + { + self.spawn_boxed(description, f.boxed()); } } @@ -32,3 +48,68 @@ impl FutureSpawner for ActixFutureSpawner { near_performance_metrics::actix::spawn(description, f); } } + +/// Abstraction for something that can schedule something to run after. +/// This isn't the same as just delaying a closure. Rather, it has the +/// additional power of providing the closure a mutable reference to some +/// object. With the Actix framework, for example, the object (of type `T`) +/// would be the actor, and the `DelayedActionRunner`` is implemented by +/// the actix `Context`. +pub trait DelayedActionRunner { + fn run_later_boxed( + &mut self, + dur: std::time::Duration, + f: Box) + Send + 'static>, + ); +} + +pub trait DelayedActionRunnerExt { + fn run_later( + &mut self, + dur: std::time::Duration, + f: impl FnOnce(&mut T, &mut dyn DelayedActionRunner) + Send + 'static, + ); +} + +impl DelayedActionRunnerExt for Runner +where + Runner: DelayedActionRunner, +{ + fn run_later( + &mut self, + dur: std::time::Duration, + f: impl FnOnce(&mut T, &mut dyn DelayedActionRunner) + Send + 'static, + ) { + self.run_later_boxed(dur, Box::new(f)); + } +} + +impl DelayedActionRunnerExt for dyn DelayedActionRunner + '_ { + fn run_later( + &mut self, + dur: std::time::Duration, + f: impl FnOnce(&mut T, &mut dyn DelayedActionRunner) + Send + 'static, + ) { + self.run_later_boxed(dur, Box::new(f)); + } +} + +/// Implementation of `DelayedActionRunner` for Actix. With this, any code +/// that used to take a `&mut actix::Context` can now take a +/// `&mut dyn DelayedActionRunner` instead, which isn't actix-specific. +impl DelayedActionRunner for actix::Context +where + T: 'static, + Outer: DerefMut, + Outer: Actor>, +{ + fn run_later_boxed( + &mut self, + dur: std::time::Duration, + f: Box) + Send + 'static>, + ) { + near_performance_metrics::actix::run_later(self, dur, move |obj, ctx| { + f(obj.deref_mut(), ctx) + }); + } +} diff --git a/core/async/src/test_loop.rs b/core/async/src/test_loop.rs index 127795e30d0..7e01ff98c2d 100644 --- a/core/async/src/test_loop.rs +++ b/core/async/src/test_loop.rs @@ -68,7 +68,10 @@ pub mod multi_instance; use self::{ delay_sender::DelaySender, event_handler::LoopEventHandler, - futures::{TestLoopFutureSpawner, TestLoopTask}, + futures::{ + TestLoopDelayedActionEvent, TestLoopDelayedActionRunner, TestLoopFutureSpawner, + TestLoopTask, + }, }; use crate::test_loop::event_handler::LoopHandlerContext; use crate::time; @@ -322,6 +325,20 @@ impl TestLoop { pub fn run_instant(&mut self) { self.run_for(time::Duration::ZERO); } + + pub fn future_spawner(&self) -> TestLoopFutureSpawner + where + Event: From>, + { + self.sender().narrow() + } + + pub fn delayed_action_runner(&self) -> TestLoopDelayedActionRunner + where + Event: From>, + { + TestLoopDelayedActionRunner { sender: self.sender().narrow() } + } } impl Drop for TestLoop { diff --git a/core/async/src/test_loop/event_handler.rs b/core/async/src/test_loop/event_handler.rs index 37304d55591..a9730ce3525 100644 --- a/core/async/src/test_loop/event_handler.rs +++ b/core/async/src/test_loop/event_handler.rs @@ -44,6 +44,20 @@ impl LoopEventHandler { }) } + pub fn new_with_drop( + handler: impl FnMut(Event, &mut Data, &LoopHandlerContext) -> Result<(), Event> + 'static, + ok_to_drop: impl Fn(&Event) -> bool + 'static, + ) -> Self { + Self { + inner: Box::new(LoopEventHandlerImplByFunction { + initial_event: None, + handler: Box::new(handler), + ok_to_drop: Box::new(ok_to_drop), + context: None, + }), + } + } + /// Like new(), but additionally sends an initial event with an initial /// delay. See periodic_interval() for why this is useful. pub fn new_with_initial_event( diff --git a/core/async/src/test_loop/futures.rs b/core/async/src/test_loop/futures.rs index d64da6c992e..c36c4a237e7 100644 --- a/core/async/src/test_loop/futures.rs +++ b/core/async/src/test_loop/futures.rs @@ -1,4 +1,5 @@ use super::{delay_sender::DelaySender, event_handler::LoopEventHandler}; +use crate::futures::DelayedActionRunner; use crate::{futures::FutureSpawner, messaging::CanSend}; use futures::future::BoxFuture; use futures::task::{waker_ref, ArcWake}; @@ -95,3 +96,50 @@ impl FutureSpawner for TestLoopFutureSpawner { self.send(task); } } + +/// Represents an action that was scheduled to run later, by using +/// `DelayedActionRunner::run_later`. +pub struct TestLoopDelayedActionEvent( + Box) + Send + 'static>, +); + +impl Debug for TestLoopDelayedActionEvent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("DelayedAction").finish() + } +} + +/// An event handler that handles only `TestLoopDelayedActionEvent`s, by +/// running the action encapsulated in the event. +pub fn drive_delayed_action_runners() -> LoopEventHandler> { + LoopEventHandler::new_with_drop( + |event, data, ctx| { + let mut runner = TestLoopDelayedActionRunner { sender: ctx.sender.clone() }; + (event.0)(data, &mut runner); + Ok(()) + }, + |_| { + // Delayed actions are usually used for timers, so let's just say + // it's OK to drop them at the end of the test. It would be hard + // to distinguish what sort of delayed action was being scheduled + // anyways. + true + }, + ) +} + +/// `DelayedActionRunner` that schedules the action to be run later by the +/// TestLoop event loop. +pub struct TestLoopDelayedActionRunner { + pub(crate) sender: DelaySender>, +} + +impl DelayedActionRunner for TestLoopDelayedActionRunner { + fn run_later_boxed( + &mut self, + dur: std::time::Duration, + f: Box) + Send + 'static>, + ) { + self.sender.send_with_delay(TestLoopDelayedActionEvent(f), dur.try_into().unwrap()); + } +} diff --git a/core/o11y/src/macros.rs b/core/o11y/src/macros.rs index 64b06abd89c..8082abadad1 100644 --- a/core/o11y/src/macros.rs +++ b/core/o11y/src/macros.rs @@ -11,7 +11,7 @@ macro_rules! handler_span { actor = near_o11y::macros::last_component_of_name(std::any::type_name::()), $($extra_fields)*) .entered(); - span.set_parent(context); + ::set_parent(&span, context); (span, msg) }}; } diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index 787bbe3cd15..eeabf869356 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -2,7 +2,7 @@ use actix::{Actor, Addr}; use anyhow::{anyhow, bail, Context}; use near_async::actix::AddrWithAutoSpanContextExt; use near_async::messaging::{noop, IntoMultiSender, IntoSender, LateBoundSender}; -use near_async::time; +use near_async::time::{self, Clock}; use near_chain::test_utils::{KeyValueRuntime, MockEpochManager, ValidatorSchedule}; use near_chain::types::RuntimeAdapter; use near_chain::{Chain, ChainGenesis}; @@ -77,6 +77,7 @@ fn setup_network_node( let state_sync_adapter = Arc::new(RwLock::new(SyncAdapter::new(noop().into_sender(), noop().into_sender()))); let client_actor = start_client( + Clock::real(), client_config.clone(), chain_genesis.clone(), epoch_manager.clone(), diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index e4ff9c8629f..771c7ad2c8b 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -11,7 +11,7 @@ use anyhow::Context; use cold_storage::ColdStoreLoopHandle; use near_async::actix::AddrWithAutoSpanContextExt; use near_async::messaging::{IntoMultiSender, IntoSender, LateBoundSender}; -use near_async::time; +use near_async::time::{self, Clock}; use near_chain::state_snapshot_actor::{ get_delete_snapshot_callback, get_make_snapshot_callback, SnapshotCallbacks, StateSnapshotActor, }; @@ -335,6 +335,7 @@ pub fn start_with_config_and_synchronization( let snapshot_callbacks = SnapshotCallbacks { make_snapshot_callback, delete_snapshot_callback }; let (client_actor, client_arbiter_handle, resharding_handle) = start_client( + Clock::real(), config.client_config.clone(), chain_genesis.clone(), epoch_manager.clone(),