From 461c1a90e32abd65baa904532f3c21efb1228e5e Mon Sep 17 00:00:00 2001 From: robin-near <111538878+robin-near@users.noreply.github.com> Date: Fri, 23 Feb 2024 09:07:39 -0800 Subject: [PATCH] [TestLoop Refactoring] (3/n) Split ClientActions from ClientActor. (#10646) This makes it so that ClientActor has no logic, and is purely an Actix shell. This way, ClientActions can be independently tested. Also, this adds another example to the core-async library that shows how to test an Actix component. It demonstrates the use of TestLoop to test both asynchronous actix requests and periodic timers. The same idea can be used to test the client actor by constructing and using ClientActions in a TestLoop test. --- chain/chain/src/state_snapshot_actor.rs | 2 +- chain/client/src/client.rs | 4 +- chain/client/src/client_actions.rs | 1876 +++++++++++++++ chain/client/src/client_actor.rs | 2051 +---------------- chain/client/src/debug.rs | 2 +- chain/client/src/lib.rs | 3 +- chain/client/src/sync/sync_actor.rs | 2 +- chain/client/src/sync_jobs_actor.rs | 2 +- chain/client/src/test_utils/setup.rs | 3 +- chain/client/src/view_client.rs | 2 +- chain/network/src/peer/peer_actor.rs | 2 +- .../src/peer_manager/peer_manager_actor.rs | 2 +- chain/network/src/test_utils.rs | 2 +- chain/telemetry/src/lib.rs | 2 +- core/async/src/examples/actix_component.rs | 123 + .../src/examples/actix_component_test.rs | 102 + core/async/src/examples/async_component.rs | 2 +- core/async/src/examples/mod.rs | 2 + core/async/src/futures.rs | 89 +- core/async/src/test_loop.rs | 19 +- core/async/src/test_loop/event_handler.rs | 14 + core/async/src/test_loop/futures.rs | 48 + core/o11y/src/macros.rs | 2 +- integration-tests/src/tests/network/runner.rs | 3 +- nearcore/src/lib.rs | 3 +- 25 files changed, 2381 insertions(+), 1981 deletions(-) create mode 100644 chain/client/src/client_actions.rs create mode 100644 core/async/src/examples/actix_component.rs create mode 100644 core/async/src/examples/actix_component_test.rs diff --git a/chain/chain/src/state_snapshot_actor.rs b/chain/chain/src/state_snapshot_actor.rs index e2662538bad..2cd6e4aeef9 100644 --- a/chain/chain/src/state_snapshot_actor.rs +++ b/chain/chain/src/state_snapshot_actor.rs @@ -1,7 +1,7 @@ use actix::{AsyncContext, Context}; use near_async::messaging::CanSend; use near_network::types::{NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest}; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::block::Block; use near_primitives::hash::CryptoHash; diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 183d5ad9682..fa1e42405e2 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -16,8 +16,6 @@ use crate::SyncAdapter; use crate::SyncMessage; use crate::{metrics, SyncStatus}; use actix_rt::ArbiterHandle; -use chrono::DateTime; -use chrono::Utc; use itertools::Itertools; use near_async::messaging::IntoSender; use near_async::messaging::{CanSend, Sender}; @@ -182,7 +180,7 @@ pub struct Client { /// When the "sync block" was requested. /// The "sync block" is the last block of the previous epoch, i.e. `prev_hash` of the `sync_hash` block. - pub last_time_sync_block_requested: Option>, + pub last_time_sync_block_requested: Option, /// Helper module for stateless validation functionality like chunk witness production, validation /// chunk endorsements tracking etc. pub chunk_validator: ChunkValidator, diff --git a/chain/client/src/client_actions.rs b/chain/client/src/client_actions.rs new file mode 100644 index 00000000000..11de323e68f --- /dev/null +++ b/chain/client/src/client_actions.rs @@ -0,0 +1,1876 @@ +//! Client actor orchestrates Client and facilitates network connection. +//! It should just serve as a coordinator class to handle messages and check triggers but immediately +//! pass the control to Client. This means, any real block processing or production logic should +//! be put in Client. +//! Unfortunately, this is not the case today. We are in the process of refactoring ClientActor +//! https://github.com/near/nearcore/issues/7899 + +#[cfg(feature = "test_features")] +use crate::client::AdvProduceBlocksMode; +use crate::client::{Client, EPOCH_START_INFO_BLOCKS}; +use crate::config_updater::ConfigUpdater; +use crate::debug::new_network_info_view; +use crate::info::{display_sync_status, InfoHelper}; +use crate::sync::adapter::{SyncMessage, SyncShardInfo}; +use crate::sync::state::{StateSync, StateSyncResult}; +use crate::{metrics, StatusResponse}; +use actix::{Addr, Arbiter}; +use near_async::futures::{DelayedActionRunner, DelayedActionRunnerExt}; +use near_async::messaging::{CanSend, Sender}; +use near_async::time::{Clock, Utc}; +use near_chain::chain::{ + ApplyStatePartsRequest, ApplyStatePartsResponse, BlockCatchUpRequest, BlockCatchUpResponse, +}; +use near_chain::resharding::{ReshardingRequest, ReshardingResponse}; +use near_chain::test_utils::format_hash; +#[cfg(feature = "test_features")] +use near_chain::ChainStoreAccess; +use near_chain::{ + byzantine_assert, near_chain_primitives, Block, BlockHeader, BlockProcessingArtifact, + DoneApplyChunkCallback, Provenance, +}; +use near_chain_configs::{ClientConfig, LogSummaryStyle}; +use near_chain_primitives::error::EpochErrorResultToChainError; +use near_chunks::client::ShardsManagerResponse; +use near_chunks::logic::cares_about_shard_this_or_next_epoch; +use near_client_primitives::types::{ + Error, GetClientConfig, GetClientConfigError, GetNetworkInfo, NetworkInfoResponse, + StateSyncStatus, Status, StatusError, StatusSyncInfo, SyncStatus, +}; +use near_network::client::{ + BlockApproval, BlockHeadersResponse, BlockResponse, ChunkEndorsementMessage, + ChunkStateWitnessMessage, ProcessTxRequest, ProcessTxResponse, RecvChallenge, SetNetworkInfo, + StateResponse, +}; +use near_network::types::ReasonForBan; +use near_network::types::{ + NetworkInfo, NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, +}; +use near_o11y::WithSpanContextExt; +use near_performance_metrics; +use near_performance_metrics_macros::perf; +use near_primitives::block::Tip; +use near_primitives::block_header::ApprovalType; +use near_primitives::epoch_manager::RngSeed; +use near_primitives::hash::CryptoHash; +use near_primitives::network::{AnnounceAccount, PeerId}; +use near_primitives::static_clock::StaticClock; +use near_primitives::types::BlockHeight; +use near_primitives::unwrap_or_return; +use near_primitives::utils::{from_timestamp, MaybeValidated}; +use near_primitives::validator_signer::ValidatorSigner; +use near_primitives::version::PROTOCOL_VERSION; +use near_primitives::views::{DetailedDebugStatus, ValidatorInfo}; +#[cfg(feature = "test_features")] +use near_store::DBCol; +use near_store::ShardUId; +use near_telemetry::TelemetryActor; +use rand::seq::SliceRandom; +use rand::{thread_rng, Rng}; +use std::collections::HashMap; +use std::fmt; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::sync::broadcast; +use tracing::{debug, debug_span, error, info, trace, warn}; + +/// Multiplier on `max_block_time` to wait until deciding that chain stalled. +const STATUS_WAIT_TIME_MULTIPLIER: u64 = 10; +/// `max_block_production_time` times this multiplier is how long we wait before rebroadcasting +/// the current `head` +const HEAD_STALL_MULTIPLIER: u32 = 4; + +#[derive(Clone, near_async::MultiSend, near_async::MultiSenderFrom)] +pub struct ClientSenderForClient { + pub apply_chunks_done: Sender, +} + +pub struct ClientActions { + clock: Clock, + + /// Adversarial controls + pub adv: crate::adversarial::Controls, + + // Sender to be able to send a message to myself. + myself_sender: ClientSenderForClient, + pub(crate) client: Client, + network_adapter: PeerManagerAdapter, + network_info: NetworkInfo, + /// Identity that represents this Client at the network level. + /// It is used as part of the messages that identify this client. + node_id: PeerId, + /// Last time we announced our accounts as validators. + last_validator_announce_time: Option, + /// Info helper. + info_helper: InfoHelper, + + /// Last time handle_block_production method was called + block_production_next_attempt: near_async::time::Utc, + + // Last time when log_summary method was called. + log_summary_timer_next_attempt: near_async::time::Utc, + + block_production_started: bool, + doomslug_timer_next_attempt: near_async::time::Utc, + sync_timer_next_attempt: near_async::time::Utc, + sync_started: bool, + state_parts_task_scheduler: Box, + block_catch_up_scheduler: Box, + resharding_scheduler: Box, + state_parts_client_arbiter: Arbiter, + + #[cfg(feature = "sandbox")] + fastforward_delta: near_primitives::types::BlockHeightDelta, + + /// Synchronization measure to allow graceful shutdown. + /// Informs the system when a ClientActor gets dropped. + shutdown_signal: Option>, + + /// Manages updating the config. + config_updater: Option, +} + +impl ClientActions { + pub fn new( + clock: Clock, + client: Client, + myself_sender: ClientSenderForClient, + config: ClientConfig, + node_id: PeerId, + network_adapter: PeerManagerAdapter, + validator_signer: Option>, + telemetry_actor: Addr, + shutdown_signal: Option>, + adv: crate::adversarial::Controls, + config_updater: Option, + state_parts_task_scheduler: Box, + block_catch_up_scheduler: Box, + resharding_scheduler: Box, + state_parts_client_arbiter: Arbiter, + ) -> Result { + if let Some(vs) = &validator_signer { + info!(target: "client", "Starting validator node: {}", vs.validator_id()); + } + let info_helper = InfoHelper::new(Some(telemetry_actor), &config, validator_signer.clone()); + + let now = clock.now_utc(); + Ok(ClientActions { + clock, + adv, + myself_sender, + client, + network_adapter, + node_id, + network_info: NetworkInfo { + connected_peers: vec![], + tier1_connections: vec![], + num_connected_peers: 0, + peer_max_count: 0, + highest_height_peers: vec![], + received_bytes_per_sec: 0, + sent_bytes_per_sec: 0, + known_producers: vec![], + tier1_accounts_keys: vec![], + tier1_accounts_data: vec![], + }, + last_validator_announce_time: None, + info_helper, + block_production_next_attempt: now, + log_summary_timer_next_attempt: now, + block_production_started: false, + doomslug_timer_next_attempt: now, + sync_timer_next_attempt: now, + sync_started: false, + #[cfg(feature = "sandbox")] + fastforward_delta: 0, + shutdown_signal, + config_updater, + + state_parts_task_scheduler, + block_catch_up_scheduler, + resharding_scheduler, + state_parts_client_arbiter, + }) + } +} + +#[cfg(feature = "test_features")] +#[derive(actix::Message, Debug)] +#[rtype(result = "Option")] +pub enum NetworkAdversarialMessage { + AdvProduceBlocks(u64, bool), + AdvSwitchToHeight(u64), + AdvDisableHeaderSync, + AdvDisableDoomslug, + AdvGetSavedBlocks, + AdvCheckStorageConsistency, +} + +pub(crate) trait ClientActionHandler { + type Result; + fn handle(&mut self, msg: M) -> Self::Result; +} + +#[cfg(feature = "test_features")] +impl ClientActionHandler for ClientActions { + type Result = Option; + + fn handle(&mut self, msg: NetworkAdversarialMessage) -> Self::Result { + match msg { + NetworkAdversarialMessage::AdvDisableDoomslug => { + info!(target: "adversary", "Turning Doomslug off"); + self.adv.set_disable_doomslug(true); + self.client.doomslug.adv_disable(); + self.client.chain.adv_disable_doomslug(); + None + } + NetworkAdversarialMessage::AdvDisableHeaderSync => { + info!(target: "adversary", "Blocking header sync"); + self.adv.set_disable_header_sync(true); + None + } + NetworkAdversarialMessage::AdvProduceBlocks(num_blocks, only_valid) => { + info!(target: "adversary", num_blocks, "Starting adversary blocks production"); + if only_valid { + self.client.adv_produce_blocks = Some(AdvProduceBlocksMode::OnlyValid); + } else { + self.client.adv_produce_blocks = Some(AdvProduceBlocksMode::All); + } + let start_height = + self.client.chain.mut_chain_store().get_latest_known().unwrap().height + 1; + let mut blocks_produced = 0; + for height in start_height.. { + let block = + self.client.produce_block(height).expect("block should be produced"); + if only_valid && block == None { + continue; + } + let block = block.expect("block should exist after produced"); + info!(target: "adversary", blocks_produced, num_blocks, height, "Producing adversary block"); + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::Block { block: block.clone() }, + )); + let _ = self.client.start_process_block( + block.into(), + Provenance::PRODUCED, + self.get_apply_chunks_done_callback(), + ); + blocks_produced += 1; + if blocks_produced == num_blocks { + break; + } + } + None + } + NetworkAdversarialMessage::AdvSwitchToHeight(height) => { + info!(target: "adversary", "Switching to height {:?}", height); + let mut chain_store_update = self.client.chain.mut_chain_store().store_update(); + chain_store_update.save_largest_target_height(height); + chain_store_update + .adv_save_latest_known(height) + .expect("adv method should not fail"); + chain_store_update.commit().expect("adv method should not fail"); + None + } + NetworkAdversarialMessage::AdvGetSavedBlocks => { + info!(target: "adversary", "Requested number of saved blocks"); + let store = self.client.chain.chain_store().store(); + let mut num_blocks = 0; + for _ in store.iter(DBCol::Block) { + num_blocks += 1; + } + Some(num_blocks) + } + NetworkAdversarialMessage::AdvCheckStorageConsistency => { + // timeout is set to 1.5 seconds to give some room as we wait in Nightly for 2 seconds + let timeout = 1500; + info!(target: "adversary", "Check Storage Consistency, timeout set to {:?} milliseconds", timeout); + let mut genesis = near_chain_configs::GenesisConfig::default(); + genesis.genesis_height = self.client.chain.chain_store().get_genesis_height(); + let mut store_validator = near_chain::store_validator::StoreValidator::new( + self.client.validator_signer.as_ref().map(|x| x.validator_id().clone()), + genesis, + self.client.epoch_manager.clone(), + self.client.shard_tracker.clone(), + self.client.runtime_adapter.clone(), + self.client.chain.chain_store().store().clone(), + self.adv.is_archival(), + ); + store_validator.set_timeout(timeout); + store_validator.validate(); + if store_validator.is_failed() { + error!(target: "client", "Storage Validation failed, {:?}", store_validator.errors); + Some(0) + } else { + Some(store_validator.tests_done()) + } + } + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = ProcessTxResponse; + + fn handle(&mut self, msg: ProcessTxRequest) -> Self::Result { + let ProcessTxRequest { transaction, is_forwarded, check_only } = msg; + self.client.process_tx(transaction, is_forwarded, check_only) + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: BlockResponse) { + let BlockResponse { block, peer_id, was_requested } = msg; + debug!(target: "client", block_height = block.header().height(), block_hash = ?block.header().hash(), "BlockResponse"); + let blocks_at_height = + self.client.chain.chain_store().get_all_block_hashes_by_height(block.header().height()); + if was_requested + || blocks_at_height.is_err() + || blocks_at_height.as_ref().unwrap().is_empty() + { + // This is a very sneaky piece of logic. + if self.maybe_receive_state_sync_blocks(&block) { + // A node is syncing its state. Don't consider receiving + // blocks other than the few special ones that State Sync expects. + return; + } + self.client.receive_block( + block, + peer_id, + was_requested, + self.get_apply_chunks_done_callback(), + ); + } else { + match self.client.epoch_manager.get_epoch_id_from_prev_block(block.header().prev_hash()) + { + Ok(epoch_id) => { + if let Some(hashes) = blocks_at_height.unwrap().get(&epoch_id) { + if !hashes.contains(block.header().hash()) { + warn!(target: "client", "Rejecting unrequested block {}, height {}", block.header().hash(), block.header().height()); + } + } + } + _ => {} + } + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = Result<(), ReasonForBan>; + + fn handle(&mut self, msg: BlockHeadersResponse) -> Self::Result { + let BlockHeadersResponse(headers, peer_id) = msg; + if self.receive_headers(headers, peer_id) { + Ok(()) + } else { + warn!(target: "client", "Banning node for sending invalid block headers"); + Err(ReasonForBan::BadBlockHeader) + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: BlockApproval) { + let BlockApproval(approval, peer_id) = msg; + debug!(target: "client", "Receive approval {:?} from peer {:?}", approval, peer_id); + self.client.collect_block_approval(&approval, ApprovalType::PeerApproval(peer_id)); + } +} + +/// StateResponse is used during StateSync and catchup. +/// It contains either StateSync header information (that tells us how many parts there are etc) or a single part. +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: StateResponse) { + let StateResponse(state_response_info) = msg; + let shard_id = state_response_info.shard_id(); + let hash = state_response_info.sync_hash(); + let state_response = state_response_info.take_state_response(); + + trace!(target: "sync", "Received state response shard_id: {} sync_hash: {:?} part(id/size): {:?}", + shard_id, + hash, + state_response.part().as_ref().map(|(part_id, data)| (part_id, data.len())) + ); + // Get the download that matches the shard_id and hash + + // ... It could be that the state was requested by the state sync + if let SyncStatus::StateSync(StateSyncStatus { + sync_hash, + sync_status: shards_to_download, + }) = &mut self.client.sync_status + { + if hash == *sync_hash { + if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { + self.client.state_sync.update_download_on_state_response_message( + shard_download, + hash, + shard_id, + state_response, + &mut self.client.chain, + ); + return; + } + } + } + + // ... Or one of the catchups + if let Some((state_sync, shards_to_download, _)) = + self.client.catchup_state_syncs.get_mut(&hash) + { + if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { + state_sync.update_download_on_state_response_message( + shard_download, + hash, + shard_id, + state_response, + &mut self.client.chain, + ); + return; + } + } + + error!(target: "sync", "State sync received hash {} that we're not expecting, potential malicious peer or a very delayed response.", hash); + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: RecvChallenge) { + let RecvChallenge(challenge) = msg; + match self.client.process_challenge(challenge) { + Ok(_) => {} + Err(err) => error!(target: "client", "Error processing challenge: {}", err), + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: SetNetworkInfo) { + // SetNetworkInfo is a large message. Avoid printing it at the `debug` verbosity. + let SetNetworkInfo(network_info) = msg; + self.network_info = network_info; + } +} + +#[cfg(feature = "sandbox")] +impl ClientActionHandler for ClientActions { + type Result = near_client_primitives::types::SandboxResponse; + + fn handle( + &mut self, + msg: near_client_primitives::types::SandboxMessage, + ) -> near_client_primitives::types::SandboxResponse { + match msg { + near_client_primitives::types::SandboxMessage::SandboxPatchState(state) => { + self.client.chain.patch_state( + near_primitives::sandbox::state_patch::SandboxStatePatch::new(state), + ); + near_client_primitives::types::SandboxResponse::SandboxNoResponse + } + near_client_primitives::types::SandboxMessage::SandboxPatchStateStatus => { + near_client_primitives::types::SandboxResponse::SandboxPatchStateFinished( + !self.client.chain.patch_state_in_progress(), + ) + } + near_client_primitives::types::SandboxMessage::SandboxFastForward(delta_height) => { + if self.fastforward_delta > 0 { + return near_client_primitives::types::SandboxResponse::SandboxFastForwardFailed( + "Consecutive fast_forward requests cannot be made while a current one is going on.".to_string()); + } + + self.fastforward_delta = delta_height; + near_client_primitives::types::SandboxResponse::SandboxNoResponse + } + near_client_primitives::types::SandboxMessage::SandboxFastForwardStatus => { + near_client_primitives::types::SandboxResponse::SandboxFastForwardFinished( + self.fastforward_delta == 0, + ) + } + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = Result; + + fn handle(&mut self, msg: Status) -> Self::Result { + let head = self.client.chain.head()?; + let head_header = self.client.chain.get_block_header(&head.last_block_hash)?; + let latest_block_time = head_header.raw_timestamp(); + let latest_state_root = *head_header.prev_state_root(); + if msg.is_health_check { + let now = self.clock.now_utc(); + let block_timestamp = + Utc::from_unix_timestamp_nanos(latest_block_time as i128).unwrap(); + if now > block_timestamp { + let elapsed = (now - block_timestamp).unsigned_abs(); + if elapsed + > Duration::from_millis( + self.client.config.max_block_production_delay.as_millis() as u64 + * STATUS_WAIT_TIME_MULTIPLIER, + ) + { + return Err(StatusError::NoNewBlocks { elapsed }); + } + } + + if self.client.sync_status.is_syncing() { + return Err(StatusError::NodeIsSyncing); + } + } + let validators: Vec = self + .client + .epoch_manager + .get_epoch_block_producers_ordered(&head.epoch_id, &head.last_block_hash) + .into_chain_error()? + .into_iter() + .map(|(validator_stake, is_slashed)| ValidatorInfo { + account_id: validator_stake.take_account_id(), + is_slashed, + }) + .collect(); + + let epoch_start_height = + self.client.epoch_manager.get_epoch_start_height(&head.last_block_hash).ok(); + + let protocol_version = self + .client + .epoch_manager + .get_epoch_protocol_version(&head.epoch_id) + .into_chain_error()?; + + let node_public_key = self.node_id.public_key().clone(); + let (validator_account_id, validator_public_key) = match &self.client.validator_signer { + Some(vs) => (Some(vs.validator_id().clone()), Some(vs.public_key())), + None => (None, None), + }; + let node_key = validator_public_key.clone(); + + let mut earliest_block_hash = None; + let mut earliest_block_height = None; + let mut earliest_block_time = None; + if let Some(earliest_block_hash_value) = self.client.chain.get_earliest_block_hash()? { + earliest_block_hash = Some(earliest_block_hash_value); + if let Ok(earliest_block) = + self.client.chain.get_block_header(&earliest_block_hash_value) + { + earliest_block_height = Some(earliest_block.height()); + earliest_block_time = Some(earliest_block.timestamp()); + } + } + // Provide more detailed information about the current state of chain. + // For now - provide info about last 50 blocks. + let detailed_debug_status = if msg.detailed { + Some(DetailedDebugStatus { + network_info: new_network_info_view(&self.client.chain, &self.network_info), + sync_status: format!( + "{} ({})", + self.client.sync_status.as_variant_name(), + display_sync_status( + &self.client.sync_status, + &self.client.chain.head()?, + &self.client.config.state_sync.sync, + ), + ), + catchup_status: self.client.get_catchup_status()?, + current_head_status: head.clone().into(), + current_header_head_status: self.client.chain.header_head()?.into(), + block_production_delay_millis: self + .client + .config + .min_block_production_delay + .as_millis() as u64, + }) + } else { + None + }; + let uptime_sec = StaticClock::utc().timestamp() - self.info_helper.boot_time_seconds; + Ok(StatusResponse { + version: self.client.config.version.clone(), + protocol_version, + latest_protocol_version: PROTOCOL_VERSION, + chain_id: self.client.config.chain_id.clone(), + rpc_addr: self.client.config.rpc_addr.clone(), + validators, + sync_info: StatusSyncInfo { + latest_block_hash: head.last_block_hash, + latest_block_height: head.height, + latest_state_root, + latest_block_time: from_timestamp(latest_block_time), + syncing: self.client.sync_status.is_syncing(), + earliest_block_hash, + earliest_block_height, + earliest_block_time, + epoch_id: Some(head.epoch_id), + epoch_start_height, + }, + validator_account_id, + validator_public_key, + node_public_key, + node_key, + uptime_sec, + detailed_debug_status, + }) + } +} + +/// Private to public API conversion. +fn make_peer_info(from: near_network::types::PeerInfo) -> near_client_primitives::types::PeerInfo { + near_client_primitives::types::PeerInfo { + id: from.id, + addr: from.addr, + account_id: from.account_id, + } +} + +/// Private to public API conversion. +fn make_known_producer( + from: near_network::types::KnownProducer, +) -> near_client_primitives::types::KnownProducer { + near_client_primitives::types::KnownProducer { + peer_id: from.peer_id, + account_id: from.account_id, + addr: from.addr, + next_hops: from.next_hops, + } +} + +impl ClientActionHandler for ClientActions { + type Result = Result; + + fn handle(&mut self, _msg: GetNetworkInfo) -> Self::Result { + Ok(NetworkInfoResponse { + connected_peers: (self.network_info.connected_peers.iter()) + .map(|fpi| make_peer_info(fpi.full_peer_info.peer_info.clone())) + .collect(), + num_connected_peers: self.network_info.num_connected_peers, + peer_max_count: self.network_info.peer_max_count, + sent_bytes_per_sec: self.network_info.sent_bytes_per_sec, + received_bytes_per_sec: self.network_info.received_bytes_per_sec, + known_producers: self + .network_info + .known_producers + .iter() + .map(|p| make_known_producer(p.clone())) + .collect(), + }) + } +} + +/// `ApplyChunksDoneMessage` is a message that signals the finishing of applying chunks of a block. +/// Upon receiving this message, ClientActors knows that it's time to finish processing the blocks that +/// just finished applying chunks. +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub struct ApplyChunksDoneMessage; + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, _msg: ApplyChunksDoneMessage) -> Self::Result { + self.try_process_unfinished_blocks(); + } +} + +#[derive(Debug)] +enum SyncRequirement { + SyncNeeded { peer_id: PeerId, highest_height: BlockHeight, head: Tip }, + AlreadyCaughtUp { peer_id: PeerId, highest_height: BlockHeight, head: Tip }, + NoPeers, + AdvHeaderSyncDisabled, +} + +impl SyncRequirement { + fn sync_needed(&self) -> bool { + matches!(self, Self::SyncNeeded { .. }) + } + + fn to_metrics_string(&self) -> String { + match self { + Self::SyncNeeded { .. } => "SyncNeeded", + Self::AlreadyCaughtUp { .. } => "AlreadyCaughtUp", + Self::NoPeers => "NoPeers", + Self::AdvHeaderSyncDisabled { .. } => "AdvHeaderSyncDisabled", + } + .to_string() + } +} + +impl fmt::Display for SyncRequirement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::SyncNeeded { peer_id, highest_height, head: my_head } => write!( + f, + "sync needed at #{} [{}]. highest height peer: {} at #{}", + my_head.height, + format_hash(my_head.last_block_hash), + peer_id, + highest_height + ), + Self::AlreadyCaughtUp { peer_id, highest_height, head: my_head } => write!( + f, + "synced at #{} [{}]. highest height peer: {} at #{}", + my_head.height, + format_hash(my_head.last_block_hash), + peer_id, + highest_height + ), + Self::NoPeers => write!(f, "no available peers"), + Self::AdvHeaderSyncDisabled => { + write!(f, "syncing disabled via adv_disable_header_sync") + } + } + } +} + +impl ClientActions { + pub(crate) fn start(&mut self, ctx: &mut dyn DelayedActionRunner) { + self.start_flat_storage_creation(ctx); + + // Start syncing job. + self.start_sync(ctx); + + // Start block production tracking if have block producer info. + if self.client.validator_signer.is_some() { + self.block_production_started = true; + } + + // Start triggers + self.schedule_triggers(ctx); + + // Start catchup job. + self.catchup(ctx); + + if let Err(err) = self.client.send_network_chain_info() { + tracing::error!(target: "client", ?err, "Failed to update network chain info"); + } + } + + /// Check if client Account Id should be sent and send it. + /// Account Id is sent when is not current a validator but are becoming a validator soon. + fn check_send_announce_account(&mut self, prev_block_hash: CryptoHash) { + // If no peers, there is no one to announce to. + if self.network_info.num_connected_peers == 0 { + debug!(target: "client", "No peers: skip account announce"); + return; + } + + // First check that we currently have an AccountId + let validator_signer = match self.client.validator_signer.as_ref() { + None => return, + Some(signer) => signer, + }; + + let now = StaticClock::instant(); + // Check that we haven't announced it too recently + if let Some(last_validator_announce_time) = self.last_validator_announce_time { + // Don't make announcement if have passed less than half of the time in which other peers + // should remove our Account Id from their Routing Tables. + if 2 * (now - last_validator_announce_time) < self.client.config.ttl_account_id_router { + return; + } + } + + debug!(target: "client", "Check announce account for {}, last announce time {:?}", validator_signer.validator_id(), self.last_validator_announce_time); + + // Announce AccountId if client is becoming a validator soon. + let next_epoch_id = unwrap_or_return!(self + .client + .epoch_manager + .get_next_epoch_id_from_prev_block(&prev_block_hash)); + + // Check client is part of the futures validators + if self.client.is_validator(&next_epoch_id, &prev_block_hash) { + debug!(target: "client", "Sending announce account for {}", validator_signer.validator_id()); + self.last_validator_announce_time = Some(now); + + let signature = validator_signer.sign_account_announce( + validator_signer.validator_id(), + &self.node_id, + &next_epoch_id, + ); + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::AnnounceAccount(AnnounceAccount { + account_id: validator_signer.validator_id().clone(), + peer_id: self.node_id.clone(), + epoch_id: next_epoch_id, + signature, + }), + )); + } + } + + /// Process the sandbox fast forward request. If the change in block height is past an epoch, + /// we fast forward to just right before the epoch, produce some blocks to get past and into + /// a new epoch, then we continue on with the residual amount to fast forward. + #[cfg(feature = "sandbox")] + fn sandbox_process_fast_forward( + &mut self, + block_height: BlockHeight, + ) -> Result, Error> { + let mut delta_height = std::mem::replace(&mut self.fastforward_delta, 0); + if delta_height == 0 { + return Ok(None); + } + + let epoch_length = self.client.config.epoch_length; + if epoch_length <= 3 { + return Err(Error::Other( + "Unsupported: fast_forward with an epoch length of 3 or less".to_string(), + )); + } + + // Check if we are at epoch boundary. If we are, do not fast forward until new + // epoch is here. Decrement the fast_forward count by 1 when a block is produced + // during this period of waiting + let block_height_wrt_epoch = block_height % epoch_length; + if epoch_length - block_height_wrt_epoch <= 3 || block_height_wrt_epoch == 0 { + // wait for doomslug to call into produce block + self.fastforward_delta = delta_height; + return Ok(None); + } + + let delta_height = if block_height_wrt_epoch + delta_height >= epoch_length { + // fast forward to just right before epoch boundary to have epoch_manager + // handle the epoch_height updates as normal. `- 3` since this is being + // done 3 blocks before the epoch ends. + let right_before_epoch_update = epoch_length - block_height_wrt_epoch - 3; + + delta_height -= right_before_epoch_update; + self.fastforward_delta = delta_height; + right_before_epoch_update + } else { + delta_height + }; + + self.client.accrued_fastforward_delta += delta_height; + let delta_time = self.client.sandbox_delta_time(); + let new_latest_known = near_chain::types::LatestKnown { + height: block_height + delta_height, + seen: near_primitives::utils::to_timestamp(StaticClock::utc() + delta_time), + }; + + Ok(Some(new_latest_known)) + } + + fn pre_block_production(&mut self) -> Result<(), Error> { + #[cfg(feature = "sandbox")] + { + let latest_known = self.client.chain.mut_chain_store().get_latest_known()?; + if let Some(new_latest_known) = + self.sandbox_process_fast_forward(latest_known.height)? + { + self.client.chain.mut_chain_store().save_latest_known(new_latest_known.clone())?; + self.client.sandbox_update_tip(new_latest_known.height)?; + } + } + Ok(()) + } + + fn post_block_production(&mut self) { + #[cfg(feature = "sandbox")] + if self.fastforward_delta > 0 { + // Decrease the delta_height by 1 since we've produced a single block. This + // ensures that we advanced the right amount of blocks when fast forwarding + // and fast forwarding triggers regular block production in the case of + // stepping between epoch boundaries. + self.fastforward_delta -= 1; + } + } + + /// Retrieves latest height, and checks if must produce next block. + /// Otherwise wait for block arrival or suggest to skip after timeout. + fn handle_block_production(&mut self) -> Result<(), Error> { + let _span = tracing::debug_span!(target: "client", "handle_block_production").entered(); + // If syncing, don't try to produce blocks. + if self.client.sync_status.is_syncing() { + debug!(target:"client", sync_status=format!("{:#?}", self.client.sync_status), "Syncing - block production disabled"); + return Ok(()); + } + + let _ = self.client.check_and_update_doomslug_tip(); + + self.pre_block_production()?; + let head = self.client.chain.head()?; + let latest_known = self.client.chain.chain_store().get_latest_known()?; + + assert!( + head.height <= latest_known.height, + "Latest known height is invalid {} vs {}", + head.height, + latest_known.height + ); + + let epoch_id = + self.client.epoch_manager.get_epoch_id_from_prev_block(&head.last_block_hash)?; + let log_block_production_info = + if self.client.epoch_manager.is_next_block_epoch_start(&head.last_block_hash)? { + true + } else { + // the next block is still the same epoch + let epoch_start_height = + self.client.epoch_manager.get_epoch_start_height(&head.last_block_hash)?; + latest_known.height - epoch_start_height < EPOCH_START_INFO_BLOCKS + }; + + // We try to produce block for multiple heights (up to the highest height for which we've seen 2/3 of approvals). + if latest_known.height + 1 <= self.client.doomslug.get_largest_height_crossing_threshold() { + debug!(target: "client", "Considering blocks for production between {} and {} ", latest_known.height + 1, self.client.doomslug.get_largest_height_crossing_threshold()); + } else { + debug!(target: "client", "Cannot produce any block: not enough approvals beyond {}", latest_known.height); + } + + let me = if let Some(me) = &self.client.validator_signer { + me.validator_id().clone() + } else { + return Ok(()); + }; + + // For debug purpose, we record the approvals we have seen so far to the future blocks + for height in latest_known.height + 1..=self.client.doomslug.get_largest_approval_height() { + let next_block_producer_account = + self.client.epoch_manager.get_block_producer(&epoch_id, height)?; + + if me == next_block_producer_account { + self.client.block_production_info.record_approvals( + height, + self.client.doomslug.approval_status_at_height(&height), + ); + } + } + + for height in + latest_known.height + 1..=self.client.doomslug.get_largest_height_crossing_threshold() + { + let next_block_producer_account = + self.client.epoch_manager.get_block_producer(&epoch_id, height)?; + + if me == next_block_producer_account { + self.client.chunk_inclusion_tracker.prepare_chunk_headers_ready_for_inclusion( + &head.last_block_hash, + self.client.chunk_endorsement_tracker.as_ref(), + )?; + let num_chunks = self + .client + .chunk_inclusion_tracker + .num_chunk_headers_ready_for_inclusion(&epoch_id, &head.last_block_hash); + let have_all_chunks = head.height == 0 + || num_chunks == self.client.epoch_manager.shard_ids(&epoch_id).unwrap().len(); + + if self.client.doomslug.ready_to_produce_block( + StaticClock::instant(), + height, + have_all_chunks, + log_block_production_info, + ) { + if let Err(err) = self.produce_block(height) { + // If there is an error, report it and let it retry on the next loop step. + error!(target: "client", height, "Block production failed: {}", err); + } else { + self.post_block_production(); + } + } + } + } + + Ok(()) + } + + fn schedule_triggers(&mut self, ctx: &mut dyn DelayedActionRunner) { + let wait = self.check_triggers(ctx); + + ctx.run_later(wait, move |act, ctx| { + act.schedule_triggers(ctx); + }); + } + + /// Check if the scheduled time of any "triggers" has passed, and if so, call the trigger. + /// Triggers are important functions of client, like running single step of state sync or + /// checking if we can produce a block. + /// + /// It is called before processing Actix message and also in schedule_triggers. + /// This is to ensure all triggers enjoy higher priority than any actix message. + /// Otherwise due to a bug in Actix library Actix prioritizes processing messages + /// while there are messages in mailbox. Because of that we handle scheduling + /// triggers with custom `run_timer` function instead of `run_later` in Actix. + /// + /// Returns the delay before the next time `check_triggers` should be called, which is + /// min(time until the closest trigger, 1 second). + pub(crate) fn check_triggers(&mut self, ctx: &mut dyn DelayedActionRunner) -> Duration { + let _span = tracing::debug_span!(target: "client", "check_triggers").entered(); + if let Some(config_updater) = &mut self.config_updater { + config_updater.try_update(&|updateable_client_config| { + self.client.update_client_config(updateable_client_config) + }); + } + + // Check block height to trigger expected shutdown + if let Ok(head) = self.client.chain.head() { + if let Some(block_height_to_shutdown) = self.client.config.expected_shutdown.get() { + if head.height >= block_height_to_shutdown { + info!(target: "client", "Expected shutdown triggered: head block({}) >= ({:?})", head.height, block_height_to_shutdown); + if let Some(tx) = self.shutdown_signal.take() { + let _ = tx.send(()); // Ignore send signal fail, it will send again in next trigger + } + } + } + } + + self.try_process_unfinished_blocks(); + + let mut delay = near_async::time::Duration::seconds(1); + let now = self.clock.now_utc(); + + let timer = metrics::CHECK_TRIGGERS_TIME.start_timer(); + if self.sync_started { + self.sync_timer_next_attempt = self.run_timer( + self.sync_wait_period(), + self.sync_timer_next_attempt, + ctx, + |act, _| act.run_sync_step(), + "sync", + ); + + delay = std::cmp::min(delay, self.sync_timer_next_attempt - now); + + self.doomslug_timer_next_attempt = self.run_timer( + self.client.config.doosmslug_step_period, + self.doomslug_timer_next_attempt, + ctx, + |act, _| act.try_doomslug_timer(), + "doomslug", + ); + delay = core::cmp::min(delay, self.doomslug_timer_next_attempt - now) + } + if self.block_production_started { + self.block_production_next_attempt = self.run_timer( + self.client.config.block_production_tracking_delay, + self.block_production_next_attempt, + ctx, + |act, _ctx| act.try_handle_block_production(), + "block_production", + ); + + let _ = self.client.check_head_progress_stalled( + self.client.config.max_block_production_delay * HEAD_STALL_MULTIPLIER, + ); + + delay = core::cmp::min(delay, self.block_production_next_attempt - now) + } + + self.log_summary_timer_next_attempt = self.run_timer( + self.client.config.log_summary_period, + self.log_summary_timer_next_attempt, + ctx, + |act, _ctx| act.log_summary(), + "log_summary", + ); + delay = core::cmp::min(delay, self.log_summary_timer_next_attempt - now); + timer.observe_duration(); + core::cmp::max(delay, near_async::time::Duration::ZERO).unsigned_abs() + } + + /// "Unfinished" blocks means that blocks that client has started the processing and haven't + /// finished because it was waiting for applying chunks to be done. This function checks + /// if there are any "unfinished" blocks that are ready to be processed again and finish processing + /// these blocks. + /// This function is called at two places, upon receiving ApplyChunkDoneMessage and `check_triggers`. + /// The job that executes applying chunks will send an ApplyChunkDoneMessage to ClientActor after + /// applying chunks is done, so when receiving ApplyChunkDoneMessage messages, ClientActor + /// calls this function to finish processing the unfinished blocks. ClientActor also calls + /// this function in `check_triggers`, because the actix queue may be blocked by other messages + /// and we want to prioritize block processing. + fn try_process_unfinished_blocks(&mut self) { + let _span = debug_span!(target: "client", "try_process_unfinished_blocks").entered(); + let (accepted_blocks, errors) = + self.client.postprocess_ready_blocks(self.get_apply_chunks_done_callback(), true); + if !errors.is_empty() { + error!(target: "client", ?errors, "try_process_unfinished_blocks got errors"); + } + self.process_accepted_blocks(accepted_blocks); + } + + fn try_handle_block_production(&mut self) { + let _span = debug_span!(target: "client", "try_handle_block_production").entered(); + if let Err(err) = self.handle_block_production() { + tracing::error!(target: "client", ?err, "Handle block production failed") + } + } + + fn try_doomslug_timer(&mut self) { + let _span = tracing::debug_span!(target: "client", "try_doomslug_timer").entered(); + let _ = self.client.check_and_update_doomslug_tip(); + let approvals = self.client.doomslug.process_timer(StaticClock::instant()); + + // Important to save the largest approval target height before sending approvals, so + // that if the node crashes in the meantime, we cannot get slashed on recovery + let mut chain_store_update = self.client.chain.mut_chain_store().store_update(); + chain_store_update + .save_largest_target_height(self.client.doomslug.get_largest_target_height()); + + match chain_store_update.commit() { + Ok(_) => { + let head = unwrap_or_return!(self.client.chain.head()); + if self.client.is_validator(&head.epoch_id, &head.last_block_hash) + || self.client.is_validator(&head.next_epoch_id, &head.last_block_hash) + { + for approval in approvals { + if let Err(e) = + self.client.send_approval(&self.client.doomslug.get_tip().0, approval) + { + error!("Error while sending an approval {:?}", e); + } + } + } + } + Err(e) => error!("Error while committing largest skipped height {:?}", e), + }; + } + + /// Produce block if we are block producer for given `next_height` height. + /// Can return error, should be called with `produce_block` to handle errors and reschedule. + fn produce_block(&mut self, next_height: BlockHeight) -> Result<(), Error> { + let _span = tracing::debug_span!(target: "client", "produce_block", next_height).entered(); + if let Some(block) = self.client.produce_block_on_head(next_height, false)? { + // If we produced the block, send it out before we apply the block. + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::Block { block: block.clone() }, + )); + // We’ve produced the block so that counts as validated block. + let block = MaybeValidated::from_validated(block); + let res = self.client.start_process_block( + block, + Provenance::PRODUCED, + self.get_apply_chunks_done_callback(), + ); + if let Err(e) = &res { + match e { + near_chain::Error::ChunksMissing(_) => { + debug!(target: "client", "chunks missing"); + // missing chunks were already handled in Client::process_block, we don't need to + // do anything here + return Ok(()); + } + _ => { + error!(target: "client", ?res, "Failed to process freshly produced block"); + byzantine_assert!(false); + return res.map_err(|err| err.into()); + } + } + } + } + Ok(()) + } + + fn send_chunks_metrics(&mut self, block: &Block) { + let chunks = block.chunks(); + for (chunk, &included) in chunks.iter().zip(block.header().chunk_mask().iter()) { + if included { + self.info_helper.chunk_processed( + chunk.shard_id(), + chunk.prev_gas_used(), + chunk.prev_balance_burnt(), + ); + } else { + self.info_helper.chunk_skipped(chunk.shard_id()); + } + } + } + + fn send_block_metrics(&mut self, block: &Block) { + let chunks_in_block = block.header().chunk_mask().iter().filter(|&&m| m).count(); + let gas_used = Block::compute_gas_used(block.chunks().iter(), block.header().height()); + + let last_final_hash = block.header().last_final_block(); + let last_final_ds_hash = block.header().last_ds_final_block(); + let last_final_block_height = self + .client + .chain + .get_block(&last_final_hash) + .map_or(0, |block| block.header().height()); + let last_final_ds_block_height = self + .client + .chain + .get_block(&last_final_ds_hash) + .map_or(0, |block| block.header().height()); + + let epoch_height = + self.client.epoch_manager.get_epoch_height_from_prev_block(block.hash()).unwrap_or(0); + let epoch_start_height = self + .client + .epoch_manager + .get_epoch_start_height(&last_final_hash) + .unwrap_or(last_final_block_height); + let last_final_block_height_in_epoch = + last_final_block_height.checked_sub(epoch_start_height); + + self.info_helper.block_processed( + gas_used, + chunks_in_block as u64, + block.header().next_gas_price(), + block.header().total_supply(), + last_final_block_height, + last_final_ds_block_height, + epoch_height, + last_final_block_height_in_epoch, + ); + } + + /// Process all blocks that were accepted by calling other relevant services. + fn process_accepted_blocks(&mut self, accepted_blocks: Vec) { + let _span = tracing::debug_span!( + target: "client", + "process_accepted_blocks", + num_blocks = accepted_blocks.len()) + .entered(); + for accepted_block in accepted_blocks { + let block = self.client.chain.get_block(&accepted_block).unwrap().clone(); + self.send_chunks_metrics(&block); + self.send_block_metrics(&block); + self.check_send_announce_account(*block.header().last_final_block()); + } + } + + /// Returns the callback function that will be passed to various functions that may trigger + /// the processing of new blocks. This callback will be called at the end of applying chunks + /// for every block. + fn get_apply_chunks_done_callback(&self) -> DoneApplyChunkCallback { + let myself_sender = self.myself_sender.clone(); + Arc::new(move |_| { + myself_sender.send(ApplyChunksDoneMessage {}); + }) + } + + fn receive_headers(&mut self, headers: Vec, peer_id: PeerId) -> bool { + let _span = + debug_span!(target: "client", "receive_headers", num_headers = headers.len(), ?peer_id) + .entered(); + if headers.is_empty() { + info!(target: "client", "Received an empty set of block headers"); + return true; + } + match self.client.sync_block_headers(headers) { + Ok(_) => true, + Err(err) => { + if err.is_bad_data() { + error!(target: "client", ?err, "Error processing sync blocks"); + false + } else { + debug!(target: "client", ?err, "Block headers refused by chain"); + true + } + } + } + } + + /// Check whether need to (continue) sync. + /// Also return higher height with known peers at that height. + fn syncing_info(&self) -> Result { + if self.adv.disable_header_sync() { + return Ok(SyncRequirement::AdvHeaderSyncDisabled); + } + + let head = self.client.chain.head()?; + let is_syncing = self.client.sync_status.is_syncing(); + + // Only consider peers whose latest block is not invalid blocks + let eligible_peers: Vec<_> = self + .network_info + .highest_height_peers + .iter() + .filter(|p| !self.client.chain.is_block_invalid(&p.highest_block_hash)) + .collect(); + metrics::PEERS_WITH_INVALID_HASH + .set(self.network_info.highest_height_peers.len() as i64 - eligible_peers.len() as i64); + let peer_info = if let Some(peer_info) = eligible_peers.choose(&mut thread_rng()) { + peer_info + } else { + return Ok(SyncRequirement::NoPeers); + }; + + let peer_id = peer_info.peer_info.id.clone(); + let highest_height = peer_info.highest_block_height; + + if is_syncing { + if highest_height <= head.height { + Ok(SyncRequirement::AlreadyCaughtUp { peer_id, highest_height, head }) + } else { + Ok(SyncRequirement::SyncNeeded { peer_id, highest_height, head }) + } + } else { + if highest_height > head.height + self.client.config.sync_height_threshold { + Ok(SyncRequirement::SyncNeeded { peer_id, highest_height, head }) + } else { + Ok(SyncRequirement::AlreadyCaughtUp { peer_id, highest_height, head }) + } + } + } + + fn start_flat_storage_creation(&mut self, ctx: &mut dyn DelayedActionRunner) { + if !self.client.config.flat_storage_creation_enabled { + return; + } + match self.client.run_flat_storage_creation_step() { + Ok(false) => {} + Ok(true) => { + return; + } + Err(err) => { + error!(target: "client", "Error occurred during flat storage creation step: {:?}", err); + } + } + + ctx.run_later(self.client.config.flat_storage_creation_period, move |act, ctx| { + act.start_flat_storage_creation(ctx); + }); + } + + /// Starts syncing and then switches to either syncing or regular mode. + fn start_sync(&mut self, ctx: &mut dyn DelayedActionRunner) { + // Wait for connections reach at least minimum peers unless skipping sync. + if self.network_info.num_connected_peers < self.client.config.min_num_peers + && !self.client.config.skip_sync_wait + { + ctx.run_later(self.client.config.sync_step_period, move |act, ctx| { + act.start_sync(ctx); + }); + return; + } + self.sync_started = true; + + // Sync loop will be started by check_triggers. + } + + /// Select the block hash we are using to sync state. It will sync with the state before applying the + /// content of such block. + /// + /// The selected block will always be the first block on a new epoch: + /// . + fn find_sync_hash(&mut self) -> Result { + let header_head = self.client.chain.header_head()?; + let sync_hash = header_head.last_block_hash; + let epoch_start_sync_hash = + StateSync::get_epoch_start_sync_hash(&mut self.client.chain, &sync_hash)?; + + let genesis_hash = self.client.chain.genesis().hash(); + tracing::debug!( + target: "sync", + ?header_head, + ?sync_hash, + ?epoch_start_sync_hash, + ?genesis_hash, + "find_sync_hash"); + assert_ne!(&epoch_start_sync_hash, genesis_hash); + Ok(epoch_start_sync_hash) + } + + /// Runs catchup on repeat, if this client is a validator. + /// Schedules itself again if it was not ran as response to state parts job result + fn catchup(&mut self, ctx: &mut dyn DelayedActionRunner) { + { + // An extra scope to limit the lifetime of the span. + let _span = tracing::debug_span!(target: "client", "catchup").entered(); + if let Err(err) = self.client.run_catchup( + &self.network_info.highest_height_peers, + &self.state_parts_task_scheduler, + &self.block_catch_up_scheduler, + &self.resharding_scheduler, + self.get_apply_chunks_done_callback(), + &self.state_parts_client_arbiter.handle(), + ) { + error!(target: "client", "{:?} Error occurred during catchup for the next epoch: {:?}", self.client.validator_signer.as_ref().map(|vs| vs.validator_id()), err); + } + } + + ctx.run_later(self.client.config.catchup_step_period, move |act, ctx| { + act.catchup(ctx); + }); + } + + /// Runs given callback if the time now is at least `next_attempt`. + /// Returns time for next run which should be made based on given `delay` between runs. + fn run_timer( + &mut self, + delay: Duration, + next_attempt: Utc, + ctx: &mut dyn DelayedActionRunner, + f: F, + timer_label: &str, + ) -> Utc + where + F: FnOnce(&mut Self, &mut dyn DelayedActionRunner) + 'static, + { + let now = self.clock.now_utc(); + if now < next_attempt { + return next_attempt; + } + + let timer = + metrics::CLIENT_TRIGGER_TIME_BY_TYPE.with_label_values(&[timer_label]).start_timer(); + f(self, ctx); + timer.observe_duration(); + + now + delay + } + + fn sync_wait_period(&self) -> Duration { + if let Ok(sync) = self.syncing_info() { + if !sync.sync_needed() { + // If we don't need syncing - retry the sync call rarely. + self.client.config.sync_check_period + } else { + // If we need syncing - retry the sync call often. + self.client.config.sync_step_period + } + } else { + self.client.config.sync_step_period + } + } + + /// Main syncing job responsible for syncing client with other peers. + /// Runs itself iff it was not ran as reaction for message with results of + /// finishing state part job + fn run_sync_step(&mut self) { + let _span = tracing::debug_span!(target: "client", "run_sync_step").entered(); + + macro_rules! unwrap_and_report (($obj: expr) => (match $obj { + Ok(v) => v, + Err(err) => { + error!(target: "sync", "Sync: Unexpected error: {}", err); + return; + } + })); + + let currently_syncing = self.client.sync_status.is_syncing(); + let sync = unwrap_and_report!(self.syncing_info()); + self.info_helper.update_sync_requirements_metrics(sync.to_metrics_string()); + + match sync { + SyncRequirement::AlreadyCaughtUp { .. } + | SyncRequirement::NoPeers + | SyncRequirement::AdvHeaderSyncDisabled => { + if currently_syncing { + // Initial transition out of "syncing" state. + debug!(target: "sync", prev_sync_status = ?self.client.sync_status, "disabling sync"); + self.client.sync_status.update(SyncStatus::NoSync); + // Announce this client's account id if their epoch is coming up. + let head = unwrap_and_report!(self.client.chain.head()); + self.check_send_announce_account(head.prev_block_hash); + } + } + + SyncRequirement::SyncNeeded { highest_height, .. } => { + let mut notify_start_sync = false; + if !currently_syncing { + info!(target: "client", ?sync, "enabling sync"); + } + // Run each step of syncing separately. + unwrap_and_report!(self.client.header_sync.run( + &mut self.client.sync_status, + &mut self.client.chain, + highest_height, + &self.network_info.highest_height_peers + )); + // Only body / state sync if header height is close to the latest. + let header_head = unwrap_and_report!(self.client.chain.header_head()); + + // Sync state if already running sync state or if block sync is too far. + let sync_state = match self.client.sync_status { + SyncStatus::StateSync(_) => true, + _ if header_head.height + >= highest_height + .saturating_sub(self.client.config.block_header_fetch_horizon) => + { + unwrap_and_report!(self.client.block_sync.run( + &mut self.client.sync_status, + &self.client.chain, + highest_height, + &self.network_info.highest_height_peers + )) + } + _ => false, + }; + if sync_state { + match self.client.sync_status { + SyncStatus::StateSync(_) => (), + _ => { + let sync_hash = unwrap_and_report!(self.find_sync_hash()); + if !self.client.config.archive { + unwrap_and_report!(self + .client + .chain + .reset_data_pre_state_sync(sync_hash)); + } + self.client.sync_status.update(SyncStatus::StateSync( + StateSyncStatus { sync_hash, sync_status: HashMap::default() }, + )); + // This is the first time we run state sync. + notify_start_sync = true; + } + }; + let sync_hash = match &self.client.sync_status { + SyncStatus::StateSync(s) => s.sync_hash, + _ => unreachable!("Sync status should have been StateSync!"), + }; + + let me = + self.client.validator_signer.as_ref().map(|x| x.validator_id().clone()); + let block_header = + unwrap_and_report!(self.client.chain.get_block_header(&sync_hash)); + let prev_hash = *block_header.prev_hash(); + let epoch_id = block_header.epoch_id().clone(); + let shards_to_sync: Vec<_> = self + .client + .epoch_manager + .shard_ids(&epoch_id) + .unwrap() + .into_iter() + .filter(|&shard_id| { + cares_about_shard_this_or_next_epoch( + me.as_ref(), + &prev_hash, + shard_id, + true, + &self.client.shard_tracker, + ) + }) + .collect(); + + let use_colour = + matches!(self.client.config.log_summary_style, LogSummaryStyle::Colored); + + // Notify each shard to sync. + if notify_start_sync { + let shard_layout = self + .client + .epoch_manager + .get_shard_layout(&epoch_id) + .expect("Cannot get shard layout"); + for &shard_id in &shards_to_sync { + let shard_uid = + ShardUId::from_shard_id_and_layout(shard_id, &shard_layout); + match self.client.state_sync_adapter.clone().read() { + Ok(sync_adapter) => sync_adapter.send( + shard_uid, + (SyncMessage::StartSync(SyncShardInfo { + shard_uid, + sync_hash, + })) + .with_span_context(), + ), + Err(_) => { + error!(target:"client", "State sync adapter lock is poisoned.") + } + } + } + } + + let now = self.clock.now_utc(); + + // FIXME: it checks if the block exists.. but I have no idea why.. + // seems that we don't really use this block in case of catchup - we use it only for state sync. + // Seems it is related to some bug with block getting orphaned after state sync? but not sure. + let (request_block, have_block) = + unwrap_and_report!(self.sync_block_status(&prev_hash, now)); + + if request_block { + self.client.last_time_sync_block_requested = Some(now); + if let Some(peer_info) = + self.network_info.highest_height_peers.choose(&mut thread_rng()) + { + let id = peer_info.peer_info.id.clone(); + + for hash in + vec![*block_header.prev_hash(), *block_header.hash()].into_iter() + { + self.client.request_block(hash, id.clone()); + } + } + } + if have_block { + self.client.last_time_sync_block_requested = None; + } + + let state_sync_status = match &mut self.client.sync_status { + SyncStatus::StateSync(s) => s, + _ => unreachable!("Sync status should have been StateSync!"), + }; + match unwrap_and_report!(self.client.state_sync.run( + &me, + sync_hash, + &mut state_sync_status.sync_status, + &mut self.client.chain, + self.client.epoch_manager.as_ref(), + &self.network_info.highest_height_peers, + shards_to_sync, + &self.state_parts_task_scheduler, + &self.resharding_scheduler, + &self.state_parts_client_arbiter.handle(), + use_colour, + self.client.runtime_adapter.clone(), + )) { + StateSyncResult::InProgress => (), + StateSyncResult::Completed => { + if !have_block { + trace!(target: "sync", "Sync done. Waiting for sync block."); + return; + } + info!(target: "sync", "State sync: all shards are done"); + + let mut block_processing_artifacts = BlockProcessingArtifact::default(); + + unwrap_and_report!(self.client.chain.reset_heads_post_state_sync( + &me, + sync_hash, + &mut block_processing_artifacts, + self.get_apply_chunks_done_callback(), + )); + + self.client + .process_block_processing_artifact(block_processing_artifacts); + self.client.sync_status.update(SyncStatus::BlockSync { + start_height: 0, + current_height: 0, + highest_height: 0, + }); + } + } + } + } + } + } + + /// Verifies if the node possesses sync block. + /// It is the last block of the previous epoch. + /// If the block is absent, the node requests it from peers. + fn sync_block_status( + &self, + prev_hash: &CryptoHash, + now: Utc, + ) -> Result<(bool, bool), near_chain::Error> { + let (request_block, have_block) = if !self.client.chain.block_exists(prev_hash)? { + let timeout = + near_async::time::Duration::try_from(self.client.config.state_sync_timeout) + .unwrap(); + match self.client.last_time_sync_block_requested { + None => (true, false), + Some(last_time) => { + if (now - last_time) >= timeout { + tracing::error!( + target: "sync", + %prev_hash, + ?timeout, + "State sync: block request timed out"); + (true, false) + } else { + (false, false) + } + } + } + } else { + (false, true) + }; + Ok((request_block, have_block)) + } + + /// Print current summary. + fn log_summary(&mut self) { + let _span = tracing::debug_span!(target: "client", "log_summary").entered(); + self.info_helper.log_summary( + &self.client, + &self.node_id, + &self.network_info, + &self.config_updater, + ) + } + + /// Checks if the node is syncing its State and applies special logic in that case. + /// A node usually ignores blocks that are too far ahead, but in case of a node syncing its state it is looking for 2 specific blocks: + /// * The first block of the new epoch + /// * The last block of the prev epoch + /// Returns whether the node is syncing its state. + fn maybe_receive_state_sync_blocks(&mut self, block: &Block) -> bool { + let SyncStatus::StateSync(StateSyncStatus { sync_hash, .. }) = self.client.sync_status + else { + return false; + }; + if let Ok(header) = self.client.chain.get_block_header(&sync_hash) { + let block: MaybeValidated = (*block).clone().into(); + let block_hash = *block.hash(); + if let Err(err) = self.client.chain.validate_block(&block) { + byzantine_assert!(false); + error!(target: "client", ?err, ?block_hash, "Received an invalid block during state sync"); + } + // Notice that two blocks are saved differently: + // * save_block() for one block. + // * save_orphan() for another block. + if &block_hash == header.prev_hash() { + // The last block of the previous epoch. + if let Err(err) = self.client.chain.save_block(block) { + error!(target: "client", ?err, ?block_hash, "Failed to save a block during state sync"); + } + } else if block_hash == sync_hash { + // The first block of the new epoch. + self.client.chain.save_orphan(block, Provenance::NONE, None, false); + } + } + true + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: ApplyStatePartsResponse) -> Self::Result { + tracing::debug!(target: "client", ?msg); + if let Some((sync, _, _)) = self.client.catchup_state_syncs.get_mut(&msg.sync_hash) { + // We are doing catchup + sync.set_apply_result(msg.shard_id, msg.apply_result); + } else { + self.client.state_sync.set_apply_result(msg.shard_id, msg.apply_result); + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: BlockCatchUpResponse) -> Self::Result { + tracing::debug!(target: "client", ?msg); + if let Some((_, _, blocks_catch_up_state)) = + self.client.catchup_state_syncs.get_mut(&msg.sync_hash) + { + assert!(blocks_catch_up_state.scheduled_blocks.remove(&msg.block_hash)); + blocks_catch_up_state.processed_blocks.insert( + msg.block_hash, + msg.results.into_iter().map(|res| res.1).collect::>(), + ); + } else { + panic!("block catch up processing result from unknown sync hash"); + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + #[perf] + fn handle(&mut self, msg: ReshardingResponse) -> Self::Result { + tracing::debug!(target: "client", ?msg); + if let Some((sync, _, _)) = self.client.catchup_state_syncs.get_mut(&msg.sync_hash) { + // We are doing catchup + sync.set_resharding_result(msg.shard_id, msg.new_state_roots); + } else { + self.client.state_sync.set_resharding_result(msg.shard_id, msg.new_state_roots); + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + #[perf] + fn handle(&mut self, msg: ShardsManagerResponse) -> Self::Result { + match msg { + ShardsManagerResponse::ChunkCompleted { partial_chunk, shard_chunk } => { + self.client.on_chunk_completed( + partial_chunk, + shard_chunk, + self.get_apply_chunks_done_callback(), + ); + } + ShardsManagerResponse::InvalidChunk(encoded_chunk) => { + self.client.on_invalid_chunk(encoded_chunk); + } + ShardsManagerResponse::ChunkHeaderReadyForInclusion { + chunk_header, + chunk_producer, + } => { + self.client + .chunk_inclusion_tracker + .mark_chunk_header_ready_for_inclusion(chunk_header, chunk_producer); + } + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = Result; + + fn handle(&mut self, msg: GetClientConfig) -> Self::Result { + tracing::debug!(target: "client", ?msg); + + Ok(self.client.config.clone()) + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + fn handle(&mut self, msg: SyncMessage) -> Self::Result { + tracing::debug!(target: "client", ?msg); + // TODO + // process messages from SyncActors + } +} + +impl ClientActions { + pub fn handle_state_witness_message( + &mut self, + msg: ChunkStateWitnessMessage, + ctx: &mut dyn DelayedActionRunner, + ) { + let peer_id = msg.peer_id.clone(); + let attempts_remaining = msg.attempts_remaining; + match self.client.process_chunk_state_witness(msg.witness, msg.peer_id, None) { + Err(err) => { + tracing::error!(target: "client", ?err, "Error processing chunk state witness"); + } + Ok(Some(witness)) => { + if attempts_remaining > 0 { + ctx.run_later(Duration::from_millis(100), move |actions, ctx| { + actions.handle_state_witness_message( + ChunkStateWitnessMessage { + witness, + peer_id, + attempts_remaining: attempts_remaining - 1, + }, + ctx, + ); + }); + } else { + tracing::error!(target: "client", "Failed to process chunk state witness even after 5 tries due to missing parent block"); + } + } + Ok(None) => {} + } + } +} + +impl ClientActionHandler for ClientActions { + type Result = (); + + #[perf] + fn handle(&mut self, msg: ChunkEndorsementMessage) -> Self::Result { + if let Err(err) = self.client.process_chunk_endorsement(msg.0) { + tracing::error!(target: "client", ?err, "Error processing chunk endorsement"); + } + } +} + +/// Returns random seed sampled from the current thread +pub fn random_seed_from_thread() -> RngSeed { + let mut rng_seed: RngSeed = [0; 32]; + rand::thread_rng().fill(&mut rng_seed); + rng_seed +} diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 29504e6ddfe..74033415dd4 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -5,156 +5,61 @@ //! Unfortunately, this is not the case today. We are in the process of refactoring ClientActor //! https://github.com/near/nearcore/issues/7899 -#[cfg(feature = "test_features")] -use crate::client::AdvProduceBlocksMode; -use crate::client::{Client, EPOCH_START_INFO_BLOCKS}; -use crate::config_updater::ConfigUpdater; -use crate::debug::new_network_info_view; -use crate::info::{display_sync_status, InfoHelper}; -use crate::sync::adapter::{SyncMessage, SyncShardInfo}; -use crate::sync::state::{StateSync, StateSyncResult}; -use crate::sync_jobs_actor::{create_sync_job_scheduler, SyncJobsActor}; -use crate::{metrics, StatusResponse, SyncAdapter}; -use actix::{Actor, Addr, Arbiter, AsyncContext, Context, Handler}; -use actix_rt::ArbiterHandle; +use actix::{Actor, Addr, AsyncContext, Context, Handler}; +use actix_rt::{Arbiter, ArbiterHandle}; use chrono::{DateTime, Utc}; -use itertools::Itertools; -use near_async::messaging::{CanSend, Sender}; -use near_chain::chain::{ - ApplyStatePartsRequest, ApplyStatePartsResponse, BlockCatchUpRequest, BlockCatchUpResponse, -}; -use near_chain::resharding::{ReshardingRequest, ReshardingResponse}; +use near_async::actix::AddrWithAutoSpanContextExt; +use near_async::messaging::{IntoMultiSender, Sender}; +use near_async::time::Clock; +use near_chain::chain::{ApplyStatePartsRequest, BlockCatchUpRequest}; +use near_chain::resharding::ReshardingRequest; use near_chain::state_snapshot_actor::SnapshotCallbacks; -use near_chain::test_utils::format_hash; use near_chain::types::RuntimeAdapter; -#[cfg(feature = "test_features")] -use near_chain::ChainStoreAccess; -use near_chain::{ - byzantine_assert, near_chain_primitives, Block, BlockHeader, BlockProcessingArtifact, - ChainGenesis, DoneApplyChunkCallback, Provenance, -}; -use near_chain_configs::{ClientConfig, LogSummaryStyle, ReshardingHandle}; -use near_chain_primitives::error::EpochErrorResultToChainError; +use near_chain::ChainGenesis; +use near_chain_configs::{ClientConfig, ReshardingHandle}; use near_chunks::adapter::ShardsManagerRequestFromClient; -use near_chunks::client::ShardsManagerResponse; -use near_chunks::logic::cares_about_shard_this_or_next_epoch; -use near_client_primitives::types::{ - Error, GetClientConfig, GetClientConfigError, GetNetworkInfo, NetworkInfoResponse, - StateSyncStatus, Status, StatusError, StatusSyncInfo, SyncStatus, -}; +use near_client_primitives::types::Error; use near_epoch_manager::shard_tracker::ShardTracker; -use near_epoch_manager::EpochManagerAdapter; -use near_network::client::{ - BlockApproval, BlockHeadersResponse, BlockResponse, ChunkEndorsementMessage, - ChunkStateWitnessMessage, ProcessTxRequest, ProcessTxResponse, RecvChallenge, SetNetworkInfo, - StateResponse, -}; -use near_network::types::ReasonForBan; -use near_network::types::{ - NetworkInfo, NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, -}; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; -use near_performance_metrics; -use near_performance_metrics_macros::perf; -use near_primitives::block::Tip; -use near_primitives::block_header::ApprovalType; -use near_primitives::epoch_manager::RngSeed; -use near_primitives::hash::CryptoHash; -use near_primitives::network::{AnnounceAccount, PeerId}; +use near_epoch_manager::{EpochManagerAdapter, RngSeed}; +use near_network::client::ChunkStateWitnessMessage; +use near_network::types::PeerManagerAdapter; +use near_o11y::{handler_debug_span, WithSpanContext}; +use near_primitives::network::PeerId; use near_primitives::static_clock::StaticClock; -use near_primitives::types::BlockHeight; -use near_primitives::unwrap_or_return; -use near_primitives::utils::{from_timestamp, MaybeValidated}; use near_primitives::validator_signer::ValidatorSigner; -use near_primitives::version::PROTOCOL_VERSION; -use near_primitives::views::{DetailedDebugStatus, ValidatorInfo}; -#[cfg(feature = "test_features")] -use near_store::DBCol; -use near_store::ShardUId; use near_telemetry::TelemetryActor; -use rand::seq::SliceRandom; -use rand::{thread_rng, Rng}; -use std::collections::HashMap; -use std::fmt; +use rand::Rng; +use std::fmt::Debug; +use std::ops::{Deref, DerefMut}; use std::sync::{Arc, RwLock}; -use std::thread; -use std::time::{Duration, Instant}; use tokio::sync::broadcast; -use tracing::{debug, debug_span, error, info, trace, warn}; -/// Multiplier on `max_block_time` to wait until deciding that chain stalled. -const STATUS_WAIT_TIME_MULTIPLIER: u64 = 10; -/// `max_block_production_time` times this multiplier is how long we wait before rebroadcasting -/// the current `head` -const HEAD_STALL_MULTIPLIER: u32 = 4; +use crate::client_actions::{ClientActionHandler, ClientActions, ClientSenderForClient}; +use crate::sync_jobs_actor::{create_sync_job_scheduler, SyncJobsActor}; +use crate::{metrics, Client, ConfigUpdater, SyncAdapter}; pub struct ClientActor { - /// Adversarial controls - pub adv: crate::adversarial::Controls, - - // Address of this ClientActor. Can be used to send messages to self. - my_address: Addr, - pub(crate) client: Client, - network_adapter: PeerManagerAdapter, - network_info: NetworkInfo, - /// Identity that represents this Client at the network level. - /// It is used as part of the messages that identify this client. - node_id: PeerId, - /// Last time we announced our accounts as validators. - last_validator_announce_time: Option, - /// Info helper. - info_helper: InfoHelper, - - /// Last time handle_block_production method was called - block_production_next_attempt: DateTime, - - // Last time when log_summary method was called. - log_summary_timer_next_attempt: DateTime, - - block_production_started: bool, - doomslug_timer_next_attempt: DateTime, - sync_timer_next_attempt: DateTime, - sync_started: bool, - state_parts_task_scheduler: Box, - block_catch_up_scheduler: Box, - resharding_scheduler: Box, - state_parts_client_arbiter: Arbiter, - - #[cfg(feature = "sandbox")] - fastforward_delta: near_primitives::types::BlockHeightDelta, - - /// Synchronization measure to allow graceful shutdown. - /// Informs the system when a ClientActor gets dropped. - shutdown_signal: Option>, + actions: ClientActions, +} - /// Manages updating the config. - config_updater: Option, +impl Deref for ClientActor { + type Target = ClientActions; + fn deref(&self) -> &ClientActions { + &self.actions + } } -/// Blocks the program until given genesis time arrives. -fn wait_until_genesis(genesis_time: &DateTime) { - loop { - // Get chrono::Duration::num_seconds() by deducting genesis_time from now. - let duration = genesis_time.signed_duration_since(StaticClock::utc()); - let chrono_seconds = duration.num_seconds(); - // Check if number of seconds in chrono::Duration larger than zero. - if chrono_seconds <= 0 { - break; - } - info!(target: "near", "Waiting until genesis: {}d {}h {}m {}s", duration.num_days(), - (duration.num_hours() % 24), - (duration.num_minutes() % 60), - (duration.num_seconds() % 60)); - let wait = - std::cmp::min(Duration::from_secs(10), Duration::from_secs(chrono_seconds as u64)); - thread::sleep(wait); +impl DerefMut for ClientActor { + fn deref_mut(&mut self) -> &mut ClientActions { + &mut self.actions } } impl ClientActor { pub fn new( + clock: Clock, client: Client, - address: Addr, + myself_sender: ClientSenderForClient, config: ClientConfig, node_id: PeerId, network_adapter: PeerManagerAdapter, @@ -175,80 +80,34 @@ impl ClientActor { SyncJobsActor { client_addr: self_addr_clone } }, ); - if let Some(vs) = &validator_signer { - info!(target: "client", "Starting validator node: {}", vs.validator_id()); - } - let info_helper = InfoHelper::new(Some(telemetry_actor), &config, validator_signer.clone()); - - let now = Utc::now(); - Ok(ClientActor { - adv, - my_address: address, + let actions = ClientActions::new( + clock, client, - network_adapter, + myself_sender, + config, node_id, - network_info: NetworkInfo { - connected_peers: vec![], - tier1_connections: vec![], - num_connected_peers: 0, - peer_max_count: 0, - highest_height_peers: vec![], - received_bytes_per_sec: 0, - sent_bytes_per_sec: 0, - known_producers: vec![], - tier1_accounts_keys: vec![], - tier1_accounts_data: vec![], - }, - last_validator_announce_time: None, - info_helper, - block_production_next_attempt: now, - log_summary_timer_next_attempt: now, - block_production_started: false, - doomslug_timer_next_attempt: now, - sync_timer_next_attempt: now, - sync_started: false, - state_parts_task_scheduler: create_sync_job_scheduler::( - sync_jobs_actor_addr.clone(), - ), - block_catch_up_scheduler: create_sync_job_scheduler::( - sync_jobs_actor_addr.clone(), - ), - resharding_scheduler: create_sync_job_scheduler::( - sync_jobs_actor_addr, - ), - state_parts_client_arbiter: state_parts_arbiter, - - #[cfg(feature = "sandbox")] - fastforward_delta: 0, + network_adapter, + validator_signer, + telemetry_actor, shutdown_signal, + adv, config_updater, - }) + create_sync_job_scheduler::(sync_jobs_actor_addr.clone()), + create_sync_job_scheduler::(sync_jobs_actor_addr.clone()), + create_sync_job_scheduler::(sync_jobs_actor_addr), + state_parts_arbiter, + )?; + Ok(Self { actions }) } + + // NOTE: Do not add any more functionality to ClientActor. Add to ClientActions instead. } impl Actor for ClientActor { type Context = Context; fn started(&mut self, ctx: &mut Self::Context) { - self.start_flat_storage_creation(ctx); - - // Start syncing job. - self.start_sync(ctx); - - // Start block production tracking if have block producer info. - if self.client.validator_signer.is_some() { - self.block_production_started = true; - } - - // Start triggers - self.schedule_triggers(ctx); - - // Start catchup job. - self.catchup(ctx); - - if let Err(err) = self.client.send_network_chain_info() { - error!(target: "client", ?err, "Failed to update network chain info"); - } + self.actions.start(ctx); } } @@ -265,1790 +124,42 @@ impl ClientActor { msg: WithSpanContext, ctx: &mut Context, msg_type: &str, - f: impl FnOnce(&mut Self, Req) -> Res, + f: impl FnOnce(&mut Self, Req, &mut Context) -> Res, ) -> Res { let (_span, msg) = handler_debug_span!(target: "client", msg, msg_type); - self.check_triggers(ctx); + self.actions.check_triggers(ctx); let _span_inner = tracing::debug_span!(target: "client", "NetworkClientMessage").entered(); metrics::CLIENT_MESSAGES_COUNT.with_label_values(&[msg_type]).inc(); let timer = metrics::CLIENT_MESSAGES_PROCESSING_TIME.with_label_values(&[msg_type]).start_timer(); - let res = f(self, msg); + let res = f(self, msg, ctx); timer.observe_duration(); res } } -#[cfg(feature = "test_features")] -#[derive(actix::Message, Debug)] -#[rtype(result = "Option")] -pub enum NetworkAdversarialMessage { - AdvProduceBlocks(u64, bool), - AdvSwitchToHeight(u64), - AdvDisableHeaderSync, - AdvDisableDoomslug, - AdvGetSavedBlocks, - AdvCheckStorageConsistency, -} - -#[cfg(feature = "test_features")] -impl Handler> for ClientActor { - type Result = Option; - - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - self.wrap(msg, ctx, "NetworkAdversarialMessage", |this, msg| match msg { - NetworkAdversarialMessage::AdvDisableDoomslug => { - info!(target: "adversary", "Turning Doomslug off"); - this.adv.set_disable_doomslug(true); - this.client.doomslug.adv_disable(); - this.client.chain.adv_disable_doomslug(); - None - } - NetworkAdversarialMessage::AdvDisableHeaderSync => { - info!(target: "adversary", "Blocking header sync"); - this.adv.set_disable_header_sync(true); - None - } - NetworkAdversarialMessage::AdvProduceBlocks( - num_blocks, - only_valid, - ) => { - info!(target: "adversary", num_blocks, "Starting adversary blocks production"); - if only_valid { - this.client.adv_produce_blocks = Some(AdvProduceBlocksMode::OnlyValid); - } else { - this.client.adv_produce_blocks = Some(AdvProduceBlocksMode::All); - } - let start_height = - this.client.chain.mut_chain_store().get_latest_known().unwrap().height + 1; - let mut blocks_produced = 0; - for height in start_height.. { - let block = this - .client - .produce_block(height) - .expect("block should be produced"); - if only_valid && block == None { - continue; - } - let block = block.expect("block should exist after produced"); - info!(target: "adversary", blocks_produced, num_blocks, height, "Producing adversary block"); - this.network_adapter.send( - PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::Block { block: block.clone() }, - ) - ); - let _ = this.client.start_process_block( - block.into(), - Provenance::PRODUCED, - this.get_apply_chunks_done_callback(), - ); - blocks_produced += 1; - if blocks_produced == num_blocks { - break; - } - } - None - } - NetworkAdversarialMessage::AdvSwitchToHeight(height) => { - info!(target: "adversary", "Switching to height {:?}", height); - let mut chain_store_update = this.client.chain.mut_chain_store().store_update(); - chain_store_update.save_largest_target_height(height); - chain_store_update - .adv_save_latest_known(height) - .expect("adv method should not fail"); - chain_store_update.commit().expect("adv method should not fail"); - None - } - NetworkAdversarialMessage::AdvGetSavedBlocks => { - info!(target: "adversary", "Requested number of saved blocks"); - let store = this.client.chain.chain_store().store(); - let mut num_blocks = 0; - for _ in store.iter(DBCol::Block) { - num_blocks += 1; - } - Some(num_blocks) - } - NetworkAdversarialMessage::AdvCheckStorageConsistency => { - // timeout is set to 1.5 seconds to give some room as we wait in Nightly for 2 seconds - let timeout = 1500; - info!(target: "adversary", "Check Storage Consistency, timeout set to {:?} milliseconds", timeout); - let mut genesis = near_chain_configs::GenesisConfig::default(); - genesis.genesis_height = this.client.chain.chain_store().get_genesis_height(); - let mut store_validator = near_chain::store_validator::StoreValidator::new( - this.client.validator_signer.as_ref().map(|x| x.validator_id().clone()), - genesis, - this.client.epoch_manager.clone(), - this.client.shard_tracker.clone(), - this.client.runtime_adapter.clone(), - this.client.chain.chain_store().store().clone(), - this.adv.is_archival(), - ); - store_validator.set_timeout(timeout); - store_validator.validate(); - if store_validator.is_failed() { - error!(target: "client", "Storage Validation failed, {:?}", store_validator.errors); - Some(0) - } else { - Some(store_validator.tests_done()) - } - } - }) - } -} - -impl Handler> for ClientActor { - type Result = ProcessTxResponse; - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - self.wrap(msg, ctx, "ProcessTxRequest", |this: &mut Self, msg| { - let ProcessTxRequest { transaction, is_forwarded, check_only } = msg; - this.client.process_tx(transaction, is_forwarded, check_only) - }) - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - self.wrap(msg, ctx, "BlockResponse", |this: &mut Self, msg|{ - let BlockResponse{ block, peer_id, was_requested } = msg; - debug!(target: "client", block_height = block.header().height(), block_hash = ?block.header().hash(), "BlockResponse"); - let blocks_at_height = this - .client - .chain - .chain_store() - .get_all_block_hashes_by_height(block.header().height()); - if was_requested || blocks_at_height.is_err() || blocks_at_height.as_ref().unwrap().is_empty() { - // This is a very sneaky piece of logic. - if this.maybe_receive_state_sync_blocks(&block) { - // A node is syncing its state. Don't consider receiving - // blocks other than the few special ones that State Sync expects. - return; - } - this.client.receive_block( - block, - peer_id, - was_requested, - this.get_apply_chunks_done_callback(), - ); - } else { - match this - .client - .epoch_manager - .get_epoch_id_from_prev_block(block.header().prev_hash()) - { - Ok(epoch_id) => { - if let Some(hashes) = blocks_at_height.unwrap().get(&epoch_id) { - if !hashes.contains(block.header().hash()) { - warn!(target: "client", "Rejecting unrequested block {}, height {}", block.header().hash(), block.header().height()); - } - } - } - _ => {} - } - } - }) - } -} - -impl Handler> for ClientActor { - type Result = Result<(), ReasonForBan>; - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - self.wrap(msg, ctx, "BlockHeadersResponse", |this, msg| { - let BlockHeadersResponse(headers, peer_id) = msg; - if this.receive_headers(headers, peer_id) { - Ok(()) - } else { - warn!(target: "client", "Banning node for sending invalid block headers"); - Err(ReasonForBan::BadBlockHeader) - } - }) - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - self.wrap(msg, ctx, "BlockApproval", |this, msg| { - let BlockApproval(approval, peer_id) = msg; - debug!(target: "client", "Receive approval {:?} from peer {:?}", approval, peer_id); - this.client.collect_block_approval(&approval, ApprovalType::PeerApproval(peer_id)); - }) - } -} - -/// StateResponse is used during StateSync and catchup. -/// It contains either StateSync header information (that tells us how many parts there are etc) or a single part. -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - self.wrap(msg, ctx, "StateResponse", |this, msg| { - let StateResponse(state_response_info) = msg; - let shard_id = state_response_info.shard_id(); - let hash = state_response_info.sync_hash(); - let state_response = state_response_info.take_state_response(); - - trace!(target: "sync", "Received state response shard_id: {} sync_hash: {:?} part(id/size): {:?}", - shard_id, - hash, - state_response.part().as_ref().map(|(part_id, data)| (part_id, data.len())) - ); - // Get the download that matches the shard_id and hash - - // ... It could be that the state was requested by the state sync - if let SyncStatus::StateSync(StateSyncStatus{ sync_hash, sync_status: shards_to_download }) = - &mut this.client.sync_status - { - if hash == *sync_hash { - if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { - this.client.state_sync.update_download_on_state_response_message(shard_download, hash, shard_id, state_response, &mut this.client.chain); - return; - } - } - } - - // ... Or one of the catchups - if let Some((state_sync, shards_to_download, _)) = - this.client.catchup_state_syncs.get_mut(&hash) - { - if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { - state_sync.update_download_on_state_response_message(shard_download, hash, shard_id, state_response, &mut this.client.chain); - return; - } - } - - error!(target: "sync", "State sync received hash {} that we're not expecting, potential malicious peer or a very delayed response.", hash); - }) - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - self.wrap(msg, ctx, "RecvChallenge", |this, msg| { - let RecvChallenge(challenge) = msg; - match this.client.process_challenge(challenge) { - Ok(_) => {} - Err(err) => error!(target: "client", "Error processing challenge: {}", err), - } - }); - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { - // SetNetworkInfo is a large message. Avoid printing it at the `debug` verbosity. - self.wrap(msg, ctx, "SetNetworkInfo", |this, msg| { - let SetNetworkInfo(network_info) = msg; - this.network_info = network_info; - }) - } -} - -#[cfg(feature = "sandbox")] -impl Handler> for ClientActor { - type Result = near_client_primitives::types::SandboxResponse; - - fn handle( - &mut self, - msg: WithSpanContext, - _ctx: &mut Context, - ) -> near_client_primitives::types::SandboxResponse { - let (_span, msg) = handler_debug_span!(target: "client", msg); - match msg { - near_client_primitives::types::SandboxMessage::SandboxPatchState(state) => { - self.client.chain.patch_state( - near_primitives::sandbox::state_patch::SandboxStatePatch::new(state), - ); - near_client_primitives::types::SandboxResponse::SandboxNoResponse - } - near_client_primitives::types::SandboxMessage::SandboxPatchStateStatus => { - near_client_primitives::types::SandboxResponse::SandboxPatchStateFinished( - !self.client.chain.patch_state_in_progress(), - ) - } - near_client_primitives::types::SandboxMessage::SandboxFastForward(delta_height) => { - if self.fastforward_delta > 0 { - return near_client_primitives::types::SandboxResponse::SandboxFastForwardFailed( - "Consecutive fast_forward requests cannot be made while a current one is going on.".to_string()); - } - - self.fastforward_delta = delta_height; - near_client_primitives::types::SandboxResponse::SandboxNoResponse - } - near_client_primitives::types::SandboxMessage::SandboxFastForwardStatus => { - near_client_primitives::types::SandboxResponse::SandboxFastForwardFinished( - self.fastforward_delta == 0, - ) - } - } - } -} - -impl Handler> for ClientActor { - type Result = Result; - - #[perf] - fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - self.check_triggers(ctx); - - let head = self.client.chain.head()?; - let head_header = self.client.chain.get_block_header(&head.last_block_hash)?; - let latest_block_time = head_header.raw_timestamp(); - let latest_state_root = *head_header.prev_state_root(); - if msg.is_health_check { - let now = Utc::now(); - let block_timestamp = from_timestamp(latest_block_time); - if now > block_timestamp { - let elapsed = (now - block_timestamp).to_std().unwrap(); - if elapsed - > Duration::from_millis( - self.client.config.max_block_production_delay.as_millis() as u64 - * STATUS_WAIT_TIME_MULTIPLIER, - ) - { - return Err(StatusError::NoNewBlocks { elapsed }); - } - } - - if self.client.sync_status.is_syncing() { - return Err(StatusError::NodeIsSyncing); - } - } - let validators: Vec = self - .client - .epoch_manager - .get_epoch_block_producers_ordered(&head.epoch_id, &head.last_block_hash) - .into_chain_error()? - .into_iter() - .map(|(validator_stake, is_slashed)| ValidatorInfo { - account_id: validator_stake.take_account_id(), - is_slashed, - }) - .collect(); - - let epoch_start_height = - self.client.epoch_manager.get_epoch_start_height(&head.last_block_hash).ok(); - - let protocol_version = self - .client - .epoch_manager - .get_epoch_protocol_version(&head.epoch_id) - .into_chain_error()?; - - let node_public_key = self.node_id.public_key().clone(); - let (validator_account_id, validator_public_key) = match &self.client.validator_signer { - Some(vs) => (Some(vs.validator_id().clone()), Some(vs.public_key())), - None => (None, None), - }; - let node_key = validator_public_key.clone(); - - let mut earliest_block_hash = None; - let mut earliest_block_height = None; - let mut earliest_block_time = None; - if let Some(earliest_block_hash_value) = self.client.chain.get_earliest_block_hash()? { - earliest_block_hash = Some(earliest_block_hash_value); - if let Ok(earliest_block) = - self.client.chain.get_block_header(&earliest_block_hash_value) - { - earliest_block_height = Some(earliest_block.height()); - earliest_block_time = Some(earliest_block.timestamp()); - } - } - // Provide more detailed information about the current state of chain. - // For now - provide info about last 50 blocks. - let detailed_debug_status = if msg.detailed { - Some(DetailedDebugStatus { - network_info: new_network_info_view(&self.client.chain, &self.network_info), - sync_status: format!( - "{} ({})", - self.client.sync_status.as_variant_name(), - display_sync_status( - &self.client.sync_status, - &self.client.chain.head()?, - &self.client.config.state_sync.sync, - ), - ), - catchup_status: self.client.get_catchup_status()?, - current_head_status: head.clone().into(), - current_header_head_status: self.client.chain.header_head()?.into(), - block_production_delay_millis: self - .client - .config - .min_block_production_delay - .as_millis() as u64, - }) - } else { - None - }; - let uptime_sec = StaticClock::utc().timestamp() - self.info_helper.boot_time_seconds; - Ok(StatusResponse { - version: self.client.config.version.clone(), - protocol_version, - latest_protocol_version: PROTOCOL_VERSION, - chain_id: self.client.config.chain_id.clone(), - rpc_addr: self.client.config.rpc_addr.clone(), - validators, - sync_info: StatusSyncInfo { - latest_block_hash: head.last_block_hash, - latest_block_height: head.height, - latest_state_root, - latest_block_time: from_timestamp(latest_block_time), - syncing: self.client.sync_status.is_syncing(), - earliest_block_hash, - earliest_block_height, - earliest_block_time, - epoch_id: Some(head.epoch_id), - epoch_start_height, - }, - validator_account_id, - validator_public_key, - node_public_key, - node_key, - uptime_sec, - detailed_debug_status, - }) - } -} - -/// Private to public API conversion. -fn make_peer_info(from: near_network::types::PeerInfo) -> near_client_primitives::types::PeerInfo { - near_client_primitives::types::PeerInfo { - id: from.id, - addr: from.addr, - account_id: from.account_id, - } -} - -/// Private to public API conversion. -fn make_known_producer( - from: near_network::types::KnownProducer, -) -> near_client_primitives::types::KnownProducer { - near_client_primitives::types::KnownProducer { - peer_id: from.peer_id, - account_id: from.account_id, - addr: from.addr, - next_hops: from.next_hops, - } -} - -impl Handler> for ClientActor { - type Result = Result; - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - let (_span, _msg) = handler_debug_span!(target: "client", msg); - self.check_triggers(ctx); - - Ok(NetworkInfoResponse { - connected_peers: (self.network_info.connected_peers.iter()) - .map(|fpi| make_peer_info(fpi.full_peer_info.peer_info.clone())) - .collect(), - num_connected_peers: self.network_info.num_connected_peers, - peer_max_count: self.network_info.peer_max_count, - sent_bytes_per_sec: self.network_info.sent_bytes_per_sec, - received_bytes_per_sec: self.network_info.received_bytes_per_sec, - known_producers: self - .network_info - .known_producers - .iter() - .map(|p| make_known_producer(p.clone())) - .collect(), - }) - } -} - -/// `ApplyChunksDoneMessage` is a message that signals the finishing of applying chunks of a block. -/// Upon receiving this message, ClientActors knows that it's time to finish processing the blocks that -/// just finished applying chunks. -#[derive(actix::Message, Debug)] -#[rtype(result = "()")] -pub struct ApplyChunksDoneMessage; - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _ctx: &mut Self::Context, - ) -> Self::Result { - let (_span, _msg) = handler_debug_span!(target: "client", msg); - self.try_process_unfinished_blocks(); - } -} - -#[derive(Debug)] -enum SyncRequirement { - SyncNeeded { peer_id: PeerId, highest_height: BlockHeight, head: Tip }, - AlreadyCaughtUp { peer_id: PeerId, highest_height: BlockHeight, head: Tip }, - NoPeers, - AdvHeaderSyncDisabled, -} - -impl SyncRequirement { - fn sync_needed(&self) -> bool { - matches!(self, Self::SyncNeeded { .. }) - } - - fn to_metrics_string(&self) -> String { - match self { - Self::SyncNeeded { .. } => "SyncNeeded", - Self::AlreadyCaughtUp { .. } => "AlreadyCaughtUp", - Self::NoPeers => "NoPeers", - Self::AdvHeaderSyncDisabled { .. } => "AdvHeaderSyncDisabled", - } - .to_string() - } -} - -impl fmt::Display for SyncRequirement { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::SyncNeeded { peer_id, highest_height, head: my_head } => write!( - f, - "sync needed at #{} [{}]. highest height peer: {} at #{}", - my_head.height, - format_hash(my_head.last_block_hash), - peer_id, - highest_height - ), - Self::AlreadyCaughtUp { peer_id, highest_height, head: my_head } => write!( - f, - "synced at #{} [{}]. highest height peer: {} at #{}", - my_head.height, - format_hash(my_head.last_block_hash), - peer_id, - highest_height - ), - Self::NoPeers => write!(f, "no available peers"), - Self::AdvHeaderSyncDisabled => { - write!(f, "syncing disabled via adv_disable_header_sync") - } - } - } -} - -impl ClientActor { - /// Check if client Account Id should be sent and send it. - /// Account Id is sent when is not current a validator but are becoming a validator soon. - fn check_send_announce_account(&mut self, prev_block_hash: CryptoHash) { - // If no peers, there is no one to announce to. - if self.network_info.num_connected_peers == 0 { - debug!(target: "client", "No peers: skip account announce"); - return; - } - - // First check that we currently have an AccountId - let validator_signer = match self.client.validator_signer.as_ref() { - None => return, - Some(signer) => signer, - }; - - let now = StaticClock::instant(); - // Check that we haven't announced it too recently - if let Some(last_validator_announce_time) = self.last_validator_announce_time { - // Don't make announcement if have passed less than half of the time in which other peers - // should remove our Account Id from their Routing Tables. - if 2 * (now - last_validator_announce_time) < self.client.config.ttl_account_id_router { - return; - } - } - - debug!(target: "client", "Check announce account for {}, last announce time {:?}", validator_signer.validator_id(), self.last_validator_announce_time); - - // Announce AccountId if client is becoming a validator soon. - let next_epoch_id = unwrap_or_return!(self - .client - .epoch_manager - .get_next_epoch_id_from_prev_block(&prev_block_hash)); - - // Check client is part of the futures validators - if self.client.is_validator(&next_epoch_id, &prev_block_hash) { - debug!(target: "client", "Sending announce account for {}", validator_signer.validator_id()); - self.last_validator_announce_time = Some(now); - - let signature = validator_signer.sign_account_announce( - validator_signer.validator_id(), - &self.node_id, - &next_epoch_id, - ); - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::AnnounceAccount(AnnounceAccount { - account_id: validator_signer.validator_id().clone(), - peer_id: self.node_id.clone(), - epoch_id: next_epoch_id, - signature, - }), - )); - } - } - - /// Process the sandbox fast forward request. If the change in block height is past an epoch, - /// we fast forward to just right before the epoch, produce some blocks to get past and into - /// a new epoch, then we continue on with the residual amount to fast forward. - #[cfg(feature = "sandbox")] - fn sandbox_process_fast_forward( - &mut self, - block_height: BlockHeight, - ) -> Result, Error> { - let mut delta_height = std::mem::replace(&mut self.fastforward_delta, 0); - if delta_height == 0 { - return Ok(None); - } - - let epoch_length = self.client.config.epoch_length; - if epoch_length <= 3 { - return Err(Error::Other( - "Unsupported: fast_forward with an epoch length of 3 or less".to_string(), - )); - } - - // Check if we are at epoch boundary. If we are, do not fast forward until new - // epoch is here. Decrement the fast_forward count by 1 when a block is produced - // during this period of waiting - let block_height_wrt_epoch = block_height % epoch_length; - if epoch_length - block_height_wrt_epoch <= 3 || block_height_wrt_epoch == 0 { - // wait for doomslug to call into produce block - self.fastforward_delta = delta_height; - return Ok(None); - } - - let delta_height = if block_height_wrt_epoch + delta_height >= epoch_length { - // fast forward to just right before epoch boundary to have epoch_manager - // handle the epoch_height updates as normal. `- 3` since this is being - // done 3 blocks before the epoch ends. - let right_before_epoch_update = epoch_length - block_height_wrt_epoch - 3; - - delta_height -= right_before_epoch_update; - self.fastforward_delta = delta_height; - right_before_epoch_update - } else { - delta_height - }; - - self.client.accrued_fastforward_delta += delta_height; - let delta_time = self.client.sandbox_delta_time(); - let new_latest_known = near_chain::types::LatestKnown { - height: block_height + delta_height, - seen: near_primitives::utils::to_timestamp(StaticClock::utc() + delta_time), - }; - - Ok(Some(new_latest_known)) - } - - fn pre_block_production(&mut self) -> Result<(), Error> { - #[cfg(feature = "sandbox")] - { - let latest_known = self.client.chain.mut_chain_store().get_latest_known()?; - if let Some(new_latest_known) = - self.sandbox_process_fast_forward(latest_known.height)? - { - self.client.chain.mut_chain_store().save_latest_known(new_latest_known.clone())?; - self.client.sandbox_update_tip(new_latest_known.height)?; - } - } - Ok(()) - } - - fn post_block_production(&mut self) { - #[cfg(feature = "sandbox")] - if self.fastforward_delta > 0 { - // Decrease the delta_height by 1 since we've produced a single block. This - // ensures that we advanced the right amount of blocks when fast forwarding - // and fast forwarding triggers regular block production in the case of - // stepping between epoch boundaries. - self.fastforward_delta -= 1; - } - } - - /// Retrieves latest height, and checks if must produce next block. - /// Otherwise wait for block arrival or suggest to skip after timeout. - fn handle_block_production(&mut self) -> Result<(), Error> { - let _span = tracing::debug_span!(target: "client", "handle_block_production").entered(); - // If syncing, don't try to produce blocks. - if self.client.sync_status.is_syncing() { - debug!(target:"client", sync_status=format!("{:#?}", self.client.sync_status), "Syncing - block production disabled"); - return Ok(()); - } - - let _ = self.client.check_and_update_doomslug_tip(); - - self.pre_block_production()?; - let head = self.client.chain.head()?; - let latest_known = self.client.chain.chain_store().get_latest_known()?; - - assert!( - head.height <= latest_known.height, - "Latest known height is invalid {} vs {}", - head.height, - latest_known.height - ); - - let epoch_id = - self.client.epoch_manager.get_epoch_id_from_prev_block(&head.last_block_hash)?; - let log_block_production_info = - if self.client.epoch_manager.is_next_block_epoch_start(&head.last_block_hash)? { - true - } else { - // the next block is still the same epoch - let epoch_start_height = - self.client.epoch_manager.get_epoch_start_height(&head.last_block_hash)?; - latest_known.height - epoch_start_height < EPOCH_START_INFO_BLOCKS - }; - - // We try to produce block for multiple heights (up to the highest height for which we've seen 2/3 of approvals). - if latest_known.height + 1 <= self.client.doomslug.get_largest_height_crossing_threshold() { - debug!(target: "client", "Considering blocks for production between {} and {} ", latest_known.height + 1, self.client.doomslug.get_largest_height_crossing_threshold()); - } else { - debug!(target: "client", "Cannot produce any block: not enough approvals beyond {}", latest_known.height); - } - - let me = if let Some(me) = &self.client.validator_signer { - me.validator_id().clone() - } else { - return Ok(()); - }; - - // For debug purpose, we record the approvals we have seen so far to the future blocks - for height in latest_known.height + 1..=self.client.doomslug.get_largest_approval_height() { - let next_block_producer_account = - self.client.epoch_manager.get_block_producer(&epoch_id, height)?; - - if me == next_block_producer_account { - self.client.block_production_info.record_approvals( - height, - self.client.doomslug.approval_status_at_height(&height), - ); - } - } - - for height in - latest_known.height + 1..=self.client.doomslug.get_largest_height_crossing_threshold() - { - let next_block_producer_account = - self.client.epoch_manager.get_block_producer(&epoch_id, height)?; - - if me == next_block_producer_account { - self.client.chunk_inclusion_tracker.prepare_chunk_headers_ready_for_inclusion( - &head.last_block_hash, - self.client.chunk_endorsement_tracker.as_ref(), - )?; - let num_chunks = self - .client - .chunk_inclusion_tracker - .num_chunk_headers_ready_for_inclusion(&epoch_id, &head.last_block_hash); - let have_all_chunks = head.height == 0 - || num_chunks == self.client.epoch_manager.shard_ids(&epoch_id).unwrap().len(); - - if self.client.doomslug.ready_to_produce_block( - StaticClock::instant(), - height, - have_all_chunks, - log_block_production_info, - ) { - if let Err(err) = self.produce_block(height) { - // If there is an error, report it and let it retry on the next loop step. - error!(target: "client", height, "Block production failed: {}", err); - } else { - self.post_block_production(); - } - } - } - } - - Ok(()) - } - - fn schedule_triggers(&mut self, ctx: &mut Context) { - let wait = self.check_triggers(ctx); - - near_performance_metrics::actix::run_later(ctx, wait, move |act, ctx| { - act.schedule_triggers(ctx); - }); - } - - /// Check if the scheduled time of any "triggers" has passed, and if so, call the trigger. - /// Triggers are important functions of client, like running single step of state sync or - /// checking if we can produce a block. - /// - /// It is called before processing Actix message and also in schedule_triggers. - /// This is to ensure all triggers enjoy higher priority than any actix message. - /// Otherwise due to a bug in Actix library Actix prioritizes processing messages - /// while there are messages in mailbox. Because of that we handle scheduling - /// triggers with custom `run_timer` function instead of `run_later` in Actix. - /// - /// Returns the delay before the next time `check_triggers` should be called, which is - /// min(time until the closest trigger, 1 second). - fn check_triggers(&mut self, ctx: &mut Context) -> Duration { - let _span = tracing::debug_span!(target: "client", "check_triggers").entered(); - if let Some(config_updater) = &mut self.config_updater { - config_updater.try_update(&|updateable_client_config| { - self.client.update_client_config(updateable_client_config) - }); - } - - // Check block height to trigger expected shutdown - if let Ok(head) = self.client.chain.head() { - if let Some(block_height_to_shutdown) = self.client.config.expected_shutdown.get() { - if head.height >= block_height_to_shutdown { - info!(target: "client", "Expected shutdown triggered: head block({}) >= ({:?})", head.height, block_height_to_shutdown); - if let Some(tx) = self.shutdown_signal.take() { - let _ = tx.send(()); // Ignore send signal fail, it will send again in next trigger - } - } - } - } - - self.try_process_unfinished_blocks(); - - let mut delay = Duration::from_secs(1); - let now = Utc::now(); - - let timer = metrics::CHECK_TRIGGERS_TIME.start_timer(); - if self.sync_started { - self.sync_timer_next_attempt = self.run_timer( - self.sync_wait_period(), - self.sync_timer_next_attempt, - ctx, - |act, _| act.run_sync_step(), - "sync", - ); - - delay = std::cmp::min( - delay, - self.sync_timer_next_attempt.signed_duration_since(now).to_std().unwrap_or(delay), - ); - - self.doomslug_timer_next_attempt = self.run_timer( - self.client.config.doosmslug_step_period, - self.doomslug_timer_next_attempt, - ctx, - |act, ctx| act.try_doomslug_timer(ctx), - "doomslug", - ); - delay = core::cmp::min( - delay, - self.doomslug_timer_next_attempt - .signed_duration_since(now) - .to_std() - .unwrap_or(delay), - ) - } - if self.block_production_started { - self.block_production_next_attempt = self.run_timer( - self.client.config.block_production_tracking_delay, - self.block_production_next_attempt, - ctx, - |act, _ctx| act.try_handle_block_production(), - "block_production", - ); - - let _ = self.client.check_head_progress_stalled( - self.client.config.max_block_production_delay * HEAD_STALL_MULTIPLIER, - ); - - delay = core::cmp::min( - delay, - self.block_production_next_attempt - .signed_duration_since(now) - .to_std() - .unwrap_or(delay), - ) - } - - self.log_summary_timer_next_attempt = self.run_timer( - self.client.config.log_summary_period, - self.log_summary_timer_next_attempt, - ctx, - |act, _ctx| act.log_summary(), - "log_summary", - ); - delay = core::cmp::min( - delay, - self.log_summary_timer_next_attempt - .signed_duration_since(now) - .to_std() - .unwrap_or(delay), - ); - timer.observe_duration(); - delay - } - - /// "Unfinished" blocks means that blocks that client has started the processing and haven't - /// finished because it was waiting for applying chunks to be done. This function checks - /// if there are any "unfinished" blocks that are ready to be processed again and finish processing - /// these blocks. - /// This function is called at two places, upon receiving ApplyChunkDoneMessage and `check_triggers`. - /// The job that executes applying chunks will send an ApplyChunkDoneMessage to ClientActor after - /// applying chunks is done, so when receiving ApplyChunkDoneMessage messages, ClientActor - /// calls this function to finish processing the unfinished blocks. ClientActor also calls - /// this function in `check_triggers`, because the actix queue may be blocked by other messages - /// and we want to prioritize block processing. - fn try_process_unfinished_blocks(&mut self) { - let _span = debug_span!(target: "client", "try_process_unfinished_blocks").entered(); - let (accepted_blocks, errors) = - self.client.postprocess_ready_blocks(self.get_apply_chunks_done_callback(), true); - if !errors.is_empty() { - error!(target: "client", ?errors, "try_process_unfinished_blocks got errors"); - } - self.process_accepted_blocks(accepted_blocks); - } - - fn try_handle_block_production(&mut self) { - let _span = debug_span!(target: "client", "try_handle_block_production").entered(); - if let Err(err) = self.handle_block_production() { - tracing::error!(target: "client", ?err, "Handle block production failed") - } - } - - fn try_doomslug_timer(&mut self, _: &mut Context) { - let _span = tracing::debug_span!(target: "client", "try_doomslug_timer").entered(); - let _ = self.client.check_and_update_doomslug_tip(); - let approvals = self.client.doomslug.process_timer(StaticClock::instant()); - - // Important to save the largest approval target height before sending approvals, so - // that if the node crashes in the meantime, we cannot get slashed on recovery - let mut chain_store_update = self.client.chain.mut_chain_store().store_update(); - chain_store_update - .save_largest_target_height(self.client.doomslug.get_largest_target_height()); - - match chain_store_update.commit() { - Ok(_) => { - let head = unwrap_or_return!(self.client.chain.head()); - if self.client.is_validator(&head.epoch_id, &head.last_block_hash) - || self.client.is_validator(&head.next_epoch_id, &head.last_block_hash) - { - for approval in approvals { - if let Err(e) = - self.client.send_approval(&self.client.doomslug.get_tip().0, approval) - { - error!("Error while sending an approval {:?}", e); - } - } - } - } - Err(e) => error!("Error while committing largest skipped height {:?}", e), - }; - } - - /// Produce block if we are block producer for given `next_height` height. - /// Can return error, should be called with `produce_block` to handle errors and reschedule. - fn produce_block(&mut self, next_height: BlockHeight) -> Result<(), Error> { - let _span = tracing::debug_span!(target: "client", "produce_block", next_height).entered(); - if let Some(block) = self.client.produce_block_on_head(next_height, false)? { - // If we produced the block, send it out before we apply the block. - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::Block { block: block.clone() }, - )); - // We’ve produced the block so that counts as validated block. - let block = MaybeValidated::from_validated(block); - let res = self.client.start_process_block( - block, - Provenance::PRODUCED, - self.get_apply_chunks_done_callback(), - ); - if let Err(e) = &res { - match e { - near_chain::Error::ChunksMissing(_) => { - debug!(target: "client", "chunks missing"); - // missing chunks were already handled in Client::process_block, we don't need to - // do anything here - return Ok(()); - } - _ => { - error!(target: "client", ?res, "Failed to process freshly produced block"); - byzantine_assert!(false); - return res.map_err(|err| err.into()); - } - } - } - } - Ok(()) - } - - fn send_chunks_metrics(&mut self, block: &Block) { - let chunks = block.chunks(); - for (chunk, &included) in chunks.iter().zip(block.header().chunk_mask().iter()) { - if included { - self.info_helper.chunk_processed( - chunk.shard_id(), - chunk.prev_gas_used(), - chunk.prev_balance_burnt(), - ); - } else { - self.info_helper.chunk_skipped(chunk.shard_id()); - } - } - } - - fn send_block_metrics(&mut self, block: &Block) { - let chunks_in_block = block.header().chunk_mask().iter().filter(|&&m| m).count(); - let gas_used = Block::compute_gas_used(block.chunks().iter(), block.header().height()); - - let last_final_hash = block.header().last_final_block(); - let last_final_ds_hash = block.header().last_ds_final_block(); - let last_final_block_height = self - .client - .chain - .get_block(&last_final_hash) - .map_or(0, |block| block.header().height()); - let last_final_ds_block_height = self - .client - .chain - .get_block(&last_final_ds_hash) - .map_or(0, |block| block.header().height()); - - let epoch_height = - self.client.epoch_manager.get_epoch_height_from_prev_block(block.hash()).unwrap_or(0); - let epoch_start_height = self - .client - .epoch_manager - .get_epoch_start_height(&last_final_hash) - .unwrap_or(last_final_block_height); - let last_final_block_height_in_epoch = - last_final_block_height.checked_sub(epoch_start_height); - - self.info_helper.block_processed( - gas_used, - chunks_in_block as u64, - block.header().next_gas_price(), - block.header().total_supply(), - last_final_block_height, - last_final_ds_block_height, - epoch_height, - last_final_block_height_in_epoch, - ); - } - - /// Process all blocks that were accepted by calling other relevant services. - fn process_accepted_blocks(&mut self, accepted_blocks: Vec) { - let _span = tracing::debug_span!( - target: "client", - "process_accepted_blocks", - num_blocks = accepted_blocks.len()) - .entered(); - for accepted_block in accepted_blocks { - let block = self.client.chain.get_block(&accepted_block).unwrap().clone(); - self.send_chunks_metrics(&block); - self.send_block_metrics(&block); - self.check_send_announce_account(*block.header().last_final_block()); - } - } - - /// Returns the callback function that will be passed to various functions that may trigger - /// the processing of new blocks. This callback will be called at the end of applying chunks - /// for every block. - fn get_apply_chunks_done_callback(&self) -> DoneApplyChunkCallback { - let addr = self.my_address.clone(); - Arc::new(move |_| { - addr.do_send(ApplyChunksDoneMessage {}.with_span_context()); - }) - } - - fn receive_headers(&mut self, headers: Vec, peer_id: PeerId) -> bool { - let _span = - debug_span!(target: "client", "receive_headers", num_headers = headers.len(), ?peer_id) - .entered(); - if headers.is_empty() { - info!(target: "client", "Received an empty set of block headers"); - return true; - } - match self.client.sync_block_headers(headers) { - Ok(_) => true, - Err(err) => { - if err.is_bad_data() { - error!(target: "client", ?err, "Error processing sync blocks"); - false - } else { - debug!(target: "client", ?err, "Block headers refused by chain"); - true - } - } - } - } - - /// Check whether need to (continue) sync. - /// Also return higher height with known peers at that height. - fn syncing_info(&self) -> Result { - if self.adv.disable_header_sync() { - return Ok(SyncRequirement::AdvHeaderSyncDisabled); - } - - let head = self.client.chain.head()?; - let is_syncing = self.client.sync_status.is_syncing(); - - // Only consider peers whose latest block is not invalid blocks - let eligible_peers: Vec<_> = self - .network_info - .highest_height_peers - .iter() - .filter(|p| !self.client.chain.is_block_invalid(&p.highest_block_hash)) - .collect(); - metrics::PEERS_WITH_INVALID_HASH - .set(self.network_info.highest_height_peers.len() as i64 - eligible_peers.len() as i64); - let peer_info = if let Some(peer_info) = eligible_peers.choose(&mut thread_rng()) { - peer_info - } else { - return Ok(SyncRequirement::NoPeers); - }; - - let peer_id = peer_info.peer_info.id.clone(); - let highest_height = peer_info.highest_block_height; - - if is_syncing { - if highest_height <= head.height { - Ok(SyncRequirement::AlreadyCaughtUp { peer_id, highest_height, head }) - } else { - Ok(SyncRequirement::SyncNeeded { peer_id, highest_height, head }) - } - } else { - if highest_height > head.height + self.client.config.sync_height_threshold { - Ok(SyncRequirement::SyncNeeded { peer_id, highest_height, head }) - } else { - Ok(SyncRequirement::AlreadyCaughtUp { peer_id, highest_height, head }) - } - } - } - - fn start_flat_storage_creation(&mut self, ctx: &mut Context) { - if !self.client.config.flat_storage_creation_enabled { - return; - } - match self.client.run_flat_storage_creation_step() { - Ok(false) => {} - Ok(true) => { - return; - } - Err(err) => { - error!(target: "client", "Error occurred during flat storage creation step: {:?}", err); - } - } - - near_performance_metrics::actix::run_later( - ctx, - self.client.config.flat_storage_creation_period, - move |act, ctx| { - act.start_flat_storage_creation(ctx); - }, - ); - } - - /// Starts syncing and then switches to either syncing or regular mode. - fn start_sync(&mut self, ctx: &mut Context) { - // Wait for connections reach at least minimum peers unless skipping sync. - if self.network_info.num_connected_peers < self.client.config.min_num_peers - && !self.client.config.skip_sync_wait - { - near_performance_metrics::actix::run_later( - ctx, - self.client.config.sync_step_period, - move |act, ctx| { - act.start_sync(ctx); - }, - ); - return; - } - self.sync_started = true; - - // Sync loop will be started by check_triggers. - } - - /// Select the block hash we are using to sync state. It will sync with the state before applying the - /// content of such block. - /// - /// The selected block will always be the first block on a new epoch: - /// . - fn find_sync_hash(&mut self) -> Result { - let header_head = self.client.chain.header_head()?; - let sync_hash = header_head.last_block_hash; - let epoch_start_sync_hash = - StateSync::get_epoch_start_sync_hash(&mut self.client.chain, &sync_hash)?; - - let genesis_hash = self.client.chain.genesis().hash(); - tracing::debug!( - target: "sync", - ?header_head, - ?sync_hash, - ?epoch_start_sync_hash, - ?genesis_hash, - "find_sync_hash"); - assert_ne!(&epoch_start_sync_hash, genesis_hash); - Ok(epoch_start_sync_hash) - } - - /// Runs catchup on repeat, if this client is a validator. - /// Schedules itself again if it was not ran as response to state parts job result - fn catchup(&mut self, ctx: &mut Context) { - { - // An extra scope to limit the lifetime of the span. - let _span = tracing::debug_span!(target: "client", "catchup").entered(); - if let Err(err) = self.client.run_catchup( - &self.network_info.highest_height_peers, - &self.state_parts_task_scheduler, - &self.block_catch_up_scheduler, - &self.resharding_scheduler, - self.get_apply_chunks_done_callback(), - &self.state_parts_client_arbiter.handle(), - ) { - error!(target: "client", "{:?} Error occurred during catchup for the next epoch: {:?}", self.client.validator_signer.as_ref().map(|vs| vs.validator_id()), err); - } - } - - near_performance_metrics::actix::run_later( - ctx, - self.client.config.catchup_step_period, - move |act, ctx| { - act.catchup(ctx); - }, - ); - } - - /// Runs given callback if the time now is at least `next_attempt`. - /// Returns time for next run which should be made based on given `delay` between runs. - fn run_timer( - &mut self, - delay: Duration, - next_attempt: DateTime, - ctx: &mut Context, - f: F, - timer_label: &str, - ) -> DateTime - where - F: FnOnce(&mut Self, &mut ::Context) + 'static, - { - let now = Utc::now(); - if now < next_attempt { - return next_attempt; - } - - let timer = - metrics::CLIENT_TRIGGER_TIME_BY_TYPE.with_label_values(&[timer_label]).start_timer(); - f(self, ctx); - timer.observe_duration(); - - now.checked_add_signed(chrono::Duration::from_std(delay).unwrap()).unwrap() - } - - fn sync_wait_period(&self) -> Duration { - if let Ok(sync) = self.syncing_info() { - if !sync.sync_needed() { - // If we don't need syncing - retry the sync call rarely. - self.client.config.sync_check_period - } else { - // If we need syncing - retry the sync call often. - self.client.config.sync_step_period - } - } else { - self.client.config.sync_step_period - } - } - - /// Main syncing job responsible for syncing client with other peers. - /// Runs itself iff it was not ran as reaction for message with results of - /// finishing state part job - fn run_sync_step(&mut self) { - let _span = tracing::debug_span!(target: "client", "run_sync_step").entered(); - - macro_rules! unwrap_and_report (($obj: expr) => (match $obj { - Ok(v) => v, - Err(err) => { - error!(target: "sync", "Sync: Unexpected error: {}", err); - return; - } - })); - - let currently_syncing = self.client.sync_status.is_syncing(); - let sync = unwrap_and_report!(self.syncing_info()); - self.info_helper.update_sync_requirements_metrics(sync.to_metrics_string()); - - match sync { - SyncRequirement::AlreadyCaughtUp { .. } - | SyncRequirement::NoPeers - | SyncRequirement::AdvHeaderSyncDisabled => { - if currently_syncing { - // Initial transition out of "syncing" state. - debug!(target: "sync", prev_sync_status = ?self.client.sync_status, "disabling sync"); - self.client.sync_status.update(SyncStatus::NoSync); - // Announce this client's account id if their epoch is coming up. - let head = unwrap_and_report!(self.client.chain.head()); - self.check_send_announce_account(head.prev_block_hash); - } - } - - SyncRequirement::SyncNeeded { highest_height, .. } => { - let mut notify_start_sync = false; - if !currently_syncing { - info!(target: "client", ?sync, "enabling sync"); - } - // Run each step of syncing separately. - unwrap_and_report!(self.client.header_sync.run( - &mut self.client.sync_status, - &mut self.client.chain, - highest_height, - &self.network_info.highest_height_peers - )); - // Only body / state sync if header height is close to the latest. - let header_head = unwrap_and_report!(self.client.chain.header_head()); - - // Sync state if already running sync state or if block sync is too far. - let sync_state = match self.client.sync_status { - SyncStatus::StateSync(_) => true, - _ if header_head.height - >= highest_height - .saturating_sub(self.client.config.block_header_fetch_horizon) => - { - unwrap_and_report!(self.client.block_sync.run( - &mut self.client.sync_status, - &self.client.chain, - highest_height, - &self.network_info.highest_height_peers - )) - } - _ => false, - }; - if sync_state { - match self.client.sync_status { - SyncStatus::StateSync(_) => (), - _ => { - let sync_hash = unwrap_and_report!(self.find_sync_hash()); - if !self.client.config.archive { - unwrap_and_report!(self - .client - .chain - .reset_data_pre_state_sync(sync_hash)); - } - self.client.sync_status.update(SyncStatus::StateSync( - StateSyncStatus { sync_hash, sync_status: HashMap::default() }, - )); - // This is the first time we run state sync. - notify_start_sync = true; - } - }; - let sync_hash = match &self.client.sync_status { - SyncStatus::StateSync(s) => s.sync_hash, - _ => unreachable!("Sync status should have been StateSync!"), - }; - - let me = - self.client.validator_signer.as_ref().map(|x| x.validator_id().clone()); - let block_header = - unwrap_and_report!(self.client.chain.get_block_header(&sync_hash)); - let prev_hash = *block_header.prev_hash(); - let epoch_id = block_header.epoch_id().clone(); - let shards_to_sync: Vec<_> = self - .client - .epoch_manager - .shard_ids(&epoch_id) - .unwrap() - .into_iter() - .filter(|&shard_id| { - cares_about_shard_this_or_next_epoch( - me.as_ref(), - &prev_hash, - shard_id, - true, - &self.client.shard_tracker, - ) - }) - .collect(); - - let use_colour = - matches!(self.client.config.log_summary_style, LogSummaryStyle::Colored); - - // Notify each shard to sync. - if notify_start_sync { - let shard_layout = self - .client - .epoch_manager - .get_shard_layout(&epoch_id) - .expect("Cannot get shard layout"); - for &shard_id in &shards_to_sync { - let shard_uid = - ShardUId::from_shard_id_and_layout(shard_id, &shard_layout); - match self.client.state_sync_adapter.clone().read() { - Ok(sync_adapter) => sync_adapter.send( - shard_uid, - (SyncMessage::StartSync(SyncShardInfo { - shard_uid, - sync_hash, - })) - .with_span_context(), - ), - Err(_) => { - error!(target:"client", "State sync adapter lock is poisoned.") - } - } - } - } - - let now = StaticClock::utc(); - - // FIXME: it checks if the block exists.. but I have no idea why.. - // seems that we don't really use this block in case of catchup - we use it only for state sync. - // Seems it is related to some bug with block getting orphaned after state sync? but not sure. - let (request_block, have_block) = - unwrap_and_report!(self.sync_block_status(&prev_hash, now)); - - if request_block { - self.client.last_time_sync_block_requested = Some(now); - if let Some(peer_info) = - self.network_info.highest_height_peers.choose(&mut thread_rng()) - { - let id = peer_info.peer_info.id.clone(); - - for hash in - vec![*block_header.prev_hash(), *block_header.hash()].into_iter() - { - self.client.request_block(hash, id.clone()); - } - } - } - if have_block { - self.client.last_time_sync_block_requested = None; - } - - let state_sync_status = match &mut self.client.sync_status { - SyncStatus::StateSync(s) => s, - _ => unreachable!("Sync status should have been StateSync!"), - }; - match unwrap_and_report!(self.client.state_sync.run( - &me, - sync_hash, - &mut state_sync_status.sync_status, - &mut self.client.chain, - self.client.epoch_manager.as_ref(), - &self.network_info.highest_height_peers, - shards_to_sync, - &self.state_parts_task_scheduler, - &self.resharding_scheduler, - &self.state_parts_client_arbiter.handle(), - use_colour, - self.client.runtime_adapter.clone(), - )) { - StateSyncResult::InProgress => (), - StateSyncResult::Completed => { - if !have_block { - trace!(target: "sync", "Sync done. Waiting for sync block."); - return; - } - info!(target: "sync", "State sync: all shards are done"); - - let mut block_processing_artifacts = BlockProcessingArtifact::default(); - - unwrap_and_report!(self.client.chain.reset_heads_post_state_sync( - &me, - sync_hash, - &mut block_processing_artifacts, - self.get_apply_chunks_done_callback(), - )); - - self.client - .process_block_processing_artifact(block_processing_artifacts); - self.client.sync_status.update(SyncStatus::BlockSync { - start_height: 0, - current_height: 0, - highest_height: 0, - }); - } - } - } - } - } - } - - /// Verifies if the node possesses sync block. - /// It is the last block of the previous epoch. - /// If the block is absent, the node requests it from peers. - fn sync_block_status( - &self, - prev_hash: &CryptoHash, - now: DateTime, - ) -> Result<(bool, bool), near_chain::Error> { - let (request_block, have_block) = if !self.client.chain.block_exists(prev_hash)? { - let timeout = - chrono::Duration::from_std(self.client.config.state_sync_timeout).unwrap(); - match self.client.last_time_sync_block_requested { - None => (true, false), - Some(last_time) => { - if (now - last_time) >= timeout { - tracing::error!( - target: "sync", - %prev_hash, - ?timeout, - "State sync: block request timed out"); - (true, false) - } else { - (false, false) - } - } - } - } else { - (false, true) - }; - Ok((request_block, have_block)) - } - - /// Print current summary. - fn log_summary(&mut self) { - let _span = tracing::debug_span!(target: "client", "log_summary").entered(); - self.info_helper.log_summary( - &self.client, - &self.node_id, - &self.network_info, - &self.config_updater, - ) - } - - /// Checks if the node is syncing its State and applies special logic in that case. - /// A node usually ignores blocks that are too far ahead, but in case of a node syncing its state it is looking for 2 specific blocks: - /// * The first block of the new epoch - /// * The last block of the prev epoch - /// Returns whether the node is syncing its state. - fn maybe_receive_state_sync_blocks(&mut self, block: &Block) -> bool { - let SyncStatus::StateSync(StateSyncStatus { sync_hash, .. }) = self.client.sync_status - else { - return false; - }; - if let Ok(header) = self.client.chain.get_block_header(&sync_hash) { - let block: MaybeValidated = (*block).clone().into(); - let block_hash = *block.hash(); - if let Err(err) = self.client.chain.validate_block(&block) { - byzantine_assert!(false); - error!(target: "client", ?err, ?block_hash, "Received an invalid block during state sync"); - } - // Notice that two blocks are saved differently: - // * save_block() for one block. - // * save_orphan() for another block. - if &block_hash == header.prev_hash() { - // The last block of the previous epoch. - if let Err(err) = self.client.chain.save_block(block) { - error!(target: "client", ?err, ?block_hash, "Failed to save a block during state sync"); - } - } else if block_hash == sync_hash { - // The first block of the new epoch. - self.client.chain.save_orphan(block, Provenance::NONE, None, false); - } - } - true - } -} - -impl Drop for ClientActor { - fn drop(&mut self) { - let _span = tracing::debug_span!(target: "client", "drop").entered(); - self.state_parts_client_arbiter.stop(); - } -} - -impl Handler> for ClientActor { - type Result = (); +impl Handler> for ClientActor +where + T: actix::Message + Debug, + ClientActions: ClientActionHandler, + T::Result: actix::dev::MessageResponse>, +{ + type Result = T::Result; - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Self::Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - if let Some((sync, _, _)) = self.client.catchup_state_syncs.get_mut(&msg.sync_hash) { - // We are doing catchup - sync.set_apply_result(msg.shard_id, msg.apply_result); - } else { - self.client.state_sync.set_apply_result(msg.shard_id, msg.apply_result); - } - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Self::Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - if let Some((_, _, blocks_catch_up_state)) = - self.client.catchup_state_syncs.get_mut(&msg.sync_hash) - { - assert!(blocks_catch_up_state.scheduled_blocks.remove(&msg.block_hash)); - blocks_catch_up_state - .processed_blocks - .insert(msg.block_hash, msg.results.into_iter().map(|res| res.1).collect_vec()); - } else { - panic!("block catch up processing result from unknown sync hash"); - } - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Self::Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - if let Some((sync, _, _)) = self.client.catchup_state_syncs.get_mut(&msg.sync_hash) { - // We are doing catchup - sync.set_resharding_result(msg.shard_id, msg.new_state_roots); - } else { - self.client.state_sync.set_resharding_result(msg.shard_id, msg.new_state_roots); - } - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Self::Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - match msg { - ShardsManagerResponse::ChunkCompleted { partial_chunk, shard_chunk } => { - self.client.on_chunk_completed( - partial_chunk, - shard_chunk, - self.get_apply_chunks_done_callback(), - ); - } - ShardsManagerResponse::InvalidChunk(encoded_chunk) => { - self.client.on_invalid_chunk(encoded_chunk); - } - ShardsManagerResponse::ChunkHeaderReadyForInclusion { - chunk_header, - chunk_producer, - } => { - self.client - .chunk_inclusion_tracker - .mark_chunk_header_ready_for_inclusion(chunk_header, chunk_producer); - } - } - } -} - -impl Handler> for ClientActor { - type Result = Result; - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - - Ok(self.client.config.clone()) - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle(&mut self, msg: WithSpanContext, _: &mut Context) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - tracing::debug!(target: "client", ?msg); - // TODO - // process messages from SyncActors + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) -> Self::Result { + self.wrap(msg, ctx, std::any::type_name::(), |this, msg, _| this.actions.handle(msg)) } } +// This one requires the context for further scheduling of messages, so +// we can't use the generic wrapper above. impl Handler> for ClientActor { type Result = (); - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - ctx: &mut Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - let peer_id = msg.peer_id.clone(); - let attempts_remaining = msg.attempts_remaining; - match self.client.process_chunk_state_witness(msg.witness, msg.peer_id, None) { - Err(err) => { - tracing::error!(target: "client", ?err, "Error processing chunk state witness"); - } - Ok(Some(witness)) => { - if attempts_remaining > 0 { - ctx.run_later(Duration::from_millis(100), move |_, ctx| { - ctx.address().do_send( - ChunkStateWitnessMessage { - witness, - peer_id, - attempts_remaining: attempts_remaining - 1, - } - .with_span_context(), - ); - }); - } else { - tracing::error!(target: "client", "Failed to process chunk state witness even after 5 tries due to missing parent block"); - } - } - Ok(None) => {} - } - } -} - -impl Handler> for ClientActor { - type Result = (); - - #[perf] - fn handle( - &mut self, - msg: WithSpanContext, - _: &mut Context, - ) -> Self::Result { - let (_span, msg) = handler_debug_span!(target: "client", msg); - if let Err(err) = self.client.process_chunk_endorsement(msg.0) { - tracing::error!(target: "client", ?err, "Error processing chunk endorsement"); - } + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { + self.wrap(msg, ctx, "ChunkStateWitnessMessage", |this, msg, ctx| { + this.actions.handle_state_witness_message(msg, ctx) + }) } } @@ -2059,8 +170,31 @@ pub fn random_seed_from_thread() -> RngSeed { rng_seed } +/// Blocks the program until given genesis time arrives. +fn wait_until_genesis(genesis_time: &DateTime) { + loop { + // Get chrono::Duration::num_seconds() by deducting genesis_time from now. + let duration = genesis_time.signed_duration_since(StaticClock::utc()); + let chrono_seconds = duration.num_seconds(); + // Check if number of seconds in chrono::Duration larger than zero. + if chrono_seconds <= 0 { + break; + } + tracing::info!(target: "near", "Waiting until genesis: {}d {}h {}m {}s", duration.num_days(), + (duration.num_hours() % 24), + (duration.num_minutes() % 60), + (duration.num_seconds() % 60)); + let wait = std::cmp::min( + std::time::Duration::from_secs(10), + std::time::Duration::from_secs(chrono_seconds as u64), + ); + std::thread::sleep(wait); + } +} + /// Starts client in a separate Arbiter (thread). pub fn start_client( + clock: Clock, client_config: ClientConfig, chain_genesis: ChainGenesis, epoch_manager: Arc, @@ -2099,8 +233,9 @@ pub fn start_client( let resharding_handle = client.chain.resharding_handle.clone(); let client_addr = ClientActor::start_in_arbiter(&client_arbiter_handle, move |ctx| { ClientActor::new( + clock, client, - ctx.address(), + ctx.address().with_auto_span_context().into_multi_sender(), client_config, node_id, network_adapter, diff --git a/chain/client/src/debug.rs b/chain/client/src/debug.rs index fe5dfc7568e..5f12e17618e 100644 --- a/chain/client/src/debug.rs +++ b/chain/client/src/debug.rs @@ -16,7 +16,7 @@ use near_client_primitives::{ types::StatusError, }; use near_epoch_manager::EpochManagerAdapter; -use near_o11y::{handler_debug_span, log_assert, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, log_assert, WithSpanContext}; use near_performance_metrics_macros::perf; use near_primitives::state_sync::get_num_state_parts; use near_primitives::types::{AccountId, BlockHeight, NumShards, ShardId, ValidatorInfoIdentifier}; diff --git a/chain/client/src/lib.rs b/chain/client/src/lib.rs index 5e92ea7b56c..af34791cae9 100644 --- a/chain/client/src/lib.rs +++ b/chain/client/src/lib.rs @@ -10,7 +10,7 @@ pub use near_client_primitives::types::{ pub use crate::client::{Client, ProduceChunkResult}; #[cfg(feature = "test_features")] -pub use crate::client_actor::NetworkAdversarialMessage; +pub use crate::client_actions::NetworkAdversarialMessage; pub use crate::client_actor::{start_client, ClientActor}; pub use crate::config_updater::ConfigUpdater; pub use crate::sync::adapter::{SyncAdapter, SyncMessage}; @@ -25,6 +25,7 @@ pub mod adversarial; mod chunk_distribution_network; mod chunk_inclusion_tracker; mod client; +pub mod client_actions; mod client_actor; mod config_updater; pub mod debug; diff --git a/chain/client/src/sync/sync_actor.rs b/chain/client/src/sync/sync_actor.rs index bb853868250..02b9b2dfb96 100644 --- a/chain/client/src/sync/sync_actor.rs +++ b/chain/client/src/sync/sync_actor.rs @@ -1,7 +1,7 @@ use super::adapter::{SyncMessage as ClientSyncMessage, SyncShardInfo}; use near_async::messaging::Sender; use near_network::types::{PeerManagerMessageRequest, StateSyncResponse}; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, WithSpanContext}; use near_performance_metrics_macros::perf; use near_primitives::hash::CryptoHash; use near_store::ShardUId; diff --git a/chain/client/src/sync_jobs_actor.rs b/chain/client/src/sync_jobs_actor.rs index 0f5c1394146..2411fc21423 100644 --- a/chain/client/src/sync_jobs_actor.rs +++ b/chain/client/src/sync_jobs_actor.rs @@ -8,7 +8,7 @@ use near_chain::chain::{ }; use near_chain::resharding::ReshardingRequest; use near_chain::Chain; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::state_part::PartId; use near_primitives::state_sync::StatePartKey; diff --git a/chain/client/src/test_utils/setup.rs b/chain/client/src/test_utils/setup.rs index e405c39cb2f..a99512c3b7a 100644 --- a/chain/client/src/test_utils/setup.rs +++ b/chain/client/src/test_utils/setup.rs @@ -185,8 +185,9 @@ pub fn setup( ) .unwrap(); let client_actor = ClientActor::new( + Clock::real(), // TODO: use fake clock client, - ctx.address(), + ctx.address().with_auto_span_context().into_multi_sender(), config, PeerId::new(PublicKey::empty(KeyType::ED25519)), network_adapter, diff --git a/chain/client/src/view_client.rs b/chain/client/src/view_client.rs index ef8679f352a..548e84d9742 100644 --- a/chain/client/src/view_client.rs +++ b/chain/client/src/view_client.rs @@ -33,7 +33,7 @@ use near_network::types::{ NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, ReasonForBan, StateResponseInfo, StateResponseInfoV2, }; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::block::{Block, BlockHeader}; use near_primitives::epoch_manager::epoch_info::EpochInfo; diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index 1142eef245b..36f81d834c9 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -38,7 +38,7 @@ use lru::LruCache; use near_async::messaging::SendAsync; use near_async::time; use near_crypto::Signature; -use near_o11y::{handler_debug_span, log_assert, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, log_assert, WithSpanContext}; use near_performance_metrics_macros::perf; use near_primitives::hash::CryptoHash; use near_primitives::network::{AnnounceAccount, PeerId}; diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index 6b1d70afad6..cace48ae3ff 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -24,7 +24,7 @@ use actix::{Actor as _, AsyncContext as _}; use anyhow::Context as _; use near_async::messaging::{SendAsync, Sender}; use near_async::time; -use near_o11y::{handler_debug_span, handler_trace_span, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, handler_trace_span, WithSpanContext}; use near_performance_metrics_macros::perf; use near_primitives::block::GenesisId; use near_primitives::network::{AnnounceAccount, PeerId}; diff --git a/chain/network/src/test_utils.rs b/chain/network/src/test_utils.rs index 42f070584e6..4806752517f 100644 --- a/chain/network/src/test_utils.rs +++ b/chain/network/src/test_utils.rs @@ -8,7 +8,7 @@ use actix::{Actor, ActorContext, Context, Handler}; use futures::{future, Future, FutureExt}; use near_async::messaging::{CanSend, MessageWithCallback}; use near_crypto::{KeyType, SecretKey}; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext}; +use near_o11y::{handler_debug_span, WithSpanContext}; use near_primitives::hash::hash; use near_primitives::network::PeerId; use near_primitives::types::EpochId; diff --git a/chain/telemetry/src/lib.rs b/chain/telemetry/src/lib.rs index 11961aaa378..7e39a154bc6 100644 --- a/chain/telemetry/src/lib.rs +++ b/chain/telemetry/src/lib.rs @@ -3,7 +3,7 @@ mod metrics; use actix::{Actor, Addr, Context, Handler}; use awc::{Client, Connector}; use futures::FutureExt; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::static_clock::StaticClock; use std::ops::Sub; diff --git a/core/async/src/examples/actix_component.rs b/core/async/src/examples/actix_component.rs new file mode 100644 index 00000000000..5daafbd1d05 --- /dev/null +++ b/core/async/src/examples/actix_component.rs @@ -0,0 +1,123 @@ +use crate as near_async; // only needed because we're in this crate itself +use crate::futures::{DelayedActionRunner, DelayedActionRunnerExt}; +use crate::messaging::{AsyncSender, SendAsync, Sender}; +use futures::future::BoxFuture; +use futures::FutureExt; +use near_async_derive::{MultiSend, MultiSendMessage, MultiSenderFrom}; +use std::ops::{Deref, DerefMut}; + +#[derive(actix::Message, Debug, Clone, PartialEq, Eq)] +#[rtype(result = "ExampleResponse")] +pub struct ExampleRequest { + pub id: u32, +} + +#[derive(actix::MessageResponse, Debug, Clone, PartialEq, Eq)] +pub struct ExampleResponse { + pub id: u32, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PeriodicRequest { + pub id: u32, +} + +/// An example component that represents the backing state of an actor. +/// +/// It supports two functionalities: processing of ExampleRequest, as well as +/// sending a PeriodicRequest every second. We'll be testing both of these +/// functionalities. +pub struct ExampleComponent { + next_periodic_request_id: u32, + periodic_request_sender: Sender, +} + +impl ExampleComponent { + pub fn new(periodic_request_sender: Sender) -> Self { + Self { next_periodic_request_id: 0, periodic_request_sender } + } + + /// Example function that processes a request received by the actor. + pub fn process_request(&mut self, request: ExampleRequest) -> ExampleResponse { + ExampleResponse { id: request.id } + } + + /// Example start function that is called at the start of the actor, + /// to schedule timers. + pub fn start(&mut self, ctx: &mut dyn DelayedActionRunner) { + self.schedule_periodic_request(ctx); + } + + fn schedule_periodic_request(&mut self, ctx: &mut dyn DelayedActionRunner) { + ctx.run_later(std::time::Duration::from_secs(1), |component, ctx| { + component + .periodic_request_sender + .send(PeriodicRequest { id: component.next_periodic_request_id }); + component.next_periodic_request_id += 1; + component.schedule_periodic_request(ctx); + }); + } +} + +/// Example actix Actor. Actors should have nothing but a shell that +/// forwards messages to the implementing component, because in the +/// TestLoop tests we aren't able to use this part at all. +struct ExampleActor { + component: ExampleComponent, +} + +// This Deref and DerefMut is used to support the DelayedActionRunner. +impl Deref for ExampleActor { + type Target = ExampleComponent; + + fn deref(&self) -> &Self::Target { + &self.component + } +} + +impl DerefMut for ExampleActor { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.component + } +} + +impl actix::Actor for ExampleActor { + type Context = actix::Context; + + fn started(&mut self, ctx: &mut Self::Context) { + self.component.start(ctx); + } +} + +impl actix::Handler for ExampleActor { + type Result = ExampleResponse; + + fn handle(&mut self, msg: ExampleRequest, _ctx: &mut Self::Context) -> Self::Result { + self.component.process_request(msg) + } +} + +/// Typically an actor would handle multiple messages, and this is where +/// multisenders come in handy. For this example we have only message but +/// we still demonstrate the use of multisenders. +#[derive(MultiSend, MultiSenderFrom, MultiSendMessage)] +#[multi_send_message_derive(Debug)] +pub struct ExampleComponentAdapter { + pub example: AsyncSender, +} + +/// Just another component that will send a request to the ExampleComponent. +pub struct OuterComponent { + example: ExampleComponentAdapter, +} + +impl OuterComponent { + pub fn new(example: ExampleComponentAdapter) -> Self { + Self { example } + } + + pub fn call_example_component_for_response(&self, id: u32) -> BoxFuture<'static, u32> { + let response = self.example.send_async(ExampleRequest { id }); + async move { response.await.unwrap().id }.boxed() + } +} diff --git a/core/async/src/examples/actix_component_test.rs b/core/async/src/examples/actix_component_test.rs new file mode 100644 index 00000000000..6f2c12bd9ac --- /dev/null +++ b/core/async/src/examples/actix_component_test.rs @@ -0,0 +1,102 @@ +use super::actix_component::{ + ExampleComponent, ExampleComponentAdapterMessage, OuterComponent, PeriodicRequest, +}; +use crate::futures::FutureSpawnerExt; +use crate::messaging::{IntoMultiSender, IntoSender}; +use crate::test_loop::event_handler::{capture_events, LoopEventHandler}; +use crate::test_loop::futures::{ + drive_delayed_action_runners, drive_futures, TestLoopDelayedActionEvent, TestLoopTask, +}; +use crate::test_loop::TestLoopBuilder; +use derive_enum_from_into::{EnumFrom, EnumTryInto}; +use std::sync::Arc; +use time::Duration; + +#[derive(derive_more::AsMut, derive_more::AsRef)] +struct ExampleComponentTestData { + dummy: (), + example: ExampleComponent, + outer: OuterComponent, + periodic_requests_captured: Vec, +} + +#[derive(Debug, EnumTryInto, EnumFrom)] +enum ExampleComponentTestEvent { + PeriodicRequest(PeriodicRequest), + ExampleRequest(ExampleComponentAdapterMessage), + // Needed to support DelayedActionRunner on the ExampleComponent. + DelayedAction(TestLoopDelayedActionEvent), + // Arc is needed to support futures. + Task(Arc), +} + +fn example_handler() -> LoopEventHandler { + LoopEventHandler::new_simple( + |event: ExampleComponentAdapterMessage, data: &mut ExampleComponent| match event { + ExampleComponentAdapterMessage::_example(request) => { + let response = data.process_request(request.message); + (request.callback)(Ok(response)); + } + }, + ) +} + +#[test] +fn test_actix_component() { + let builder = TestLoopBuilder::::new(); + let sender = builder.sender(); + let mut test = builder.build(ExampleComponentTestData { + dummy: (), + example: ExampleComponent::new(sender.clone().into_sender()), + outer: OuterComponent::new( + // We need to specify the explicit type here because this is a + // three-step conversion. First, we're converting the + // `DelaySender` into a + // `Sender`, which is a single + // sender that accepts an enum (generated by MultiSendMessage) + // that contains every type of message that can be sent to + // ExampleComponent. Then, we break it apart so that it becomes + // something that can send any of the messages that the enum + // contains, and finally we convert it a multisender + // ExampleComponentAdapter. + IntoSender::::into_sender(sender) + .break_apart() + .into_multi_sender(), + ), + periodic_requests_captured: vec![], + }); + // This is to allow futures to be used in the test even though the + // test itself is synchronous. + test.register_handler(drive_futures().widen()); + // This is to allow the ExampleComponent to run delayed actions (timers). + test.register_handler(drive_delayed_action_runners::().widen()); + // This is to capture the periodic requests sent by the ExampleComponent + // so we can assert against it. + test.register_handler(capture_events::().widen()); + // This is to handle the ExampleComponentAdapterMessage events by + // forwarding them to the ExampleComponent. + test.register_handler(example_handler().widen()); + + // We need to redo whatever the ExampleActor does in its `started` method. + test.data.example.start(&mut test.delayed_action_runner()); + // Send some requests; this can be done in the asynchronous context. + test.future_spawner().spawn("wait for 5", { + let res = test.data.outer.call_example_component_for_response(5); + async move { + assert_eq!(res.await, 5); + } + }); + test.future_spawner().spawn("wait for 6", { + let res = test.data.outer.call_example_component_for_response(6); + async move { + assert_eq!(res.await, 6); + } + }); + // Run for 3 seconds (not real time, but in the test loop time). + // It should result in sending 3 periodic requests. + test.run_for(Duration::seconds(3)); + assert_eq!( + test.data.periodic_requests_captured, + vec![PeriodicRequest { id: 0 }, PeriodicRequest { id: 1 }, PeriodicRequest { id: 2 },] + ); +} diff --git a/core/async/src/examples/async_component.rs b/core/async/src/examples/async_component.rs index 1ebd03766d3..542957b775a 100644 --- a/core/async/src/examples/async_component.rs +++ b/core/async/src/examples/async_component.rs @@ -1,5 +1,5 @@ use crate::{ - futures::FutureSpawner, + futures::{FutureSpawner, FutureSpawnerExt}, messaging::{AsyncSender, Sender}, }; diff --git a/core/async/src/examples/mod.rs b/core/async/src/examples/mod.rs index e48d526ab19..710c4525f93 100644 --- a/core/async/src/examples/mod.rs +++ b/core/async/src/examples/mod.rs @@ -1,3 +1,5 @@ +mod actix_component; +mod actix_component_test; mod async_component; mod async_component_test; mod multi_instance_test; diff --git a/core/async/src/futures.rs b/core/async/src/futures.rs index fc936b2d5c6..4fd55ef920d 100644 --- a/core/async/src/futures.rs +++ b/core/async/src/futures.rs @@ -1,5 +1,7 @@ +use actix::Actor; pub use futures::future::BoxFuture; // pub for macros use futures::FutureExt; +use std::ops::DerefMut; /// Abstraction for something that can drive futures. /// @@ -14,13 +16,27 @@ pub trait FutureSpawner { fn spawn_boxed(&self, description: &'static str, f: BoxFuture<'static, ()>); } -impl<'a> dyn FutureSpawner + 'a { - /// Spawns a future by automatically boxing it. - pub fn spawn(&self, description: &'static str, f: F) +pub trait FutureSpawnerExt { + fn spawn(&self, description: &'static str, f: F) + where + F: futures::Future + Send + 'static; +} + +impl FutureSpawnerExt for T { + fn spawn(&self, description: &'static str, f: F) where F: futures::Future + Send + 'static, { - self.spawn_boxed(description, f.boxed()) + self.spawn_boxed(description, f.boxed()); + } +} + +impl FutureSpawnerExt for dyn FutureSpawner + '_ { + fn spawn(&self, description: &'static str, f: F) + where + F: futures::Future + Send + 'static, + { + self.spawn_boxed(description, f.boxed()); } } @@ -32,3 +48,68 @@ impl FutureSpawner for ActixFutureSpawner { near_performance_metrics::actix::spawn(description, f); } } + +/// Abstraction for something that can schedule something to run after. +/// This isn't the same as just delaying a closure. Rather, it has the +/// additional power of providing the closure a mutable reference to some +/// object. With the Actix framework, for example, the object (of type `T`) +/// would be the actor, and the `DelayedActionRunner`` is implemented by +/// the actix `Context`. +pub trait DelayedActionRunner { + fn run_later_boxed( + &mut self, + dur: std::time::Duration, + f: Box) + Send + 'static>, + ); +} + +pub trait DelayedActionRunnerExt { + fn run_later( + &mut self, + dur: std::time::Duration, + f: impl FnOnce(&mut T, &mut dyn DelayedActionRunner) + Send + 'static, + ); +} + +impl DelayedActionRunnerExt for Runner +where + Runner: DelayedActionRunner, +{ + fn run_later( + &mut self, + dur: std::time::Duration, + f: impl FnOnce(&mut T, &mut dyn DelayedActionRunner) + Send + 'static, + ) { + self.run_later_boxed(dur, Box::new(f)); + } +} + +impl DelayedActionRunnerExt for dyn DelayedActionRunner + '_ { + fn run_later( + &mut self, + dur: std::time::Duration, + f: impl FnOnce(&mut T, &mut dyn DelayedActionRunner) + Send + 'static, + ) { + self.run_later_boxed(dur, Box::new(f)); + } +} + +/// Implementation of `DelayedActionRunner` for Actix. With this, any code +/// that used to take a `&mut actix::Context` can now take a +/// `&mut dyn DelayedActionRunner` instead, which isn't actix-specific. +impl DelayedActionRunner for actix::Context +where + T: 'static, + Outer: DerefMut, + Outer: Actor>, +{ + fn run_later_boxed( + &mut self, + dur: std::time::Duration, + f: Box) + Send + 'static>, + ) { + near_performance_metrics::actix::run_later(self, dur, move |obj, ctx| { + f(obj.deref_mut(), ctx) + }); + } +} diff --git a/core/async/src/test_loop.rs b/core/async/src/test_loop.rs index 127795e30d0..7e01ff98c2d 100644 --- a/core/async/src/test_loop.rs +++ b/core/async/src/test_loop.rs @@ -68,7 +68,10 @@ pub mod multi_instance; use self::{ delay_sender::DelaySender, event_handler::LoopEventHandler, - futures::{TestLoopFutureSpawner, TestLoopTask}, + futures::{ + TestLoopDelayedActionEvent, TestLoopDelayedActionRunner, TestLoopFutureSpawner, + TestLoopTask, + }, }; use crate::test_loop::event_handler::LoopHandlerContext; use crate::time; @@ -322,6 +325,20 @@ impl TestLoop { pub fn run_instant(&mut self) { self.run_for(time::Duration::ZERO); } + + pub fn future_spawner(&self) -> TestLoopFutureSpawner + where + Event: From>, + { + self.sender().narrow() + } + + pub fn delayed_action_runner(&self) -> TestLoopDelayedActionRunner + where + Event: From>, + { + TestLoopDelayedActionRunner { sender: self.sender().narrow() } + } } impl Drop for TestLoop { diff --git a/core/async/src/test_loop/event_handler.rs b/core/async/src/test_loop/event_handler.rs index 37304d55591..a9730ce3525 100644 --- a/core/async/src/test_loop/event_handler.rs +++ b/core/async/src/test_loop/event_handler.rs @@ -44,6 +44,20 @@ impl LoopEventHandler { }) } + pub fn new_with_drop( + handler: impl FnMut(Event, &mut Data, &LoopHandlerContext) -> Result<(), Event> + 'static, + ok_to_drop: impl Fn(&Event) -> bool + 'static, + ) -> Self { + Self { + inner: Box::new(LoopEventHandlerImplByFunction { + initial_event: None, + handler: Box::new(handler), + ok_to_drop: Box::new(ok_to_drop), + context: None, + }), + } + } + /// Like new(), but additionally sends an initial event with an initial /// delay. See periodic_interval() for why this is useful. pub fn new_with_initial_event( diff --git a/core/async/src/test_loop/futures.rs b/core/async/src/test_loop/futures.rs index d64da6c992e..c36c4a237e7 100644 --- a/core/async/src/test_loop/futures.rs +++ b/core/async/src/test_loop/futures.rs @@ -1,4 +1,5 @@ use super::{delay_sender::DelaySender, event_handler::LoopEventHandler}; +use crate::futures::DelayedActionRunner; use crate::{futures::FutureSpawner, messaging::CanSend}; use futures::future::BoxFuture; use futures::task::{waker_ref, ArcWake}; @@ -95,3 +96,50 @@ impl FutureSpawner for TestLoopFutureSpawner { self.send(task); } } + +/// Represents an action that was scheduled to run later, by using +/// `DelayedActionRunner::run_later`. +pub struct TestLoopDelayedActionEvent( + Box) + Send + 'static>, +); + +impl Debug for TestLoopDelayedActionEvent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("DelayedAction").finish() + } +} + +/// An event handler that handles only `TestLoopDelayedActionEvent`s, by +/// running the action encapsulated in the event. +pub fn drive_delayed_action_runners() -> LoopEventHandler> { + LoopEventHandler::new_with_drop( + |event, data, ctx| { + let mut runner = TestLoopDelayedActionRunner { sender: ctx.sender.clone() }; + (event.0)(data, &mut runner); + Ok(()) + }, + |_| { + // Delayed actions are usually used for timers, so let's just say + // it's OK to drop them at the end of the test. It would be hard + // to distinguish what sort of delayed action was being scheduled + // anyways. + true + }, + ) +} + +/// `DelayedActionRunner` that schedules the action to be run later by the +/// TestLoop event loop. +pub struct TestLoopDelayedActionRunner { + pub(crate) sender: DelaySender>, +} + +impl DelayedActionRunner for TestLoopDelayedActionRunner { + fn run_later_boxed( + &mut self, + dur: std::time::Duration, + f: Box) + Send + 'static>, + ) { + self.sender.send_with_delay(TestLoopDelayedActionEvent(f), dur.try_into().unwrap()); + } +} diff --git a/core/o11y/src/macros.rs b/core/o11y/src/macros.rs index 64b06abd89c..8082abadad1 100644 --- a/core/o11y/src/macros.rs +++ b/core/o11y/src/macros.rs @@ -11,7 +11,7 @@ macro_rules! handler_span { actor = near_o11y::macros::last_component_of_name(std::any::type_name::()), $($extra_fields)*) .entered(); - span.set_parent(context); + ::set_parent(&span, context); (span, msg) }}; } diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index 787bbe3cd15..eeabf869356 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -2,7 +2,7 @@ use actix::{Actor, Addr}; use anyhow::{anyhow, bail, Context}; use near_async::actix::AddrWithAutoSpanContextExt; use near_async::messaging::{noop, IntoMultiSender, IntoSender, LateBoundSender}; -use near_async::time; +use near_async::time::{self, Clock}; use near_chain::test_utils::{KeyValueRuntime, MockEpochManager, ValidatorSchedule}; use near_chain::types::RuntimeAdapter; use near_chain::{Chain, ChainGenesis}; @@ -77,6 +77,7 @@ fn setup_network_node( let state_sync_adapter = Arc::new(RwLock::new(SyncAdapter::new(noop().into_sender(), noop().into_sender()))); let client_actor = start_client( + Clock::real(), client_config.clone(), chain_genesis.clone(), epoch_manager.clone(), diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index e4ff9c8629f..771c7ad2c8b 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -11,7 +11,7 @@ use anyhow::Context; use cold_storage::ColdStoreLoopHandle; use near_async::actix::AddrWithAutoSpanContextExt; use near_async::messaging::{IntoMultiSender, IntoSender, LateBoundSender}; -use near_async::time; +use near_async::time::{self, Clock}; use near_chain::state_snapshot_actor::{ get_delete_snapshot_callback, get_make_snapshot_callback, SnapshotCallbacks, StateSnapshotActor, }; @@ -335,6 +335,7 @@ pub fn start_with_config_and_synchronization( let snapshot_callbacks = SnapshotCallbacks { make_snapshot_callback, delete_snapshot_callback }; let (client_actor, client_arbiter_handle, resharding_handle) = start_client( + Clock::real(), config.client_config.clone(), chain_genesis.clone(), epoch_manager.clone(),