diff --git a/node/Cargo.lock b/node/Cargo.lock index 346c19ad..d632c84f 100644 --- a/node/Cargo.lock +++ b/node/Cargo.lock @@ -2643,7 +2643,6 @@ name = "zksync_consensus_executor" version = "0.1.0" dependencies = [ "anyhow", - "prost", "rand 0.8.5", "test-casing", "tokio", @@ -2657,8 +2656,6 @@ dependencies = [ "zksync_consensus_storage", "zksync_consensus_sync_blocks", "zksync_consensus_utils", - "zksync_protobuf", - "zksync_protobuf_build", ] [[package]] @@ -2715,12 +2712,12 @@ dependencies = [ "async-trait", "prost", "rand 0.8.5", - "rocksdb", "tempfile", "test-casing", "thiserror", "tokio", "tracing", + "vise", "zksync_concurrency", "zksync_consensus_roles", "zksync_protobuf", @@ -2751,9 +2748,13 @@ name = "zksync_consensus_tools" version = "0.1.0" dependencies = [ "anyhow", + "async-trait", "clap", + "prost", "rand 0.8.5", + "rocksdb", "serde_json", + "tempfile", "tokio", "tracing", "tracing-subscriber", @@ -2766,6 +2767,7 @@ dependencies = [ "zksync_consensus_storage", "zksync_consensus_utils", "zksync_protobuf", + "zksync_protobuf_build", ] [[package]] diff --git a/node/Cargo.toml b/node/Cargo.toml index 9d7a65a3..81f70c23 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -150,3 +150,4 @@ wildcard_dependencies = "warn" # Produces too many false positives. redundant_locals = "allow" needless_pass_by_ref_mut = "allow" +box_default = "allow" diff --git a/node/actors/bft/src/inner.rs b/node/actors/bft/src/config.rs similarity index 62% rename from node/actors/bft/src/inner.rs rename to node/actors/bft/src/config.rs index 368553b7..beeb32ea 100644 --- a/node/actors/bft/src/inner.rs +++ b/node/actors/bft/src/config.rs @@ -1,23 +1,26 @@ //! The inner data of the consensus state machine. This is shared between the different roles. - -use crate::{io::OutputMessage, misc}; +use crate::{misc, PayloadManager}; +use std::sync::Arc; use tracing::instrument; -use zksync_concurrency::ctx::channel; use zksync_consensus_roles::validator; +use zksync_consensus_storage as storage; -/// The ConsensusInner struct, it contains data to be shared with the state machines. This is never supposed -/// to be modified, except by the Consensus struct. +/// Configuration of the bft actor. #[derive(Debug)] -pub(crate) struct ConsensusInner { - /// The communication pipe. This is used to send outputs. - pub(crate) pipe: channel::UnboundedSender, +pub struct Config { /// The validator's secret key. - pub(crate) secret_key: validator::SecretKey, + pub secret_key: validator::SecretKey, /// A vector of public keys for all the validators in the network. - pub(crate) validator_set: validator::ValidatorSet, + pub validator_set: validator::ValidatorSet, + /// Block store. + pub block_store: Arc, + /// Replica store. + pub replica_store: Box, + /// Payload manager. + pub payload_manager: Box, } -impl ConsensusInner { +impl Config { /// The maximum size of the payload of a block, in bytes. We will /// reject blocks with payloads larger than this. pub(crate) const PAYLOAD_MAX_SIZE: usize = 500 * zksync_protobuf::kB; @@ -33,16 +36,12 @@ impl ConsensusInner { /// for a given number of replicas. #[instrument(level = "trace", ret)] pub fn threshold(&self) -> usize { - let num_validators = self.validator_set.len(); - - misc::consensus_threshold(num_validators) + misc::consensus_threshold(self.validator_set.len()) } /// Calculate the maximum number of faulty replicas, for a given number of replicas. #[instrument(level = "trace", ret)] pub fn faulty_replicas(&self) -> usize { - let num_validators = self.validator_set.len(); - - misc::faulty_replicas(num_validators) + misc::faulty_replicas(self.validator_set.len()) } } diff --git a/node/actors/bft/src/leader/replica_commit.rs b/node/actors/bft/src/leader/replica_commit.rs index 36e5a026..b8a56ab4 100644 --- a/node/actors/bft/src/leader/replica_commit.rs +++ b/node/actors/bft/src/leader/replica_commit.rs @@ -68,7 +68,7 @@ impl StateMachine { // Check that the message signer is in the validator set. let validator_index = - self.inner + self.config .validator_set .index(author) .ok_or(Error::NonValidatorSigner { @@ -84,7 +84,7 @@ impl StateMachine { } // If the message is for a view when we are not a leader, we discard it. - if self.inner.view_leader(message.view) != self.inner.secret_key.public() { + if self.config.view_leader(message.view) != self.config.secret_key.public() { return Err(Error::NotLeaderInView); } @@ -109,7 +109,7 @@ impl StateMachine { // We add the message to the incrementally-constructed QC. self.commit_qcs .entry(message.view) - .or_insert(CommitQC::new(message, &self.inner.validator_set)) + .or_insert(CommitQC::new(message, &self.config.validator_set)) .add(&signed_message.sig, validator_index); // We store the message in our cache. @@ -123,11 +123,11 @@ impl StateMachine { } let Some((_, replica_messages)) = by_proposal .into_iter() - .find(|(_, v)| v.len() >= self.inner.threshold()) + .find(|(_, v)| v.len() >= self.config.threshold()) else { return Ok(()); }; - debug_assert_eq!(replica_messages.len(), self.inner.threshold()); + debug_assert_eq!(replica_messages.len(), self.config.threshold()); // ----------- Update the state machine -------------- @@ -151,7 +151,7 @@ impl StateMachine { // Broadcast the leader commit message to all replicas (ourselves included). let output_message = ConsensusInputMessage { message: self - .inner + .config .secret_key .sign_msg(validator::ConsensusMsg::LeaderCommit( validator::LeaderCommit { @@ -161,7 +161,7 @@ impl StateMachine { )), recipient: Target::Broadcast, }; - self.inner.pipe.send(output_message.into()); + self.pipe.send(output_message.into()); // Clean the caches. self.prepare_message_cache.retain(|k, _| k >= &self.view); diff --git a/node/actors/bft/src/leader/replica_prepare.rs b/node/actors/bft/src/leader/replica_prepare.rs index 3f458cad..b83798aa 100644 --- a/node/actors/bft/src/leader/replica_prepare.rs +++ b/node/actors/bft/src/leader/replica_prepare.rs @@ -93,7 +93,7 @@ impl StateMachine { // Check that the message signer is in the validator set. let validator_index = - self.inner + self.config .validator_set .index(author) .ok_or(Error::NonValidatorSigner { @@ -109,7 +109,7 @@ impl StateMachine { } // If the message is for a view when we are not a leader, we discard it. - if self.inner.view_leader(message.view) != self.inner.secret_key.public() { + if self.config.view_leader(message.view) != self.config.secret_key.public() { return Err(Error::NotLeaderInView); } @@ -134,7 +134,7 @@ impl StateMachine { // Verify the high QC. message .high_qc - .verify(&self.inner.validator_set, self.inner.threshold()) + .verify(&self.config.validator_set, self.config.threshold()) .map_err(Error::InvalidHighQC)?; // If the high QC is for a future view, we discard the message. @@ -153,7 +153,7 @@ impl StateMachine { self.prepare_qcs.entry(message.view).or_default().add( &signed_message, validator_index, - &self.inner.validator_set, + &self.config.validator_set, ); // We store the message in our cache. @@ -165,7 +165,7 @@ impl StateMachine { // Now we check if we have enough messages to continue. let num_messages = self.prepare_message_cache.get(&message.view).unwrap().len(); - if num_messages < self.inner.threshold() { + if num_messages < self.config.threshold() { return Ok(()); } @@ -173,7 +173,7 @@ impl StateMachine { // for this same view if we receive another replica prepare message after this. self.prepare_message_cache.remove(&message.view); - debug_assert_eq!(num_messages, self.inner.threshold()); + debug_assert_eq!(num_messages, self.config.threshold()); // ----------- Update the state machine -------------- diff --git a/node/actors/bft/src/leader/state_machine.rs b/node/actors/bft/src/leader/state_machine.rs index 53d93836..c59003e3 100644 --- a/node/actors/bft/src/leader/state_machine.rs +++ b/node/actors/bft/src/leader/state_machine.rs @@ -1,4 +1,4 @@ -use crate::{metrics, ConsensusInner, PayloadSource}; +use crate::{metrics, Config, OutputSender}; use std::{ collections::{BTreeMap, HashMap}, sync::Arc, @@ -7,14 +7,16 @@ use std::{ use tracing::instrument; use zksync_concurrency::{ctx, error::Wrap as _, metrics::LatencyHistogramExt as _, sync, time}; use zksync_consensus_network::io::{ConsensusInputMessage, Target}; -use zksync_consensus_roles::validator::{self, CommitQC, PrepareQC}; +use zksync_consensus_roles::validator; /// The StateMachine struct contains the state of the leader. This is a simple state machine. We just store /// replica messages and produce leader messages (including proposing blocks) when we reach the threshold for /// those messages. When participating in consensus we are not the leader most of the time. pub(crate) struct StateMachine { /// Consensus configuration and output channel. - pub(crate) inner: Arc, + pub(crate) config: Arc, + /// Pipe through with leader sends network messages. + pub(crate) pipe: OutputSender, /// The current view number. This might not match the replica's view number, we only have this here /// to make the leader advance monotonically in time and stop it from accepting messages from the past. pub(crate) view: validator::ViewNumber, @@ -29,24 +31,25 @@ pub(crate) struct StateMachine { HashMap>, >, /// Prepare QCs indexed by view number. - pub(crate) prepare_qcs: BTreeMap, + pub(crate) prepare_qcs: BTreeMap, /// Newest prepare QC composed from the `ReplicaPrepare` messages. - pub(crate) prepare_qc: sync::watch::Sender>, + pub(crate) prepare_qc: sync::watch::Sender>, /// A cache of replica commit messages indexed by view number and validator. pub(crate) commit_message_cache: BTreeMap< validator::ViewNumber, HashMap>, >, /// Commit QCs indexed by view number. - pub(crate) commit_qcs: BTreeMap, + pub(crate) commit_qcs: BTreeMap, } impl StateMachine { /// Creates a new StateMachine struct. #[instrument(level = "trace")] - pub fn new(ctx: &ctx::Ctx, inner: Arc) -> Self { + pub fn new(ctx: &ctx::Ctx, config: Arc, pipe: OutputSender) -> Self { StateMachine { - inner, + config, + pipe, view: validator::ViewNumber(0), phase: validator::Phase::Prepare, phase_start: ctx.now(), @@ -106,9 +109,9 @@ impl StateMachine { /// that the validator doesn't spend time on generating payloads for already expired views. pub(crate) async fn run_proposer( ctx: &ctx::Ctx, - inner: &ConsensusInner, - payload_source: &dyn PayloadSource, - mut prepare_qc: sync::watch::Receiver>, + config: &Config, + mut prepare_qc: sync::watch::Receiver>, + pipe: &OutputSender, ) -> ctx::Result<()> { let mut next_view = validator::ViewNumber(0); loop { @@ -119,7 +122,7 @@ impl StateMachine { continue; }; next_view = prepare_qc.view().next(); - Self::propose(ctx, inner, payload_source, prepare_qc).await?; + Self::propose(ctx, config, prepare_qc, pipe).await?; } } @@ -127,9 +130,9 @@ impl StateMachine { /// Uses `payload_source` to generate a payload if needed. pub(crate) async fn propose( ctx: &ctx::Ctx, - inner: &ConsensusInner, - payload_source: &dyn PayloadSource, - justification: PrepareQC, + cfg: &Config, + justification: validator::PrepareQC, + pipe: &OutputSender, ) -> ctx::Result<()> { // Get the highest block voted for and check if there's a quorum of votes for it. To have a quorum // in this situation, we require 2*f+1 votes, where f is the maximum number of faulty replicas. @@ -141,11 +144,11 @@ impl StateMachine { let highest_vote: Option = count .iter() // We only take one value from the iterator because there can only be at most one block with a quorum of 2f+1 votes. - .find_map(|(h, v)| (*v > 2 * inner.faulty_replicas()).then_some(h)) + .find_map(|(h, v)| (*v > 2 * cfg.faulty_replicas()).then_some(h)) .cloned(); - // Get the highest CommitQC. - let highest_qc: &CommitQC = justification + // Get the highest validator::CommitQC. + let highest_qc: &validator::CommitQC = justification .map .keys() .map(|s| &s.high_qc) @@ -162,8 +165,13 @@ impl StateMachine { Some(proposal) if proposal != highest_qc.message.proposal => (proposal, None), // The previous block was finalized, so we can propose a new block. _ => { - let payload = payload_source - .propose(ctx, highest_qc.message.proposal.number.next()) + // Defensively assume that PayloadManager cannot propose until the previous block is stored. + cfg.block_store + .wait_until_persisted(ctx, highest_qc.header().number) + .await?; + let payload = cfg + .payload_manager + .propose(ctx, highest_qc.header().number.next()) .await?; metrics::METRICS .leader_proposal_payload_size @@ -177,7 +185,7 @@ impl StateMachine { // ----------- Prepare our message and send it -------------- // Broadcast the leader prepare message to all replicas (ourselves included). - let msg = inner + let msg = cfg .secret_key .sign_msg(validator::ConsensusMsg::LeaderPrepare( validator::LeaderPrepare { @@ -188,7 +196,7 @@ impl StateMachine { justification, }, )); - inner.pipe.send( + pipe.send( ConsensusInputMessage { message: msg, recipient: Target::Broadcast, diff --git a/node/actors/bft/src/leader/tests.rs b/node/actors/bft/src/leader/tests.rs index 9df45048..41f1ee6f 100644 --- a/node/actors/bft/src/leader/tests.rs +++ b/node/actors/bft/src/leader/tests.rs @@ -5,431 +5,586 @@ use crate::testonly::ut_harness::UTHarness; use assert_matches::assert_matches; use pretty_assertions::assert_eq; use rand::Rng; -use zksync_concurrency::ctx; +use zksync_concurrency::{ctx, scope}; use zksync_consensus_roles::validator::{self, LeaderCommit, Phase, ViewNumber}; #[tokio::test] async fn replica_prepare_sanity() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - util.new_leader_prepare(ctx).await; + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + util.new_leader_prepare(ctx).await; + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_sanity_yield_leader_prepare() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let replica_prepare = util.new_replica_prepare(|_| {}); - let leader_prepare = util - .process_replica_prepare(ctx, replica_prepare.clone()) - .await - .unwrap() - .unwrap(); - assert_eq!( - leader_prepare.msg.protocol_version, - replica_prepare.msg.protocol_version - ); - assert_eq!(leader_prepare.msg.view, replica_prepare.msg.view); - assert_eq!( - leader_prepare.msg.proposal.parent, - replica_prepare.msg.high_vote.proposal.hash() - ); - assert_eq!( - leader_prepare.msg.justification, - util.new_prepare_qc(|msg| *msg = replica_prepare.msg) - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}); + let leader_prepare = util + .process_replica_prepare(ctx, replica_prepare.clone()) + .await + .unwrap() + .unwrap(); + assert_eq!( + leader_prepare.msg.protocol_version, + replica_prepare.msg.protocol_version + ); + assert_eq!(leader_prepare.msg.view, replica_prepare.msg.view); + assert_eq!( + leader_prepare.msg.proposal.parent, + replica_prepare.msg.high_vote.proposal.hash() + ); + assert_eq!( + leader_prepare.msg.justification, + util.new_prepare_qc(|msg| *msg = replica_prepare.msg) + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_sanity_yield_leader_prepare_reproposal() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - util.new_replica_commit(ctx).await; - util.process_replica_timeout(ctx).await; - let replica_prepare = util.new_replica_prepare(|_| {}).msg; - let leader_prepare = util - .process_replica_prepare_all(ctx, replica_prepare.clone()) - .await; - - assert_eq!( - leader_prepare.msg.protocol_version, - replica_prepare.protocol_version - ); - assert_eq!(leader_prepare.msg.view, replica_prepare.view); - assert_eq!( - leader_prepare.msg.proposal, - replica_prepare.high_vote.proposal - ); - assert_eq!(leader_prepare.msg.proposal_payload, None); - let map = leader_prepare.msg.justification.map; - assert_eq!(map.len(), 1); - assert_eq!(*map.first_key_value().unwrap().0, replica_prepare); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + util.new_replica_commit(ctx).await; + util.process_replica_timeout(ctx).await; + let replica_prepare = util.new_replica_prepare(|_| {}).msg; + let leader_prepare = util + .process_replica_prepare_all(ctx, replica_prepare.clone()) + .await; + + assert_eq!( + leader_prepare.msg.protocol_version, + replica_prepare.protocol_version + ); + assert_eq!(leader_prepare.msg.view, replica_prepare.view); + assert_eq!( + leader_prepare.msg.proposal, + replica_prepare.high_vote.proposal + ); + assert_eq!(leader_prepare.msg.proposal_payload, None); + let map = leader_prepare.msg.justification.map; + assert_eq!(map.len(), 1); + assert_eq!(*map.first_key_value().unwrap().0, replica_prepare); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_incompatible_protocol_version() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let incompatible_protocol_version = util.incompatible_protocol_version(); - let replica_prepare = util.new_replica_prepare(|msg| { - msg.protocol_version = incompatible_protocol_version; - }); - let res = util.process_replica_prepare(ctx, replica_prepare).await; - assert_matches!( - res, - Err(ReplicaPrepareError::IncompatibleProtocolVersion { message_version, local_version }) => { - assert_eq!(message_version, incompatible_protocol_version); - assert_eq!(local_version, util.protocol_version()); - } - ) + scope::run!(ctx, |ctx,s| async { + let (mut util,runner) = UTHarness::new(ctx,1).await; + s.spawn_bg(runner.run(ctx)); + + let incompatible_protocol_version = util.incompatible_protocol_version(); + let replica_prepare = util.new_replica_prepare(|msg| { + msg.protocol_version = incompatible_protocol_version; + }); + let res = util.process_replica_prepare(ctx, replica_prepare).await; + assert_matches!( + res, + Err(ReplicaPrepareError::IncompatibleProtocolVersion { message_version, local_version }) => { + assert_eq!(message_version, incompatible_protocol_version); + assert_eq!(local_version, util.protocol_version()); + } + ); + Ok(()) + }).await.unwrap(); } #[tokio::test] async fn replica_prepare_non_validator_signer() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let replica_prepare = util.new_replica_prepare(|_| {}).msg; - let non_validator_key: validator::SecretKey = ctx.rng().gen(); - let res = util - .process_replica_prepare(ctx, non_validator_key.sign_msg(replica_prepare)) - .await; - assert_matches!( - res, - Err(ReplicaPrepareError::NonValidatorSigner { signer }) => { - assert_eq!(signer, non_validator_key.public()); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}).msg; + let non_validator_key: validator::SecretKey = ctx.rng().gen(); + let res = util + .process_replica_prepare(ctx, non_validator_key.sign_msg(replica_prepare)) + .await; + assert_matches!( + res, + Err(ReplicaPrepareError::NonValidatorSigner { signer }) => { + assert_eq!(signer, non_validator_key.public()); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_old_view() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let replica_prepare = util.new_replica_prepare(|_| {}); - util.leader.view = util.replica.view.next(); - util.leader.phase = Phase::Prepare; - let res = util.process_replica_prepare(ctx, replica_prepare).await; - assert_matches!( - res, - Err(ReplicaPrepareError::Old { - current_view: ViewNumber(2), - current_phase: Phase::Prepare, - }) - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}); + util.leader.view = util.replica.view.next(); + util.leader.phase = Phase::Prepare; + let res = util.process_replica_prepare(ctx, replica_prepare).await; + assert_matches!( + res, + Err(ReplicaPrepareError::Old { + current_view: ViewNumber(2), + current_phase: Phase::Prepare, + }) + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_during_commit() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let replica_prepare = util.new_replica_prepare(|_| {}); - util.leader.view = util.replica.view; - util.leader.phase = Phase::Commit; - let res = util.process_replica_prepare(ctx, replica_prepare).await; - assert_matches!( - res, - Err(ReplicaPrepareError::Old { - current_view, - current_phase: Phase::Commit, - }) => { - assert_eq!(current_view, util.replica.view); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}); + util.leader.view = util.replica.view; + util.leader.phase = Phase::Commit; + let res = util.process_replica_prepare(ctx, replica_prepare).await; + assert_matches!( + res, + Err(ReplicaPrepareError::Old { + current_view, + current_phase: Phase::Commit, + }) => { + assert_eq!(current_view, util.replica.view); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_not_leader_in_view() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 2).await; - let replica_prepare = util.new_replica_prepare(|msg| { - // Moving to the next view changes the leader. - msg.view = msg.view.next(); - }); - let res = util.process_replica_prepare(ctx, replica_prepare).await; - assert_matches!(res, Err(ReplicaPrepareError::NotLeaderInView)); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 2).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|msg| { + // Moving to the next view changes the leader. + msg.view = msg.view.next(); + }); + let res = util.process_replica_prepare(ctx, replica_prepare).await; + assert_matches!(res, Err(ReplicaPrepareError::NotLeaderInView)); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_already_exists() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 2).await; - - util.set_owner_as_view_leader(); - let replica_prepare = util.new_replica_prepare(|_| {}); - assert!(util - .process_replica_prepare(ctx, replica_prepare.clone()) - .await - .unwrap() - .is_none()); - let res = util - .process_replica_prepare(ctx, replica_prepare.clone()) - .await; - assert_matches!( - res, - Err(ReplicaPrepareError::Exists { existing_message }) => { - assert_eq!(existing_message, replica_prepare.msg); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 2).await; + s.spawn_bg(runner.run(ctx)); + + util.set_owner_as_view_leader(); + let replica_prepare = util.new_replica_prepare(|_| {}); + assert!(util + .process_replica_prepare(ctx, replica_prepare.clone()) + .await + .unwrap() + .is_none()); + let res = util + .process_replica_prepare(ctx, replica_prepare.clone()) + .await; + assert_matches!( + res, + Err(ReplicaPrepareError::Exists { existing_message }) => { + assert_eq!(existing_message, replica_prepare.msg); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_num_received_below_threshold() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 2).await; - - util.set_owner_as_view_leader(); - let replica_prepare = util.new_replica_prepare(|_| {}); - assert!(util - .process_replica_prepare(ctx, replica_prepare) - .await - .unwrap() - .is_none()); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 2).await; + s.spawn_bg(runner.run(ctx)); + + util.set_owner_as_view_leader(); + let replica_prepare = util.new_replica_prepare(|_| {}); + assert!(util + .process_replica_prepare(ctx, replica_prepare) + .await + .unwrap() + .is_none()); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_invalid_sig() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut replica_prepare = util.new_replica_prepare(|_| {}); - replica_prepare.sig = ctx.rng().gen(); - let res = util.process_replica_prepare(ctx, replica_prepare).await; - assert_matches!(res, Err(ReplicaPrepareError::InvalidSignature(_))); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut replica_prepare = util.new_replica_prepare(|_| {}); + replica_prepare.sig = ctx.rng().gen(); + let res = util.process_replica_prepare(ctx, replica_prepare).await; + assert_matches!(res, Err(ReplicaPrepareError::InvalidSignature(_))); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_invalid_commit_qc() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let replica_prepare = util.new_replica_prepare(|msg| msg.high_qc = ctx.rng().gen()); - let res = util.process_replica_prepare(ctx, replica_prepare).await; - assert_matches!(res, Err(ReplicaPrepareError::InvalidHighQC(..))); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|msg| msg.high_qc = ctx.rng().gen()); + let res = util.process_replica_prepare(ctx, replica_prepare).await; + assert_matches!(res, Err(ReplicaPrepareError::InvalidHighQC(..))); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_high_qc_of_current_view() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let view = ViewNumber(1); - let qc_view = ViewNumber(1); - util.set_view(view); - let qc = util.new_commit_qc(|msg| msg.view = qc_view); - let replica_prepare = util.new_replica_prepare(|msg| msg.high_qc = qc); - let res = util.process_replica_prepare(ctx, replica_prepare).await; - assert_matches!( - res, - Err(ReplicaPrepareError::HighQCOfFutureView { high_qc_view, current_view }) => { - assert_eq!(high_qc_view, qc_view); - assert_eq!(current_view, view); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let view = ViewNumber(1); + let qc_view = ViewNumber(1); + util.set_view(view); + let qc = util.new_commit_qc(|msg| msg.view = qc_view); + let replica_prepare = util.new_replica_prepare(|msg| msg.high_qc = qc); + let res = util.process_replica_prepare(ctx, replica_prepare).await; + assert_matches!( + res, + Err(ReplicaPrepareError::HighQCOfFutureView { high_qc_view, current_view }) => { + assert_eq!(high_qc_view, qc_view); + assert_eq!(current_view, view); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_prepare_high_qc_of_future_view() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let view = ViewNumber(1); - let qc_view = ViewNumber(2); - util.set_view(view); - let qc = util.new_commit_qc(|msg| msg.view = qc_view); - let replica_prepare = util.new_replica_prepare(|msg| msg.high_qc = qc); - let res = util.process_replica_prepare(ctx, replica_prepare).await; - assert_matches!( - res, - Err(ReplicaPrepareError::HighQCOfFutureView{ high_qc_view, current_view }) => { - assert_eq!(high_qc_view, qc_view); - assert_eq!(current_view, view); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let view = ViewNumber(1); + let qc_view = ViewNumber(2); + util.set_view(view); + let qc = util.new_commit_qc(|msg| msg.view = qc_view); + let replica_prepare = util.new_replica_prepare(|msg| msg.high_qc = qc); + let res = util.process_replica_prepare(ctx, replica_prepare).await; + assert_matches!( + res, + Err(ReplicaPrepareError::HighQCOfFutureView{ high_qc_view, current_view }) => { + assert_eq!(high_qc_view, qc_view); + assert_eq!(current_view, view); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_sanity() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - util.new_leader_commit(ctx).await; + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + util.new_leader_commit(ctx).await; + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_sanity_yield_leader_commit() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let replica_commit = util.new_replica_commit(ctx).await; - let leader_commit = util - .process_replica_commit(ctx, replica_commit.clone()) - .await - .unwrap() - .unwrap(); - assert_matches!( - leader_commit.msg, - LeaderCommit { - protocol_version, - justification, - } => { - assert_eq!(protocol_version, replica_commit.msg.protocol_version); - assert_eq!(justification, util.new_commit_qc(|msg| *msg = replica_commit.msg)); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_commit = util.new_replica_commit(ctx).await; + let leader_commit = util + .process_replica_commit(ctx, replica_commit.clone()) + .await + .unwrap() + .unwrap(); + assert_matches!( + leader_commit.msg, + LeaderCommit { + protocol_version, + justification, + } => { + assert_eq!(protocol_version, replica_commit.msg.protocol_version); + assert_eq!(justification, util.new_commit_qc(|msg| *msg = replica_commit.msg)); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_incompatible_protocol_version() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let incompatible_protocol_version = util.incompatible_protocol_version(); - let mut replica_commit = util.new_replica_commit(ctx).await.msg; - replica_commit.protocol_version = incompatible_protocol_version; - let res = util - .process_replica_commit(ctx, util.owner_key().sign_msg(replica_commit)) - .await; - assert_matches!( - res, - Err(ReplicaCommitError::IncompatibleProtocolVersion { message_version, local_version }) => { - assert_eq!(message_version, incompatible_protocol_version); - assert_eq!(local_version, util.protocol_version()); - } - ) + scope::run!(ctx, |ctx,s| async { + let (mut util,runner) = UTHarness::new(ctx,1).await; + s.spawn_bg(runner.run(ctx)); + + let incompatible_protocol_version = util.incompatible_protocol_version(); + let mut replica_commit = util.new_replica_commit(ctx).await.msg; + replica_commit.protocol_version = incompatible_protocol_version; + let res = util + .process_replica_commit(ctx, util.owner_key().sign_msg(replica_commit)) + .await; + assert_matches!( + res, + Err(ReplicaCommitError::IncompatibleProtocolVersion { message_version, local_version }) => { + assert_eq!(message_version, incompatible_protocol_version); + assert_eq!(local_version, util.protocol_version()); + } + ); + Ok(()) + }).await.unwrap(); } #[tokio::test] async fn replica_commit_non_validator_signer() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let replica_commit = util.new_replica_commit(ctx).await.msg; - let non_validator_key: validator::SecretKey = ctx.rng().gen(); - let res = util - .process_replica_commit(ctx, non_validator_key.sign_msg(replica_commit)) - .await; - assert_matches!( - res, - Err(ReplicaCommitError::NonValidatorSigner { signer }) => { - assert_eq!(signer, non_validator_key.public()); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_commit = util.new_replica_commit(ctx).await.msg; + let non_validator_key: validator::SecretKey = ctx.rng().gen(); + let res = util + .process_replica_commit(ctx, non_validator_key.sign_msg(replica_commit)) + .await; + assert_matches!( + res, + Err(ReplicaCommitError::NonValidatorSigner { signer }) => { + assert_eq!(signer, non_validator_key.public()); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_old() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut replica_commit = util.new_replica_commit(ctx).await.msg; - replica_commit.view = util.replica.view.prev(); - let replica_commit = util.owner_key().sign_msg(replica_commit); - let res = util.process_replica_commit(ctx, replica_commit).await; - assert_matches!( - res, - Err(ReplicaCommitError::Old { current_view, current_phase }) => { - assert_eq!(current_view, util.replica.view); - assert_eq!(current_phase, util.replica.phase); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut replica_commit = util.new_replica_commit(ctx).await.msg; + replica_commit.view = util.replica.view.prev(); + let replica_commit = util.owner_key().sign_msg(replica_commit); + let res = util.process_replica_commit(ctx, replica_commit).await; + assert_matches!( + res, + Err(ReplicaCommitError::Old { current_view, current_phase }) => { + assert_eq!(current_view, util.replica.view); + assert_eq!(current_phase, util.replica.phase); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_not_leader_in_view() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 2).await; - - let current_view_leader = util.view_leader(util.replica.view); - assert_ne!(current_view_leader, util.owner_key().public()); - - let replica_commit = util.new_current_replica_commit(|_| {}); - let res = util.process_replica_commit(ctx, replica_commit).await; - assert_matches!(res, Err(ReplicaCommitError::NotLeaderInView)); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 2).await; + s.spawn_bg(runner.run(ctx)); + + let current_view_leader = util.view_leader(util.replica.view); + assert_ne!(current_view_leader, util.owner_key().public()); + + let replica_commit = util.new_current_replica_commit(|_| {}); + let res = util.process_replica_commit(ctx, replica_commit).await; + assert_matches!(res, Err(ReplicaCommitError::NotLeaderInView)); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_already_exists() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 2).await; - let replica_commit = util.new_replica_commit(ctx).await; - assert!(util - .process_replica_commit(ctx, replica_commit.clone()) - .await - .unwrap() - .is_none()); - let res = util - .process_replica_commit(ctx, replica_commit.clone()) - .await; - assert_matches!( - res, - Err(ReplicaCommitError::DuplicateMessage { existing_message }) => { - assert_eq!(existing_message, replica_commit.msg) - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 2).await; + s.spawn_bg(runner.run(ctx)); + + let replica_commit = util.new_replica_commit(ctx).await; + assert!(util + .process_replica_commit(ctx, replica_commit.clone()) + .await + .unwrap() + .is_none()); + let res = util + .process_replica_commit(ctx, replica_commit.clone()) + .await; + assert_matches!( + res, + Err(ReplicaCommitError::DuplicateMessage { existing_message }) => { + assert_eq!(existing_message, replica_commit.msg) + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_num_received_below_threshold() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 2).await; - - let replica_prepare = util.new_replica_prepare(|_| {}); - assert!(util - .process_replica_prepare(ctx, replica_prepare.clone()) - .await - .unwrap() - .is_none()); - let replica_prepare = util.keys[1].sign_msg(replica_prepare.msg); - let leader_prepare = util - .process_replica_prepare(ctx, replica_prepare) - .await - .unwrap() - .unwrap(); - let replica_commit = util - .process_leader_prepare(ctx, leader_prepare) - .await - .unwrap(); - util.process_replica_commit(ctx, replica_commit.clone()) - .await - .unwrap(); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 2).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}); + assert!(util + .process_replica_prepare(ctx, replica_prepare.clone()) + .await + .unwrap() + .is_none()); + let replica_prepare = util.keys[1].sign_msg(replica_prepare.msg); + let leader_prepare = util + .process_replica_prepare(ctx, replica_prepare) + .await + .unwrap() + .unwrap(); + let replica_commit = util + .process_leader_prepare(ctx, leader_prepare) + .await + .unwrap(); + util.process_replica_commit(ctx, replica_commit.clone()) + .await + .unwrap(); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_invalid_sig() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut replica_commit = util.new_current_replica_commit(|_| {}); - replica_commit.sig = ctx.rng().gen(); - let res = util.process_replica_commit(ctx, replica_commit).await; - assert_matches!(res, Err(ReplicaCommitError::InvalidSignature(..))); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut replica_commit = util.new_current_replica_commit(|_| {}); + replica_commit.sig = ctx.rng().gen(); + let res = util.process_replica_commit(ctx, replica_commit).await; + assert_matches!(res, Err(ReplicaCommitError::InvalidSignature(..))); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn replica_commit_unexpected_proposal() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let replica_commit = util.new_current_replica_commit(|_| {}); - util.process_replica_commit(ctx, replica_commit) - .await - .unwrap(); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_commit = util.new_current_replica_commit(|_| {}); + util.process_replica_commit(ctx, replica_commit) + .await + .unwrap(); + Ok(()) + }) + .await + .unwrap(); } diff --git a/node/actors/bft/src/lib.rs b/node/actors/bft/src/lib.rs index 1548e0bb..a4309c9c 100644 --- a/node/actors/bft/src/lib.rs +++ b/node/actors/bft/src/lib.rs @@ -15,14 +15,12 @@ //! - [Blog post comparing several consensus algorithms](https://decentralizedthoughts.github.io/2023-04-01-hotstuff-2/) //! - Blog posts explaining [safety](https://seafooler.com/2022/01/24/understanding-safety-hotstuff/) and [responsiveness](https://seafooler.com/2022/04/02/understanding-responsiveness-hotstuff/) use crate::io::{InputMessage, OutputMessage}; -use inner::ConsensusInner; use std::sync::Arc; use zksync_concurrency::{ctx, scope}; use zksync_consensus_roles::validator; -use zksync_consensus_storage::ReplicaStore; use zksync_consensus_utils::pipe::ActorPipe; -mod inner; +mod config; pub mod io; mod leader; mod metrics; @@ -32,86 +30,94 @@ pub mod testonly; #[cfg(test)] mod tests; -/// Payload provider for the new blocks. +pub use config::Config; + +/// Protocol version of this BFT implementation. +pub const PROTOCOL_VERSION: validator::ProtocolVersion = validator::ProtocolVersion::EARLIEST; + +/// Payload proposal and verification trait. #[async_trait::async_trait] -pub trait PayloadSource: Send + Sync + 'static { - /// Propose a payload for the block `block_number`. +pub trait PayloadManager: std::fmt::Debug + Send + Sync { + /// Used by leader to propose a payload for the next block. async fn propose( &self, ctx: &ctx::Ctx, - block_number: validator::BlockNumber, + number: validator::BlockNumber, ) -> ctx::Result; + /// Used by replica to verify a payload for the next block proposed by the leader. + async fn verify( + &self, + ctx: &ctx::Ctx, + number: validator::BlockNumber, + payload: &validator::Payload, + ) -> ctx::Result<()>; } -/// Protocol version of this BFT implementation. -pub const PROTOCOL_VERSION: validator::ProtocolVersion = validator::ProtocolVersion::EARLIEST; +/// Channel through which bft actor sends network messages. +pub(crate) type OutputSender = ctx::channel::UnboundedSender; -/// Starts the Consensus actor. It will start running, processing incoming messages and -/// sending output messages. This is a blocking method. -pub async fn run( - ctx: &ctx::Ctx, - mut pipe: ActorPipe, - secret_key: validator::SecretKey, - validator_set: validator::ValidatorSet, - storage: ReplicaStore, - payload_source: &dyn PayloadSource, -) -> anyhow::Result<()> { - let inner = Arc::new(ConsensusInner { - pipe: pipe.send, - secret_key, - validator_set, - }); - let res = scope::run!(ctx, |ctx, s| async { - let mut replica = replica::StateMachine::start(ctx, inner.clone(), storage).await?; - let mut leader = leader::StateMachine::new(ctx, inner.clone()); +impl Config { + /// Starts the bft actor. It will start running, processing incoming messages and + /// sending output messages. + pub async fn run( + self, + ctx: &ctx::Ctx, + mut pipe: ActorPipe, + ) -> anyhow::Result<()> { + let cfg = Arc::new(self); + let res = scope::run!(ctx, |ctx, s| async { + let mut replica = + replica::StateMachine::start(ctx, cfg.clone(), pipe.send.clone()).await?; + let mut leader = leader::StateMachine::new(ctx, cfg.clone(), pipe.send.clone()); - s.spawn_bg(leader::StateMachine::run_proposer( - ctx, - &inner, - payload_source, - leader.prepare_qc.subscribe(), - )); + s.spawn_bg(leader::StateMachine::run_proposer( + ctx, + &cfg, + leader.prepare_qc.subscribe(), + &pipe.send, + )); - tracing::info!("Starting consensus actor {:?}", inner.secret_key.public()); + tracing::info!("Starting consensus actor {:?}", cfg.secret_key.public()); - // This is the infinite loop where the consensus actually runs. The validator waits for either - // a message from the network or for a timeout, and processes each accordingly. - loop { - let input = pipe - .recv - .recv(&ctx.with_deadline(replica.timeout_deadline)) - .await - .ok(); + // This is the infinite loop where the consensus actually runs. The validator waits for either + // a message from the network or for a timeout, and processes each accordingly. + loop { + let input = pipe + .recv + .recv(&ctx.with_deadline(replica.timeout_deadline)) + .await + .ok(); - // We check if the context is active before processing the input. If the context is not active, - // we stop. - if !ctx.is_active() { - return Ok(()); - } + // We check if the context is active before processing the input. If the context is not active, + // we stop. + if !ctx.is_active() { + return Ok(()); + } - let Some(InputMessage::Network(req)) = input else { - replica.start_new_view(ctx).await?; - continue; - }; + let Some(InputMessage::Network(req)) = input else { + replica.start_new_view(ctx).await?; + continue; + }; - use validator::ConsensusMsg as Msg; - let res = match &req.msg.msg { - Msg::ReplicaPrepare(_) | Msg::ReplicaCommit(_) => { - leader.process_input(ctx, req.msg).await - } - Msg::LeaderPrepare(_) | Msg::LeaderCommit(_) => { - replica.process_input(ctx, req.msg).await - } - }; - // Notify network actor that the message has been processed. - // Ignore sending error. - let _ = req.ack.send(()); - res?; + use validator::ConsensusMsg as Msg; + let res = match &req.msg.msg { + Msg::ReplicaPrepare(_) | Msg::ReplicaCommit(_) => { + leader.process_input(ctx, req.msg).await + } + Msg::LeaderPrepare(_) | Msg::LeaderCommit(_) => { + replica.process_input(ctx, req.msg).await + } + }; + // Notify network actor that the message has been processed. + // Ignore sending error. + let _ = req.ack.send(()); + res?; + } + }) + .await; + match res { + Ok(()) | Err(ctx::Error::Canceled(_)) => Ok(()), + Err(ctx::Error::Internal(err)) => Err(err), } - }) - .await; - match res { - Ok(()) | Err(ctx::Error::Canceled(_)) => Ok(()), - Err(ctx::Error::Internal(err)) => Err(err), } } diff --git a/node/actors/bft/src/replica/block.rs b/node/actors/bft/src/replica/block.rs index 6f524e83..fd91780b 100644 --- a/node/actors/bft/src/replica/block.rs +++ b/node/actors/bft/src/replica/block.rs @@ -1,5 +1,4 @@ use super::StateMachine; -use anyhow::Context as _; use tracing::{info, instrument}; use zksync_concurrency::ctx; use zksync_consensus_roles::validator; @@ -8,7 +7,7 @@ impl StateMachine { /// Tries to build a finalized block from the given CommitQC. We simply search our /// block proposal cache for the matching block, and if we find it we build the block. /// If this method succeeds, it sends the finalized block to the executor. - #[instrument(level = "trace", ret)] + #[instrument(level = "debug", skip(self), ret)] pub(crate) async fn save_block( &mut self, ctx: &ctx::Ctx, @@ -27,23 +26,27 @@ impl StateMachine { return Ok(()); }; let block = validator::FinalBlock { - header: commit_qc.message.proposal, payload: payload.clone(), justification: commit_qc.clone(), }; info!( "Finalized a block!\nFinal block: {:#?}", - block.header.hash() + block.header().hash() ); - self.storage - .put_block(ctx, &block) - .await - .context("store.put_block()")?; + self.config + .block_store + .queue_block(ctx, block.clone()) + .await?; + // For availability, replica should not proceed until it stores the block persistently. + self.config + .block_store + .wait_until_persisted(ctx, block.header().number) + .await?; let number_metric = &crate::metrics::METRICS.finalized_block_number; let current_number = number_metric.get(); - number_metric.set(current_number.max(block.header.number.0)); + number_metric.set(current_number.max(block.header().number.0)); Ok(()) } } diff --git a/node/actors/bft/src/replica/leader_commit.rs b/node/actors/bft/src/replica/leader_commit.rs index 9f70793b..a7e99e9e 100644 --- a/node/actors/bft/src/replica/leader_commit.rs +++ b/node/actors/bft/src/replica/leader_commit.rs @@ -80,9 +80,9 @@ impl StateMachine { } // Check that it comes from the correct leader. - if author != &self.inner.view_leader(view) { + if author != &self.config.view_leader(view) { return Err(Error::InvalidLeader { - correct_leader: self.inner.view_leader(view), + correct_leader: self.config.view_leader(view), received_leader: author.clone(), }); } @@ -105,7 +105,7 @@ impl StateMachine { // Verify the QuorumCertificate. message .justification - .verify(&self.inner.validator_set, self.inner.threshold()) + .verify(&self.config.validator_set, self.config.threshold()) .map_err(Error::InvalidJustification)?; // ----------- All checks finished. Now we process the message. -------------- diff --git a/node/actors/bft/src/replica/leader_prepare.rs b/node/actors/bft/src/replica/leader_prepare.rs index f25a0dde..0c143e01 100644 --- a/node/actors/bft/src/replica/leader_prepare.rs +++ b/node/actors/bft/src/replica/leader_prepare.rs @@ -1,5 +1,5 @@ use super::StateMachine; -use crate::inner::ConsensusInner; +use crate::Config; use std::collections::HashMap; use tracing::instrument; use zksync_concurrency::{ctx, error::Wrap}; @@ -151,9 +151,9 @@ impl StateMachine { } // Check that it comes from the correct leader. - if author != &self.inner.view_leader(view) { + if author != &self.config.view_leader(view) { return Err(Error::InvalidLeader { - correct_leader: self.inner.view_leader(view), + correct_leader: self.config.view_leader(view), received_leader: author.clone(), }); } @@ -175,7 +175,7 @@ impl StateMachine { // Verify the PrepareQC. message .justification - .verify(view, &self.inner.validator_set, self.inner.threshold()) + .verify(view, &self.config.validator_set, self.config.threshold()) .map_err(Error::InvalidPrepareQC)?; // Get the highest block voted and check if there's a quorum of votes for it. To have a quorum @@ -189,7 +189,7 @@ impl StateMachine { let highest_vote: Option = vote_count .into_iter() // We only take one value from the iterator because there can only be at most one block with a quorum of 2f+1 votes. - .find(|(_, v)| *v > 2 * self.inner.faulty_replicas()) + .find(|(_, v)| *v > 2 * self.config.faulty_replicas()) .map(|(h, _)| h); // Get the highest CommitQC and verify it. @@ -203,7 +203,7 @@ impl StateMachine { .clone(); highest_qc - .verify(&self.inner.validator_set, self.inner.threshold()) + .verify(&self.config.validator_set, self.config.threshold()) .map_err(Error::InvalidHighQC)?; // If the high QC is for a future view, we discard the message. @@ -229,7 +229,7 @@ impl StateMachine { // The leader proposed a new block. Some(payload) => { // Check that the payload doesn't exceed the maximum size. - if payload.0.len() > ConsensusInner::PAYLOAD_MAX_SIZE { + if payload.0.len() > Config::PAYLOAD_MAX_SIZE { return Err(Error::ProposalOversizedPayload { payload_size: payload.0.len(), header: message.proposal, @@ -267,9 +267,16 @@ impl StateMachine { } // Payload should be valid. + // Defensively assume that PayloadManager cannot verify proposal until the previous block is stored. + self.config + .block_store + .wait_until_persisted(ctx, highest_qc.header().number) + .await + .map_err(ctx::Error::Canceled)?; if let Err(err) = self - .storage - .verify_payload(ctx, message.proposal.number, payload) + .config + .payload_manager + .verify(ctx, message.proposal.number, payload) .await { return Err(match err { @@ -324,12 +331,12 @@ impl StateMachine { // Send the replica message to the leader. let output_message = ConsensusInputMessage { message: self - .inner + .config .secret_key .sign_msg(validator::ConsensusMsg::ReplicaCommit(commit_vote)), recipient: Target::Validator(author.clone()), }; - self.inner.pipe.send(output_message.into()); + self.pipe.send(output_message.into()); Ok(()) } diff --git a/node/actors/bft/src/replica/new_view.rs b/node/actors/bft/src/replica/new_view.rs index 5ccd5f22..6495dd3d 100644 --- a/node/actors/bft/src/replica/new_view.rs +++ b/node/actors/bft/src/replica/new_view.rs @@ -26,7 +26,7 @@ impl StateMachine { // Send the replica message to the next leader. let output_message = ConsensusInputMessage { message: self - .inner + .config .secret_key .sign_msg(validator::ConsensusMsg::ReplicaPrepare( validator::ReplicaPrepare { @@ -36,9 +36,9 @@ impl StateMachine { high_qc: self.high_qc.clone(), }, )), - recipient: Target::Validator(self.inner.view_leader(next_view)), + recipient: Target::Validator(self.config.view_leader(next_view)), }; - self.inner.pipe.send(output_message.into()); + self.pipe.send(output_message.into()); // Reset the timer. self.reset_timer(ctx); diff --git a/node/actors/bft/src/replica/state_machine.rs b/node/actors/bft/src/replica/state_machine.rs index 6c9ae9bd..327cacbd 100644 --- a/node/actors/bft/src/replica/state_machine.rs +++ b/node/actors/bft/src/replica/state_machine.rs @@ -1,4 +1,4 @@ -use crate::{metrics, ConsensusInner}; +use crate::{metrics, Config, OutputSender}; use std::{ collections::{BTreeMap, HashMap}, sync::Arc, @@ -7,14 +7,15 @@ use tracing::instrument; use zksync_concurrency::{ctx, error::Wrap as _, metrics::LatencyHistogramExt as _, time}; use zksync_consensus_roles::validator; use zksync_consensus_storage as storage; -use zksync_consensus_storage::ReplicaStore; /// The StateMachine struct contains the state of the replica. This is the most complex state machine and is responsible /// for validating and voting on blocks. When participating in consensus we are always a replica. #[derive(Debug)] pub(crate) struct StateMachine { /// Consensus configuration and output channel. - pub(crate) inner: Arc, + pub(crate) config: Arc, + /// Pipe through which replica sends network messages. + pub(super) pipe: OutputSender, /// The current view number. pub(crate) view: validator::ViewNumber, /// The current phase. @@ -28,9 +29,6 @@ pub(crate) struct StateMachine { BTreeMap>, /// The deadline to receive an input message. pub(crate) timeout_deadline: time::Deadline, - /// A reference to the storage module. We use it to backup the replica state and store - /// finalized blocks. - pub(crate) storage: ReplicaStore, } impl StateMachine { @@ -38,10 +36,13 @@ impl StateMachine { /// otherwise we initialize the state machine with whatever head block we have. pub(crate) async fn start( ctx: &ctx::Ctx, - inner: Arc, - storage: ReplicaStore, + config: Arc, + pipe: OutputSender, ) -> ctx::Result { - let backup = storage.replica_state(ctx).await?; + let backup = match config.replica_store.state(ctx).await? { + Some(backup) => backup, + None => config.block_store.subscribe().borrow().last.clone().into(), + }; let mut block_proposal_cache: BTreeMap<_, HashMap<_, _>> = BTreeMap::new(); for proposal in backup.proposals { block_proposal_cache @@ -51,14 +52,14 @@ impl StateMachine { } let mut this = Self { - inner, + config, + pipe, view: backup.view, phase: backup.phase, high_vote: backup.high_vote, high_qc: backup.high_qc, block_proposal_cache, timeout_deadline: time::Deadline::Infinite, - storage, }; // We need to start the replica before processing inputs. this.start_new_view(ctx).await.wrap("start_new_view()")?; @@ -128,8 +129,9 @@ impl StateMachine { high_qc: self.high_qc.clone(), proposals, }; - self.storage - .put_replica_state(ctx, &backup) + self.config + .replica_store + .set_state(ctx, &backup) .await .wrap("put_replica_state")?; Ok(()) diff --git a/node/actors/bft/src/replica/tests.rs b/node/actors/bft/src/replica/tests.rs index 4d66f4d4..107198f1 100644 --- a/node/actors/bft/src/replica/tests.rs +++ b/node/actors/bft/src/replica/tests.rs @@ -1,8 +1,8 @@ use super::{leader_commit, leader_prepare}; -use crate::{inner::ConsensusInner, testonly::ut_harness::UTHarness}; +use crate::{testonly, testonly::ut_harness::UTHarness, Config}; use assert_matches::assert_matches; use rand::Rng; -use zksync_concurrency::ctx; +use zksync_concurrency::{ctx, scope}; use zksync_consensus_roles::validator::{ self, CommitQC, Payload, PrepareQC, ReplicaCommit, ReplicaPrepare, ViewNumber, }; @@ -11,123 +11,160 @@ use zksync_consensus_roles::validator::{ async fn leader_prepare_sanity() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - let leader_prepare = util.new_leader_prepare(ctx).await; - util.process_leader_prepare(ctx, leader_prepare) - .await - .unwrap(); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + let leader_prepare = util.new_leader_prepare(ctx).await; + util.process_leader_prepare(ctx, leader_prepare) + .await + .unwrap(); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_reproposal_sanity() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - util.new_replica_commit(ctx).await; - util.process_replica_timeout(ctx).await; - let leader_prepare = util.new_leader_prepare(ctx).await; - assert!(leader_prepare.msg.proposal_payload.is_none()); - util.process_leader_prepare(ctx, leader_prepare) - .await - .unwrap(); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + util.new_replica_commit(ctx).await; + util.process_replica_timeout(ctx).await; + let leader_prepare = util.new_leader_prepare(ctx).await; + assert!(leader_prepare.msg.proposal_payload.is_none()); + util.process_leader_prepare(ctx, leader_prepare) + .await + .unwrap(); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_incompatible_protocol_version() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let incompatible_protocol_version = util.incompatible_protocol_version(); - let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; - leader_prepare.protocol_version = incompatible_protocol_version; - let res = util - .process_leader_prepare(ctx, util.owner_key().sign_msg(leader_prepare)) - .await; - assert_matches!( - res, - Err(leader_prepare::Error::IncompatibleProtocolVersion { message_version, local_version }) => { - assert_eq!(message_version, incompatible_protocol_version); - assert_eq!(local_version, util.protocol_version()); - } - ) + scope::run!(ctx, |ctx,s| async { + let (mut util,runner) = UTHarness::new(ctx,1).await; + s.spawn_bg(runner.run(ctx)); + + let incompatible_protocol_version = util.incompatible_protocol_version(); + let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; + leader_prepare.protocol_version = incompatible_protocol_version; + let res = util + .process_leader_prepare(ctx, util.owner_key().sign_msg(leader_prepare)) + .await; + assert_matches!( + res, + Err(leader_prepare::Error::IncompatibleProtocolVersion { message_version, local_version }) => { + assert_eq!(message_version, incompatible_protocol_version); + assert_eq!(local_version, util.protocol_version()); + } + ); + Ok(()) + }).await.unwrap(); } #[tokio::test] async fn leader_prepare_sanity_yield_replica_commit() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let leader_prepare = util.new_leader_prepare(ctx).await; - let replica_commit = util - .process_leader_prepare(ctx, leader_prepare.clone()) - .await - .unwrap(); - assert_eq!( - replica_commit.msg, - ReplicaCommit { - protocol_version: leader_prepare.msg.protocol_version, - view: leader_prepare.msg.view, - proposal: leader_prepare.msg.proposal, - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let leader_prepare = util.new_leader_prepare(ctx).await; + let replica_commit = util + .process_leader_prepare(ctx, leader_prepare.clone()) + .await + .unwrap(); + assert_eq!( + replica_commit.msg, + ReplicaCommit { + protocol_version: leader_prepare.msg.protocol_version, + view: leader_prepare.msg.view, + proposal: leader_prepare.msg.proposal, + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_invalid_leader() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 2).await; - - let view = ViewNumber(2); - util.set_view(view); - assert_eq!(util.view_leader(view), util.keys[0].public()); - - let replica_prepare = util.new_replica_prepare(|_| {}); - assert!(util - .process_replica_prepare(ctx, replica_prepare.clone()) - .await - .unwrap() - .is_none()); - - let replica_prepare = util.keys[1].sign_msg(replica_prepare.msg); - let mut leader_prepare = util - .process_replica_prepare(ctx, replica_prepare) - .await - .unwrap() - .unwrap() - .msg; - leader_prepare.view = leader_prepare.view.next(); - assert_ne!(util.view_leader(leader_prepare.view), util.keys[0].public()); - - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!( - res, - Err(leader_prepare::Error::InvalidLeader { correct_leader, received_leader }) => { - assert_eq!(correct_leader, util.keys[1].public()); - assert_eq!(received_leader, util.keys[0].public()); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 2).await; + s.spawn_bg(runner.run(ctx)); + + let view = ViewNumber(2); + util.set_view(view); + assert_eq!(util.view_leader(view), util.keys[0].public()); + + let replica_prepare = util.new_replica_prepare(|_| {}); + assert!(util + .process_replica_prepare(ctx, replica_prepare.clone()) + .await + .unwrap() + .is_none()); + + let replica_prepare = util.keys[1].sign_msg(replica_prepare.msg); + let mut leader_prepare = util + .process_replica_prepare(ctx, replica_prepare) + .await + .unwrap() + .unwrap() + .msg; + leader_prepare.view = leader_prepare.view.next(); + assert_ne!(util.view_leader(leader_prepare.view), util.keys[0].public()); + + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!( + res, + Err(leader_prepare::Error::InvalidLeader { correct_leader, received_leader }) => { + assert_eq!(correct_leader, util.keys[1].public()); + assert_eq!(received_leader, util.keys[0].public()); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_old_view() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; - leader_prepare.view = util.replica.view.prev(); - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!( - res, - Err(leader_prepare::Error::Old { current_view, current_phase }) => { - assert_eq!(current_view, util.replica.view); - assert_eq!(current_phase, util.replica.phase); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; + leader_prepare.view = util.replica.view.prev(); + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!( + res, + Err(leader_prepare::Error::Old { current_view, current_phase }) => { + assert_eq!(current_view, util.replica.view); + assert_eq!(current_phase, util.replica.phase); + } + ); + Ok(()) + }) + .await + .unwrap(); } /// Tests that `WriteBlockStore::verify_payload` is applied before signing a vote. @@ -135,178 +172,245 @@ async fn leader_prepare_old_view() { async fn leader_prepare_invalid_payload() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let leader_prepare = util.new_leader_prepare(ctx).await; - - // Insert a finalized block to the storage. - // Default implementation of verify_payload() fails if - // head block number >= proposal block number. - let block = validator::FinalBlock { - header: leader_prepare.msg.proposal, - payload: leader_prepare.msg.proposal_payload.clone().unwrap(), - justification: CommitQC::from( - &[util.keys[0].sign_msg(ReplicaCommit { - protocol_version: util.protocol_version(), - view: util.replica.view, - proposal: leader_prepare.msg.proposal, - })], - &util.validator_set(), - ) - .unwrap(), - }; - util.replica.storage.put_block(ctx, &block).await.unwrap(); - - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!(res, Err(leader_prepare::Error::ProposalInvalidPayload(..))); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = + UTHarness::new_with_payload(ctx, 1, Box::new(testonly::RejectPayload)).await; + s.spawn_bg(runner.run(ctx)); + + let leader_prepare = util.new_leader_prepare(ctx).await; + + // Insert a finalized block to the storage. + let block = validator::FinalBlock { + payload: leader_prepare.msg.proposal_payload.clone().unwrap(), + justification: CommitQC::from( + &[util.keys[0].sign_msg(ReplicaCommit { + protocol_version: util.protocol_version(), + view: util.replica.view, + proposal: leader_prepare.msg.proposal, + })], + &util.validator_set(), + ) + .unwrap(), + }; + util.replica + .config + .block_store + .queue_block(ctx, block) + .await + .unwrap(); + + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!(res, Err(leader_prepare::Error::ProposalInvalidPayload(..))); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_invalid_sig() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_prepare = util.new_leader_prepare(ctx).await; - leader_prepare.sig = ctx.rng().gen(); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!(res, Err(leader_prepare::Error::InvalidSignature(..))); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_prepare = util.new_leader_prepare(ctx).await; + leader_prepare.sig = ctx.rng().gen(); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!(res, Err(leader_prepare::Error::InvalidSignature(..))); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_invalid_prepare_qc() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; - leader_prepare.justification = ctx.rng().gen(); - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!(res, Err(leader_prepare::Error::InvalidPrepareQC(_))); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; + leader_prepare.justification = ctx.rng().gen(); + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!(res, Err(leader_prepare::Error::InvalidPrepareQC(_))); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_invalid_high_qc() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; - leader_prepare.justification = util.new_prepare_qc(|msg| msg.high_qc = ctx.rng().gen()); - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!(res, Err(leader_prepare::Error::InvalidHighQC(_))); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; + leader_prepare.justification = util.new_prepare_qc(|msg| msg.high_qc = ctx.rng().gen()); + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!(res, Err(leader_prepare::Error::InvalidHighQC(_))); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_proposal_oversized_payload() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let payload_oversize = ConsensusInner::PAYLOAD_MAX_SIZE + 1; - let payload_vec = vec![0; payload_oversize]; - let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; - leader_prepare.proposal_payload = Some(Payload(payload_vec)); - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util - .process_leader_prepare(ctx, leader_prepare.clone()) - .await; - assert_matches!( - res, - Err(leader_prepare::Error::ProposalOversizedPayload{ payload_size, header }) => { - assert_eq!(payload_size, payload_oversize); - assert_eq!(header, leader_prepare.msg.proposal); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let payload_oversize = Config::PAYLOAD_MAX_SIZE + 1; + let payload_vec = vec![0; payload_oversize]; + let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; + leader_prepare.proposal_payload = Some(Payload(payload_vec)); + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util + .process_leader_prepare(ctx, leader_prepare.clone()) + .await; + assert_matches!( + res, + Err(leader_prepare::Error::ProposalOversizedPayload{ payload_size, header }) => { + assert_eq!(payload_size, payload_oversize); + assert_eq!(header, leader_prepare.msg.proposal); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_proposal_mismatched_payload() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; - leader_prepare.proposal_payload = Some(ctx.rng().gen()); - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!(res, Err(leader_prepare::Error::ProposalMismatchedPayload)); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; + leader_prepare.proposal_payload = Some(ctx.rng().gen()); + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!(res, Err(leader_prepare::Error::ProposalMismatchedPayload)); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_proposal_when_previous_not_finalized() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let replica_prepare = util.new_replica_prepare(|_| {}); - let mut leader_prepare = util - .process_replica_prepare(ctx, replica_prepare) - .await - .unwrap() - .unwrap() - .msg; - leader_prepare.justification = util.new_prepare_qc(|msg| msg.high_vote = ctx.rng().gen()); - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!( - res, - Err(leader_prepare::Error::ProposalWhenPreviousNotFinalized) - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}); + let mut leader_prepare = util + .process_replica_prepare(ctx, replica_prepare) + .await + .unwrap() + .unwrap() + .msg; + leader_prepare.justification = util.new_prepare_qc(|msg| msg.high_vote = ctx.rng().gen()); + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!( + res, + Err(leader_prepare::Error::ProposalWhenPreviousNotFinalized) + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_proposal_invalid_parent_hash() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let replica_prepare = util.new_replica_prepare(|_| {}); - let mut leader_prepare = util - .process_replica_prepare(ctx, replica_prepare.clone()) - .await - .unwrap() - .unwrap() - .msg; - leader_prepare.proposal.parent = ctx.rng().gen(); - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util - .process_leader_prepare(ctx, leader_prepare.clone()) - .await; - assert_matches!( - res, - Err(leader_prepare::Error::ProposalInvalidParentHash { - correct_parent_hash, - received_parent_hash, - header - }) => { - assert_eq!(correct_parent_hash, replica_prepare.msg.high_vote.proposal.hash()); - assert_eq!(received_parent_hash, leader_prepare.msg.proposal.parent); - assert_eq!(header, leader_prepare.msg.proposal); - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}); + let mut leader_prepare = util + .process_replica_prepare(ctx, replica_prepare.clone()) + .await + .unwrap() + .unwrap() + .msg; + leader_prepare.proposal.parent = ctx.rng().gen(); + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util + .process_leader_prepare(ctx, leader_prepare.clone()) + .await; + assert_matches!( + res, + Err(leader_prepare::Error::ProposalInvalidParentHash { + correct_parent_hash, + received_parent_hash, + header + }) => { + assert_eq!(correct_parent_hash, replica_prepare.msg.high_vote.proposal.hash()); + assert_eq!(received_parent_hash, leader_prepare.msg.proposal.parent); + assert_eq!(header, leader_prepare.msg.proposal); + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_proposal_non_sequential_number() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let replica_prepare = util.new_replica_prepare(|_| {}); - let mut leader_prepare = util - .process_replica_prepare(ctx, replica_prepare.clone()) - .await - .unwrap() - .unwrap() - .msg; - let correct_num = replica_prepare.msg.high_vote.proposal.number.next(); - assert_eq!(correct_num, leader_prepare.proposal.number); - - let non_seq_num = correct_num.next(); - leader_prepare.proposal.number = non_seq_num; - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util - .process_leader_prepare(ctx, leader_prepare.clone()) - .await; - assert_matches!( - res, - Err(leader_prepare::Error::ProposalNonSequentialNumber { correct_number, received_number, header }) => { - assert_eq!(correct_number, correct_num); - assert_eq!(received_number, non_seq_num); - assert_eq!(header, leader_prepare.msg.proposal); - } - ); + scope::run!(ctx, |ctx,s| async { + let (mut util,runner) = UTHarness::new(ctx,1).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}); + let mut leader_prepare = util + .process_replica_prepare(ctx, replica_prepare.clone()) + .await + .unwrap() + .unwrap() + .msg; + let correct_num = replica_prepare.msg.high_vote.proposal.number.next(); + assert_eq!(correct_num, leader_prepare.proposal.number); + + let non_seq_num = correct_num.next(); + leader_prepare.proposal.number = non_seq_num; + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util + .process_leader_prepare(ctx, leader_prepare.clone()) + .await; + assert_matches!( + res, + Err(leader_prepare::Error::ProposalNonSequentialNumber { correct_number, received_number, header }) => { + assert_eq!(correct_number, correct_num); + assert_eq!(received_number, non_seq_num); + assert_eq!(header, leader_prepare.msg.proposal); + } + ); + Ok(()) + }).await.unwrap(); } #[tokio::test] @@ -314,120 +418,169 @@ async fn leader_prepare_reproposal_without_quorum() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let mut util = UTHarness::new_many(ctx).await; - let replica_prepare = util.new_replica_prepare(|_| {}).msg; - let mut leader_prepare = util - .process_replica_prepare_all(ctx, replica_prepare.clone()) - .await - .msg; - - // Turn leader_prepare into an unjustified reproposal. - let replica_prepares: Vec<_> = util - .keys - .iter() - .map(|k| { - let mut msg = replica_prepare.clone(); - msg.high_vote = rng.gen(); - k.sign_msg(msg) - }) - .collect(); - leader_prepare.justification = - PrepareQC::from(&replica_prepares, &util.validator_set()).unwrap(); - leader_prepare.proposal_payload = None; - - let leader_prepare = util.keys[0].sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!(res, Err(leader_prepare::Error::ReproposalWithoutQuorum)); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}).msg; + let mut leader_prepare = util + .process_replica_prepare_all(ctx, replica_prepare.clone()) + .await + .msg; + + // Turn leader_prepare into an unjustified reproposal. + let replica_prepares: Vec<_> = util + .keys + .iter() + .map(|k| { + let mut msg = replica_prepare.clone(); + msg.high_vote = rng.gen(); + k.sign_msg(msg) + }) + .collect(); + leader_prepare.justification = + PrepareQC::from(&replica_prepares, &util.validator_set()).unwrap(); + leader_prepare.proposal_payload = None; + + let leader_prepare = util.keys[0].sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!(res, Err(leader_prepare::Error::ReproposalWithoutQuorum)); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_reproposal_when_finalized() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; - leader_prepare.proposal_payload = None; - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!(res, Err(leader_prepare::Error::ReproposalWhenFinalized)); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; + leader_prepare.proposal_payload = None; + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!(res, Err(leader_prepare::Error::ReproposalWhenFinalized)); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_prepare_reproposal_invalid_block() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; - leader_prepare.justification = util.new_prepare_qc(|msg| msg.high_vote = ctx.rng().gen()); - leader_prepare.proposal_payload = None; - let leader_prepare = util.owner_key().sign_msg(leader_prepare); - let res = util.process_leader_prepare(ctx, leader_prepare).await; - assert_matches!(res, Err(leader_prepare::Error::ReproposalInvalidBlock)); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_prepare = util.new_leader_prepare(ctx).await.msg; + leader_prepare.justification = util.new_prepare_qc(|msg| msg.high_vote = ctx.rng().gen()); + leader_prepare.proposal_payload = None; + let leader_prepare = util.owner_key().sign_msg(leader_prepare); + let res = util.process_leader_prepare(ctx, leader_prepare).await; + assert_matches!(res, Err(leader_prepare::Error::ReproposalInvalidBlock)); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_commit_sanity() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - let leader_commit = util.new_leader_commit(ctx).await; - util.process_leader_commit(ctx, leader_commit) - .await - .unwrap(); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + let leader_commit = util.new_leader_commit(ctx).await; + util.process_leader_commit(ctx, leader_commit) + .await + .unwrap(); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_commit_sanity_yield_replica_prepare() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - let leader_commit = util.new_leader_commit(ctx).await; - let replica_prepare = util - .process_leader_commit(ctx, leader_commit.clone()) - .await - .unwrap(); - assert_eq!( - replica_prepare.msg, - ReplicaPrepare { - protocol_version: leader_commit.msg.protocol_version, - view: leader_commit.msg.justification.message.view.next(), - high_vote: leader_commit.msg.justification.message, - high_qc: leader_commit.msg.justification, - } - ); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let leader_commit = util.new_leader_commit(ctx).await; + let replica_prepare = util + .process_leader_commit(ctx, leader_commit.clone()) + .await + .unwrap(); + assert_eq!( + replica_prepare.msg, + ReplicaPrepare { + protocol_version: leader_commit.msg.protocol_version, + view: leader_commit.msg.justification.message.view.next(), + high_vote: leader_commit.msg.justification.message, + high_qc: leader_commit.msg.justification, + } + ); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_commit_incompatible_protocol_version() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 1).await; - - let incompatible_protocol_version = util.incompatible_protocol_version(); - let mut leader_commit = util.new_leader_commit(ctx).await.msg; - leader_commit.protocol_version = incompatible_protocol_version; - let res = util - .process_leader_commit(ctx, util.owner_key().sign_msg(leader_commit)) - .await; - assert_matches!( - res, - Err(leader_commit::Error::IncompatibleProtocolVersion { message_version, local_version }) => { - assert_eq!(message_version, incompatible_protocol_version); - assert_eq!(local_version, util.protocol_version()); - } - ) + scope::run!(ctx, |ctx,s| async { + let (mut util,runner) = UTHarness::new(ctx,1).await; + s.spawn_bg(runner.run(ctx)); + + let incompatible_protocol_version = util.incompatible_protocol_version(); + let mut leader_commit = util.new_leader_commit(ctx).await.msg; + leader_commit.protocol_version = incompatible_protocol_version; + let res = util + .process_leader_commit(ctx, util.owner_key().sign_msg(leader_commit)) + .await; + assert_matches!( + res, + Err(leader_commit::Error::IncompatibleProtocolVersion { message_version, local_version }) => { + assert_eq!(message_version, incompatible_protocol_version); + assert_eq!(local_version, util.protocol_version()); + } + ); + Ok(()) + }).await.unwrap(); } #[tokio::test] async fn leader_commit_invalid_leader() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new(ctx, 2).await; - let current_view_leader = util.view_leader(util.replica.view); - assert_ne!(current_view_leader, util.owner_key().public()); - - let leader_commit = util.new_leader_commit(ctx).await.msg; - let res = util - .process_leader_commit(ctx, util.keys[1].sign_msg(leader_commit)) - .await; - assert_matches!(res, Err(leader_commit::Error::InvalidLeader { .. })); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 2).await; + s.spawn_bg(runner.run(ctx)); + + let current_view_leader = util.view_leader(util.replica.view); + assert_ne!(current_view_leader, util.owner_key().public()); + + let leader_commit = util.new_leader_commit(ctx).await.msg; + let res = util + .process_leader_commit(ctx, util.keys[1].sign_msg(leader_commit)) + .await; + assert_matches!(res, Err(leader_commit::Error::InvalidLeader { .. })); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] @@ -435,22 +588,37 @@ async fn leader_commit_invalid_sig() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_commit = util.new_leader_commit(ctx).await; - leader_commit.sig = rng.gen(); - let res = util.process_leader_commit(ctx, leader_commit).await; - assert_matches!(res, Err(leader_commit::Error::InvalidSignature { .. })); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_commit = util.new_leader_commit(ctx).await; + leader_commit.sig = rng.gen(); + let res = util.process_leader_commit(ctx, leader_commit).await; + assert_matches!(res, Err(leader_commit::Error::InvalidSignature { .. })); + Ok(()) + }) + .await + .unwrap(); } #[tokio::test] async fn leader_commit_invalid_commit_qc() { + zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let mut util = UTHarness::new(ctx, 1).await; - let mut leader_commit = util.new_leader_commit(ctx).await.msg; - leader_commit.justification = rng.gen(); - let res = util - .process_leader_commit(ctx, util.owner_key().sign_msg(leader_commit)) - .await; - assert_matches!(res, Err(leader_commit::Error::InvalidJustification { .. })); + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new(ctx, 1).await; + s.spawn_bg(runner.run(ctx)); + + let mut leader_commit = util.new_leader_commit(ctx).await.msg; + leader_commit.justification = rng.gen(); + let res = util + .process_leader_commit(ctx, util.owner_key().sign_msg(leader_commit)) + .await; + assert_matches!(res, Err(leader_commit::Error::InvalidJustification { .. })); + Ok(()) + }) + .await + .unwrap(); } diff --git a/node/actors/bft/src/testonly/fuzz.rs b/node/actors/bft/src/testonly/fuzz.rs index ce6c155c..bfe90b56 100644 --- a/node/actors/bft/src/testonly/fuzz.rs +++ b/node/actors/bft/src/testonly/fuzz.rs @@ -155,10 +155,10 @@ impl Fuzz for validator::Signers { impl Fuzz for validator::Payload { fn mutate(&mut self, rng: &mut impl Rng) { // Push bytes into the payload until it exceeds the limit. - let num_bytes = crate::ConsensusInner::PAYLOAD_MAX_SIZE + 1 - self.0.len(); + let num_bytes = crate::Config::PAYLOAD_MAX_SIZE + 1 - self.0.len(); let bytes: Vec = (0..num_bytes).map(|_| rng.gen()).collect(); self.0.extend_from_slice(&bytes); - assert!(self.0.len() > crate::ConsensusInner::PAYLOAD_MAX_SIZE); + assert!(self.0.len() > crate::Config::PAYLOAD_MAX_SIZE); } } diff --git a/node/actors/bft/src/testonly/make.rs b/node/actors/bft/src/testonly/make.rs index 55422585..61506568 100644 --- a/node/actors/bft/src/testonly/make.rs +++ b/node/actors/bft/src/testonly/make.rs @@ -1,38 +1,81 @@ //! This module contains utilities that are only meant for testing purposes. -use crate::{ConsensusInner, PayloadSource}; +use crate::{Config, PayloadManager}; use rand::Rng as _; use zksync_concurrency::ctx; use zksync_consensus_roles::validator; -/// Provides payload consisting of random bytes. -pub struct RandomPayloadSource; +/// Produces random payload. +#[derive(Debug)] +pub struct RandomPayload; #[async_trait::async_trait] -impl PayloadSource for RandomPayloadSource { +impl PayloadManager for RandomPayload { async fn propose( &self, ctx: &ctx::Ctx, - _block_number: validator::BlockNumber, + _number: validator::BlockNumber, ) -> ctx::Result { - let mut payload = validator::Payload(vec![0; ConsensusInner::PAYLOAD_MAX_SIZE]); + let mut payload = validator::Payload(vec![0; Config::PAYLOAD_MAX_SIZE]); ctx.rng().fill(&mut payload.0[..]); Ok(payload) } + async fn verify( + &self, + _ctx: &ctx::Ctx, + _number: validator::BlockNumber, + _payload: &validator::Payload, + ) -> ctx::Result<()> { + Ok(()) + } } -/// Never provides a payload. -pub struct UnavailablePayloadSource; +/// propose() blocks indefinitely. +#[derive(Debug)] +pub struct PendingPayload; #[async_trait::async_trait] -impl PayloadSource for UnavailablePayloadSource { +impl PayloadManager for PendingPayload { async fn propose( &self, ctx: &ctx::Ctx, - _block_number: validator::BlockNumber, + _number: validator::BlockNumber, ) -> ctx::Result { ctx.canceled().await; Err(ctx::Canceled.into()) } + + async fn verify( + &self, + _ctx: &ctx::Ctx, + _number: validator::BlockNumber, + _payload: &validator::Payload, + ) -> ctx::Result<()> { + Ok(()) + } +} + +/// verify() doesn't accept any payload. +#[derive(Debug)] +pub struct RejectPayload; + +#[async_trait::async_trait] +impl PayloadManager for RejectPayload { + async fn propose( + &self, + _ctx: &ctx::Ctx, + _number: validator::BlockNumber, + ) -> ctx::Result { + Ok(validator::Payload(vec![])) + } + + async fn verify( + &self, + _ctx: &ctx::Ctx, + _number: validator::BlockNumber, + _payload: &validator::Payload, + ) -> ctx::Result<()> { + Err(anyhow::anyhow!("invalid payload").into()) + } } /// Creates a genesis block with the given payload @@ -55,7 +98,6 @@ pub fn make_genesis( }) .collect(); let final_block = validator::FinalBlock { - header, payload, justification: validator::CommitQC::from(&signed_messages, &validator_set).unwrap(), }; diff --git a/node/actors/bft/src/testonly/node.rs b/node/actors/bft/src/testonly/node.rs index 93aac7a5..20c08b5f 100644 --- a/node/actors/bft/src/testonly/node.rs +++ b/node/actors/bft/src/testonly/node.rs @@ -1,11 +1,11 @@ use super::Fuzz; -use crate::{io, testonly}; +use crate::{io, testonly, PayloadManager}; use rand::Rng; use std::sync::Arc; use zksync_concurrency::{ctx, scope}; use zksync_consensus_network as network; use zksync_consensus_network::io::ConsensusInputMessage; -use zksync_consensus_storage::InMemoryStorage; +use zksync_consensus_storage as storage; use zksync_consensus_utils::pipe::DispatcherPipe; /// Enum representing the behavior of the node. @@ -26,10 +26,10 @@ pub(crate) enum Behavior { } impl Behavior { - pub(crate) fn payload_source(&self) -> Box { + pub(crate) fn payload_manager(&self) -> Box { match self { - Self::HonestNotProposing => Box::new(testonly::UnavailablePayloadSource), - _ => Box::new(testonly::RandomPayloadSource), + Self::HonestNotProposing => Box::new(testonly::PendingPayload), + _ => Box::new(testonly::RandomPayload), } } } @@ -38,7 +38,7 @@ impl Behavior { pub(super) struct Node { pub(crate) net: network::testonly::Instance, pub(crate) behavior: Behavior, - pub(crate) storage: Arc, + pub(crate) block_store: Arc, } impl Node { diff --git a/node/actors/bft/src/testonly/run.rs b/node/actors/bft/src/testonly/run.rs index 9522b86a..5072a71d 100644 --- a/node/actors/bft/src/testonly/run.rs +++ b/node/actors/bft/src/testonly/run.rs @@ -1,12 +1,12 @@ use super::{Behavior, Node}; -use crate::testonly; +use crate::{testonly, Config}; use anyhow::Context; -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; use tracing::Instrument as _; use zksync_concurrency::{ctx, oneshot, scope, signal, sync}; use zksync_consensus_network as network; use zksync_consensus_roles::validator; -use zksync_consensus_storage::{BlockStore, InMemoryStorage, ReplicaStore}; +use zksync_consensus_storage::{testonly::in_memory, BlockStore}; use zksync_consensus_utils::pipe; #[derive(Clone, Copy)] @@ -27,22 +27,25 @@ impl Test { /// Run a test with the given parameters. pub(crate) async fn run(&self, ctx: &ctx::Ctx) -> anyhow::Result<()> { let rng = &mut ctx.rng(); - let nodes: Vec<_> = network::testonly::Instance::new(rng, self.nodes.len(), 1); - let keys: Vec<_> = nodes + let nets: Vec<_> = network::testonly::Instance::new(rng, self.nodes.len(), 1); + let keys: Vec<_> = nets .iter() .map(|node| node.consensus_config().key.clone()) .collect(); let (genesis_block, _) = testonly::make_genesis(&keys, validator::Payload(vec![]), validator::BlockNumber(0)); - let nodes: Vec<_> = nodes - .into_iter() - .enumerate() - .map(|(i, net)| Node { + let mut nodes = vec![]; + let mut store_runners = vec![]; + for (i, net) in nets.into_iter().enumerate() { + let block_store = Box::new(in_memory::BlockStore::new(genesis_block.clone())); + let (block_store, runner) = BlockStore::new(ctx, block_store).await?; + store_runners.push(runner); + nodes.push(Node { net, behavior: self.nodes[i], - storage: Arc::new(InMemoryStorage::new(genesis_block.clone())), - }) - .collect(); + block_store, + }); + } // Get only the honest replicas. let honest: Vec<_> = nodes @@ -53,17 +56,16 @@ impl Test { // Run the nodes until all honest nodes store enough finalized blocks. scope::run!(ctx, |ctx, s| async { + for runner in store_runners { + s.spawn_bg(runner.run(ctx)); + } s.spawn_bg(run_nodes(ctx, self.network, &nodes)); + let want_block = validator::BlockNumber(self.blocks_to_finalize as u64); for n in &honest { - s.spawn(async { - sync::wait_for( - ctx, - &mut n.storage.subscribe_to_block_writes(), - |block_number| block_number.0 >= self.blocks_to_finalize as u64, - ) - .await?; - Ok(()) - }); + sync::wait_for(ctx, &mut n.block_store.subscribe(), |state| { + state.next() > want_block + }) + .await?; } Ok(()) }) @@ -72,9 +74,9 @@ impl Test { // Check that the stored blocks are consistent. for i in 0..self.blocks_to_finalize as u64 + 1 { let i = validator::BlockNumber(i); - let want = honest[0].storage.block(ctx, i).await?; + let want = honest[0].block_store.block(ctx, i).await?; for n in &honest[1..] { - assert_eq!(want, n.storage.block(ctx, i).await?); + assert_eq!(want, n.block_store.block(ctx, i).await?); } } Ok(()) @@ -98,18 +100,17 @@ async fn run_nodes(ctx: &ctx::Ctx, network: Network, nodes: &[Node]) -> anyhow:: network_pipes.insert(validator_key.public(), network_actor_pipe); s.spawn( async { - let storage = ReplicaStore::from_store(node.storage.clone()); scope::run!(ctx, |ctx, s| async { network_ready.recv(ctx).await?; s.spawn(async { - crate::run( - ctx, - consensus_actor_pipe, - node.net.consensus_config().key.clone(), + Config { + secret_key: validator_key, validator_set, - storage, - &*node.behavior.payload_source(), - ) + block_store: node.block_store.clone(), + replica_store: Box::new(in_memory::ReplicaStore::default()), + payload_manager: node.behavior.payload_manager(), + } + .run(ctx, consensus_actor_pipe) .await .context("consensus.run()") }); diff --git a/node/actors/bft/src/testonly/ut_harness.rs b/node/actors/bft/src/testonly/ut_harness.rs index a97c306c..e39f9086 100644 --- a/node/actors/bft/src/testonly/ut_harness.rs +++ b/node/actors/bft/src/testonly/ut_harness.rs @@ -4,8 +4,7 @@ use crate::{ leader::{ReplicaCommitError, ReplicaPrepareError}, replica, replica::{LeaderCommitError, LeaderPrepareError}, - testonly::RandomPayloadSource, - ConsensusInner, + testonly, Config, PayloadManager, }; use assert_matches::assert_matches; use rand::Rng; @@ -16,7 +15,7 @@ use zksync_consensus_roles::validator::{ self, CommitQC, LeaderCommit, LeaderPrepare, Payload, Phase, PrepareQC, ReplicaCommit, ReplicaPrepare, SecretKey, Signed, ViewNumber, }; -use zksync_consensus_storage::{InMemoryStorage, ReplicaStore}; +use zksync_consensus_storage::{testonly::in_memory, BlockStore, BlockStoreRunner}; use zksync_consensus_utils::enum_util::Variant; /// `UTHarness` provides various utilities for unit tests. @@ -34,30 +33,40 @@ pub(crate) struct UTHarness { impl UTHarness { /// Creates a new `UTHarness` with the specified validator set size. - pub(crate) async fn new(ctx: &ctx::Ctx, num_validators: usize) -> UTHarness { + pub(crate) async fn new( + ctx: &ctx::Ctx, + num_validators: usize, + ) -> (UTHarness, BlockStoreRunner) { + Self::new_with_payload(ctx, num_validators, Box::new(testonly::RandomPayload)).await + } + + pub(crate) async fn new_with_payload( + ctx: &ctx::Ctx, + num_validators: usize, + payload_manager: Box, + ) -> (UTHarness, BlockStoreRunner) { let mut rng = ctx.rng(); let keys: Vec<_> = (0..num_validators).map(|_| rng.gen()).collect(); let (genesis, validator_set) = crate::testonly::make_genesis(&keys, Payload(vec![]), validator::BlockNumber(0)); // Initialize the storage. - let storage = InMemoryStorage::new(genesis); + let block_store = Box::new(in_memory::BlockStore::new(genesis)); + let (block_store, runner) = BlockStore::new(ctx, block_store).await.unwrap(); // Create the pipe. let (send, recv) = ctx::channel::unbounded(); - let inner = Arc::new(ConsensusInner { - pipe: send, + let cfg = Arc::new(Config { secret_key: keys[0].clone(), validator_set, + block_store: block_store.clone(), + replica_store: Box::new(in_memory::ReplicaStore::default()), + payload_manager, }); - let leader = leader::StateMachine::new(ctx, inner.clone()); - let replica = replica::StateMachine::start( - ctx, - inner.clone(), - ReplicaStore::from_store(Arc::new(storage)), - ) - .await - .unwrap(); + let leader = leader::StateMachine::new(ctx, cfg.clone(), send.clone()); + let replica = replica::StateMachine::start(ctx, cfg.clone(), send.clone()) + .await + .unwrap(); let mut this = UTHarness { leader, replica, @@ -65,11 +74,11 @@ impl UTHarness { keys, }; let _: Signed = this.try_recv().unwrap(); - this + (this, runner) } /// Creates a new `UTHarness` with minimally-significant validator set size. - pub(crate) async fn new_many(ctx: &ctx::Ctx) -> UTHarness { + pub(crate) async fn new_many(ctx: &ctx::Ctx) -> (UTHarness, BlockStoreRunner) { let num_validators = 6; assert!(crate::misc::faulty_replicas(num_validators) > 0); UTHarness::new(ctx, num_validators).await @@ -103,7 +112,7 @@ impl UTHarness { } pub(crate) fn owner_key(&self) -> &SecretKey { - &self.replica.inner.secret_key + &self.replica.config.secret_key } pub(crate) fn set_owner_as_view_leader(&mut self) { @@ -200,14 +209,10 @@ impl UTHarness { let prepare_qc = self.leader.prepare_qc.subscribe(); self.leader.process_replica_prepare(ctx, msg).await?; if prepare_qc.has_changed().unwrap() { - leader::StateMachine::propose( - ctx, - &self.leader.inner, - &RandomPayloadSource, - prepare_qc.borrow().clone().unwrap(), - ) - .await - .unwrap(); + let prepare_qc = prepare_qc.borrow().clone().unwrap(); + leader::StateMachine::propose(ctx, &self.leader.config, prepare_qc, &self.leader.pipe) + .await + .unwrap(); } Ok(self.try_recv()) } @@ -217,7 +222,7 @@ impl UTHarness { ctx: &ctx::Ctx, msg: ReplicaPrepare, ) -> Signed { - let want_threshold = self.replica.inner.threshold(); + let want_threshold = self.replica.config.threshold(); let mut leader_prepare = None; let msgs: Vec<_> = self.keys.iter().map(|k| k.sign_msg(msg.clone())).collect(); for (i, msg) in msgs.into_iter().enumerate() { @@ -247,7 +252,7 @@ impl UTHarness { ) -> Signed { for (i, key) in self.keys.iter().enumerate() { let res = self.leader.process_replica_commit(ctx, key.sign_msg(msg)); - let want_threshold = self.replica.inner.threshold(); + let want_threshold = self.replica.config.threshold(); match (i + 1).cmp(&want_threshold) { Ordering::Equal => res.unwrap(), Ordering::Less => res.unwrap(), @@ -278,7 +283,7 @@ impl UTHarness { } pub(crate) fn view_leader(&self, view: ViewNumber) -> validator::PublicKey { - self.replica.inner.view_leader(view) + self.replica.config.view_leader(view) } pub(crate) fn validator_set(&self) -> validator::ValidatorSet { diff --git a/node/actors/bft/src/tests.rs b/node/actors/bft/src/tests.rs index 37dbd7bd..0ca95e7f 100644 --- a/node/actors/bft/src/tests.rs +++ b/node/actors/bft/src/tests.rs @@ -2,7 +2,7 @@ use crate::{ misc::consensus_threshold, testonly::{ut_harness::UTHarness, Behavior, Network, Test}, }; -use zksync_concurrency::ctx; +use zksync_concurrency::{ctx, scope}; use zksync_consensus_roles::validator::Phase; async fn run_test(behavior: Behavior, network: Network) { @@ -69,10 +69,16 @@ async fn byzantine_real_network() { async fn timeout_leader_no_prepares() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - - util.new_replica_prepare(|_| {}); - util.produce_block_after_timeout(ctx).await; + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + util.new_replica_prepare(|_| {}); + util.produce_block_after_timeout(ctx).await; + Ok(()) + }) + .await + .unwrap(); } /// Testing liveness after the network becomes idle with leader having some cached prepare messages for the current view. @@ -80,15 +86,21 @@ async fn timeout_leader_no_prepares() { async fn timeout_leader_some_prepares() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - - let replica_prepare = util.new_replica_prepare(|_| {}); - assert!(util - .process_replica_prepare(ctx, replica_prepare) - .await - .unwrap() - .is_none()); - util.produce_block_after_timeout(ctx).await; + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + let replica_prepare = util.new_replica_prepare(|_| {}); + assert!(util + .process_replica_prepare(ctx, replica_prepare) + .await + .unwrap() + .is_none()); + util.produce_block_after_timeout(ctx).await; + Ok(()) + }) + .await + .unwrap(); } /// Testing liveness after the network becomes idle with leader in commit phase. @@ -96,12 +108,18 @@ async fn timeout_leader_some_prepares() { async fn timeout_leader_in_commit() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - - util.new_leader_prepare(ctx).await; - // Leader is in `Phase::Commit`, but should still accept prepares from newer views. - assert_eq!(util.leader.phase, Phase::Commit); - util.produce_block_after_timeout(ctx).await; + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + util.new_leader_prepare(ctx).await; + // Leader is in `Phase::Commit`, but should still accept prepares from newer views. + assert_eq!(util.leader.phase, Phase::Commit); + util.produce_block_after_timeout(ctx).await; + Ok(()) + }) + .await + .unwrap(); } /// Testing liveness after the network becomes idle with replica in commit phase. @@ -109,12 +127,18 @@ async fn timeout_leader_in_commit() { async fn timeout_replica_in_commit() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - - util.new_replica_commit(ctx).await; - // Leader is in `Phase::Commit`, but should still accept prepares from newer views. - assert_eq!(util.leader.phase, Phase::Commit); - util.produce_block_after_timeout(ctx).await; + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + util.new_replica_commit(ctx).await; + // Leader is in `Phase::Commit`, but should still accept prepares from newer views. + assert_eq!(util.leader.phase, Phase::Commit); + util.produce_block_after_timeout(ctx).await; + Ok(()) + }) + .await + .unwrap(); } /// Testing liveness after the network becomes idle with leader having some cached commit messages for the current view. @@ -122,17 +146,23 @@ async fn timeout_replica_in_commit() { async fn timeout_leader_some_commits() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - - let replica_commit = util.new_replica_commit(ctx).await; - assert!(util - .process_replica_commit(ctx, replica_commit) - .await - .unwrap() - .is_none()); - // Leader is in `Phase::Commit`, but should still accept prepares from newer views. - assert_eq!(util.leader_phase(), Phase::Commit); - util.produce_block_after_timeout(ctx).await; + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + let replica_commit = util.new_replica_commit(ctx).await; + assert!(util + .process_replica_commit(ctx, replica_commit) + .await + .unwrap() + .is_none()); + // Leader is in `Phase::Commit`, but should still accept prepares from newer views. + assert_eq!(util.leader_phase(), Phase::Commit); + util.produce_block_after_timeout(ctx).await; + Ok(()) + }) + .await + .unwrap(); } /// Testing liveness after the network becomes idle with leader in a consecutive prepare phase. @@ -140,10 +170,16 @@ async fn timeout_leader_some_commits() { async fn timeout_leader_in_consecutive_prepare() { zksync_concurrency::testonly::abort_on_panic(); let ctx = &ctx::test_root(&ctx::RealClock); - let mut util = UTHarness::new_many(ctx).await; - - util.new_leader_commit(ctx).await; - util.produce_block_after_timeout(ctx).await; + scope::run!(ctx, |ctx, s| async { + let (mut util, runner) = UTHarness::new_many(ctx).await; + s.spawn_bg(runner.run(ctx)); + + util.new_leader_commit(ctx).await; + util.produce_block_after_timeout(ctx).await; + Ok(()) + }) + .await + .unwrap(); } /// Not being able to propose a block shouldn't cause a deadlock. diff --git a/node/actors/executor/Cargo.toml b/node/actors/executor/Cargo.toml index 5f6a3f49..2746734e 100644 --- a/node/actors/executor/Cargo.toml +++ b/node/actors/executor/Cargo.toml @@ -15,10 +15,8 @@ zksync_consensus_roles.workspace = true zksync_consensus_storage.workspace = true zksync_consensus_sync_blocks.workspace = true zksync_consensus_utils.workspace = true -zksync_protobuf.workspace = true anyhow.workspace = true -prost.workspace = true rand.workspace = true tracing.workspace = true vise.workspace = true @@ -27,8 +25,5 @@ vise.workspace = true test-casing.workspace = true tokio.workspace = true -[build-dependencies] -zksync_protobuf_build.workspace = true - [lints] -workspace = true \ No newline at end of file +workspace = true diff --git a/node/actors/executor/src/config/mod.rs b/node/actors/executor/src/config/mod.rs deleted file mode 100644 index 600d8c5d..00000000 --- a/node/actors/executor/src/config/mod.rs +++ /dev/null @@ -1,172 +0,0 @@ -//! Module to create the configuration for the consensus node. -use anyhow::Context as _; -use std::{ - collections::{HashMap, HashSet}, - net, -}; -use zksync_consensus_bft::misc::consensus_threshold; -use zksync_consensus_crypto::{read_required_text, Text, TextFmt}; -use zksync_consensus_network::gossip; -use zksync_consensus_roles::{node, validator}; -use zksync_protobuf::{read_required, required, ProtoFmt}; - -pub mod proto; -#[cfg(test)] -mod tests; - -/// Consensus network config. See `network::ConsensusConfig`. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ConsensusConfig { - /// Validator key of this node. - /// It should match the secret key provided in the `validator_key` file. - pub key: validator::PublicKey, - /// Public TCP address that other validators are expected to connect to. - /// It is announced over gossip network. - pub public_addr: net::SocketAddr, -} - -impl ProtoFmt for ConsensusConfig { - type Proto = proto::ConsensusConfig; - - fn read(proto: &Self::Proto) -> anyhow::Result { - Ok(Self { - key: read_required_text(&proto.key).context("key")?, - public_addr: read_required_text(&proto.public_addr).context("public_addr")?, - }) - } - - fn build(&self) -> Self::Proto { - Self::Proto { - key: Some(self.key.encode()), - public_addr: Some(self.public_addr.encode()), - } - } -} - -/// Gossip network config. See `network::GossipConfig`. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct GossipConfig { - /// Key of this node. It uniquely identifies the node. - /// It should match the secret key provided in the `node_key` file. - pub key: node::PublicKey, - /// Limit on the number of inbound connections outside - /// of the `static_inbound` set. - pub dynamic_inbound_limit: u64, - /// Inbound connections that should be unconditionally accepted. - pub static_inbound: HashSet, - /// Outbound connections that the node should actively try to - /// establish and maintain. - pub static_outbound: HashMap, -} - -impl From for GossipConfig { - fn from(config: gossip::Config) -> Self { - Self { - key: config.key.public(), - dynamic_inbound_limit: config.dynamic_inbound_limit, - static_inbound: config.static_inbound, - static_outbound: config.static_outbound, - } - } -} - -impl ProtoFmt for GossipConfig { - type Proto = proto::GossipConfig; - - fn read(r: &Self::Proto) -> anyhow::Result { - let mut static_inbound = HashSet::new(); - for (i, v) in r.static_inbound.iter().enumerate() { - static_inbound.insert( - Text::new(v) - .decode() - .with_context(|| format!("static_inbound[{i}]"))?, - ); - } - let mut static_outbound = HashMap::new(); - for (i, e) in r.static_outbound.iter().enumerate() { - let key = - read_required_text(&e.key).with_context(|| format!("static_outbound[{i}].key"))?; - let addr = read_required_text(&e.addr) - .with_context(|| format!("static_outbound[{i}].addr"))?; - static_outbound.insert(key, addr); - } - Ok(Self { - key: read_required_text(&r.key).context("key")?, - dynamic_inbound_limit: *required(&r.dynamic_inbound_limit) - .context("dynamic_inbound_limit")?, - static_inbound, - static_outbound, - }) - } - - fn build(&self) -> Self::Proto { - Self::Proto { - key: Some(self.key.encode()), - dynamic_inbound_limit: Some(self.dynamic_inbound_limit), - static_inbound: self.static_inbound.iter().map(TextFmt::encode).collect(), - static_outbound: self - .static_outbound - .iter() - .map(|(key, addr)| proto::NodeAddr { - key: Some(TextFmt::encode(key)), - addr: Some(TextFmt::encode(addr)), - }) - .collect(), - } - } -} - -/// Config of the node executor. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ExecutorConfig { - /// IP:port to listen on, for incoming TCP connections. - /// Use `0.0.0.0:` to listen on all network interfaces (i.e. on all IPs exposed by this VM). - pub server_addr: net::SocketAddr, - /// Gossip network config. - pub gossip: GossipConfig, - /// Specifies the genesis block of the blockchain. - pub genesis_block: validator::FinalBlock, - /// Static specification of validators for Proof of Authority. Should be deprecated once we move - /// to Proof of Stake. - pub validators: validator::ValidatorSet, -} - -impl ExecutorConfig { - /// Validates internal consistency of this config. - pub(crate) fn validate(&self) -> anyhow::Result<()> { - let consensus_threshold = consensus_threshold(self.validators.len()); - self.genesis_block - .validate(&self.validators, consensus_threshold)?; - Ok(()) - } -} - -impl ProtoFmt for ExecutorConfig { - type Proto = proto::ExecutorConfig; - - fn read(r: &Self::Proto) -> anyhow::Result { - let validators = r.validators.iter().enumerate().map(|(i, v)| { - Text::new(v) - .decode() - .with_context(|| format!("validators[{i}]")) - }); - let validators: anyhow::Result> = validators.collect(); - let validators = validator::ValidatorSet::new(validators?).context("validators")?; - - Ok(Self { - server_addr: read_required_text(&r.server_addr).context("server_addr")?, - gossip: read_required(&r.gossip).context("gossip")?, - genesis_block: read_required_text(&r.genesis_block).context("genesis_block")?, - validators, - }) - } - - fn build(&self) -> Self::Proto { - Self::Proto { - server_addr: Some(TextFmt::encode(&self.server_addr)), - gossip: Some(self.gossip.build()), - genesis_block: Some(TextFmt::encode(&self.genesis_block)), - validators: self.validators.iter().map(|v| v.encode()).collect(), - } - } -} diff --git a/node/actors/executor/src/config/proto/mod.rs b/node/actors/executor/src/config/proto/mod.rs deleted file mode 100644 index 2ea69ab4..00000000 --- a/node/actors/executor/src/config/proto/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -#![allow(warnings)] -include!(concat!(env!("OUT_DIR"), "/src/config/proto/gen.rs")); diff --git a/node/actors/executor/src/config/tests.rs b/node/actors/executor/src/config/tests.rs deleted file mode 100644 index 3c18f072..00000000 --- a/node/actors/executor/src/config/tests.rs +++ /dev/null @@ -1,56 +0,0 @@ -use super::{ConsensusConfig, ExecutorConfig, GossipConfig}; -use rand::{ - distributions::{Distribution, Standard}, - Rng, -}; -use zksync_concurrency::ctx; -use zksync_consensus_roles::{node, validator}; -use zksync_protobuf::testonly::test_encode_random; - -fn make_addr(rng: &mut R) -> std::net::SocketAddr { - std::net::SocketAddr::new(std::net::IpAddr::from(rng.gen::<[u8; 16]>()), rng.gen()) -} - -impl Distribution for Standard { - fn sample(&self, rng: &mut R) -> ConsensusConfig { - ConsensusConfig { - key: rng.gen::().public(), - public_addr: make_addr(rng), - } - } -} - -impl Distribution for Standard { - fn sample(&self, rng: &mut R) -> GossipConfig { - GossipConfig { - key: rng.gen::().public(), - dynamic_inbound_limit: rng.gen(), - static_inbound: (0..5) - .map(|_| rng.gen::().public()) - .collect(), - static_outbound: (0..6) - .map(|_| (rng.gen::().public(), make_addr(rng))) - .collect(), - } - } -} - -impl Distribution for Standard { - fn sample(&self, rng: &mut R) -> ExecutorConfig { - ExecutorConfig { - server_addr: make_addr(rng), - gossip: rng.gen(), - genesis_block: rng.gen(), - validators: rng.gen(), - } - } -} - -#[test] -fn test_schema_encoding() { - let ctx = ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - test_encode_random::<_, ConsensusConfig>(rng); - test_encode_random::<_, GossipConfig>(rng); - test_encode_random::<_, ExecutorConfig>(rng); -} diff --git a/node/actors/executor/src/lib.rs b/node/actors/executor/src/lib.rs index 9856e9de..04d8a294 100644 --- a/node/actors/executor/src/lib.rs +++ b/node/actors/executor/src/lib.rs @@ -1,36 +1,37 @@ //! Library files for the executor. We have it separate from the binary so that we can use these files in the tools crate. use crate::io::Dispatcher; use anyhow::Context as _; -use std::{any, fmt, sync::Arc}; -use zksync_concurrency::{ctx, net, scope}; -use zksync_consensus_bft::{misc::consensus_threshold, PayloadSource}; +use std::{ + collections::{HashMap, HashSet}, + fmt, + sync::Arc, +}; +use zksync_concurrency::{ctx, net, scope, sync}; +use zksync_consensus_bft as bft; use zksync_consensus_network as network; use zksync_consensus_roles::{node, validator}; -use zksync_consensus_storage::{ReplicaStateStore, ReplicaStore, WriteBlockStore}; -use zksync_consensus_sync_blocks::SyncBlocks; +use zksync_consensus_storage::{BlockStore, BlockStoreState, ReplicaStore}; +use zksync_consensus_sync_blocks as sync_blocks; use zksync_consensus_utils::pipe; -mod config; mod io; pub mod testonly; #[cfg(test)] mod tests; -pub use self::config::{proto, ConsensusConfig, ExecutorConfig, GossipConfig}; +pub use network::consensus::Config as ValidatorConfig; /// Validator-related part of [`Executor`]. -struct ValidatorExecutor { +pub struct Validator { /// Consensus network configuration. - config: ConsensusConfig, - /// Validator key. - key: validator::SecretKey, + pub config: ValidatorConfig, /// Store for replica state. - replica_state_store: Arc, - /// Payload proposer for new blocks. - payload_source: Arc, + pub replica_store: Box, + /// Payload manager. + pub payload_manager: Box, } -impl fmt::Debug for ValidatorExecutor { +impl fmt::Debug for Validator { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("ValidatorExecutor") .field("config", &self.config) @@ -38,128 +39,90 @@ impl fmt::Debug for ValidatorExecutor { } } -impl ValidatorExecutor { - /// Returns consensus network configuration. - fn consensus_config(&self) -> network::consensus::Config { - network::consensus::Config { - // Consistency of the validator key has been verified in constructor. - key: self.key.clone(), - public_addr: self.config.public_addr, +/// Config of the node executor. +#[derive(Clone, Debug)] +pub struct Config { + /// IP:port to listen on, for incoming TCP connections. + /// Use `0.0.0.0:` to listen on all network interfaces (i.e. on all IPs exposed by this VM). + pub server_addr: std::net::SocketAddr, + /// Static specification of validators for Proof of Authority. Should be deprecated once we move + /// to Proof of Stake. + pub validators: validator::ValidatorSet, + + /// Key of this node. It uniquely identifies the node. + /// It should match the secret key provided in the `node_key` file. + pub node_key: node::SecretKey, + /// Limit on the number of inbound connections outside + /// of the `static_inbound` set. + pub gossip_dynamic_inbound_limit: u64, + /// Inbound connections that should be unconditionally accepted. + pub gossip_static_inbound: HashSet, + /// Outbound connections that the node should actively try to + /// establish and maintain. + pub gossip_static_outbound: HashMap, +} + +impl Config { + /// Returns gossip network configuration. + pub(crate) fn gossip(&self) -> network::gossip::Config { + network::gossip::Config { + key: self.node_key.clone(), + dynamic_inbound_limit: self.gossip_dynamic_inbound_limit, + static_inbound: self.gossip_static_inbound.clone(), + static_outbound: self.gossip_static_outbound.clone(), + enable_pings: true, } } } /// Executor allowing to spin up all actors necessary for a consensus node. #[derive(Debug)] -pub struct Executor { +pub struct Executor { /// General-purpose executor configuration. - executor_config: ExecutorConfig, - /// Secret key of the node. - node_key: node::SecretKey, - /// Block and replica state storage used by the node. - storage: Arc, + pub config: Config, + /// Block storage used by the node. + pub block_store: Arc, /// Validator-specific node data. - validator: Option, + pub validator: Option, } -impl Executor { - /// Creates a new executor with the specified parameters. - pub async fn new( - ctx: &ctx::Ctx, - node_config: ExecutorConfig, - node_key: node::SecretKey, - storage: Arc, - ) -> anyhow::Result { - node_config.validate()?; - anyhow::ensure!( - node_config.gossip.key == node_key.public(), - "config.gossip.key = {:?} doesn't match the secret key {:?}", - node_config.gossip.key, - node_key - ); - - // While justifications may differ among nodes for an arbitrary block, we assume that - // the genesis block has a hardcoded justification. - let first_block = storage.first_block(ctx).await.context("first_block")?; - anyhow::ensure!( - first_block == node_config.genesis_block, - "First stored block {first_block:?} in `{}` is not equal to the configured genesis block {:?}", - any::type_name::(), - node_config.genesis_block - ); - - Ok(Self { - executor_config: node_config, - node_key, - storage, - validator: None, - }) - } - - /// Sets validator-related data for the executor. - pub fn set_validator( - &mut self, - config: ConsensusConfig, - key: validator::SecretKey, - replica_state_store: Arc, - payload_source: Arc, - ) -> anyhow::Result<()> { - let public = &config.key; - anyhow::ensure!( - *public == key.public(), - "config.consensus.key = {public:?} doesn't match the secret key {key:?}" - ); - - // TODO: this logic must be refactored once dynamic validator sets are implemented - let is_validator = self - .executor_config - .validators - .iter() - .any(|validator_key| validator_key == public); - if is_validator { - self.validator = Some(ValidatorExecutor { - config, - key, - replica_state_store, - payload_source, - }); - } else { - tracing::info!( - "Key {public:?} is not a validator per validator set {:?}; the executor will not \ - run consensus", - self.executor_config.validators - ); - } - Ok(()) - } - - /// Returns gossip network configuration. - fn gossip_config(&self) -> network::gossip::Config { - let gossip = &self.executor_config.gossip; - network::gossip::Config { - key: self.node_key.clone(), - dynamic_inbound_limit: gossip.dynamic_inbound_limit, - static_inbound: gossip.static_inbound.clone(), - static_outbound: gossip.static_outbound.clone(), - enable_pings: true, - } +/// Converts BlockStoreState to isomorphic network::io::SyncState. +fn to_sync_state(state: BlockStoreState) -> network::io::SyncState { + network::io::SyncState { + first_stored_block: state.first, + last_stored_block: state.last, } +} +impl Executor { /// Extracts a network crate config. fn network_config(&self) -> network::Config { network::Config { - server_addr: net::tcp::ListenerAddr::new(self.executor_config.server_addr), - validators: self.executor_config.validators.clone(), - gossip: self.gossip_config(), - consensus: self - .validator - .as_ref() - .map(ValidatorExecutor::consensus_config), + server_addr: net::tcp::ListenerAddr::new(self.config.server_addr), + validators: self.config.validators.clone(), + gossip: self.config.gossip(), + consensus: self.validator.as_ref().map(|v| v.config.clone()), } } + /// Verifies correctness of the Executor. + fn verify(&self) -> anyhow::Result<()> { + if let Some(validator) = self.validator.as_ref() { + if !self + .config + .validators + .iter() + .any(|key| key == &validator.config.key.public()) + { + anyhow::bail!("this validator doesn't belong to the consensus"); + } + } + Ok(()) + } + /// Runs this executor to completion. This should be spawned on a separate task. pub async fn run(self, ctx: &ctx::Ctx) -> anyhow::Result<()> { + self.verify().context("verify()")?; let network_config = self.network_config(); // Generate the communication pipes. We have one for each actor. @@ -174,27 +137,24 @@ impl Executor { ); // Create each of the actors. - let validator_set = &self.executor_config.validators; - let sync_blocks_config = zksync_consensus_sync_blocks::Config::new( - validator_set.clone(), - consensus_threshold(validator_set.len()), - )?; - let sync_blocks = SyncBlocks::new( - ctx, - sync_blocks_actor_pipe, - self.storage.clone(), - sync_blocks_config, - ) - .await - .context("sync_blocks")?; - - let sync_blocks_subscriber = sync_blocks.subscribe_to_state_updates(); + let validator_set = self.config.validators; + let mut block_store_state = self.block_store.subscribe(); + let sync_state = sync::watch::channel(to_sync_state(block_store_state.borrow().clone())).0; tracing::debug!("Starting actors in separate threads."); scope::run!(ctx, |ctx, s| async { + s.spawn_bg(async { + // Task forwarding changes from block_store_state to sync_state. + // Alternatively we can make network depend on the storage directly. + while let Ok(state) = sync::changed(ctx, &mut block_store_state).await { + sync_state.send_replace(to_sync_state(state.clone())); + } + Ok(()) + }); + s.spawn_blocking(|| dispatcher.run(ctx).context("IO Dispatcher stopped")); s.spawn(async { - let state = network::State::new(network_config, None, Some(sync_blocks_subscriber)) + let state = network::State::new(network_config, None, Some(sync_state.subscribe())) .context("Invalid network config")?; state.register_metrics(); network::run_network(ctx, state, network_actor_pipe) @@ -204,21 +164,25 @@ impl Executor { if let Some(validator) = self.validator { s.spawn(async { let validator = validator; - let consensus_storage = - ReplicaStore::new(validator.replica_state_store, self.storage.clone()); - zksync_consensus_bft::run( - ctx, - consensus_actor_pipe, - validator.key.clone(), - validator_set.clone(), - consensus_storage, - &*validator.payload_source, - ) + bft::Config { + secret_key: validator.config.key.clone(), + validator_set: validator_set.clone(), + block_store: self.block_store.clone(), + replica_store: validator.replica_store, + payload_manager: validator.payload_manager, + } + .run(ctx, consensus_actor_pipe) .await .context("Consensus stopped") }); } - sync_blocks.run(ctx).await.context("Syncing blocks stopped") + sync_blocks::Config::new( + validator_set.clone(), + bft::misc::consensus_threshold(validator_set.len()), + )? + .run(ctx, sync_blocks_actor_pipe, self.block_store.clone()) + .await + .context("Syncing blocks stopped") }) .await } diff --git a/node/actors/executor/src/testonly.rs b/node/actors/executor/src/testonly.rs index a57393fb..e40b4d69 100644 --- a/node/actors/executor/src/testonly.rs +++ b/node/actors/executor/src/testonly.rs @@ -1,111 +1,46 @@ //! Testing extensions for node executor. -use crate::config::{ConsensusConfig, ExecutorConfig, GossipConfig}; +use crate::{Config, ValidatorConfig}; use rand::Rng; -use std::collections::HashMap; use zksync_concurrency::net; -use zksync_consensus_bft::testonly::make_genesis; -use zksync_consensus_network::{consensus, testonly::Instance}; -use zksync_consensus_roles::{node, validator}; - -impl ConsensusConfig { - fn from_network_config(src: consensus::Config) -> Self { - Self { - key: src.key.public(), - public_addr: src.public_addr, - } - } -} +use zksync_consensus_network::testonly::Instance; +use zksync_consensus_roles::validator; /// Full validator configuration. -#[derive(Debug)] +#[derive(Debug, Clone)] #[non_exhaustive] -pub struct FullValidatorConfig { - /// Executor configuration. - pub node_config: ExecutorConfig, - /// Secret key of the node used for identification in the gossip network. - pub node_key: node::SecretKey, +pub struct ValidatorNode { + /// Full node configuration. + pub node: Config, /// Consensus configuration of the validator. - pub consensus_config: ConsensusConfig, - /// Secret key for consensus. - pub validator_key: validator::SecretKey, -} - -impl FullValidatorConfig { - /// Generates a validator config for a network with a single validator. - pub fn for_single_validator( - rng: &mut impl Rng, - genesis_block_payload: validator::Payload, - genesis_block_number: validator::BlockNumber, - ) -> Self { - let mut net_configs = Instance::new_configs(rng, 1, 0); - assert_eq!(net_configs.len(), 1); - let net_config = net_configs.pop().unwrap(); - let consensus_config = net_config.consensus.unwrap(); - let validator_key = consensus_config.key.clone(); - let consensus_config = ConsensusConfig::from_network_config(consensus_config); - - let (genesis_block, validators) = make_genesis( - &[validator_key.clone()], - genesis_block_payload, - genesis_block_number, - ); - let node_key = net_config.gossip.key.clone(); - let node_config = ExecutorConfig { - server_addr: *net_config.server_addr, - gossip: net_config.gossip.into(), - genesis_block, - validators, - }; - - Self { - node_config, - node_key, - consensus_config, - validator_key, - } - } - - /// Creates a new full node and configures this validator to accept incoming connections from it. - pub fn connect_full_node(&mut self, rng: &mut impl Rng) -> FullNodeConfig { - let full_node_config = FullNodeConfig::new(rng, self); - self.node_config - .gossip - .static_inbound - .insert(full_node_config.node_key.public()); - full_node_config - } + pub validator: ValidatorConfig, } -/// Configuration for a full non-validator node. -#[derive(Debug)] -#[non_exhaustive] -pub struct FullNodeConfig { - /// Executor configuration. - pub node_config: ExecutorConfig, - /// Secret key of the node used for identification in the gossip network. - pub node_key: node::SecretKey, +/// Creates a new full node and configures this validator to accept incoming connections from it. +pub fn connect_full_node(rng: &mut impl Rng, node: &mut Config) -> Config { + let mut new = node.clone(); + new.server_addr = *net::tcp::testonly::reserve_listener(); + new.node_key = rng.gen(); + new.gossip_static_outbound = [(node.node_key.public(), node.server_addr)].into(); + node.gossip_static_inbound.insert(new.node_key.public()); + new } -impl FullNodeConfig { - fn new(rng: &mut impl Rng, validator: &FullValidatorConfig) -> Self { - let node_key: node::SecretKey = rng.gen(); - let full_node_addr = net::tcp::testonly::reserve_listener(); - let node_config = ExecutorConfig { - server_addr: *full_node_addr, - gossip: GossipConfig { - key: node_key.public(), - static_outbound: HashMap::from([( - validator.node_key.public(), - validator.node_config.server_addr, - )]), - ..validator.node_config.gossip.clone() - }, - ..validator.node_config.clone() - }; - +impl ValidatorNode { + /// Generates a validator config for a network with a single validator. + pub fn for_single_validator(rng: &mut impl Rng) -> Self { + let net_config = Instance::new_configs(rng, 1, 0).pop().unwrap(); + let validator = net_config.consensus.unwrap(); + let gossip = net_config.gossip; Self { - node_config, - node_key, + node: Config { + server_addr: *net_config.server_addr, + validators: validator::ValidatorSet::new([validator.key.public()]).unwrap(), + node_key: gossip.key, + gossip_dynamic_inbound_limit: gossip.dynamic_inbound_limit, + gossip_static_inbound: gossip.static_inbound, + gossip_static_outbound: gossip.static_outbound, + }, + validator, } } } diff --git a/node/actors/executor/src/tests.rs b/node/actors/executor/src/tests.rs index f561abe6..ac3baf3b 100644 --- a/node/actors/executor/src/tests.rs +++ b/node/actors/executor/src/tests.rs @@ -1,126 +1,85 @@ //! High-level tests for `Executor`. use super::*; -use crate::testonly::FullValidatorConfig; -use rand::{thread_rng, Rng}; +use crate::testonly::{connect_full_node, ValidatorNode}; +use rand::Rng; use std::iter; use test_casing::test_casing; use zksync_concurrency::{sync, testonly::abort_on_panic, time}; -use zksync_consensus_bft::{testonly::RandomPayloadSource, PROTOCOL_VERSION}; +use zksync_consensus_bft::{testonly, PROTOCOL_VERSION}; use zksync_consensus_roles::validator::{BlockNumber, FinalBlock, Payload}; -use zksync_consensus_storage::{BlockStore, InMemoryStorage}; +use zksync_consensus_storage::{testonly::in_memory, BlockStore, BlockStoreRunner}; -impl FullValidatorConfig { - fn gen_blocks(&self, rng: &mut impl Rng, count: usize) -> Vec { - let genesis_block = self.node_config.genesis_block.clone(); - let validators = &self.node_config.validators; - let blocks = iter::successors(Some(genesis_block), |parent| { +async fn make_store(ctx: &ctx::Ctx, genesis: FinalBlock) -> (Arc, BlockStoreRunner) { + BlockStore::new(ctx, Box::new(in_memory::BlockStore::new(genesis))) + .await + .unwrap() +} + +impl Config { + fn into_executor(self, block_store: Arc) -> Executor { + Executor { + config: self, + block_store, + validator: None, + } + } +} + +impl ValidatorNode { + fn gen_blocks<'a>(&'a self, rng: &'a mut impl Rng) -> impl 'a + Iterator { + let (genesis_block, _) = testonly::make_genesis( + &[self.validator.key.clone()], + Payload(vec![]), + BlockNumber(0), + ); + let validators = &self.node.validators; + iter::successors(Some(genesis_block), |parent| { let payload: Payload = rng.gen(); let header = validator::BlockHeader { - parent: parent.header.hash(), - number: parent.header.number.next(), + parent: parent.header().hash(), + number: parent.header().number.next(), payload: payload.hash(), }; - let commit = self.validator_key.sign_msg(validator::ReplicaCommit { + let commit = self.validator.key.sign_msg(validator::ReplicaCommit { protocol_version: PROTOCOL_VERSION, view: validator::ViewNumber(header.number.0), proposal: header, }); let justification = validator::CommitQC::from(&[commit], validators).unwrap(); - Some(FinalBlock::new(header, payload, justification)) - }); - blocks.skip(1).take(count).collect() + Some(FinalBlock::new(payload, justification)) + }) } - async fn into_executor( - self, - ctx: &ctx::Ctx, - storage: Arc, - ) -> Executor { - let mut executor = Executor::new(ctx, self.node_config, self.node_key, storage.clone()) - .await - .unwrap(); - executor - .set_validator( - self.consensus_config, - self.validator_key, - storage, - Arc::new(RandomPayloadSource), - ) - .unwrap(); - executor + fn into_executor(self, block_store: Arc) -> Executor { + Executor { + config: self.node, + block_store, + validator: Some(Validator { + config: self.validator, + replica_store: Box::new(in_memory::ReplicaStore::default()), + payload_manager: Box::new(testonly::RandomPayload), + }), + } } } -type BlockMutation = (&'static str, fn(&mut FinalBlock)); -const BLOCK_MUTATIONS: [BlockMutation; 3] = [ - ("number", |block| { - block.header.number = BlockNumber(1); - }), - ("payload", |block| { - block.payload = Payload(b"test".to_vec()); - }), - ("justification", |block| { - block.justification = thread_rng().gen(); - }), -]; - -#[test_casing(3, BLOCK_MUTATIONS)] -#[tokio::test] -async fn executor_misconfiguration(name: &str, mutation: fn(&mut FinalBlock)) { - abort_on_panic(); - let _span = tracing::info_span!("executor_misconfiguration", name).entered(); - let ctx = &ctx::root(); - let rng = &mut ctx.rng(); - - let mut validator = - FullValidatorConfig::for_single_validator(rng, Payload(vec![]), BlockNumber(0)); - let genesis_block = &mut validator.node_config.genesis_block; - mutation(genesis_block); - let storage = Arc::new(InMemoryStorage::new(genesis_block.clone())); - let err = Executor::new(ctx, validator.node_config, validator.node_key, storage) - .await - .err() - .unwrap(); - tracing::info!(%err, "received expected validation error"); -} - -#[tokio::test] -async fn genesis_block_mismatch() { - abort_on_panic(); - let ctx = &ctx::root(); - let rng = &mut ctx.rng(); - - let validator = FullValidatorConfig::for_single_validator(rng, Payload(vec![]), BlockNumber(0)); - let mut genesis_block = validator.node_config.genesis_block.clone(); - genesis_block.header.number = BlockNumber(1); - let storage = Arc::new(InMemoryStorage::new(genesis_block.clone())); - let err = Executor::new(ctx, validator.node_config, validator.node_key, storage) - .await - .err() - .unwrap(); - tracing::info!(%err, "received expected validation error"); -} - #[tokio::test] async fn executing_single_validator() { abort_on_panic(); let ctx = &ctx::root(); let rng = &mut ctx.rng(); - let validator = FullValidatorConfig::for_single_validator(rng, Payload(vec![]), BlockNumber(0)); - let genesis_block = &validator.node_config.genesis_block; - let storage = InMemoryStorage::new(genesis_block.clone()); - let storage = Arc::new(storage); - let executor = validator.into_executor(ctx, storage.clone()).await; + let validator = ValidatorNode::for_single_validator(rng); + let genesis_block = validator.gen_blocks(rng).next().unwrap(); + let (storage, runner) = make_store(ctx, genesis_block.clone()).await; + let executor = validator.into_executor(storage.clone()); scope::run!(ctx, |ctx, s| async { + s.spawn_bg(runner.run(ctx)); s.spawn_bg(executor.run(ctx)); let want = BlockNumber(5); - sync::wait_for(ctx, &mut storage.subscribe_to_block_writes(), |n| { - n >= &want - }) - .await?; + sync::wait_for(ctx, &mut storage.subscribe(), |state| state.next() > want).await?; Ok(()) }) .await @@ -133,37 +92,27 @@ async fn executing_validator_and_full_node() { let ctx = &ctx::test_root(&ctx::AffineClock::new(20.0)); let rng = &mut ctx.rng(); - let mut validator = - FullValidatorConfig::for_single_validator(rng, Payload(vec![]), BlockNumber(0)); - let full_node = validator.connect_full_node(rng); + let mut validator = ValidatorNode::for_single_validator(rng); + let full_node = connect_full_node(rng, &mut validator.node); - let genesis_block = &validator.node_config.genesis_block; - let validator_storage = InMemoryStorage::new(genesis_block.clone()); - let validator_storage = Arc::new(validator_storage); - let full_node_storage = InMemoryStorage::new(genesis_block.clone()); - let full_node_storage = Arc::new(full_node_storage); - let mut full_node_subscriber = full_node_storage.subscribe_to_block_writes(); + let genesis_block = validator.gen_blocks(rng).next().unwrap(); + let (validator_storage, validator_runner) = make_store(ctx, genesis_block.clone()).await; + let (full_node_storage, full_node_runner) = make_store(ctx, genesis_block.clone()).await; - let validator = validator - .into_executor(ctx, validator_storage.clone()) - .await; - let full_node = Executor::new( - ctx, - full_node.node_config, - full_node.node_key, - full_node_storage.clone(), - ) - .await - .unwrap(); + let validator = validator.into_executor(validator_storage.clone()); + let full_node = full_node.into_executor(full_node_storage.clone()); scope::run!(ctx, |ctx, s| async { + s.spawn_bg(validator_runner.run(ctx)); + s.spawn_bg(full_node_runner.run(ctx)); s.spawn_bg(validator.run(ctx)); s.spawn_bg(full_node.run(ctx)); - for _ in 0..5 { - let number = *sync::changed(ctx, &mut full_node_subscriber).await?; - tracing::trace!(%number, "Full node received block"); - } - anyhow::Ok(()) + let want = BlockNumber(5); + sync::wait_for(ctx, &mut full_node_storage.subscribe(), |state| { + state.next() > want + }) + .await?; + Ok(()) }) .await .unwrap(); @@ -176,73 +125,55 @@ async fn syncing_full_node_from_snapshot(delay_block_storage: bool) { let ctx = &ctx::test_root(&ctx::AffineClock::new(20.0)); let rng = &mut ctx.rng(); - let mut validator = - FullValidatorConfig::for_single_validator(rng, Payload(vec![]), BlockNumber(0)); - let mut full_node = validator.connect_full_node(rng); + let mut validator = ValidatorNode::for_single_validator(rng); + let full_node = connect_full_node(rng, &mut validator.node); - let genesis_block = &validator.node_config.genesis_block; - let blocks = validator.gen_blocks(rng, 10); - let validator_storage = InMemoryStorage::new(genesis_block.clone()); - let validator_storage = Arc::new(validator_storage); - if !delay_block_storage { - // Instead of running consensus on the validator, add the generated blocks manually. - for block in &blocks { - validator_storage.put_block(ctx, block).await.unwrap(); - } - } - let validator = Executor::new( - ctx, - validator.node_config, - validator.node_key, - validator_storage.clone(), - ) - .await - .unwrap(); + let blocks: Vec<_> = validator.gen_blocks(rng).take(11).collect(); + let (validator_storage, validator_runner) = make_store(ctx, blocks[0].clone()).await; + let validator = validator.node.into_executor(validator_storage.clone()); // Start a full node from a snapshot. - full_node.node_config.genesis_block = blocks[3].clone(); - let full_node_storage = InMemoryStorage::new(blocks[3].clone()); - let full_node_storage = Arc::new(full_node_storage); - let mut full_node_subscriber = full_node_storage.subscribe_to_block_writes(); + let (full_node_storage, full_node_runner) = make_store(ctx, blocks[4].clone()).await; - let full_node = Executor::new( - ctx, - full_node.node_config, - full_node.node_key, - full_node_storage.clone(), - ) - .await - .unwrap(); + let full_node = Executor { + config: full_node, + block_store: full_node_storage.clone(), + validator: None, + }; scope::run!(ctx, |ctx, s| async { + s.spawn_bg(validator_runner.run(ctx)); + s.spawn_bg(full_node_runner.run(ctx)); + if !delay_block_storage { + // Instead of running consensus on the validator, add the generated blocks manually. + for block in &blocks { + validator_storage + .queue_block(ctx, block.clone()) + .await + .unwrap(); + } + } s.spawn_bg(validator.run(ctx)); s.spawn_bg(full_node.run(ctx)); if delay_block_storage { // Emulate the validator gradually adding new blocks to the storage. s.spawn_bg(async { - for block in &blocks { + for block in &blocks[1..] { ctx.sleep(time::Duration::milliseconds(500)).await?; - validator_storage.put_block(ctx, block).await?; + validator_storage.queue_block(ctx, block.clone()).await?; } Ok(()) }); } - loop { - let last_contiguous_full_node_block = - full_node_storage.last_contiguous_block_number(ctx).await?; - tracing::trace!( - %last_contiguous_full_node_block, - "Full node updated last contiguous block" - ); - if last_contiguous_full_node_block == BlockNumber(10) { - break; // The full node has received all blocks! - } - // Wait until the node storage is updated. - let number = *sync::changed(ctx, &mut full_node_subscriber).await?; - tracing::trace!(%number, "Full node received block"); - } + sync::wait_for(ctx, &mut full_node_storage.subscribe(), |state| { + let last = state.last.header().number; + tracing::trace!(%last, "Full node updated last block"); + last >= BlockNumber(10) + }) + .await + .unwrap(); // Check that the node didn't receive any blocks with number lesser than the initial snapshot block. for lesser_block_number in 0..3 { diff --git a/node/actors/network/Cargo.toml b/node/actors/network/Cargo.toml index 13bec3d9..3f9bb95c 100644 --- a/node/actors/network/Cargo.toml +++ b/node/actors/network/Cargo.toml @@ -34,4 +34,4 @@ tokio.workspace = true zksync_protobuf_build.workspace = true [lints] -workspace = true \ No newline at end of file +workspace = true diff --git a/node/actors/network/src/io.rs b/node/actors/network/src/io.rs index 1f7a3c7e..336d665c 100644 --- a/node/actors/network/src/io.rs +++ b/node/actors/network/src/io.rs @@ -57,7 +57,6 @@ pub struct ConsensusReq { #[derive(Debug, Clone, PartialEq)] pub struct SyncState { pub first_stored_block: validator::CommitQC, - pub last_contiguous_stored_block: validator::CommitQC, pub last_stored_block: validator::CommitQC, } @@ -65,7 +64,6 @@ pub struct SyncState { #[derive(Debug, Clone, Copy)] pub struct SyncStateNumbers { pub first_stored_block: validator::BlockNumber, - pub last_contiguous_stored_block: validator::BlockNumber, pub last_stored_block: validator::BlockNumber, } @@ -74,7 +72,6 @@ impl SyncState { pub fn numbers(&self) -> SyncStateNumbers { SyncStateNumbers { first_stored_block: self.first_stored_block.message.proposal.number, - last_contiguous_stored_block: self.last_contiguous_stored_block.message.proposal.number, last_stored_block: self.last_stored_block.message.proposal.number, } } diff --git a/node/actors/network/src/proto/gossip.proto b/node/actors/network/src/proto/gossip.proto index fce8b6a5..850191d1 100644 --- a/node/actors/network/src/proto/gossip.proto +++ b/node/actors/network/src/proto/gossip.proto @@ -18,17 +18,14 @@ message SyncValidatorAddrsResp { repeated roles.validator.Signed net_addresses = 1; } -// Current block sync state of a node periodically sent by a node. +// State of the local block store. +// A node is expected to store a continuous range of blocks at all times +// and actively fetch newest blocks. message SyncState { // First L2 block that the node has locally. - // Will be always 0 until state pruning is introduced. optional roles.validator.CommitQC first_stored_block = 1; - // The upper bound (inclusive) of the contiguous L2 block range - // starting from first_stored_block_number. The node has all L2 blocks - // in this range, but misses some L2 blocks beyond it. - optional roles.validator.CommitQC last_contiguous_stored_block = 2; - // Certified header of the last L2 block that the node has locally. - optional roles.validator.CommitQC last_stored_block = 3; + // Last L2 block that the node has locally. + optional roles.validator.CommitQC last_stored_block = 2; } // Response to `SyncState` acknowledging its processing. diff --git a/node/actors/network/src/rpc/sync_blocks.rs b/node/actors/network/src/rpc/sync_blocks.rs index 69ba9501..827eac50 100644 --- a/node/actors/network/src/rpc/sync_blocks.rs +++ b/node/actors/network/src/rpc/sync_blocks.rs @@ -29,8 +29,6 @@ impl ProtoFmt for io::SyncState { Ok(Self { first_stored_block: read_required(&message.first_stored_block) .context("first_stored_block")?, - last_contiguous_stored_block: read_required(&message.last_contiguous_stored_block) - .context("last_contiguous_stored_block")?, last_stored_block: read_required(&message.last_stored_block) .context("last_stored_block")?, }) @@ -39,7 +37,6 @@ impl ProtoFmt for io::SyncState { fn build(&self) -> Self::Proto { Self::Proto { first_stored_block: Some(self.first_stored_block.build()), - last_contiguous_stored_block: Some(self.last_contiguous_stored_block.build()), last_stored_block: Some(self.last_stored_block.build()), } } diff --git a/node/actors/network/src/testonly.rs b/node/actors/network/src/testonly.rs index d913e142..d666feab 100644 --- a/node/actors/network/src/testonly.rs +++ b/node/actors/network/src/testonly.rs @@ -224,7 +224,6 @@ impl SyncState { pub(crate) fn gen(rng: &mut impl Rng, number: validator::BlockNumber) -> Self { let mut this = Self { first_stored_block: rng.gen(), - last_contiguous_stored_block: rng.gen(), last_stored_block: rng.gen(), }; this.last_stored_block.message.proposal.number = number; diff --git a/node/actors/sync_blocks/src/config.rs b/node/actors/sync_blocks/src/config.rs index 3bc44839..d524cd72 100644 --- a/node/actors/sync_blocks/src/config.rs +++ b/node/actors/sync_blocks/src/config.rs @@ -36,7 +36,7 @@ impl Config { Ok(Self { validator_set, consensus_threshold, - max_concurrent_blocks: 10, + max_concurrent_blocks: 20, max_concurrent_blocks_per_peer: 5, sleep_interval_for_get_block: time::Duration::seconds(10), }) diff --git a/node/actors/sync_blocks/src/lib.rs b/node/actors/sync_blocks/src/lib.rs index 95e1be8f..cda9050c 100644 --- a/node/actors/sync_blocks/src/lib.rs +++ b/node/actors/sync_blocks/src/lib.rs @@ -2,23 +2,15 @@ //! //! This crate contains an actor implementing block syncing among nodes, which is tied to the gossip //! network RPCs. -use crate::{ - io::{InputMessage, OutputMessage}, - message_handler::SyncBlocksMessageHandler, -}; +use crate::io::{InputMessage, OutputMessage}; use std::sync::Arc; -use tracing::instrument; -use zksync_concurrency::{ - ctx, scope, - sync::{self, watch}, -}; -use zksync_consensus_network::io::SyncState; -use zksync_consensus_storage::WriteBlockStore; +use zksync_concurrency::{ctx, error::Wrap as _, scope}; +use zksync_consensus_network::io::{GetBlockError, SyncBlocksRequest}; +use zksync_consensus_storage::{BlockStore, BlockStoreState}; use zksync_consensus_utils::pipe::ActorPipe; mod config; pub mod io; -mod message_handler; mod peers; #[cfg(test)] mod tests; @@ -26,53 +18,52 @@ mod tests; pub use crate::config::Config; use crate::peers::PeerStates; -/// Block syncing actor responsible for synchronizing L2 blocks with other nodes. -#[derive(Debug)] -pub struct SyncBlocks { - /// Part of the actor responsible for handling inbound messages. - pub(crate) message_handler: SyncBlocksMessageHandler, - /// Peer states. - pub(crate) peer_states: PeerStates, - /// Sender of `SyncState` updates. - pub(crate) state_sender: watch::Sender, -} - -impl SyncBlocks { - /// Creates a new actor. - pub async fn new( +impl Config { + /// Runs the sync_blocks actor. + pub async fn run( + self, ctx: &ctx::Ctx, - pipe: ActorPipe, - storage: Arc, - config: Config, - ) -> anyhow::Result { - let (state_sender, _) = watch::channel(Self::get_sync_state(ctx, storage.as_ref()).await?); - let (peer_states, peer_states_handle) = PeerStates::new(pipe.send, storage.clone(), config); - let inner = SyncBlocksMessageHandler { - message_receiver: pipe.recv, - storage, - peer_states_handle, - }; - Ok(Self { - message_handler: inner, - peer_states, - state_sender, - }) - } - - /// Subscribes to `SyncState` updates emitted by the actor. - pub fn subscribe_to_state_updates(&self) -> watch::Receiver { - self.state_sender.subscribe() - } - - /// Runs the actor processing incoming requests until `ctx` is canceled. - #[instrument(level = "trace", skip_all, err)] - pub async fn run(self, ctx: &ctx::Ctx) -> anyhow::Result<()> { - let storage = self.message_handler.storage.clone(); - - let result = scope::run!(ctx, |ctx, s| async { - s.spawn_bg(Self::emit_state_updates(ctx, storage, &self.state_sender)); - s.spawn_bg(self.peer_states.run(ctx)); - self.message_handler.process_messages(ctx).await + mut pipe: ActorPipe, + storage: Arc, + ) -> anyhow::Result<()> { + let peer_states = PeerStates::new(self, storage.clone(), pipe.send); + let result: ctx::Result<()> = scope::run!(ctx, |ctx, s| async { + s.spawn_bg(async { Ok(peer_states.run_block_fetcher(ctx).await?) }); + loop { + match pipe.recv.recv(ctx).await? { + InputMessage::Network(SyncBlocksRequest::UpdatePeerSyncState { + peer, + state, + response, + }) => { + let res = peer_states.update( + &peer, + BlockStoreState { + first: state.first_stored_block, + last: state.last_stored_block, + }, + ); + if let Err(err) = res { + tracing::info!(%err, ?peer, "peer_states.update()"); + } + response.send(()).ok(); + } + InputMessage::Network(SyncBlocksRequest::GetBlock { + block_number, + response, + }) => { + response + .send( + storage + .block(ctx, block_number) + .await + .wrap("storage.block()")? + .ok_or(GetBlockError::NotSynced), + ) + .ok(); + } + } + } }) .await; @@ -83,42 +74,4 @@ impl SyncBlocks { ctx::Error::Internal(err) => Err(err), }) } - - #[instrument(level = "trace", skip_all, err)] - async fn emit_state_updates( - ctx: &ctx::Ctx, - storage: Arc, - state_sender: &watch::Sender, - ) -> ctx::Result<()> { - let mut storage_subscriber = storage.subscribe_to_block_writes(); - loop { - let state = Self::get_sync_state(ctx, storage.as_ref()).await?; - if state_sender.send(state).is_err() { - tracing::info!("`SyncState` subscriber dropped; exiting"); - return Ok(()); - } - - let block_number = *sync::changed(ctx, &mut storage_subscriber).await?; - tracing::trace!(%block_number, "Received block write update"); - } - } - - /// Gets the current sync state of this node based on information from the storage. - #[instrument(level = "trace", skip_all)] - async fn get_sync_state( - ctx: &ctx::Ctx, - storage: &dyn WriteBlockStore, - ) -> ctx::Result { - let last_contiguous_block_number = storage.last_contiguous_block_number(ctx).await?; - let last_contiguous_stored_block = storage - .block(ctx, last_contiguous_block_number) - .await? - .expect("`last_contiguous_stored_block` disappeared"); - - Ok(SyncState { - first_stored_block: storage.first_block(ctx).await?.justification, - last_contiguous_stored_block: last_contiguous_stored_block.justification, - last_stored_block: storage.head_block(ctx).await?.justification, - }) - } } diff --git a/node/actors/sync_blocks/src/message_handler.rs b/node/actors/sync_blocks/src/message_handler.rs deleted file mode 100644 index dfd4f28d..00000000 --- a/node/actors/sync_blocks/src/message_handler.rs +++ /dev/null @@ -1,62 +0,0 @@ -//! Inner details of `SyncBlocks` actor. - -use crate::{io::InputMessage, peers::PeerStatesHandle}; -use std::sync::Arc; -use tracing::instrument; -use zksync_concurrency::ctx::{self, channel}; -use zksync_consensus_network::io::{GetBlockError, GetBlockResponse, SyncBlocksRequest}; -use zksync_consensus_roles::validator::BlockNumber; -use zksync_consensus_storage::WriteBlockStore; - -/// Inner details of `SyncBlocks` actor allowing to process messages. -#[derive(Debug)] -pub(crate) struct SyncBlocksMessageHandler { - /// Pipe using which the actor sends / receives messages. - pub(crate) message_receiver: channel::UnboundedReceiver, - /// Persistent storage for blocks. - pub(crate) storage: Arc, - /// Set of validators authoring blocks. - pub(crate) peer_states_handle: PeerStatesHandle, -} - -impl SyncBlocksMessageHandler { - /// Implements the message processing loop. - #[instrument(level = "trace", skip_all, err)] - pub(crate) async fn process_messages(mut self, ctx: &ctx::Ctx) -> ctx::Result<()> { - while let Ok(input_message) = self.message_receiver.recv(ctx).await { - match input_message { - InputMessage::Network(SyncBlocksRequest::UpdatePeerSyncState { - peer, - state, - response, - }) => { - self.peer_states_handle.update(peer, *state); - response.send(()).ok(); - } - InputMessage::Network(SyncBlocksRequest::GetBlock { - block_number, - response, - }) => { - response.send(self.get_block(ctx, block_number).await?).ok(); - } - } - } - Ok(()) - } - - /// Gets a block with the specified `number` from the storage. - /// - /// **This method is blocking.** - #[instrument(level = "trace", skip(self, ctx), err)] - async fn get_block( - &self, - ctx: &ctx::Ctx, - number: BlockNumber, - ) -> ctx::Result { - Ok(self - .storage - .block(ctx, number) - .await? - .ok_or(GetBlockError::NotSynced)) - } -} diff --git a/node/actors/sync_blocks/src/peers/events.rs b/node/actors/sync_blocks/src/peers/events.rs index 0a4409ff..5f7ee949 100644 --- a/node/actors/sync_blocks/src/peers/events.rs +++ b/node/actors/sync_blocks/src/peers/events.rs @@ -8,17 +8,11 @@ use zksync_consensus_roles::{node, validator::BlockNumber}; pub(super) enum PeerStateEvent { /// Node has successfully downloaded the specified block. GotBlock(BlockNumber), - /// Block retrieval was canceled due to block getting persisted using other means. - CanceledBlock(BlockNumber), /// Received an invalid block from the peer. - GotInvalidBlock { + RpcFailed { peer_key: node::PublicKey, block_number: BlockNumber, }, - /// Peer state was updated. Includes creating a state for a newly connected peer. - PeerUpdated(node::PublicKey), - /// Received invalid `SyncState` from a peer. - InvalidPeerUpdate(node::PublicKey), /// Peer was disconnected (i.e., it has dropped a request). - PeerDisconnected(node::PublicKey), + PeerDropped(node::PublicKey), } diff --git a/node/actors/sync_blocks/src/peers/mod.rs b/node/actors/sync_blocks/src/peers/mod.rs index 5bf3a9fa..ae4e3f1c 100644 --- a/node/actors/sync_blocks/src/peers/mod.rs +++ b/node/actors/sync_blocks/src/peers/mod.rs @@ -3,376 +3,217 @@ use self::events::PeerStateEvent; use crate::{io, Config}; use anyhow::Context as _; -use std::{collections::HashMap, ops, sync::Arc}; -use tracing::instrument; +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, +}; use zksync_concurrency::{ ctx::{self, channel}, - oneshot, scope, - sync::{self, watch, Mutex, Semaphore}, + oneshot, scope, sync, }; -use zksync_consensus_network::io::{SyncBlocksInputMessage, SyncState}; +use zksync_consensus_network::io::SyncBlocksInputMessage; use zksync_consensus_roles::{ node, - validator::{BlockNumber, BlockValidationError, FinalBlock}, + validator::{BlockNumber, FinalBlock}, }; -use zksync_consensus_storage::WriteBlockStore; +use zksync_consensus_storage::{BlockStore, BlockStoreState}; +use zksync_consensus_utils::no_copy::NoCopy; mod events; #[cfg(test)] mod tests; -type PeerStateUpdate = (node::PublicKey, SyncState); - #[derive(Debug)] struct PeerState { - first_stored_block: BlockNumber, - last_contiguous_stored_block: BlockNumber, - get_block_semaphore: Arc, -} - -impl PeerState { - fn has_block(&self, number: BlockNumber) -> bool { - let range = self.first_stored_block..=self.last_contiguous_stored_block; - range.contains(&number) - } + state: BlockStoreState, + get_block_semaphore: Arc, } /// Handle for [`PeerStates`] allowing to send updates to it. -#[derive(Debug, Clone)] -pub(crate) struct PeerStatesHandle { - updates_sender: channel::UnboundedSender, -} - -impl PeerStatesHandle { - /// Notifies [`PeerStates`] about an updated [`SyncState`] of a peer. - pub(crate) fn update(&self, peer_key: node::PublicKey, sync_state: SyncState) { - self.updates_sender.send((peer_key, sync_state)); - } -} - -type PendingBlocks = HashMap>; - -/// View of peers (or more precisely, connections with peers) w.r.t. block syncing. #[derive(Debug)] pub(crate) struct PeerStates { - updates_receiver: Option>, - events_sender: Option>, - peers: Mutex>, - pending_blocks: Mutex, - message_sender: channel::UnboundedSender, - storage: Arc, config: Config, + storage: Arc, + message_sender: channel::UnboundedSender, + + peers: Mutex>, + highest_peer_block: sync::watch::Sender, + events_sender: Option>, } impl PeerStates { /// Creates a new instance together with a handle. pub(crate) fn new( - message_sender: channel::UnboundedSender, - storage: Arc, config: Config, - ) -> (Self, PeerStatesHandle) { - let (updates_sender, updates_receiver) = channel::unbounded(); - let this = Self { - updates_receiver: Some(updates_receiver), - events_sender: None, - peers: Mutex::default(), - pending_blocks: Mutex::default(), - message_sender, - storage, + storage: Arc, + message_sender: channel::UnboundedSender, + ) -> Self { + Self { config, - }; - let handle = PeerStatesHandle { updates_sender }; - (this, handle) - } - - /// Runs the sub-actor. This will: - /// - /// 1. Get information about missing blocks from the storage. - /// 2. Spawn a task processing `SyncState`s from peers. - /// 3. Spawn a task to get each missing block. - pub(crate) async fn run(mut self, ctx: &ctx::Ctx) -> ctx::Result<()> { - let updates_receiver = self.updates_receiver.take().unwrap(); - let storage = self.storage.as_ref(); - let blocks_subscriber = storage.subscribe_to_block_writes(); - let get_block_semaphore = Semaphore::new(self.config.max_concurrent_blocks); - let (new_blocks_sender, mut new_blocks_subscriber) = watch::channel(BlockNumber(0)); - - scope::run!(ctx, |ctx, s| async { - let start_number = storage.last_contiguous_block_number(ctx).await?; - let mut last_block_number = storage.head_block(ctx).await?.header.number; - let missing_blocks = storage - .missing_block_numbers(ctx, start_number..last_block_number) - .await?; - new_blocks_sender.send_replace(last_block_number); - - s.spawn_bg(self.run_updates(ctx, updates_receiver, new_blocks_sender)); - s.spawn_bg(self.cancel_received_block_tasks(ctx, blocks_subscriber)); - - for block_number in missing_blocks { - let get_block_permit = sync::acquire(ctx, &get_block_semaphore).await?; - s.spawn(self.get_and_save_block(ctx, block_number, get_block_permit, storage)); - } - - loop { - let new_last_block_number = *sync::changed(ctx, &mut new_blocks_subscriber).await?; - let new_block_numbers = last_block_number.next()..new_last_block_number.next(); - if new_block_numbers.is_empty() { - continue; - } - tracing::trace!( - ?new_block_numbers, - "Filtering block numbers as per storage availability" - ); - - let missing_blocks = storage - .missing_block_numbers(ctx, new_block_numbers) - .await?; - if missing_blocks.is_empty() { - continue; - } - tracing::trace!( - ?missing_blocks, - "Enqueuing requests for getting blocks from peers" - ); - - for block_number in missing_blocks { - let get_block_permit = sync::acquire(ctx, &get_block_semaphore).await?; - s.spawn(self.get_and_save_block(ctx, block_number, get_block_permit, storage)); - } - last_block_number = new_last_block_number; - } - }) - .await - } + storage, + message_sender, - async fn run_updates( - &self, - ctx: &ctx::Ctx, - mut updates_receiver: channel::UnboundedReceiver, - new_blocks_sender: watch::Sender, - ) -> ctx::Result<()> { - loop { - let (peer_key, sync_state) = updates_receiver.recv(ctx).await?; - let new_last_block_number = self - .update_peer_sync_state(ctx, peer_key, sync_state) - .await?; - new_blocks_sender.send_if_modified(|number| { - if *number < new_last_block_number { - *number = new_last_block_number; - return true; - } - false - }); + peers: Mutex::default(), + highest_peer_block: sync::watch::channel(BlockNumber(0)).0, + events_sender: None, } } - /// Cancels pending block retrieval for blocks that appear in the storage using other means - /// (e.g., thanks to the consensus algorithm). This works at best-effort basis; it's not guaranteed - /// that this method will timely cancel all block retrievals. - #[instrument(level = "trace", skip_all, err)] - async fn cancel_received_block_tasks( + /// Updates the known `BlockStore` state of the given peer. + /// This information is used to decide from which peer to fetch + /// a given block from. + pub(crate) fn update( &self, - ctx: &ctx::Ctx, - mut subscriber: watch::Receiver, - ) -> ctx::Result<()> { - loop { - let block_number = *sync::changed(ctx, &mut subscriber).await?; - if sync::lock(ctx, &self.pending_blocks) - .await? - .remove(&block_number) - .is_some() - { - tracing::trace!( - %block_number, - "Block persisted using other means; canceling its retrieval" - ); - // Retrieval is canceled by dropping the corresponding `oneshot::Sender`. - } - } - } + peer: &node::PublicKey, + state: BlockStoreState, + ) -> anyhow::Result<()> { + use std::collections::hash_map::Entry; - /// Returns the last trusted block number stored by the peer. - #[instrument( - level = "trace", - err, - skip(self, ctx, state), - fields(state = ?state.numbers()) - )] - async fn update_peer_sync_state( - &self, - ctx: &ctx::Ctx, - peer_key: node::PublicKey, - state: SyncState, - ) -> ctx::OrCanceled { - let numbers = match self.validate_sync_state(state) { - Ok(numbers) => numbers, - Err(err) => { - tracing::warn!(%err, "Invalid `SyncState` received from peer"); - if let Some(events_sender) = &self.events_sender { - events_sender.send(PeerStateEvent::InvalidPeerUpdate(peer_key)); - } - return Ok(BlockNumber(0)); - // TODO: ban peer etc. + let last = state.last.header().number; + anyhow::ensure!(state.first.header().number <= state.last.header().number); + state + .last + .verify(&self.config.validator_set, self.config.consensus_threshold) + .context("state.last.verify()")?; + let mut peers = self.peers.lock().unwrap(); + match peers.entry(peer.clone()) { + Entry::Occupied(mut e) => e.get_mut().state = state, + Entry::Vacant(e) => { + let permits = self.config.max_concurrent_blocks_per_peer; + e.insert(PeerState { + state, + get_block_semaphore: Arc::new(sync::Semaphore::new(permits)), + }); } - }; - let first_stored_block = *numbers.start(); - let last_contiguous_stored_block = *numbers.end(); - - let mut peers = sync::lock(ctx, &self.peers).await?; - let permits = self.config.max_concurrent_blocks_per_peer; - let peer_state = peers.entry(peer_key.clone()).or_insert_with(|| PeerState { - first_stored_block, - last_contiguous_stored_block, - get_block_semaphore: Arc::new(Semaphore::new(permits)), - }); - let prev_contiguous_stored_block = peer_state.last_contiguous_stored_block; - if last_contiguous_stored_block < prev_contiguous_stored_block { - tracing::warn!( - %last_contiguous_stored_block, - %prev_contiguous_stored_block, - "Bogus state update from peer: new `last_contiguous_stored_block` value \ - ({last_contiguous_stored_block}) is lesser than the old value ({prev_contiguous_stored_block})" - ); - } - - peer_state.first_stored_block = first_stored_block; - // If `first_stored_block` increases, we could cancel getting pruned blocks from the peer here. - // However, the peer will respond such requests with a "missing block" error anyway, - // and new requests won't be routed to it because of updated `PeerState`, - // so having no special handling is fine. - // Likewise, no specialized handling is required for decreasing `first_stored_block`; - // if this leads to an ability to fetch some of the pending blocks, it'll be discovered - // after `sleep_interval_for_get_block` (i.e., soon enough). - - tracing::trace!( - %prev_contiguous_stored_block, - %last_contiguous_stored_block, - "Updating last contiguous stored block for peer" - ); - peer_state.last_contiguous_stored_block = last_contiguous_stored_block; - drop(peers); - - if let Some(events_sender) = &self.events_sender { - events_sender.send(PeerStateEvent::PeerUpdated(peer_key)); } - Ok(last_contiguous_stored_block) + self.highest_peer_block + .send_if_modified(|highest_peer_block| { + if *highest_peer_block >= last { + return false; + } + *highest_peer_block = last; + true + }); + Ok(()) } - fn validate_sync_state( - &self, - state: SyncState, - ) -> anyhow::Result> { - let numbers = state.numbers(); - anyhow::ensure!( - numbers.first_stored_block <= numbers.last_contiguous_stored_block, - "Invariant violated: numbers.first_stored_block <= numbers.last_contiguous_stored_block" - ); - anyhow::ensure!( - numbers.last_contiguous_stored_block <= numbers.last_stored_block, - "Invariant violated: numbers.last_contiguous_stored_block <= numbers.last_stored_block" - ); - - state - .last_contiguous_stored_block - .verify(&self.config.validator_set, self.config.consensus_threshold) - .context("Failed verifying `last_contiguous_stored_block`")?; - // We don't verify QCs for the last stored block since it is not used - // in the following logic. The first stored block is not verified as well since it doesn't - // extend the set of blocks a peer should have. To reflect this, the method consumes `SyncState` - // and returns the validated block numbers. - Ok(numbers.first_stored_block..=numbers.last_contiguous_stored_block) + /// Task fetching blocks from peers which are not present in storage. + pub(crate) async fn run_block_fetcher(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled<()> { + let sem = sync::Semaphore::new(self.config.max_concurrent_blocks); + scope::run!(ctx, |ctx, s| async { + let mut next = self.storage.subscribe().borrow().next(); + let mut highest_peer_block = self.highest_peer_block.subscribe(); + loop { + sync::wait_for(ctx, &mut highest_peer_block, |highest_peer_block| { + highest_peer_block >= &next + }) + .await?; + let permit = sync::acquire(ctx, &sem).await?; + let block_number = NoCopy::from(next); + next = next.next(); + s.spawn(async { + let _permit = permit; + self.fetch_block(ctx, block_number.into_inner()).await + }); + } + }) + .await } - async fn get_and_save_block( - &self, - ctx: &ctx::Ctx, - block_number: BlockNumber, - get_block_permit: sync::SemaphorePermit<'_>, - storage: &dyn WriteBlockStore, - ) -> ctx::Result<()> { - let (stop_sender, stop_receiver) = oneshot::channel(); - sync::lock(ctx, &self.pending_blocks) - .await? - .insert(block_number, stop_sender); - - let block_result = scope::run!(ctx, |ctx, s| async { + /// Fetches the block from peers and puts it to storage. + /// Early exits if the block appeared in storage from other source. + async fn fetch_block(&self, ctx: &ctx::Ctx, block_number: BlockNumber) -> ctx::OrCanceled<()> { + let _ = scope::run!(ctx, |ctx, s| async { s.spawn_bg(async { - // Cancel the scope in either of these events: - // - The parent scope is canceled. - // - The `stop_sender` is dropped. - stop_receiver.recv_or_disconnected(ctx).await.ok(); - s.cancel(); - Ok(()) + let block = self.fetch_block_from_peers(ctx, block_number).await?; + self.storage.queue_block(ctx, block).await }); - self.get_block(ctx, block_number).await + // Cancel fetching as soon as block is queued for storage. + self.storage.wait_until_queued(ctx, block_number).await }) .await; - - drop(get_block_permit); - sync::lock(ctx, &self.pending_blocks) - .await? - .remove(&block_number); - - if let Ok(block) = block_result { - if let Some(events_sender) = &self.events_sender { - events_sender.send(PeerStateEvent::GotBlock(block_number)); - } - storage.put_block(ctx, &block).await?; - } else { - tracing::trace!(%block_number, "Getting block canceled"); - if let Some(events_sender) = &self.events_sender { - events_sender.send(PeerStateEvent::CanceledBlock(block_number)); - } - } - Ok(()) + self.storage.wait_until_persisted(ctx, block_number).await } - #[instrument(level = "trace", skip(self, ctx))] - async fn get_block( + /// Fetches the block from peers. + async fn fetch_block_from_peers( &self, ctx: &ctx::Ctx, - block_number: BlockNumber, + number: BlockNumber, ) -> ctx::OrCanceled { - loop { - let Some((peer_key, _permit)) = - Self::acquire_peer_permit(&*sync::lock(ctx, &self.peers).await?, block_number) - else { + while ctx.is_active() { + let Some((peer, permit)) = self.try_acquire_peer_permit(number) else { let sleep_interval = self.config.sleep_interval_for_get_block; ctx.sleep(sleep_interval).await?; continue; }; - - let block = self - .get_block_from_peer(ctx, peer_key.clone(), block_number) - .await?; - let Some(block) = block else { continue }; - - if let Err(err) = self.validate_block(block_number, &block) { - tracing::warn!( - %err, ?peer_key, %block_number, - "Received invalid block #{block_number} from peer {peer_key:?}" - ); - // TODO: ban peer etc. - if let Some(events_sender) = &self.events_sender { - events_sender.send(PeerStateEvent::GotInvalidBlock { - peer_key, - block_number, - }); + let res = self.fetch_block_from_peer(ctx, &peer, number).await; + drop(permit); + match res { + Ok(block) => { + if let Some(send) = &self.events_sender { + send.send(PeerStateEvent::GotBlock(number)); + } + return Ok(block); + } + Err(ctx::Error::Canceled(_)) => { + tracing::info!(%number, ?peer, "get_block() call canceled"); + } + Err(err) => { + tracing::info!(%err, %number, ?peer, "get_block() failed"); + if let Some(send) = &self.events_sender { + send.send(PeerStateEvent::RpcFailed { + peer_key: peer.clone(), + block_number: number, + }); + } + self.drop_peer(&peer); } - } else { - return Ok(block); } } + Err(ctx::Canceled) } - // It's important to keep this method sync; we don't want to hold `peers` lock across wait points. - fn acquire_peer_permit( - peers: &HashMap, + /// Fetches a block from the specified peer. + async fn fetch_block_from_peer( + &self, + ctx: &ctx::Ctx, + peer: &node::PublicKey, + number: BlockNumber, + ) -> ctx::Result { + let (response, response_receiver) = oneshot::channel(); + let message = SyncBlocksInputMessage::GetBlock { + recipient: peer.clone(), + number, + response, + }; + self.message_sender.send(message.into()); + let block = response_receiver + .recv_or_disconnected(ctx) + .await? + .context("no response")? + .context("RPC error")?; + if block.header().number != number { + return Err(anyhow::anyhow!( + "block does not have requested number (requested: {number}, got: {})", + block.header().number + ) + .into()); + } + block + .validate(&self.config.validator_set, self.config.consensus_threshold) + .context("block.validate()")?; + Ok(block) + } + + fn try_acquire_peer_permit( + &self, block_number: BlockNumber, ) -> Option<(node::PublicKey, sync::OwnedSemaphorePermit)> { + let peers = self.peers.lock().unwrap(); let mut peers_with_no_permits = vec![]; let eligible_peers_info = peers.iter().filter(|(peer_key, state)| { - if !state.has_block(block_number) { + if !state.state.contains(block_number) { return false; } let available_permits = state.get_block_semaphore.available_permits(); @@ -397,7 +238,6 @@ impl PeerStates { Some((peer_key.clone(), permit)) } else { tracing::debug!( - %block_number, ?peers_with_no_permits, "No peers to query block #{block_number}" ); @@ -405,67 +245,14 @@ impl PeerStates { } } - #[instrument(level = "trace", skip(self, ctx), err)] - async fn get_block_from_peer( - &self, - ctx: &ctx::Ctx, - recipient: node::PublicKey, - number: BlockNumber, - ) -> ctx::OrCanceled> { - let (response, response_receiver) = oneshot::channel(); - let message = SyncBlocksInputMessage::GetBlock { - recipient: recipient.clone(), - number, - response, - }; - self.message_sender.send(message.into()); - tracing::trace!("Requested block from peer"); - - let response = response_receiver.recv_or_disconnected(ctx).await?; - match response { - Ok(Ok(block)) => return Ok(Some(block)), - Ok(Err(rpc_err)) => { - tracing::warn!( - err = %rpc_err, - "get_block({number}) returned an error" - ); - } - Err(_) => { - tracing::info!("get_block({number}) request was dropped by network"); - self.disconnect_peer(ctx, &recipient).await?; - } - } - Ok(None) - } - - fn validate_block( - &self, - block_number: BlockNumber, - block: &FinalBlock, - ) -> Result<(), BlockValidationError> { - if block.header.number != block_number { - let err = anyhow::anyhow!( - "block does not have requested number (requested: {block_number}, got: {})", - block.header.number - ); - return Err(BlockValidationError::Other(err)); - } - block.validate(&self.config.validator_set, self.config.consensus_threshold) - } - - #[instrument(level = "trace", skip(self, ctx))] - async fn disconnect_peer( - &self, - ctx: &ctx::Ctx, - peer_key: &node::PublicKey, - ) -> ctx::OrCanceled<()> { - let mut peers = sync::lock(ctx, &self.peers).await?; - if let Some(state) = peers.remove(peer_key) { - tracing::trace!(?state, "Dropping peer connection state"); + /// Drops peer state. + fn drop_peer(&self, peer: &node::PublicKey) { + if self.peers.lock().unwrap().remove(peer).is_none() { + return; } + tracing::debug!(?peer, "Dropping peer state"); if let Some(events_sender) = &self.events_sender { - events_sender.send(PeerStateEvent::PeerDisconnected(peer_key.clone())); + events_sender.send(PeerStateEvent::PeerDropped(peer.clone())); } - Ok(()) } } diff --git a/node/actors/sync_blocks/src/peers/tests/basics.rs b/node/actors/sync_blocks/src/peers/tests/basics.rs index 984ec1d9..dea7e712 100644 --- a/node/actors/sync_blocks/src/peers/tests/basics.rs +++ b/node/actors/sync_blocks/src/peers/tests/basics.rs @@ -1,6 +1,7 @@ //! Basic tests. use super::*; +use crate::{io, tests::wait_for_stored_block}; #[derive(Debug)] struct UpdatingPeerStateWithSingleBlock; @@ -11,20 +12,19 @@ impl Test for UpdatingPeerStateWithSingleBlock { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, mut events_receiver, .. } = handles; - let mut storage_subscriber = storage.subscribe_to_block_writes(); + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(1)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(1)) + .unwrap(); // Check that the actor has sent a `get_block` request to the peer let message = message_receiver.recv(ctx).await?; @@ -43,8 +43,10 @@ impl Test for UpdatingPeerStateWithSingleBlock { assert_matches!(peer_event, PeerStateEvent::GotBlock(BlockNumber(1))); // Check that the block has been saved locally. - let saved_block = *sync::changed(ctx, &mut storage_subscriber).await?; - assert_eq!(saved_block, BlockNumber(1)); + sync::wait_for(ctx, &mut storage.subscribe(), |state| { + state.contains(BlockNumber(1)) + }) + .await?; Ok(()) } } @@ -63,34 +65,30 @@ impl Test for CancelingBlockRetrieval { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, - mut events_receiver, .. } = handles; + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(1)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(1)) + .unwrap(); // Check that the actor has sent a `get_block` request to the peer - let message = message_receiver.recv(ctx).await?; - assert_matches!( - message, - io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { .. }) - ); + let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { mut response, .. }) = + message_receiver.recv(ctx).await?; // Emulate receiving block using external means. storage - .put_block(ctx, &test_validators.final_blocks[1]) + .queue_block(ctx, test_validators.final_blocks[1].clone()) .await?; + // Retrieval of the block must be canceled. - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::CanceledBlock(BlockNumber(1))); + response.closed().await; Ok(()) } } @@ -109,24 +107,23 @@ impl Test for FilteringBlockRetrieval { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, - mut events_receiver, .. } = handles; // Emulate receiving block using external means. storage - .put_block(ctx, &test_validators.final_blocks[1]) + .queue_block(ctx, test_validators.final_blocks[1].clone()) .await?; + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(2)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(2)) + .unwrap(); // Check that the actor has sent `get_block` request to the peer, but only for block #2. let message = message_receiver.recv(ctx).await?; @@ -166,21 +163,21 @@ impl Test for UpdatingPeerStateWithMultipleBlocks { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, mut events_receiver, } = handles; + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update( - peer_key.clone(), - test_validators.sync_state(Self::BLOCK_COUNT - 1), - ); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update( + &peer_key, + test_validators.sync_state(Self::BLOCK_COUNT - 1).clone(), + ) + .unwrap(); let mut requested_blocks = HashMap::with_capacity(Self::MAX_CONCURRENT_BLOCKS); for _ in 1..Self::BLOCK_COUNT { @@ -188,7 +185,7 @@ impl Test for UpdatingPeerStateWithMultipleBlocks { recipient, number, response, - }) = message_receiver.recv(ctx).await?; + }) = message_receiver.recv(ctx).await.unwrap(); tracing::trace!("Received request for block #{number}"); assert_eq!(recipient, peer_key); @@ -199,7 +196,7 @@ impl Test for UpdatingPeerStateWithMultipleBlocks { if requested_blocks.len() == Self::MAX_CONCURRENT_BLOCKS || rng.gen() { // Answer a random request. - let number = *requested_blocks.keys().choose(&mut rng).unwrap(); + let number = *requested_blocks.keys().choose(rng).unwrap(); let response = requested_blocks.remove(&number).unwrap(); test_validators.send_block(number, response); @@ -241,33 +238,48 @@ impl Test for DisconnectingPeer { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, mut events_receiver, } = handles; + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(1)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(1)) + .unwrap(); // Drop the response sender emulating peer disconnect. - message_receiver.recv(ctx).await?; + let msg = message_receiver.recv(ctx).await?; + { + let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { + recipient, + number, + .. + }) = &msg; + assert_eq!(recipient, &peer_key); + assert_eq!(number, &BlockNumber(1)); + } + drop(msg); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerDisconnected(key) if key == peer_key); + wait_for_event( + ctx, + &mut events_receiver, + |ev| matches!(ev, PeerStateEvent::PeerDropped(key) if key == peer_key), + ) + .await + .context("wait for PeerDropped")?; // Check that no new requests are sent (there are no peers to send them to). clock.advance(BLOCK_SLEEP_INTERVAL); assert_matches!(message_receiver.try_recv(), None); // Re-connect the peer with an updated state. - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(2)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(2)) + .unwrap(); // Ensure that blocks are re-requested. clock.advance(BLOCK_SLEEP_INTERVAL); @@ -289,20 +301,26 @@ impl Test for DisconnectingPeer { let response = responses.remove(&2).unwrap(); test_validators.send_block(BlockNumber(2), response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(BlockNumber(2))); + wait_for_event(ctx, &mut events_receiver, |ev| { + matches!(ev, PeerStateEvent::GotBlock(BlockNumber(2))) + }) + .await?; drop(responses); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerDisconnected(key) if key == peer_key); + wait_for_event( + ctx, + &mut events_receiver, + |ev| matches!(ev, PeerStateEvent::PeerDropped(key) if key == peer_key), + ) + .await?; // Check that no new requests are sent (there are no peers to send them to). clock.advance(BLOCK_SLEEP_INTERVAL); assert_matches!(message_receiver.try_recv(), None); // Re-connect the peer with the same state. - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(2)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(2)) + .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); let message = message_receiver.recv(ctx).await?; @@ -361,73 +379,68 @@ impl Test for DownloadingBlocksInGaps { config.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; } - async fn initialize_storage( - &self, - ctx: &ctx::Ctx, - storage: &dyn WriteBlockStore, - test_validators: &TestValidators, - ) { - for &block_number in &self.local_block_numbers { - storage - .put_block(ctx, &test_validators.final_blocks[block_number]) - .await - .unwrap(); - } - } - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, - mut events_receiver, + .. } = handles; - let peer_key = rng.gen::().public(); - let mut last_peer_block_number = if self.increase_peer_block_number_during_test { - rng.gen_range(1..Self::BLOCK_COUNT) - } else { - Self::BLOCK_COUNT - 1 - }; - peer_states_handle.update( - peer_key.clone(), - test_validators.sync_state(last_peer_block_number), - ); - wait_for_peer_update(ctx, &mut events_receiver, &peer_key).await?; - clock.advance(BLOCK_SLEEP_INTERVAL); - - let expected_block_numbers = - (1..Self::BLOCK_COUNT).filter(|number| !self.local_block_numbers.contains(number)); - - // Check that all missing blocks are requested. - for expected_number in expected_block_numbers { - if expected_number > last_peer_block_number { - last_peer_block_number = rng.gen_range(expected_number..Self::BLOCK_COUNT); - peer_states_handle.update( - peer_key.clone(), - test_validators.sync_state(last_peer_block_number), + scope::run!(ctx, |ctx, s| async { + for &block_number in &self.local_block_numbers { + s.spawn( + storage.queue_block(ctx, test_validators.final_blocks[block_number].clone()), ); - // Wait until the update is processed. - wait_for_peer_update(ctx, &mut events_receiver, &peer_key).await?; + } + let rng = &mut ctx.rng(); + let peer_key = rng.gen::().public(); + let mut last_peer_block_number = if self.increase_peer_block_number_during_test { + rng.gen_range(1..Self::BLOCK_COUNT) + } else { + Self::BLOCK_COUNT - 1 + }; + peer_states + .update( + &peer_key, + test_validators.sync_state(last_peer_block_number), + ) + .unwrap(); + clock.advance(BLOCK_SLEEP_INTERVAL); + let expected_block_numbers = + (1..Self::BLOCK_COUNT).filter(|number| !self.local_block_numbers.contains(number)); + + // Check that all missing blocks are requested. + for expected_number in expected_block_numbers { + if expected_number > last_peer_block_number { + last_peer_block_number = rng.gen_range(expected_number..Self::BLOCK_COUNT); + peer_states + .update( + &peer_key, + test_validators.sync_state(last_peer_block_number), + ) + .unwrap(); + clock.advance(BLOCK_SLEEP_INTERVAL); + } + + let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { + recipient, + number, + response, + }) = message_receiver.recv(ctx).await?; + + assert_eq!(recipient, peer_key); + assert!(number.0 <= last_peer_block_number as u64); + test_validators.send_block(number, response); + wait_for_stored_block(ctx, storage.as_ref(), number).await?; clock.advance(BLOCK_SLEEP_INTERVAL); } - - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message_receiver.recv(ctx).await?; - - assert_eq!(recipient, peer_key); - assert_eq!(number.0 as usize, expected_number); - test_validators.send_block(number, response); - wait_for_stored_block(ctx, storage.as_ref(), number).await?; - clock.advance(BLOCK_SLEEP_INTERVAL); - } + Ok(()) + }) + .await?; Ok(()) } } @@ -458,22 +471,17 @@ impl Test for LimitingGetBlockConcurrency { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, - mut events_receiver, .. } = handles; - let mut storage_subscriber = storage.subscribe_to_block_writes(); - + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update( - peer_key.clone(), - test_validators.sync_state(Self::BLOCK_COUNT - 1), - ); - wait_for_peer_update(ctx, &mut events_receiver, &peer_key).await?; + peer_states + .update(&peer_key, test_validators.sync_state(Self::BLOCK_COUNT - 1)) + .unwrap(); // The actor should request 3 new blocks it's now aware of from the only peer it's currently // aware of. Note that blocks may be queried in any order. @@ -492,13 +500,12 @@ impl Test for LimitingGetBlockConcurrency { message_responses.keys().copied().collect::>(), HashSet::from([1, 2, 3]) ); + tracing::info!("blocks requrested"); - // Send a correct response out of order. - let response = message_responses.remove(&3).unwrap(); - test_validators.send_block(BlockNumber(3), response); - - let saved_block = *sync::changed(ctx, &mut storage_subscriber).await?; - assert_eq!(saved_block, BlockNumber(3)); + // Send a correct response. + let response = message_responses.remove(&1).unwrap(); + test_validators.send_block(BlockNumber(1), response); + wait_for_stored_block(ctx, storage.as_ref(), BlockNumber(1)).await?; // The actor should now request another block. let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { diff --git a/node/actors/sync_blocks/src/peers/tests/fakes.rs b/node/actors/sync_blocks/src/peers/tests/fakes.rs index 0913f05b..e0b5cdd6 100644 --- a/node/actors/sync_blocks/src/peers/tests/fakes.rs +++ b/node/actors/sync_blocks/src/peers/tests/fakes.rs @@ -1,78 +1,30 @@ //! Tests focused on handling peers providing fake information to the node. use super::*; +use zksync_consensus_roles::validator; #[tokio::test] async fn processing_invalid_sync_states() { let ctx = &ctx::test_root(&ctx::RealClock); let rng = &mut ctx.rng(); - let test_validators = TestValidators::new(4, 3, rng); - let storage = InMemoryStorage::new(test_validators.final_blocks[0].clone()); - let storage = Arc::new(storage); + let test_validators = TestValidators::new(rng, 4, 3); + let (storage, _runner) = make_store(ctx, test_validators.final_blocks[0].clone()).await; let (message_sender, _) = channel::unbounded(); - let (peer_states, _) = PeerStates::new(message_sender, storage, test_validators.test_config()); + let peer_states = PeerStates::new(test_validators.test_config(), storage, message_sender); + let peer = &rng.gen::().public(); let mut invalid_sync_state = test_validators.sync_state(1); - invalid_sync_state.first_stored_block = test_validators.final_blocks[2].justification.clone(); - assert!(peer_states.validate_sync_state(invalid_sync_state).is_err()); + invalid_sync_state.first = test_validators.final_blocks[2].justification.clone(); + assert!(peer_states.update(peer, invalid_sync_state).is_err()); let mut invalid_sync_state = test_validators.sync_state(1); - invalid_sync_state.last_contiguous_stored_block = - test_validators.final_blocks[2].justification.clone(); - assert!(peer_states.validate_sync_state(invalid_sync_state).is_err()); + invalid_sync_state.last.message.proposal.number = BlockNumber(5); + assert!(peer_states.update(peer, invalid_sync_state).is_err()); - let mut invalid_sync_state = test_validators.sync_state(1); - invalid_sync_state - .last_contiguous_stored_block - .message - .proposal - .number = BlockNumber(5); - invalid_sync_state.last_stored_block.message.proposal.number = BlockNumber(5); - assert!(peer_states.validate_sync_state(invalid_sync_state).is_err()); - - let other_network = TestValidators::new(4, 2, rng); + let other_network = TestValidators::new(rng, 4, 2); let invalid_sync_state = other_network.sync_state(1); - assert!(peer_states.validate_sync_state(invalid_sync_state).is_err()); -} - -#[tokio::test] -async fn processing_invalid_blocks() { - let ctx = &ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - let test_validators = TestValidators::new(4, 3, rng); - let storage = InMemoryStorage::new(test_validators.final_blocks[0].clone()); - let storage = Arc::new(storage); - - let (message_sender, _) = channel::unbounded(); - let (peer_states, _) = PeerStates::new(message_sender, storage, test_validators.test_config()); - - let invalid_block = &test_validators.final_blocks[0]; - let err = peer_states - .validate_block(BlockNumber(1), invalid_block) - .unwrap_err(); - assert_matches!(err, BlockValidationError::Other(_)); - - let mut invalid_block = test_validators.final_blocks[1].clone(); - invalid_block.justification = test_validators.final_blocks[0].justification.clone(); - let err = peer_states - .validate_block(BlockNumber(1), &invalid_block) - .unwrap_err(); - assert_matches!(err, BlockValidationError::ProposalMismatch { .. }); - - let mut invalid_block = test_validators.final_blocks[1].clone(); - invalid_block.payload = validator::Payload(b"invalid".to_vec()); - let err = peer_states - .validate_block(BlockNumber(1), &invalid_block) - .unwrap_err(); - assert_matches!(err, BlockValidationError::HashMismatch { .. }); - - let other_network = TestValidators::new(4, 2, rng); - let invalid_block = &other_network.final_blocks[1]; - let err = peer_states - .validate_block(BlockNumber(1), invalid_block) - .unwrap_err(); - assert_matches!(err, BlockValidationError::Justification(_)); + assert!(peer_states.update(peer, invalid_sync_state).is_err()); } #[derive(Debug)] @@ -85,23 +37,17 @@ impl Test for PeerWithFakeSyncState { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - mut rng, test_validators, - peer_states_handle, + peer_states, mut events_receiver, .. } = handles; + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); let mut fake_sync_state = test_validators.sync_state(1); - fake_sync_state - .last_contiguous_stored_block - .message - .proposal - .number = BlockNumber(42); - peer_states_handle.update(peer_key.clone(), fake_sync_state); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::InvalidPeerUpdate(key) if key == peer_key); + fake_sync_state.last.message.proposal.number = BlockNumber(42); + assert!(peer_states.update(&peer_key, fake_sync_state).is_err()); clock.advance(BLOCK_SLEEP_INTERVAL); assert_matches!(events_receiver.try_recv(), None); @@ -121,55 +67,62 @@ struct PeerWithFakeBlock; impl Test for PeerWithFakeBlock { const BLOCK_COUNT: usize = 10; + fn tweak_config(&self, cfg: &mut Config) { + cfg.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; + } + async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, mut events_receiver, } = handles; - let peer_key = rng.gen::().public(); - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(1)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); - - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, peer_key); - assert_eq!(number, BlockNumber(1)); - - let mut fake_block = test_validators.final_blocks[2].clone(); - fake_block.header.number = BlockNumber(1); - response.send(Ok(fake_block)).unwrap(); - - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!( - peer_event, - PeerStateEvent::GotInvalidBlock { - block_number: BlockNumber(1), - peer_key: key, - } if key == peer_key - ); - clock.advance(BLOCK_SLEEP_INTERVAL); + let rng = &mut ctx.rng(); + + for fake_block in [ + // other block than requested + test_validators.final_blocks[0].clone(), + // block with wrong validator set + TestValidators::new(rng, 4, 2).final_blocks[1].clone(), + // block with mismatching payload, + { + let mut block = test_validators.final_blocks[1].clone(); + block.payload = validator::Payload(b"invalid".to_vec()); + block + }, + ] { + let peer_key = rng.gen::().public(); + peer_states + .update(&peer_key, test_validators.sync_state(1)) + .unwrap(); + clock.advance(BLOCK_SLEEP_INTERVAL); + + let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { + recipient, + number, + response, + }) = message_receiver.recv(ctx).await?; + assert_eq!(recipient, peer_key); + assert_eq!(number, BlockNumber(1)); + response.send(Ok(fake_block)).unwrap(); + + wait_for_event(ctx, &mut events_receiver, |ev| { + matches!(ev, + PeerStateEvent::RpcFailed { + block_number: BlockNumber(1), + peer_key: key, + } if key == peer_key + ) + }) + .await?; + } // The invalid block must not be saved. - assert_matches!(events_receiver.try_recv(), None); assert!(storage.block(ctx, BlockNumber(1)).await?.is_none()); - - // Since we don't ban misbehaving peers, the node will send a request to the same peer again. - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, number, .. - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, peer_key); - assert_eq!(number, BlockNumber(1)); - Ok(()) } } diff --git a/node/actors/sync_blocks/src/peers/tests/mod.rs b/node/actors/sync_blocks/src/peers/tests/mod.rs index cf902834..3bdab70d 100644 --- a/node/actors/sync_blocks/src/peers/tests/mod.rs +++ b/node/actors/sync_blocks/src/peers/tests/mod.rs @@ -1,13 +1,12 @@ use super::*; -use crate::tests::TestValidators; +use crate::tests::{make_store, TestValidators}; use assert_matches::assert_matches; use async_trait::async_trait; -use rand::{rngs::StdRng, seq::IteratorRandom, Rng}; +use rand::{seq::IteratorRandom, Rng}; use std::{collections::HashSet, fmt}; use test_casing::{test_casing, Product}; +use tracing::instrument; use zksync_concurrency::{testonly::abort_on_panic, time}; -use zksync_consensus_roles::validator; -use zksync_consensus_storage::InMemoryStorage; mod basics; mod fakes; @@ -17,13 +16,21 @@ mod snapshots; const TEST_TIMEOUT: time::Duration = time::Duration::seconds(5); const BLOCK_SLEEP_INTERVAL: time::Duration = time::Duration::milliseconds(5); +async fn wait_for_event( + ctx: &ctx::Ctx, + events: &mut channel::UnboundedReceiver, + pred: impl Fn(PeerStateEvent) -> bool, +) -> ctx::OrCanceled<()> { + while !pred(events.recv(ctx).await?) {} + Ok(()) +} + #[derive(Debug)] struct TestHandles { clock: ctx::ManualClock, - rng: StdRng, test_validators: TestValidators, - peer_states_handle: PeerStatesHandle, - storage: Arc, + peer_states: Arc, + storage: Arc, message_receiver: channel::UnboundedReceiver, events_receiver: channel::UnboundedReceiver, } @@ -40,7 +47,7 @@ trait Test: fmt::Debug + Send + Sync { async fn initialize_storage( &self, _ctx: &ctx::Ctx, - _storage: &dyn WriteBlockStore, + _storage: &BlockStore, _test_validators: &TestValidators, ) { // Does nothing by default @@ -49,45 +56,6 @@ trait Test: fmt::Debug + Send + Sync { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()>; } -#[instrument(level = "trace", skip(ctx, storage), err)] -async fn wait_for_stored_block( - ctx: &ctx::Ctx, - storage: &dyn WriteBlockStore, - expected_block_number: BlockNumber, -) -> ctx::OrCanceled<()> { - tracing::trace!("Started waiting for stored block"); - let mut subscriber = storage.subscribe_to_block_writes(); - let mut got_block = storage.last_contiguous_block_number(ctx).await.unwrap(); - - while got_block < expected_block_number { - sync::changed(ctx, &mut subscriber).await?; - got_block = storage.last_contiguous_block_number(ctx).await.unwrap(); - } - Ok(()) -} - -#[instrument(level = "trace", skip(ctx, events_receiver))] -async fn wait_for_peer_update( - ctx: &ctx::Ctx, - events_receiver: &mut channel::UnboundedReceiver, - expected_peer: &node::PublicKey, -) -> ctx::OrCanceled<()> { - loop { - let peer_event = events_receiver.recv(ctx).await?; - tracing::trace!(?peer_event, "received peer event"); - match peer_event { - PeerStateEvent::PeerUpdated(key) => { - assert_eq!(key, *expected_peer); - return Ok(()); - } - PeerStateEvent::PeerDisconnected(_) | PeerStateEvent::GotBlock(_) => { - // Skip update - } - _ => panic!("Received unexpected peer event: {peer_event:?}"), - } - } -} - #[instrument(level = "trace")] async fn test_peer_states(test: T) { abort_on_panic(); @@ -95,37 +63,36 @@ async fn test_peer_states(test: T) { let ctx = &ctx::test_root(&ctx::RealClock).with_timeout(TEST_TIMEOUT); let clock = ctx::ManualClock::new(); let ctx = &ctx::test_with_clock(ctx, &clock); - let mut rng = ctx.rng(); - let test_validators = TestValidators::new(4, T::BLOCK_COUNT, &mut rng); - let storage = - InMemoryStorage::new(test_validators.final_blocks[T::GENESIS_BLOCK_NUMBER].clone()); - let storage = Arc::new(storage); - test.initialize_storage(ctx, storage.as_ref(), &test_validators) + let test_validators = TestValidators::new(&mut ctx.rng(), 4, T::BLOCK_COUNT); + let (store, store_run) = make_store( + ctx, + test_validators.final_blocks[T::GENESIS_BLOCK_NUMBER].clone(), + ) + .await; + test.initialize_storage(ctx, store.as_ref(), &test_validators) .await; let (message_sender, message_receiver) = channel::unbounded(); let (events_sender, events_receiver) = channel::unbounded(); let mut config = test_validators.test_config(); test.tweak_config(&mut config); - let (mut peer_states, peer_states_handle) = - PeerStates::new(message_sender, storage.clone(), config); + let mut peer_states = PeerStates::new(config, store.clone(), message_sender); peer_states.events_sender = Some(events_sender); + let peer_states = Arc::new(peer_states); let test_handles = TestHandles { clock, - rng, test_validators, - peer_states_handle, - storage, + peer_states: peer_states.clone(), + storage: store.clone(), message_receiver, events_receiver, }; scope::run!(ctx, |ctx, s| async { + s.spawn_bg(store_run.run(ctx)); s.spawn_bg(async { - peer_states.run(ctx).await.or_else(|err| match err { - ctx::Error::Canceled(_) => Ok(()), // Swallow cancellation errors after the test is finished - ctx::Error::Internal(err) => Err(err), - }) + peer_states.run_block_fetcher(ctx).await.ok(); + Ok(()) }); test.test(ctx, test_handles).await }) diff --git a/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs b/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs index 74bf5480..f665c5a5 100644 --- a/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs +++ b/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs @@ -1,6 +1,7 @@ //! Tests focused on interaction with multiple peers. use super::*; +use crate::tests::wait_for_stored_block; #[derive(Debug)] struct RequestingBlocksFromTwoPeers; @@ -11,25 +12,25 @@ impl Test for RequestingBlocksFromTwoPeers { fn tweak_config(&self, config: &mut Config) { config.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; - config.max_concurrent_blocks = 2; + config.max_concurrent_blocks = 5; config.max_concurrent_blocks_per_peer = 1; - // ^ Necessary for blocks numbers in tests to be deterministic } async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, mut events_receiver, } = handles; + let rng = &mut ctx.rng(); let first_peer = rng.gen::().public(); - peer_states_handle.update(first_peer.clone(), test_validators.sync_state(2)); - wait_for_peer_update(ctx, &mut events_receiver, &first_peer).await?; + peer_states + .update(&first_peer, test_validators.sync_state(2)) + .unwrap(); let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { recipient, @@ -40,10 +41,12 @@ impl Test for RequestingBlocksFromTwoPeers { assert!( first_peer_block_number == BlockNumber(1) || first_peer_block_number == BlockNumber(2) ); + tracing::info!(%first_peer_block_number, "received request"); let second_peer = rng.gen::().public(); - peer_states_handle.update(second_peer.clone(), test_validators.sync_state(4)); - wait_for_peer_update(ctx, &mut events_receiver, &second_peer).await?; + peer_states + .update(&second_peer, test_validators.sync_state(4)) + .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { @@ -56,17 +59,24 @@ impl Test for RequestingBlocksFromTwoPeers { second_peer_block_number == BlockNumber(1) || second_peer_block_number == BlockNumber(2) ); + tracing::info!(%second_peer_block_number, "received requrest"); test_validators.send_block(first_peer_block_number, first_peer_response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(num) if num == first_peer_block_number); + wait_for_event( + ctx, + &mut events_receiver, + |ev| matches!(ev, PeerStateEvent::GotBlock(num) if num == first_peer_block_number), + ) + .await + .unwrap(); // The node shouldn't send more requests to the first peer since it would be beyond // its known latest block number (2). clock.advance(BLOCK_SLEEP_INTERVAL); assert_matches!(message_receiver.try_recv(), None); - peer_states_handle.update(first_peer.clone(), test_validators.sync_state(4)); - wait_for_peer_update(ctx, &mut events_receiver, &first_peer).await?; + peer_states + .update(&first_peer, test_validators.sync_state(4)) + .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); // Now the actor can get block #3 from the peer. @@ -79,10 +89,16 @@ impl Test for RequestingBlocksFromTwoPeers { assert!( first_peer_block_number == BlockNumber(3) || first_peer_block_number == BlockNumber(4) ); + tracing::info!(%first_peer_block_number, "received requrest"); test_validators.send_block(first_peer_block_number, first_peer_response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(num) if num == first_peer_block_number); + wait_for_event( + ctx, + &mut events_receiver, + |ev| matches!(ev, PeerStateEvent::GotBlock(num) if num == first_peer_block_number), + ) + .await + .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { @@ -94,13 +110,24 @@ impl Test for RequestingBlocksFromTwoPeers { assert!( first_peer_block_number == BlockNumber(3) || first_peer_block_number == BlockNumber(4) ); + tracing::info!(%first_peer_block_number, "received requrest"); test_validators.send_block(second_peer_block_number, second_peer_response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(num) if num == second_peer_block_number); + wait_for_event( + ctx, + &mut events_receiver, + |ev| matches!(ev, PeerStateEvent::GotBlock(num) if num == second_peer_block_number), + ) + .await + .unwrap(); test_validators.send_block(first_peer_block_number, first_peer_response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(num) if num == first_peer_block_number); + wait_for_event( + ctx, + &mut events_receiver, + |ev| matches!(ev, PeerStateEvent::GotBlock(num) if num == first_peer_block_number), + ) + .await + .unwrap(); // No more blocks should be requested from peers. clock.advance(BLOCK_SLEEP_INTERVAL); assert_matches!(message_receiver.try_recv(), None); @@ -175,21 +202,21 @@ impl Test for RequestingBlocksFromMultiplePeers { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { clock, - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, mut events_receiver, } = handles; - let peers = &self.create_peers(&mut rng); + let rng = &mut ctx.rng(); + let peers = &self.create_peers(rng); scope::run!(ctx, |ctx, s| async { // Announce peer states. for (peer_key, peer) in peers { let last_block = peer.last_block.0 as usize; - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(last_block)); + peer_states.update(peer_key, test_validators.sync_state(last_block)).unwrap(); } s.spawn_bg(async { @@ -260,11 +287,7 @@ impl Test for RequestingBlocksFromMultiplePeers { ); clock.advance(BLOCK_SLEEP_INTERVAL); } - PeerStateEvent::PeerUpdated(_) => { - clock.advance(BLOCK_SLEEP_INTERVAL); - } - PeerStateEvent::PeerDisconnected(_) => { /* Do nothing */ } - _ => panic!("Unexpected peer event: {peer_event:?}"), + PeerStateEvent::RpcFailed{..} | PeerStateEvent::PeerDropped(_) => { /* Do nothing */ } } } diff --git a/node/actors/sync_blocks/src/peers/tests/snapshots.rs b/node/actors/sync_blocks/src/peers/tests/snapshots.rs index c95e8fb2..e92db55e 100644 --- a/node/actors/sync_blocks/src/peers/tests/snapshots.rs +++ b/node/actors/sync_blocks/src/peers/tests/snapshots.rs @@ -1,6 +1,7 @@ //! Tests related to snapshot storage. use super::*; +use crate::tests::wait_for_stored_block; use zksync_consensus_network::io::GetBlockError; #[derive(Debug)] @@ -17,24 +18,19 @@ impl Test for UpdatingPeerStateWithStorageSnapshot { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - mut rng, test_validators, - peer_states_handle, + peer_states, storage, mut message_receiver, mut events_receiver, clock, } = handles; - let mut storage_subscriber = storage.subscribe_to_block_writes(); - + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); for stale_block_number in [1, 2] { - peer_states_handle.update( - peer_key.clone(), - test_validators.sync_state(stale_block_number), - ); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(stale_block_number)) + .unwrap(); // No new block requests should be issued. clock.advance(BLOCK_SLEEP_INTERVAL); @@ -42,9 +38,9 @@ impl Test for UpdatingPeerStateWithStorageSnapshot { assert!(message_receiver.try_recv().is_none()); } - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(3)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(3)) + .unwrap(); // Check that the actor has sent a `get_block` request to the peer let message = message_receiver.recv(ctx).await?; @@ -59,12 +55,14 @@ impl Test for UpdatingPeerStateWithStorageSnapshot { // Emulate the peer sending a correct response. test_validators.send_block(BlockNumber(3), response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(BlockNumber(3))); + wait_for_event(ctx, &mut events_receiver, |ev| { + matches!(ev, PeerStateEvent::GotBlock(BlockNumber(3))) + }) + .await + .unwrap(); // Check that the block has been saved locally. - let saved_block = *sync::changed(ctx, &mut storage_subscriber).await?; - assert_eq!(saved_block, BlockNumber(3)); + wait_for_stored_block(ctx, &storage, BlockNumber(3)).await?; Ok(()) } } @@ -87,19 +85,19 @@ impl Test for FilteringRequestsForSnapshotPeer { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - mut rng, test_validators, - peer_states_handle, + peer_states, mut message_receiver, mut events_receiver, clock, .. } = handles; + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update(peer_key.clone(), test_validators.snapshot_sync_state(2..=2)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.snapshot_sync_state(2..=2)) + .unwrap(); // The peer should only be queried for blocks that it actually has (#2 in this case). let message = message_receiver.recv(ctx).await?; @@ -113,8 +111,11 @@ impl Test for FilteringRequestsForSnapshotPeer { // Emulate the peer sending a correct response. test_validators.send_block(BlockNumber(2), response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(BlockNumber(2))); + wait_for_event(ctx, &mut events_receiver, |ev| { + matches!(ev, PeerStateEvent::GotBlock(BlockNumber(2))) + }) + .await + .unwrap(); // No further requests should be made. clock.advance(BLOCK_SLEEP_INTERVAL); @@ -122,9 +123,9 @@ impl Test for FilteringRequestsForSnapshotPeer { assert!(message_receiver.try_recv().is_none()); // Emulate peer receiving / producing a new block. - peer_states_handle.update(peer_key.clone(), test_validators.snapshot_sync_state(2..=3)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.snapshot_sync_state(2..=3)) + .unwrap(); let message = message_receiver.recv(ctx).await?; let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { @@ -137,10 +138,9 @@ impl Test for FilteringRequestsForSnapshotPeer { // Emulate another peer with full history. let full_peer_key = rng.gen::().public(); - peer_states_handle.update(full_peer_key.clone(), test_validators.sync_state(3)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == full_peer_key); - + peer_states + .update(&full_peer_key, test_validators.sync_state(3)) + .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); // A node should only request block #1 from the peer; block #3 is already requested, @@ -155,12 +155,20 @@ impl Test for FilteringRequestsForSnapshotPeer { assert_eq!(number, BlockNumber(1)); test_validators.send_block(BlockNumber(1), response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(BlockNumber(1))); + wait_for_event(ctx, &mut events_receiver, |ev| { + matches!(ev, PeerStateEvent::GotBlock(BlockNumber(1))) + }) + .await + .unwrap(); drop(block3_response); // Emulate first peer disconnecting. - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerDisconnected(key) if key == peer_key); + wait_for_event( + ctx, + &mut events_receiver, + |ev| matches!(ev,PeerStateEvent::PeerDropped(key) if key == peer_key), + ) + .await + .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); // Now, block #3 will be requested from the peer with full history. @@ -193,19 +201,19 @@ impl Test for PruningPeerHistory { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - mut rng, test_validators, - peer_states_handle, + peer_states, mut message_receiver, mut events_receiver, clock, .. } = handles; + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(1)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.sync_state(1)) + .unwrap(); let message = message_receiver.recv(ctx).await?; let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { @@ -217,9 +225,9 @@ impl Test for PruningPeerHistory { assert_eq!(number, BlockNumber(1)); // Emulate peer pruning blocks. - peer_states_handle.update(peer_key.clone(), test_validators.snapshot_sync_state(3..=3)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.snapshot_sync_state(3..=3)) + .unwrap(); let message = message_receiver.recv(ctx).await?; let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { @@ -231,8 +239,11 @@ impl Test for PruningPeerHistory { assert_eq!(number, BlockNumber(3)); test_validators.send_block(BlockNumber(3), response); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(BlockNumber(3))); + wait_for_event(ctx, &mut events_receiver, |ev| { + matches!(ev, PeerStateEvent::GotBlock(BlockNumber(3))) + }) + .await + .unwrap(); // No new blocks should be requested (the peer has no block #2). clock.advance(BLOCK_SLEEP_INTERVAL); @@ -267,19 +278,18 @@ impl Test for BackfillingPeerHistory { async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { let TestHandles { - mut rng, test_validators, - peer_states_handle, + peer_states, mut message_receiver, - mut events_receiver, clock, .. } = handles; + let rng = &mut ctx.rng(); let peer_key = rng.gen::().public(); - peer_states_handle.update(peer_key.clone(), test_validators.snapshot_sync_state(3..=3)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); + peer_states + .update(&peer_key, test_validators.snapshot_sync_state(3..=3)) + .unwrap(); let message = message_receiver.recv(ctx).await?; let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { @@ -288,10 +298,9 @@ impl Test for BackfillingPeerHistory { assert_eq!(recipient, peer_key); assert_eq!(number, BlockNumber(3)); - peer_states_handle.update(peer_key.clone(), test_validators.sync_state(3)); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::PeerUpdated(key) if key == peer_key); - + peer_states + .update(&peer_key, test_validators.sync_state(3)) + .unwrap(); clock.advance(BLOCK_SLEEP_INTERVAL); let mut new_requested_numbers = HashSet::new(); for _ in 0..2 { diff --git a/node/actors/sync_blocks/src/tests/end_to_end.rs b/node/actors/sync_blocks/src/tests/end_to_end.rs index 610834a3..283798ff 100644 --- a/node/actors/sync_blocks/src/tests/end_to_end.rs +++ b/node/actors/sync_blocks/src/tests/end_to_end.rs @@ -5,132 +5,130 @@ use async_trait::async_trait; use rand::seq::SliceRandom; use std::fmt; use test_casing::test_casing; -use tracing::Instrument; -use zksync_concurrency::{ctx::channel, testonly::abort_on_panic}; +use tracing::{instrument, Instrument}; +use zksync_concurrency::{ctx, scope, sync, testonly::abort_on_panic}; use zksync_consensus_network as network; -use zksync_consensus_network::testonly::Instance as NetworkInstance; +use zksync_consensus_network::{io::SyncState, testonly::Instance as NetworkInstance}; use zksync_consensus_roles::node; -use zksync_consensus_storage::InMemoryStorage; +use zksync_consensus_utils::no_copy::NoCopy; type NetworkDispatcherPipe = pipe::DispatcherPipe; #[derive(Debug)] -struct NodeHandle { - create_block_sender: channel::UnboundedSender, - sync_state_subscriber: watch::Receiver, +struct Node { + store: Arc, + test_validators: Arc, switch_on_sender: Option>, _switch_off_sender: oneshot::Sender<()>, } -impl NodeHandle { +impl Node { + async fn new_network( + ctx: &ctx::Ctx, + node_count: usize, + gossip_peers: usize, + ) -> (Vec, Vec) { + let rng = &mut ctx.rng(); + let test_validators = Arc::new(TestValidators::new(rng, 4, 20)); + let mut nodes = vec![]; + let mut runners = vec![]; + for net in NetworkInstance::new(rng, node_count, gossip_peers) { + let (n, r) = Node::new(ctx, net, test_validators.clone()).await; + nodes.push(n); + runners.push(r); + } + (nodes, runners) + } + + async fn new( + ctx: &ctx::Ctx, + mut network: NetworkInstance, + test_validators: Arc, + ) -> (Self, NodeRunner) { + let (store, store_runner) = make_store(ctx, test_validators.final_blocks[0].clone()).await; + let (switch_on_sender, switch_on_receiver) = oneshot::channel(); + let (switch_off_sender, switch_off_receiver) = oneshot::channel(); + + network.disable_gossip_pings(); + + let runner = NodeRunner { + network, + store: store.clone(), + store_runner, + test_validators: test_validators.clone(), + switch_on_receiver, + switch_off_receiver, + }; + let this = Self { + store, + test_validators, + switch_on_sender: Some(switch_on_sender), + _switch_off_sender: switch_off_sender, + }; + (this, runner) + } + fn switch_on(&mut self) { self.switch_on_sender.take(); } -} -#[derive(Debug)] -struct InitialNodeHandle { - create_block_sender: channel::UnboundedSender, - sync_state_subscriber_receiver: oneshot::Receiver>, - switch_on_sender: oneshot::Sender<()>, - _switch_off_sender: oneshot::Sender<()>, -} - -impl InitialNodeHandle { - async fn wait(self, ctx: &ctx::Ctx) -> anyhow::Result { - let sync_state_subscriber = self - .sync_state_subscriber_receiver - .recv_or_disconnected(ctx) - .await??; - Ok(NodeHandle { - create_block_sender: self.create_block_sender, - sync_state_subscriber, - switch_on_sender: Some(self.switch_on_sender), - _switch_off_sender: self._switch_off_sender, - }) + async fn put_block(&self, ctx: &ctx::Ctx, block_number: BlockNumber) { + tracing::trace!(%block_number, "Storing new block"); + let block = &self.test_validators.final_blocks[block_number.0 as usize]; + self.store.queue_block(ctx, block.clone()).await.unwrap(); } } -struct Node { +#[must_use] +struct NodeRunner { network: NetworkInstance, - /// Receiver to command a node to push a block with the specified number to its storage. - create_block_receiver: channel::UnboundedReceiver, - sync_state_subscriber_sender: oneshot::Sender>, + store: Arc, + store_runner: BlockStoreRunner, + test_validators: Arc, switch_on_receiver: oneshot::Receiver<()>, switch_off_receiver: oneshot::Receiver<()>, } -impl fmt::Debug for Node { +impl fmt::Debug for NodeRunner { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter - .debug_struct("Node") + .debug_struct("NodeRunner") .field("key", &self.key()) .finish() } } -impl Node { - fn new(mut network: NetworkInstance) -> (Self, InitialNodeHandle) { - let (create_block_sender, create_block_receiver) = channel::unbounded(); - let (sync_state_subscriber_sender, sync_state_subscriber_receiver) = oneshot::channel(); - let (switch_on_sender, switch_on_receiver) = oneshot::channel(); - let (switch_off_sender, switch_off_receiver) = oneshot::channel(); - - network.disable_gossip_pings(); - - let this = Self { - network, - create_block_receiver, - sync_state_subscriber_sender, - switch_on_receiver, - switch_off_receiver, - }; - let handle = InitialNodeHandle { - create_block_sender, - sync_state_subscriber_receiver, - switch_on_sender, - _switch_off_sender: switch_off_sender, - }; - (this, handle) +fn to_sync_state(state: BlockStoreState) -> SyncState { + SyncState { + first_stored_block: state.first, + last_stored_block: state.last, } +} +impl NodeRunner { fn key(&self) -> node::PublicKey { self.network.gossip_config().key.public() } - #[instrument(level = "trace", skip(ctx, test_validators), err)] - async fn run(mut self, ctx: &ctx::Ctx, test_validators: &TestValidators) -> anyhow::Result<()> { + async fn run(mut self, ctx: &ctx::Ctx) -> anyhow::Result<()> { let key = self.key(); let (sync_blocks_actor_pipe, sync_blocks_dispatcher_pipe) = pipe::new(); let (network_actor_pipe, network_dispatcher_pipe) = pipe::new(); - let storage = InMemoryStorage::new(test_validators.final_blocks[0].clone()); - let storage = Arc::new(storage); - - let sync_blocks_config = test_validators.test_config(); - let sync_blocks = SyncBlocks::new( - ctx, - sync_blocks_actor_pipe, - storage.clone(), - sync_blocks_config, - ) - .await - .expect("Failed initializing `sync_blocks` actor"); - - let sync_states_subscriber = sync_blocks.subscribe_to_state_updates(); + let mut store_state = self.store.subscribe(); + let sync_state = sync::watch::channel(to_sync_state(store_state.borrow().clone())).0; self.network - .set_sync_state_subscriber(sync_states_subscriber.clone()); + .set_sync_state_subscriber(sync_state.subscribe()); + let sync_blocks_config = self.test_validators.test_config(); scope::run!(ctx, |ctx, s| async { + s.spawn_bg(self.store_runner.run(ctx)); s.spawn_bg(async { - while let Ok(block_number) = self.create_block_receiver.recv(ctx).await { - tracing::trace!(?key, %block_number, "Storing new block"); - let block = &test_validators.final_blocks[block_number.0 as usize]; - storage.put_block(ctx, block).await.unwrap(); + while let Ok(state) = sync::changed(ctx, &mut store_state).await { + sync_state.send_replace(to_sync_state(state.clone())); } Ok(()) }); - s.spawn_bg(async { network::run_network(ctx, self.network.state().clone(), network_actor_pipe) .instrument(tracing::trace_span!("network", ?key)) @@ -139,9 +137,6 @@ impl Node { }); self.network.wait_for_gossip_connections().await; tracing::trace!("Node connected to peers"); - self.sync_state_subscriber_sender - .send(sync_states_subscriber) - .ok(); self.switch_on_receiver .recv_or_disconnected(ctx) @@ -153,8 +148,8 @@ impl Node { .await .with_context(|| format!("executor for {key:?}")) }); - s.spawn_bg(sync_blocks.run(ctx)); - tracing::trace!("Node is fully started"); + s.spawn_bg(sync_blocks_config.run(ctx, sync_blocks_actor_pipe, self.store.clone())); + tracing::info!("Node is fully started"); self.switch_off_receiver .recv_or_disconnected(ctx) @@ -162,7 +157,7 @@ impl Node { .ok(); // ^ Unlike with `switch_on_receiver`, the context may get canceled before the receiver // is dropped, so we swallow both cancellation and disconnect errors here. - tracing::trace!("Node stopped"); + tracing::info!("Node stopped"); Ok(()) }) .await @@ -174,7 +169,7 @@ impl Node { mut network_dispatcher_pipe: NetworkDispatcherPipe, ) -> anyhow::Result<()> { scope::run!(ctx, |ctx, s| async { - let network_task = async { + s.spawn(async { while let Ok(message) = network_dispatcher_pipe.recv.recv(ctx).await { tracing::trace!(?message, "Received network message"); match message { @@ -185,8 +180,7 @@ impl Node { } } Ok(()) - }; - s.spawn(network_task.instrument(tracing::Span::current())); + }); while let Ok(message) = sync_blocks_dispatcher_pipe.recv.recv(ctx).await { let OutputMessage::Network(message) = message; @@ -199,31 +193,11 @@ impl Node { } } -#[derive(Debug)] -struct GossipNetwork { - test_validators: TestValidators, - node_handles: Vec, -} - -impl GossipNetwork { - fn new(rng: &mut impl Rng, node_count: usize, gossip_peers: usize) -> (Self, Vec) { - let test_validators = TestValidators::new(4, 20, rng); - let nodes = NetworkInstance::new(rng, node_count, gossip_peers); - let (nodes, node_handles) = nodes.into_iter().map(Node::new).unzip(); - let this = Self { - test_validators, - node_handles, - }; - (this, nodes) - } -} - #[async_trait] trait GossipNetworkTest: fmt::Debug + Send { /// Returns the number of nodes in the gossip network and number of peers for each node. fn network_params(&self) -> (usize, usize); - - async fn test(self, ctx: &ctx::Ctx, network: GossipNetwork) -> anyhow::Result<()>; + async fn test(self, ctx: &ctx::Ctx, network: Vec) -> anyhow::Result<()>; } #[instrument(level = "trace")] @@ -235,30 +209,12 @@ async fn test_sync_blocks(test: T) { let ctx = &ctx::test_root(&ctx::AffineClock::new(CLOCK_SPEEDUP as f64)) .with_timeout(TEST_TIMEOUT * CLOCK_SPEEDUP); let (node_count, gossip_peers) = test.network_params(); - let (network, nodes) = GossipNetwork::new(&mut ctx.rng(), node_count, gossip_peers); + let (nodes, runners) = Node::new_network(ctx, node_count, gossip_peers).await; scope::run!(ctx, |ctx, s| async { - for node in nodes { - let test_validators = network.test_validators.clone(); - s.spawn_bg(async { - let test_validators = test_validators; - let key = node.key(); - node.run(ctx, &test_validators).await?; - tracing::trace!(?key, "Node task completed"); - Ok(()) - }); - } - - let mut node_handles = Vec::with_capacity(network.node_handles.len()); - for node_handle in network.node_handles { - node_handles.push(node_handle.wait(ctx).await?); + for (i, runner) in runners.into_iter().enumerate() { + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); } - tracing::trace!("Finished preparations for test"); - - let network = GossipNetwork { - test_validators: network.test_validators, - node_handles, - }; - test.test(ctx, network).await + test.test(ctx, nodes).await }) .await .unwrap(); @@ -276,52 +232,46 @@ impl GossipNetworkTest for BasicSynchronization { (self.node_count, self.gossip_peers) } - async fn test(self, ctx: &ctx::Ctx, network: GossipNetwork) -> anyhow::Result<()> { - let GossipNetwork { - mut node_handles, .. - } = network; + async fn test(self, ctx: &ctx::Ctx, mut node_handles: Vec) -> anyhow::Result<()> { let rng = &mut ctx.rng(); // Check initial node states. for node_handle in &mut node_handles { node_handle.switch_on(); - let block_numbers = node_handle.sync_state_subscriber.borrow().numbers(); - assert_eq!(block_numbers.first_stored_block, BlockNumber(0)); - assert_eq!(block_numbers.last_stored_block, BlockNumber(0)); - assert_eq!(block_numbers.last_stored_block, BlockNumber(0)); + let state = node_handle.store.subscribe().borrow().clone(); + assert_eq!(state.first.header().number, BlockNumber(0)); + assert_eq!(state.last.header().number, BlockNumber(0)); } - for block_number in 1..5 { - let block_number = BlockNumber(block_number); + for block_number in (1..5).map(BlockNumber) { let sending_node = node_handles.choose(rng).unwrap(); - sending_node.create_block_sender.send(block_number); + sending_node.put_block(ctx, block_number).await; // Wait until all nodes get this block. for node_handle in &mut node_handles { - sync::wait_for(ctx, &mut node_handle.sync_state_subscriber, |state| { - state.numbers().last_contiguous_stored_block == block_number - }) - .await?; + wait_for_stored_block(ctx, &node_handle.store, block_number).await?; } tracing::trace!("All nodes received block #{block_number}"); } - // Add blocks in the opposite order, so that other nodes will start downloading all blocks - // in batch. let sending_node = node_handles.choose(rng).unwrap(); - for block_number in (5..10).rev() { - let block_number = BlockNumber(block_number); - sending_node.create_block_sender.send(block_number); - } + scope::run!(ctx, |ctx, s| async { + // Add a batch of blocks. + for block_number in (5..10).rev().map(BlockNumber) { + let block_number = NoCopy::from(block_number); + s.spawn_bg(async { + sending_node.put_block(ctx, block_number.into_inner()).await; + Ok(()) + }); + } - // Wait until nodes get all new blocks. - for node_handle in &mut node_handles { - sync::wait_for(ctx, &mut node_handle.sync_state_subscriber, |state| { - state.numbers().last_contiguous_stored_block == BlockNumber(9) - }) - .await?; - } - Ok(()) + // Wait until nodes get all new blocks. + for node_handle in &node_handles { + wait_for_stored_block(ctx, &node_handle.store, BlockNumber(9)).await?; + } + Ok(()) + }) + .await } } @@ -357,10 +307,7 @@ impl GossipNetworkTest for SwitchingOffNodes { (self.node_count, self.node_count / 2) } - async fn test(self, ctx: &ctx::Ctx, network: GossipNetwork) -> anyhow::Result<()> { - let GossipNetwork { - mut node_handles, .. - } = network; + async fn test(self, ctx: &ctx::Ctx, mut node_handles: Vec) -> anyhow::Result<()> { let rng = &mut ctx.rng(); for node_handle in &mut node_handles { @@ -368,23 +315,25 @@ impl GossipNetworkTest for SwitchingOffNodes { } let mut block_number = BlockNumber(1); - while node_handles.len() > 1 { - // Switch off a random node by dropping its handle. - let node_index_to_remove = rng.gen_range(0..node_handles.len()); - node_handles.swap_remove(node_index_to_remove); + while !node_handles.is_empty() { + tracing::info!("{} nodes left", node_handles.len()); let sending_node = node_handles.choose(rng).unwrap(); - sending_node.create_block_sender.send(block_number); + sending_node.put_block(ctx, block_number).await; + tracing::info!("block {block_number} inserted"); // Wait until all remaining nodes get the new block. - for node_handle in &mut node_handles { - sync::wait_for(ctx, &mut node_handle.sync_state_subscriber, |state| { - state.numbers().last_contiguous_stored_block == block_number - }) - .await?; + for node_handle in &node_handles { + wait_for_stored_block(ctx, &node_handle.store, block_number).await?; } tracing::trace!("All nodes received block #{block_number}"); block_number = block_number.next(); + + // Switch off a random node by dropping its handle. + // We start switching off only after the first round, to make sure all nodes are fully + // started. + let node_index_to_remove = rng.gen_range(0..node_handles.len()); + node_handles.swap_remove(node_index_to_remove); } Ok(()) } @@ -407,10 +356,7 @@ impl GossipNetworkTest for SwitchingOnNodes { (self.node_count, self.node_count / 2) } - async fn test(self, ctx: &ctx::Ctx, network: GossipNetwork) -> anyhow::Result<()> { - let GossipNetwork { - mut node_handles, .. - } = network; + async fn test(self, ctx: &ctx::Ctx, mut node_handles: Vec) -> anyhow::Result<()> { let rng = &mut ctx.rng(); let mut switched_on_nodes = Vec::with_capacity(self.node_count); @@ -423,14 +369,11 @@ impl GossipNetworkTest for SwitchingOnNodes { switched_on_nodes.push(node_handle); let sending_node = switched_on_nodes.choose(rng).unwrap(); - sending_node.create_block_sender.send(block_number); + sending_node.put_block(ctx, block_number).await; // Wait until all switched on nodes get the new block. for node_handle in &mut switched_on_nodes { - sync::wait_for(ctx, &mut node_handle.sync_state_subscriber, |state| { - state.numbers().last_contiguous_stored_block == block_number - }) - .await?; + wait_for_stored_block(ctx, &node_handle.store, block_number).await?; } tracing::trace!("All nodes received block #{block_number}"); block_number = block_number.next(); diff --git a/node/actors/sync_blocks/src/tests/mod.rs b/node/actors/sync_blocks/src/tests/mod.rs index f88007b2..deb1891c 100644 --- a/node/actors/sync_blocks/src/tests/mod.rs +++ b/node/actors/sync_blocks/src/tests/mod.rs @@ -5,20 +5,41 @@ use rand::{ Rng, }; use std::{iter, ops}; -use zksync_concurrency::{oneshot, testonly::abort_on_panic, time}; +use zksync_concurrency::{oneshot, sync, testonly::abort_on_panic, time}; use zksync_consensus_network::io::{GetBlockError, GetBlockResponse, SyncBlocksRequest}; use zksync_consensus_roles::validator::{ self, testonly::{make_block, make_genesis_block}, BlockHeader, BlockNumber, CommitQC, FinalBlock, Payload, ValidatorSet, }; -use zksync_consensus_storage::InMemoryStorage; +use zksync_consensus_storage::{testonly::in_memory, BlockStore, BlockStoreRunner}; use zksync_consensus_utils::pipe; mod end_to_end; const TEST_TIMEOUT: time::Duration = time::Duration::seconds(20); +pub(crate) async fn make_store( + ctx: &ctx::Ctx, + genesis: FinalBlock, +) -> (Arc, BlockStoreRunner) { + let storage = in_memory::BlockStore::new(genesis); + BlockStore::new(ctx, Box::new(storage)).await.unwrap() +} + +pub(crate) async fn wait_for_stored_block( + ctx: &ctx::Ctx, + storage: &BlockStore, + block_number: BlockNumber, +) -> ctx::OrCanceled<()> { + tracing::trace!("Started waiting for stored block"); + sync::wait_for(ctx, &mut storage.subscribe(), |state| { + state.next() > block_number + }) + .await?; + Ok(()) +} + impl Distribution for Standard { fn sample(&self, rng: &mut R) -> Config { let validator_set: ValidatorSet = rng.gen(); @@ -35,7 +56,7 @@ pub(crate) struct TestValidators { } impl TestValidators { - pub(crate) fn new(validator_count: usize, block_count: usize, rng: &mut impl Rng) -> Self { + pub(crate) fn new(rng: &mut impl Rng, validator_count: usize, block_count: usize) -> Self { let validator_secret_keys: Vec = (0..validator_count).map(|_| rng.gen()).collect(); let validator_set = validator_secret_keys.iter().map(|sk| sk.public()); @@ -51,7 +72,6 @@ impl TestValidators { let mut latest_block = BlockHeader::genesis(payload.hash(), BlockNumber(0)); let final_blocks = (0..block_count).map(|_| { let final_block = FinalBlock { - header: latest_block, payload: payload.clone(), justification: this.certify_block(&latest_block), }; @@ -80,26 +100,22 @@ impl TestValidators { CommitQC::from(&signed_messages, &self.validator_set).unwrap() } - pub(crate) fn sync_state(&self, last_block_number: usize) -> SyncState { + pub(crate) fn sync_state(&self, last_block_number: usize) -> BlockStoreState { self.snapshot_sync_state(1..=last_block_number) } pub(crate) fn snapshot_sync_state( &self, block_numbers: ops::RangeInclusive, - ) -> SyncState { + ) -> BlockStoreState { assert!(!block_numbers.is_empty()); - - let first_block = self.final_blocks[*block_numbers.start()] - .justification - .clone(); - let last_block = self.final_blocks[*block_numbers.end()] - .justification - .clone(); - SyncState { - first_stored_block: first_block, - last_contiguous_stored_block: last_block.clone(), - last_stored_block: last_block, + BlockStoreState { + first: self.final_blocks[*block_numbers.start()] + .justification + .clone(), + last: self.final_blocks[*block_numbers.end()] + .justification + .clone(), } } @@ -109,8 +125,10 @@ impl TestValidators { response: oneshot::Sender, ) { let final_block = self.final_blocks[number.0 as usize].clone(); - response.send(Ok(final_block)).unwrap(); - tracing::trace!("Responded to get_block({number})"); + match response.send(Ok(final_block)) { + Ok(()) => tracing::info!(?number, "responded to get_block()"), + Err(_) => tracing::info!(?number, "failed to respond to get_block()"), + } } } @@ -122,68 +140,34 @@ async fn subscribing_to_state_updates() { let rng = &mut ctx.rng(); let protocol_version = validator::ProtocolVersion::EARLIEST; let genesis_block = make_genesis_block(rng, protocol_version); - let block_1 = make_block(rng, &genesis_block.header, protocol_version); - let block_2 = make_block(rng, &block_1.header, protocol_version); - let block_3 = make_block(rng, &block_2.header, protocol_version); + let block_1 = make_block(rng, genesis_block.header(), protocol_version); - let storage = InMemoryStorage::new(genesis_block.clone()); - let storage = &Arc::new(storage); + let (storage, runner) = make_store(ctx, genesis_block.clone()).await; let (actor_pipe, _dispatcher_pipe) = pipe::new(); - let actor = SyncBlocks::new(ctx, actor_pipe, storage.clone(), rng.gen()) - .await - .unwrap(); - let mut state_subscriber = actor.subscribe_to_state_updates(); + let mut state_subscriber = storage.subscribe(); + let cfg: Config = rng.gen(); scope::run!(ctx, |ctx, s| async { - s.spawn_bg(async { - actor.run(ctx).await.or_else(|err| { - if err.root_cause().is::() { - Ok(()) // Swallow cancellation errors after the test is finished - } else { - Err(err) - } - }) - }); + s.spawn_bg(runner.run(ctx)); + s.spawn_bg(cfg.run(ctx, actor_pipe, storage.clone())); s.spawn_bg(async { assert!(ctx.sleep(TEST_TIMEOUT).await.is_err(), "Test timed out"); anyhow::Ok(()) }); - { - let initial_state = state_subscriber.borrow_and_update(); - assert_eq!( - initial_state.first_stored_block, - genesis_block.justification - ); - assert_eq!( - initial_state.last_contiguous_stored_block, - genesis_block.justification - ); - assert_eq!(initial_state.last_stored_block, genesis_block.justification); - } - - storage.put_block(ctx, &block_1).await.unwrap(); - - { - let new_state = sync::changed(ctx, &mut state_subscriber).await?; - assert_eq!(new_state.first_stored_block, genesis_block.justification); - assert_eq!( - new_state.last_contiguous_stored_block, - block_1.justification - ); - assert_eq!(new_state.last_stored_block, block_1.justification); - } - - storage.put_block(ctx, &block_3).await.unwrap(); - - let new_state = sync::changed(ctx, &mut state_subscriber).await?; - assert_eq!(new_state.first_stored_block, genesis_block.justification); - assert_eq!( - new_state.last_contiguous_stored_block, - block_1.justification - ); - assert_eq!(new_state.last_stored_block, block_3.justification); + let state = state_subscriber.borrow().clone(); + assert_eq!(state.first, genesis_block.justification); + assert_eq!(state.last, genesis_block.justification); + storage.queue_block(ctx, block_1.clone()).await.unwrap(); + let state = sync::wait_for(ctx, &mut state_subscriber, |state| { + state.next() > block_1.header().number + }) + .await + .unwrap() + .clone(); + assert_eq!(state.first, genesis_block.justification); + assert_eq!(state.last, block_1.justification); Ok(()) }) .await @@ -199,31 +183,20 @@ async fn getting_blocks() { let protocol_version = validator::ProtocolVersion::EARLIEST; let genesis_block = make_genesis_block(rng, protocol_version); - let storage = InMemoryStorage::new(genesis_block.clone()); - let storage = Arc::new(storage); - let blocks = iter::successors(Some(genesis_block), |parent| { - Some(make_block(rng, &parent.header, protocol_version)) - }); - let blocks: Vec<_> = blocks.take(5).collect(); - for block in &blocks { - storage.put_block(ctx, block).await.unwrap(); - } - + let (storage, runner) = make_store(ctx, genesis_block.clone()).await; let (actor_pipe, dispatcher_pipe) = pipe::new(); - let actor = SyncBlocks::new(ctx, actor_pipe, storage.clone(), rng.gen()) - .await - .unwrap(); + let cfg: Config = rng.gen(); scope::run!(ctx, |ctx, s| async { - s.spawn_bg(async { - actor.run(ctx).await.or_else(|err| { - if err.root_cause().is::() { - Ok(()) // Swallow cancellation errors after the test is finished - } else { - Err(err) - } - }) + s.spawn_bg(runner.run(ctx)); + let blocks = iter::successors(Some(genesis_block), |parent| { + Some(make_block(rng, parent.header(), protocol_version)) }); + let blocks: Vec<_> = blocks.take(5).collect(); + for block in &blocks { + storage.queue_block(ctx, block.clone()).await.unwrap(); + } + s.spawn_bg(cfg.run(ctx, actor_pipe, storage.clone())); s.spawn_bg(async { assert!(ctx.sleep(TEST_TIMEOUT).await.is_err(), "Test timed out"); anyhow::Ok(()) diff --git a/node/libs/concurrency/src/sync/mod.rs b/node/libs/concurrency/src/sync/mod.rs index 9e920965..9960c6d7 100644 --- a/node/libs/concurrency/src/sync/mod.rs +++ b/node/libs/concurrency/src/sync/mod.rs @@ -48,7 +48,8 @@ impl ops::DerefMut for LocalMutexGuard<'_, T> { } } -/// Locks a mutex. +/// Locks a mutex, returning a guard which is NOT Send +/// (useful for ensuring that mutex is not held across await point). /// Note that depending on a use case you might /// want to wait unconditionally for a mutex to be locked /// (when a mutex is guaranteed to be unlocked fast). diff --git a/node/libs/protobuf/src/testonly.rs b/node/libs/protobuf/src/testonly.rs index 9cf15ae0..15272cd8 100644 --- a/node/libs/protobuf/src/testonly.rs +++ b/node/libs/protobuf/src/testonly.rs @@ -9,7 +9,7 @@ use rand::{ /// Test encoding and canonical encoding properties. #[track_caller] -pub fn test_encode(rng: &mut R, x: &T) { +pub fn test_encode(rng: &mut R, x: &T) { let x_encode = encode(x); let x_canonical = canonical(x); let x_shuffled = encode_shuffled(rng, x); @@ -26,7 +26,7 @@ pub fn test_encode(rng: &mut R, x: & /// Syntax sugar for `test_encode`, /// because `test_encode(rng,&rng::gen())` doesn't compile. #[track_caller] -pub fn test_encode_random(rng: &mut R) +pub fn test_encode_random(rng: &mut R) where Standard: Distribution, { diff --git a/node/libs/roles/src/proto/validator.proto b/node/libs/roles/src/proto/validator.proto index 40f7563c..9393369b 100644 --- a/node/libs/roles/src/proto/validator.proto +++ b/node/libs/roles/src/proto/validator.proto @@ -22,9 +22,8 @@ message BlockHeader { } message FinalBlock { - optional BlockHeader header = 1; // required - optional bytes payload = 2; // required - optional CommitQC justification = 3; // required + optional bytes payload = 1; // required + optional CommitQC justification = 2; // required } message ConsensusMsg { diff --git a/node/libs/roles/src/validator/conv.rs b/node/libs/roles/src/validator/conv.rs index b7df925f..33748b43 100644 --- a/node/libs/roles/src/validator/conv.rs +++ b/node/libs/roles/src/validator/conv.rs @@ -57,7 +57,6 @@ impl ProtoFmt for FinalBlock { type Proto = proto::FinalBlock; fn read(r: &Self::Proto) -> anyhow::Result { Ok(Self { - header: read_required(&r.header).context("header")?, payload: Payload(required(&r.payload).context("payload")?.clone()), justification: read_required(&r.justification).context("justification")?, }) @@ -65,7 +64,6 @@ impl ProtoFmt for FinalBlock { fn build(&self) -> Self::Proto { Self::Proto { - header: Some(self.header.build()), payload: Some(self.payload.0.clone()), justification: Some(self.justification.build()), } diff --git a/node/libs/roles/src/validator/messages/block.rs b/node/libs/roles/src/validator/messages/block.rs index adc3fe7b..5ea7f486 100644 --- a/node/libs/roles/src/validator/messages/block.rs +++ b/node/libs/roles/src/validator/messages/block.rs @@ -52,8 +52,8 @@ impl Payload { } /// Sequential number of the block. -/// Genesis block has number 0. -/// For other blocks: block.number = block.parent.number + 1. +/// Genesis block can have an arbitrary block number. +/// For blocks other than genesis: block.number = block.parent.number + 1. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct BlockNumber(pub u64); @@ -80,6 +80,11 @@ impl fmt::Display for BlockNumber { pub struct BlockHeaderHash(pub(crate) Keccak256); impl BlockHeaderHash { + /// Constant that the parent of the genesis block should be set to. + pub fn genesis_parent() -> Self { + Self(Keccak256::default()) + } + /// Interprets the specified `bytes` as a block header hash digest (i.e., a reverse operation to [`Self::as_bytes()`]). /// It is caller's responsibility to ensure that `bytes` are actually a block header hash digest. pub fn from_bytes(bytes: [u8; 32]) -> Self { @@ -133,7 +138,7 @@ impl BlockHeader { /// Creates a genesis block. pub fn genesis(payload: PayloadHash, number: BlockNumber) -> Self { Self { - parent: BlockHeaderHash(Keccak256::default()), + parent: BlockHeaderHash::genesis_parent(), number, payload, } @@ -152,8 +157,6 @@ impl BlockHeader { /// A block that has been finalized by the consensus protocol. #[derive(Clone, Debug, PartialEq, Eq)] pub struct FinalBlock { - /// Header of the block. - pub header: BlockHeader, /// Payload of the block. Should match `header.payload` hash. pub payload: Payload, /// Justification for the block. What guarantees that the block is final. @@ -162,16 +165,19 @@ pub struct FinalBlock { impl FinalBlock { /// Creates a new finalized block. - pub fn new(header: BlockHeader, payload: Payload, justification: CommitQC) -> Self { - assert_eq!(header.payload, payload.hash()); - assert_eq!(header, justification.message.proposal); + pub fn new(payload: Payload, justification: CommitQC) -> Self { + assert_eq!(justification.message.proposal.payload, payload.hash()); Self { - header, payload, justification, } } + /// Header fo the block. + pub fn header(&self) -> &BlockHeader { + &self.justification.message.proposal + } + /// Validates internal consistency of this block. pub fn validate( &self, @@ -179,19 +185,12 @@ impl FinalBlock { consensus_threshold: usize, ) -> Result<(), BlockValidationError> { let payload_hash = self.payload.hash(); - if payload_hash != self.header.payload { + if payload_hash != self.header().payload { return Err(BlockValidationError::HashMismatch { - header_hash: self.header.payload, + header_hash: self.header().payload, payload_hash, }); } - if self.header != self.justification.message.proposal { - return Err(BlockValidationError::ProposalMismatch { - block_header: Box::new(self.header), - qc_header: Box::new(self.justification.message.proposal), - }); - } - self.justification .verify(validators, consensus_threshold) .map_err(BlockValidationError::Justification) @@ -233,21 +232,7 @@ pub enum BlockValidationError { /// Hash of the payload. payload_hash: PayloadHash, }, - /// Quorum certificate proposal doesn't match the block header. - #[error( - "quorum certificate proposal doesn't match the block header (block header: {block_header:?}, \ - header in QC: {qc_header:?})" - )] - ProposalMismatch { - /// Block header field. - block_header: Box, - /// Block header from the quorum certificate. - qc_header: Box, - }, /// Failed verifying quorum certificate. #[error("failed verifying quorum certificate: {0:#?}")] Justification(#[source] anyhow::Error), - /// Application-specific error. - #[error(transparent)] - Other(anyhow::Error), } diff --git a/node/libs/roles/src/validator/messages/consensus.rs b/node/libs/roles/src/validator/messages/consensus.rs index 8f6004ee..d7a1d9ba 100644 --- a/node/libs/roles/src/validator/messages/consensus.rs +++ b/node/libs/roles/src/validator/messages/consensus.rs @@ -285,6 +285,11 @@ pub struct CommitQC { } impl CommitQC { + /// Header of the certified block. + pub fn header(&self) -> &BlockHeader { + &self.message.proposal + } + /// Create a new empty instance for a given `ReplicaCommit` message and a validator set size. pub fn new(message: ReplicaCommit, validator_set: &ValidatorSet) -> Self { Self { diff --git a/node/libs/roles/src/validator/testonly.rs b/node/libs/roles/src/validator/testonly.rs index f8effd9c..3f5a6832 100644 --- a/node/libs/roles/src/validator/testonly.rs +++ b/node/libs/roles/src/validator/testonly.rs @@ -40,7 +40,6 @@ pub fn make_genesis_block(rng: &mut R, protocol_version: ProtocolVersion let header = BlockHeader::genesis(payload.hash(), BlockNumber(0)); let justification = make_justification(rng, &header, protocol_version); FinalBlock { - header, payload, justification, } @@ -57,7 +56,6 @@ pub fn make_block( let header = BlockHeader::new(parent, payload.hash()); let justification = make_justification(rng, &header, protocol_version); FinalBlock { - header, payload, justification, } @@ -205,7 +203,6 @@ impl Distribution for Standard { impl Distribution for Standard { fn sample(&self, rng: &mut R) -> FinalBlock { FinalBlock { - header: rng.gen(), payload: rng.gen(), justification: rng.gen(), } diff --git a/node/libs/storage/Cargo.toml b/node/libs/storage/Cargo.toml index f107fe73..e1cca9e2 100644 --- a/node/libs/storage/Cargo.toml +++ b/node/libs/storage/Cargo.toml @@ -15,9 +15,9 @@ anyhow.workspace = true async-trait.workspace = true prost.workspace = true rand.workspace = true -rocksdb = { workspace = true, optional = true } thiserror.workspace = true tracing.workspace = true +vise.workspace = true [dev-dependencies] assert_matches.workspace = true @@ -28,10 +28,5 @@ tokio.workspace = true [build-dependencies] zksync_protobuf_build.workspace = true -[features] -default = [] -# Enables RocksDB-based storage. -rocksdb = ["dep:rocksdb"] - [lints] -workspace = true \ No newline at end of file +workspace = true diff --git a/node/libs/storage/src/block_store/metrics.rs b/node/libs/storage/src/block_store/metrics.rs new file mode 100644 index 00000000..32dae966 --- /dev/null +++ b/node/libs/storage/src/block_store/metrics.rs @@ -0,0 +1,28 @@ +//! Storage metrics. +use std::time; + +#[derive(Debug, vise::Metrics)] +#[metrics(prefix = "zksync_consensus_storage_persistent_block_store")] +pub(super) struct PersistentBlockStore { + /// Latency of a successful `state()` call. + #[metrics(unit = vise::Unit::Seconds, buckets = vise::Buckets::LATENCIES)] + pub(super) state_latency: vise::Histogram, + /// Latency of a successful `block()` call. + #[metrics(unit = vise::Unit::Seconds, buckets = vise::Buckets::LATENCIES)] + pub(super) block_latency: vise::Histogram, + /// Latency of a successful `store_next_block()` call. + #[metrics(unit = vise::Unit::Seconds, buckets = vise::Buckets::LATENCIES)] + pub(super) store_next_block_latency: vise::Histogram, +} + +#[vise::register] +pub(super) static PERSISTENT_BLOCK_STORE: vise::Global = vise::Global::new(); + +#[derive(Debug, vise::Metrics)] +#[metrics(prefix = "zksync_consensus_storage_block_store")] +pub(super) struct BlockStore { + /// BlockNumber of the last queued block. + pub(super) last_queued_block: vise::Gauge, + /// BlockNumber of the last persisted block. + pub(super) last_persisted_block: vise::Gauge, +} diff --git a/node/libs/storage/src/block_store/mod.rs b/node/libs/storage/src/block_store/mod.rs new file mode 100644 index 00000000..716a66f1 --- /dev/null +++ b/node/libs/storage/src/block_store/mod.rs @@ -0,0 +1,251 @@ +//! Defines storage layer for finalized blocks. +use std::{collections::VecDeque, fmt, sync::Arc}; +use zksync_concurrency::{ctx, error::Wrap as _, sync}; +use zksync_consensus_roles::validator; + +mod metrics; + +/// State of the `BlockStore`: continuous range of blocks. +#[derive(Debug, Clone)] +pub struct BlockStoreState { + /// Stored block with the lowest number. + pub first: validator::CommitQC, + /// Stored block with the highest number. + pub last: validator::CommitQC, +} + +impl BlockStoreState { + /// Checks whether block with the given number is stored in the `BlockStore`. + pub fn contains(&self, number: validator::BlockNumber) -> bool { + self.first.header().number <= number && number <= self.last.header().number + } + + /// Number of the next block that can be stored in the `BlockStore`. + /// (i.e. `last` + 1). + pub fn next(&self) -> validator::BlockNumber { + self.last.header().number.next() + } +} + +/// Storage of a continuous range of L2 blocks. +/// +/// Implementations **must** propagate context cancellation using [`StorageError::Canceled`]. +#[async_trait::async_trait] +pub trait PersistentBlockStore: fmt::Debug + Send + Sync { + /// Range of blocks avaliable in storage. + /// PersistentBlockStore is expected to always contain at least 1 block, + /// and be append-only storage (never delete blocks). + /// Consensus code calls this method only once and then tracks the + /// range of avaliable blocks internally. + async fn state(&self, ctx: &ctx::Ctx) -> ctx::Result; + + /// Gets a block by its number. + /// Returns error if block is missing. + /// Caller is expected to know the state (by calling `state()`) + /// and only request the blocks contained in the state. + async fn block( + &self, + ctx: &ctx::Ctx, + number: validator::BlockNumber, + ) -> ctx::Result; + + /// Persistently store a block. + /// Implementations are only required to accept a block directly after the current last block, + /// so that the stored blocks always constitute a continuous range. + /// Implementation should return only after the block is stored PERSISTENTLY - + /// consensus liveness property depends on this behavior. + async fn store_next_block( + &self, + ctx: &ctx::Ctx, + block: &validator::FinalBlock, + ) -> ctx::Result<()>; +} + +#[derive(Debug)] +struct Inner { + queued_state: sync::watch::Sender, + persisted_state: BlockStoreState, + queue: VecDeque, +} + +/// A wrapper around a PersistentBlockStore which adds caching blocks in-memory +/// and other useful utilities. +#[derive(Debug)] +pub struct BlockStore { + inner: sync::watch::Sender, + persistent: Box, +} + +/// Runner of the BlockStore background tasks. +#[must_use] +pub struct BlockStoreRunner(Arc); + +impl BlockStoreRunner { + /// Runs the background tasks of the BlockStore. + pub async fn run(self, ctx: &ctx::Ctx) -> anyhow::Result<()> { + #[vise::register] + static COLLECTOR: vise::Collector> = vise::Collector::new(); + let store_ref = Arc::downgrade(&self.0); + let _ = COLLECTOR.before_scrape(move || Some(store_ref.upgrade()?.scrape_metrics())); + + let res = async { + let inner = &mut self.0.inner.subscribe(); + loop { + let block = sync::wait_for(ctx, inner, |inner| !inner.queue.is_empty()) + .await? + .queue[0] + .clone(); + + // TODO: monitor errors as well. + let t = metrics::PERSISTENT_BLOCK_STORE + .store_next_block_latency + .start(); + self.0.persistent.store_next_block(ctx, &block).await?; + t.observe(); + + self.0.inner.send_modify(|inner| { + debug_assert_eq!(inner.persisted_state.next(), block.header().number); + inner.persisted_state.last = block.justification.clone(); + inner.queue.pop_front(); + }); + } + } + .await; + match res { + Ok(()) | Err(ctx::Error::Canceled(_)) => Ok(()), + Err(ctx::Error::Internal(err)) => Err(err), + } + } +} + +impl BlockStore { + /// Constructs a BlockStore. + /// BlockStore takes ownership of the passed PersistentBlockStore, + /// i.e. caller should modify the underlying persistent storage + /// ONLY through the constructed BlockStore. + pub async fn new( + ctx: &ctx::Ctx, + persistent: Box, + ) -> ctx::Result<(Arc, BlockStoreRunner)> { + let t = metrics::PERSISTENT_BLOCK_STORE.state_latency.start(); + let state = persistent.state(ctx).await.wrap("persistent.state()")?; + t.observe(); + if state.first.header().number > state.last.header().number { + return Err(anyhow::anyhow!("invalid state").into()); + } + let this = Arc::new(Self { + persistent, + inner: sync::watch::channel(Inner { + queued_state: sync::watch::channel(state.clone()).0, + persisted_state: state, + queue: VecDeque::new(), + }) + .0, + }); + Ok((this.clone(), BlockStoreRunner(this))) + } + + /// Fetches a block (from queue or persistent storage). + pub async fn block( + &self, + ctx: &ctx::Ctx, + number: validator::BlockNumber, + ) -> ctx::Result> { + { + let inner = self.inner.borrow(); + if !inner.queued_state.borrow().contains(number) { + return Ok(None); + } + if !inner.persisted_state.contains(number) { + // Subtraction is safe, because we know that the block + // is in inner.queue at this point. + let idx = number.0 - inner.persisted_state.next().0; + return Ok(inner.queue.get(idx as usize).cloned()); + } + } + let t = metrics::PERSISTENT_BLOCK_STORE.block_latency.start(); + let block = self + .persistent + .block(ctx, number) + .await + .wrap("persistent.block()")?; + t.observe(); + Ok(Some(block)) + } + + /// Insert block to a queue to be persisted eventually. + /// Since persisting a block may take a significant amount of time, + /// BlockStore contains a queue of blocks waiting to be persisted. + /// `queue_block()` adds a block to the queue as soon as all intermediate + /// blocks are queued_state as well. Queue is unbounded, so it is caller's + /// responsibility to manage the queue size. + pub async fn queue_block( + &self, + ctx: &ctx::Ctx, + block: validator::FinalBlock, + ) -> ctx::OrCanceled<()> { + let number = block.header().number; + sync::wait_for(ctx, &mut self.subscribe(), |queued_state| { + queued_state.next() >= number + }) + .await?; + self.inner.send_if_modified(|inner| { + let modified = inner.queued_state.send_if_modified(|queued_state| { + // It may happen that the same block is queued_state by 2 calls. + if queued_state.next() != number { + return false; + } + queued_state.last = block.justification.clone(); + true + }); + if !modified { + return false; + } + inner.queue.push_back(block); + true + }); + Ok(()) + } + + /// Waits until the given block is queued_state to be stored. + pub async fn wait_until_queued( + &self, + ctx: &ctx::Ctx, + number: validator::BlockNumber, + ) -> ctx::OrCanceled<()> { + sync::wait_for(ctx, &mut self.subscribe(), |queued_state| { + queued_state.contains(number) + }) + .await?; + Ok(()) + } + + /// Waits until the given block is stored persistently. + pub async fn wait_until_persisted( + &self, + ctx: &ctx::Ctx, + number: validator::BlockNumber, + ) -> ctx::OrCanceled<()> { + sync::wait_for(ctx, &mut self.inner.subscribe(), |inner| { + inner.persisted_state.contains(number) + }) + .await?; + Ok(()) + } + + /// Subscribes to the `BlockStoreState` changes. + /// Note that this state includes both queue AND stored blocks. + pub fn subscribe(&self) -> sync::watch::Receiver { + self.inner.borrow().queued_state.subscribe() + } + + fn scrape_metrics(&self) -> metrics::BlockStore { + let m = metrics::BlockStore::default(); + let inner = self.inner.borrow(); + m.last_queued_block + .set(inner.queued_state.borrow().last.header().number.0); + m.last_persisted_block + .set(inner.persisted_state.last.header().number.0); + m + } +} diff --git a/node/libs/storage/src/in_memory.rs b/node/libs/storage/src/in_memory.rs deleted file mode 100644 index 7bbb1394..00000000 --- a/node/libs/storage/src/in_memory.rs +++ /dev/null @@ -1,171 +0,0 @@ -//! In-memory storage implementation. - -use crate::{ - traits::{BlockStore, ReplicaStateStore, WriteBlockStore}, - types::{MissingBlockNumbers, ReplicaState}, -}; -use async_trait::async_trait; -use std::{collections::BTreeMap, ops}; -use zksync_concurrency::{ - ctx, - sync::{watch, Mutex}, -}; -use zksync_consensus_roles::validator; - -#[derive(Debug)] -struct BlocksInMemoryStore { - blocks: BTreeMap, - last_contiguous_block_number: validator::BlockNumber, -} - -impl BlocksInMemoryStore { - fn head_block(&self) -> &validator::FinalBlock { - self.blocks.values().next_back().unwrap() - // ^ `unwrap()` is safe by construction; the storage contains at least the genesis block - } - - fn first_block(&self) -> &validator::FinalBlock { - self.blocks.values().next().unwrap() - // ^ `unwrap()` is safe by construction; the storage contains at least the genesis block - } - - fn block(&self, number: validator::BlockNumber) -> Option<&validator::FinalBlock> { - self.blocks.get(&number) - } - - fn missing_block_numbers( - &self, - range: ops::Range, - ) -> Vec { - let existing_numbers = self - .blocks - .range(range.clone()) - .map(|(&number, _)| Ok(number)); - MissingBlockNumbers::new(range, existing_numbers) - .map(Result::unwrap) - .collect() - } - - fn put_block(&mut self, block: validator::FinalBlock) { - let block_number = block.header.number; - tracing::debug!("Inserting block #{block_number} into database"); - if let Some(prev_block) = self.blocks.insert(block_number, block) { - tracing::debug!(?prev_block, "Block #{block_number} is overwritten"); - } else { - for (&number, _) in self - .blocks - .range(self.last_contiguous_block_number.next()..) - { - let expected_block_number = self.last_contiguous_block_number.next(); - if number == expected_block_number { - self.last_contiguous_block_number = expected_block_number; - } else { - return; - } - } - } - } -} - -/// In-memory store. -#[derive(Debug)] -pub struct InMemoryStorage { - blocks: Mutex, - replica_state: Mutex>, - blocks_sender: watch::Sender, -} - -impl InMemoryStorage { - /// Creates a new store containing only the specified `genesis_block`. - pub fn new(genesis_block: validator::FinalBlock) -> Self { - let genesis_block_number = genesis_block.header.number; - Self { - blocks: Mutex::new(BlocksInMemoryStore { - blocks: BTreeMap::from([(genesis_block_number, genesis_block)]), - last_contiguous_block_number: genesis_block_number, - }), - replica_state: Mutex::default(), - blocks_sender: watch::channel(genesis_block_number).0, - } - } -} - -#[async_trait] -impl BlockStore for InMemoryStorage { - async fn head_block(&self, _ctx: &ctx::Ctx) -> ctx::Result { - Ok(self.blocks.lock().await.head_block().clone()) - } - - async fn first_block(&self, _ctx: &ctx::Ctx) -> ctx::Result { - Ok(self.blocks.lock().await.first_block().clone()) - } - - async fn last_contiguous_block_number( - &self, - _ctx: &ctx::Ctx, - ) -> ctx::Result { - Ok(self.blocks.lock().await.last_contiguous_block_number) - } - - async fn block( - &self, - _ctx: &ctx::Ctx, - number: validator::BlockNumber, - ) -> ctx::Result> { - Ok(self.blocks.lock().await.block(number).cloned()) - } - - async fn missing_block_numbers( - &self, - _ctx: &ctx::Ctx, - range: ops::Range, - ) -> ctx::Result> { - Ok(self.blocks.lock().await.missing_block_numbers(range)) - } - - fn subscribe_to_block_writes(&self) -> watch::Receiver { - self.blocks_sender.subscribe() - } -} - -#[async_trait] -impl WriteBlockStore for InMemoryStorage { - /// Just verifies that the payload is for the successor of the current head. - async fn verify_payload( - &self, - ctx: &ctx::Ctx, - block_number: validator::BlockNumber, - _payload: &validator::Payload, - ) -> ctx::Result<()> { - let head_number = self.head_block(ctx).await?.header.number; - if head_number >= block_number { - return Err(anyhow::anyhow!( - "received proposal for block {block_number:?}, while head is at {head_number:?}" - ) - .into()); - } - Ok(()) - } - - async fn put_block(&self, _ctx: &ctx::Ctx, block: &validator::FinalBlock) -> ctx::Result<()> { - self.blocks.lock().await.put_block(block.clone()); - self.blocks_sender.send_replace(block.header.number); - Ok(()) - } -} - -#[async_trait] -impl ReplicaStateStore for InMemoryStorage { - async fn replica_state(&self, _ctx: &ctx::Ctx) -> ctx::Result> { - Ok(self.replica_state.lock().await.clone()) - } - - async fn put_replica_state( - &self, - _ctx: &ctx::Ctx, - replica_state: &ReplicaState, - ) -> ctx::Result<()> { - *self.replica_state.lock().await = Some(replica_state.clone()); - Ok(()) - } -} diff --git a/node/libs/storage/src/lib.rs b/node/libs/storage/src/lib.rs index e59adcc3..ee017752 100644 --- a/node/libs/storage/src/lib.rs +++ b/node/libs/storage/src/lib.rs @@ -1,22 +1,13 @@ -//! This module is responsible for persistent data storage, it provides schema-aware type-safe database access. Currently we use RocksDB, -//! but this crate only exposes an abstraction of a database, so we can easily switch to a different storage engine in the future. - -mod in_memory; +//! Abstraction for persistent data storage. +//! It provides schema-aware type-safe database access. +mod block_store; pub mod proto; -mod replica_state; -#[cfg(feature = "rocksdb")] -mod rocksdb; -mod testonly; +mod replica_store; +pub mod testonly; #[cfg(test)] mod tests; -mod traits; -mod types; -#[cfg(feature = "rocksdb")] -pub use crate::rocksdb::RocksdbStorage; pub use crate::{ - in_memory::InMemoryStorage, - replica_state::ReplicaStore, - traits::{BlockStore, ReplicaStateStore, WriteBlockStore}, - types::{Proposal, ReplicaState}, + block_store::{BlockStore, BlockStoreRunner, BlockStoreState, PersistentBlockStore}, + replica_store::{Proposal, ReplicaState, ReplicaStore}, }; diff --git a/node/libs/storage/src/replica_state.rs b/node/libs/storage/src/replica_state.rs index 5daf2a82..634118dc 100644 --- a/node/libs/storage/src/replica_state.rs +++ b/node/libs/storage/src/replica_state.rs @@ -1,85 +1,3 @@ //! `FallbackReplicaStateStore` type. -use crate::{ - traits::{ReplicaStateStore, WriteBlockStore}, - types::ReplicaState, -}; -use std::sync::Arc; -use zksync_concurrency::ctx; -use zksync_consensus_roles::validator; -impl From for ReplicaState { - fn from(certificate: validator::CommitQC) -> Self { - Self { - view: certificate.message.view, - phase: validator::Phase::Prepare, - high_vote: certificate.message, - high_qc: certificate, - proposals: vec![], - } - } -} - -/// Storage combining [`ReplicaStateStore`] and [`WriteBlockStore`]. -#[derive(Debug, Clone)] -pub struct ReplicaStore { - state: Arc, - blocks: Arc, -} - -impl ReplicaStore { - /// Creates a store from a type implementing both replica state and block storage. - pub fn from_store(store: Arc) -> Self - where - S: ReplicaStateStore + WriteBlockStore + 'static, - { - Self { - state: store.clone(), - blocks: store, - } - } - - /// Creates a new replica state store with a fallback. - pub fn new(state: Arc, blocks: Arc) -> Self { - Self { state, blocks } - } - - /// Gets the replica state. If it's not present, falls back to recover it from the fallback block store. - pub async fn replica_state(&self, ctx: &ctx::Ctx) -> ctx::Result { - let replica_state = self.state.replica_state(ctx).await?; - if let Some(replica_state) = replica_state { - Ok(replica_state) - } else { - let head_block = self.blocks.head_block(ctx).await?; - Ok(ReplicaState::from(head_block.justification)) - } - } - - /// Stores the given replica state into the database. This just proxies to the base replica store. - pub async fn put_replica_state( - &self, - ctx: &ctx::Ctx, - replica_state: &ReplicaState, - ) -> ctx::Result<()> { - self.state.put_replica_state(ctx, replica_state).await - } - - /// Verify that `payload` is a correct proposal for the block `block_number`. - pub async fn verify_payload( - &self, - ctx: &ctx::Ctx, - block_number: validator::BlockNumber, - payload: &validator::Payload, - ) -> ctx::Result<()> { - self.blocks.verify_payload(ctx, block_number, payload).await - } - - /// Puts a block into this storage. - pub async fn put_block( - &self, - ctx: &ctx::Ctx, - block: &validator::FinalBlock, - ) -> ctx::Result<()> { - self.blocks.put_block(ctx, block).await - } -} diff --git a/node/libs/storage/src/replica_store.rs b/node/libs/storage/src/replica_store.rs new file mode 100644 index 00000000..6243cc72 --- /dev/null +++ b/node/libs/storage/src/replica_store.rs @@ -0,0 +1,106 @@ +//! Defines storage layer for persistent replica state. +use crate::proto; +use anyhow::Context as _; +use std::fmt; +use zksync_concurrency::ctx; +use zksync_consensus_roles::validator; +use zksync_protobuf::{read_required, required, ProtoFmt}; + +/// Storage for [`ReplicaState`]. +/// +/// Implementations **must** propagate context cancellation using [`StorageError::Canceled`]. +#[async_trait::async_trait] +pub trait ReplicaStore: fmt::Debug + Send + Sync { + /// Gets the replica state, if it is contained in the database. + async fn state(&self, ctx: &ctx::Ctx) -> ctx::Result>; + + /// Stores the given replica state into the database. + async fn set_state(&self, ctx: &ctx::Ctx, state: &ReplicaState) -> ctx::Result<()>; +} + +/// A payload of a proposed block which is not known to be finalized yet. +/// Replicas have to persist such proposed payloads for liveness: +/// consensus may finalize a block without knowing a payload in case of reproposals. +/// Currently we do not store the BlockHeader, because it is always +/// available in the LeaderPrepare message. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Proposal { + /// Number of a block for which this payload has been proposed. + pub number: validator::BlockNumber, + /// Proposed payload. + pub payload: validator::Payload, +} + +/// The struct that contains the replica state to be persisted. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ReplicaState { + /// The current view number. + pub view: validator::ViewNumber, + /// The current phase. + pub phase: validator::Phase, + /// The highest block proposal that the replica has committed to. + pub high_vote: validator::ReplicaCommit, + /// The highest commit quorum certificate known to the replica. + pub high_qc: validator::CommitQC, + /// A cache of the received block proposals. + pub proposals: Vec, +} + +impl From for ReplicaState { + fn from(certificate: validator::CommitQC) -> Self { + Self { + view: certificate.message.view, + phase: validator::Phase::Prepare, + high_vote: certificate.message, + high_qc: certificate, + proposals: vec![], + } + } +} + +impl ProtoFmt for Proposal { + type Proto = proto::Proposal; + + fn read(r: &Self::Proto) -> anyhow::Result { + Ok(Self { + number: validator::BlockNumber(*required(&r.number).context("number")?), + payload: validator::Payload(required(&r.payload).context("payload")?.clone()), + }) + } + + fn build(&self) -> Self::Proto { + Self::Proto { + number: Some(self.number.0), + payload: Some(self.payload.0.clone()), + } + } +} + +impl ProtoFmt for ReplicaState { + type Proto = proto::ReplicaState; + + fn read(r: &Self::Proto) -> anyhow::Result { + Ok(Self { + view: validator::ViewNumber(r.view.context("view_number")?), + phase: read_required(&r.phase).context("phase")?, + high_vote: read_required(&r.high_vote).context("high_vote")?, + high_qc: read_required(&r.high_qc).context("high_qc")?, + proposals: r + .proposals + .iter() + .map(ProtoFmt::read) + .collect::>() + .context("proposals")?, + }) + } + + fn build(&self) -> Self::Proto { + Self::Proto { + view: Some(self.view.0), + phase: Some(self.phase.build()), + high_vote: Some(self.high_vote.build()), + high_qc: Some(self.high_qc.build()), + proposals: self.proposals.iter().map(|p| p.build()).collect(), + } + } +} diff --git a/node/libs/storage/src/rocksdb.rs b/node/libs/storage/src/rocksdb.rs deleted file mode 100644 index 6366e313..00000000 --- a/node/libs/storage/src/rocksdb.rs +++ /dev/null @@ -1,375 +0,0 @@ -//! This module contains the methods to handle an append-only database of finalized blocks. Since we only store finalized blocks, this forms a -//! chain of blocks, not a tree (assuming we have all blocks and not have any gap). It allows for basic functionality like inserting a block, -//! getting a block, checking if a block is contained in the DB. We also store the head of the chain. Storing it explicitly allows us to fetch -//! the current head quickly. -use crate::{ - traits::{BlockStore, ReplicaStateStore, WriteBlockStore}, - types::{MissingBlockNumbers, ReplicaState}, -}; -use anyhow::Context as _; -use async_trait::async_trait; -use rocksdb::{Direction, IteratorMode, ReadOptions}; -use std::{ - fmt, ops, - path::Path, - sync::{ - atomic::{AtomicU64, Ordering}, - RwLock, - }, -}; -use zksync_concurrency::{ctx, scope, sync::watch}; -use zksync_consensus_roles::validator; - -/// Enum used to represent a key in the database. It also acts as a separator between different stores. -#[derive(Debug, Clone, PartialEq, Eq)] -enum DatabaseKey { - /// Key used to store the replica state. - /// ReplicaState -> ReplicaState - ReplicaState, - /// Key used to store the finalized blocks. - /// Block(validator::BlockNumber) -> validator::FinalBlock - Block(validator::BlockNumber), -} - -impl DatabaseKey { - /// Starting database key for blocks indexed by number. All other keys in the default column family - /// are lower than this value. - pub(crate) const BLOCKS_START_KEY: &'static [u8] = &u64::MIN.to_be_bytes(); - - /// Iterator mode for the head block (i.e., a block with the greatest number). - pub(crate) const BLOCK_HEAD_ITERATOR: IteratorMode<'static> = - IteratorMode::From(&u64::MAX.to_be_bytes(), Direction::Reverse); - - /// Encodes this key for usage as a RocksDB key. - /// - /// # Implementation note - /// - /// This logic is maintainable only while the amount of non-block keys remains small. - /// If more keys are added (especially if their number is not known statically), prefer using - /// separate column families for them. - pub(crate) fn encode_key(&self) -> Vec { - match self { - // Keys for non-block entries must be smaller than all block keys. - Self::ReplicaState => vec![0], - // Number encoding that monotonically increases with the number - Self::Block(number) => number.0.to_be_bytes().to_vec(), - } - } - - /// Parses the specified bytes as a `Self::Block(_)` key. - pub(crate) fn parse_block_key(raw_key: &[u8]) -> anyhow::Result { - let raw_key = raw_key - .try_into() - .context("Invalid encoding for block key")?; - Ok(validator::BlockNumber(u64::from_be_bytes(raw_key))) - } -} - -/// Main struct for the Storage module, it just contains the database. Provides a set of high-level -/// atomic operations on the database. It "contains" the following data: -/// -/// - An append-only database of finalized blocks. -/// - A backup of the consensus replica state. -pub struct RocksdbStorage { - /// Wrapped RocksDB instance. We don't need `RwLock` for synchronization *per se*, just to ensure - /// that writes to the DB are linearized. - inner: RwLock, - /// In-memory cache for the last contiguous block number stored in the DB. The cache is used - /// and updated by `Self::get_last_contiguous_block_number()`. Caching is based on the assumption - /// that blocks are never removed from the DB. - cached_last_contiguous_block_number: AtomicU64, - /// Sender of numbers of written blocks. - block_writes_sender: watch::Sender, -} - -impl RocksdbStorage { - /// Create a new Storage. It first tries to open an existing database, and if that fails it just creates a - /// a new one. We need the genesis block of the chain as input. - // TODO(bruno): we want to eventually start pruning old blocks, so having the genesis - // block might be unnecessary. - pub async fn new( - ctx: &ctx::Ctx, - genesis_block: &validator::FinalBlock, - path: &Path, - ) -> ctx::Result { - let mut options = rocksdb::Options::default(); - options.create_missing_column_families(true); - options.create_if_missing(true); - - let db = scope::wait_blocking(|| { - rocksdb::DB::open(&options, path).context("Failed opening RocksDB") - }) - .await?; - - let this = Self { - inner: RwLock::new(db), - cached_last_contiguous_block_number: AtomicU64::new(genesis_block.header.number.0), - block_writes_sender: watch::channel(genesis_block.header.number).0, - }; - if let Some(stored_genesis_block) = this.block(ctx, genesis_block.header.number).await? { - if stored_genesis_block.header != genesis_block.header { - let err = anyhow::anyhow!("Mismatch between stored and expected genesis block"); - return Err(err.into()); - } - } else { - tracing::debug!( - "Genesis block not present in RocksDB at `{path}`; saving {genesis_block:?}", - path = path.display() - ); - this.put_block(ctx, genesis_block).await?; - } - Ok(this) - } - - /// Acquires a read lock on the underlying DB. - fn read(&self) -> impl ops::Deref + '_ { - self.inner.read().expect("DB lock is poisoned") - } - - /// Acquires a write lock on the underlying DB. - fn write(&self) -> impl ops::Deref + '_ { - self.inner.write().expect("DB lock is poisoned") - } - - fn head_block_blocking(&self) -> anyhow::Result { - let db = self.read(); - - let mut options = ReadOptions::default(); - options.set_iterate_range(DatabaseKey::BLOCKS_START_KEY..); - let mut iter = db.iterator_opt(DatabaseKey::BLOCK_HEAD_ITERATOR, options); - let (_, head_block) = iter - .next() - .context("Head block not found")? - .context("RocksDB error reading head block")?; - zksync_protobuf::decode(&head_block).context("Failed decoding head block bytes") - } - - /// Returns a block with the least number stored in this database. - fn first_block_blocking(&self) -> anyhow::Result { - let db = self.read(); - - let mut options = ReadOptions::default(); - options.set_iterate_range(DatabaseKey::BLOCKS_START_KEY..); - let mut iter = db.iterator_opt(IteratorMode::Start, options); - let (_, first_block) = iter - .next() - .context("First stored block not found")? - .context("RocksDB error reading first stored block")?; - zksync_protobuf::decode(&first_block).context("Failed decoding first stored block bytes") - } - - fn last_contiguous_block_number_blocking(&self) -> anyhow::Result { - let last_contiguous_block_number = self - .cached_last_contiguous_block_number - .load(Ordering::Relaxed); - let last_contiguous_block_number = validator::BlockNumber(last_contiguous_block_number); - - let last_contiguous_block_number = - self.last_contiguous_block_number_impl(last_contiguous_block_number)?; - - // The cached value may have been updated by the other thread. Fortunately, we have a simple - // protection against such "edit conflicts": the greater cached value is always valid and - // should win. - self.cached_last_contiguous_block_number - .fetch_max(last_contiguous_block_number.0, Ordering::Relaxed); - Ok(last_contiguous_block_number) - } - - // Implementation that is not aware of caching specifics. The only requirement for the method correctness - // is for the `cached_last_contiguous_block_number` to be present in the database. - fn last_contiguous_block_number_impl( - &self, - cached_last_contiguous_block_number: validator::BlockNumber, - ) -> anyhow::Result { - let db = self.read(); - - let mut options = ReadOptions::default(); - let start_key = DatabaseKey::Block(cached_last_contiguous_block_number).encode_key(); - options.set_iterate_range(start_key..); - let iter = db.iterator_opt(IteratorMode::Start, options); - let iter = iter - .map(|bytes| { - let (key, _) = bytes.context("RocksDB error iterating over block numbers")?; - DatabaseKey::parse_block_key(&key) - }) - .fuse(); - - let mut prev_block_number = cached_last_contiguous_block_number; - for block_number in iter { - let block_number = block_number?; - if block_number > prev_block_number.next() { - return Ok(prev_block_number); - } - prev_block_number = block_number; - } - Ok(prev_block_number) - } - - /// Gets a block by its number. - fn block_blocking( - &self, - number: validator::BlockNumber, - ) -> anyhow::Result> { - let db = self.read(); - - let Some(raw_block) = db - .get(DatabaseKey::Block(number).encode_key()) - .with_context(|| format!("RocksDB error reading block #{number}"))? - else { - return Ok(None); - }; - let block = zksync_protobuf::decode(&raw_block) - .with_context(|| format!("Failed decoding block #{number}"))?; - Ok(Some(block)) - } - - /// Iterates over block numbers in the specified `range` that the DB *does not* have. - fn missing_block_numbers_blocking( - &self, - range: ops::Range, - ) -> anyhow::Result> { - let db = self.read(); - - let mut options = ReadOptions::default(); - let start_key = DatabaseKey::Block(range.start).encode_key(); - let end_key = DatabaseKey::Block(range.end).encode_key(); - options.set_iterate_range(start_key..end_key); - - let iter = db.iterator_opt(IteratorMode::Start, options); - let iter = iter - .map(|bytes| { - let (key, _) = bytes.context("RocksDB error iterating over block numbers")?; - DatabaseKey::parse_block_key(&key) - }) - .fuse(); - - MissingBlockNumbers::new(range, iter).collect() - } - - // ---------------- Write methods ---------------- - - /// Insert a new block into the database. - fn put_block_blocking(&self, finalized_block: &validator::FinalBlock) -> anyhow::Result<()> { - let db = self.write(); - let block_number = finalized_block.header.number; - tracing::debug!("Inserting new block #{block_number} into the database."); - - let mut write_batch = rocksdb::WriteBatch::default(); - write_batch.put( - DatabaseKey::Block(block_number).encode_key(), - zksync_protobuf::encode(finalized_block), - ); - // Commit the transaction. - db.write(write_batch) - .context("Failed writing block to database")?; - drop(db); - - self.block_writes_sender.send_replace(block_number); - Ok(()) - } - - fn replica_state_blocking(&self) -> anyhow::Result> { - let Some(raw_state) = self - .read() - .get(DatabaseKey::ReplicaState.encode_key()) - .context("Failed to get ReplicaState from RocksDB")? - else { - return Ok(None); - }; - zksync_protobuf::decode(&raw_state) - .map(Some) - .context("Failed to decode replica state!") - } - - fn put_replica_state_blocking(&self, replica_state: &ReplicaState) -> anyhow::Result<()> { - self.write() - .put( - DatabaseKey::ReplicaState.encode_key(), - zksync_protobuf::encode(replica_state), - ) - .context("Failed putting ReplicaState to RocksDB") - } -} - -impl fmt::Debug for RocksdbStorage { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - formatter.write_str("RocksdbStorage") - } -} - -#[async_trait] -impl BlockStore for RocksdbStorage { - async fn head_block(&self, _ctx: &ctx::Ctx) -> ctx::Result { - Ok(scope::wait_blocking(|| self.head_block_blocking()).await?) - } - - async fn first_block(&self, _ctx: &ctx::Ctx) -> ctx::Result { - Ok(scope::wait_blocking(|| self.first_block_blocking()).await?) - } - - async fn last_contiguous_block_number( - &self, - _ctx: &ctx::Ctx, - ) -> ctx::Result { - Ok(scope::wait_blocking(|| self.last_contiguous_block_number_blocking()).await?) - } - - async fn block( - &self, - _ctx: &ctx::Ctx, - number: validator::BlockNumber, - ) -> ctx::Result> { - Ok(scope::wait_blocking(|| self.block_blocking(number)).await?) - } - - async fn missing_block_numbers( - &self, - _ctx: &ctx::Ctx, - range: ops::Range, - ) -> ctx::Result> { - Ok(scope::wait_blocking(|| self.missing_block_numbers_blocking(range)).await?) - } - - fn subscribe_to_block_writes(&self) -> watch::Receiver { - self.block_writes_sender.subscribe() - } -} - -#[async_trait] -impl WriteBlockStore for RocksdbStorage { - /// Just verifies that the payload is for the successor of the current head. - async fn verify_payload( - &self, - ctx: &ctx::Ctx, - block_number: validator::BlockNumber, - _payload: &validator::Payload, - ) -> ctx::Result<()> { - let head_number = self.head_block(ctx).await?.header.number; - if head_number >= block_number { - return Err(anyhow::anyhow!( - "received proposal for block {block_number:?}, while head is at {head_number:?}" - ) - .into()); - } - Ok(()) - } - - async fn put_block(&self, _ctx: &ctx::Ctx, block: &validator::FinalBlock) -> ctx::Result<()> { - Ok(scope::wait_blocking(|| self.put_block_blocking(block)).await?) - } -} - -#[async_trait] -impl ReplicaStateStore for RocksdbStorage { - async fn replica_state(&self, _ctx: &ctx::Ctx) -> ctx::Result> { - Ok(scope::wait_blocking(|| self.replica_state_blocking()).await?) - } - - async fn put_replica_state( - &self, - _ctx: &ctx::Ctx, - replica_state: &ReplicaState, - ) -> ctx::Result<()> { - Ok(scope::wait_blocking(|| self.put_replica_state_blocking(replica_state)).await?) - } -} diff --git a/node/libs/storage/src/testonly.rs b/node/libs/storage/src/testonly.rs deleted file mode 100644 index cd079d81..00000000 --- a/node/libs/storage/src/testonly.rs +++ /dev/null @@ -1,25 +0,0 @@ -//! Test-only utilities. - -use crate::types::{Proposal, ReplicaState}; -use rand::{distributions::Standard, prelude::Distribution, Rng}; - -impl Distribution for Standard { - fn sample(&self, rng: &mut R) -> Proposal { - Proposal { - number: rng.gen(), - payload: rng.gen(), - } - } -} - -impl Distribution for Standard { - fn sample(&self, rng: &mut R) -> ReplicaState { - ReplicaState { - view: rng.gen(), - phase: rng.gen(), - high_vote: rng.gen(), - high_qc: rng.gen(), - proposals: (0..rng.gen_range(1..11)).map(|_| rng.gen()).collect(), - } - } -} diff --git a/node/libs/storage/src/testonly/in_memory.rs b/node/libs/storage/src/testonly/in_memory.rs new file mode 100644 index 00000000..d34f09cd --- /dev/null +++ b/node/libs/storage/src/testonly/in_memory.rs @@ -0,0 +1,78 @@ +//! In-memory storage implementation. +use crate::{BlockStoreState, PersistentBlockStore, ReplicaState}; +use anyhow::Context as _; +use std::{collections::VecDeque, sync::Mutex}; +use zksync_concurrency::ctx; +use zksync_consensus_roles::validator; + +/// In-memory block store. +#[derive(Debug, Default)] +pub struct BlockStore(Mutex>); + +/// In-memory replica store. +#[derive(Debug, Default)] +pub struct ReplicaStore(Mutex>); + +impl BlockStore { + /// Creates a new store containing only the specified `genesis_block`. + pub fn new(genesis: validator::FinalBlock) -> Self { + Self(Mutex::new([genesis].into())) + } +} + +#[async_trait::async_trait] +impl PersistentBlockStore for BlockStore { + async fn state(&self, _ctx: &ctx::Ctx) -> ctx::Result { + let blocks = self.0.lock().unwrap(); + if blocks.is_empty() { + return Err(anyhow::anyhow!("store is empty").into()); + } + Ok(BlockStoreState { + first: blocks.front().unwrap().justification.clone(), + last: blocks.back().unwrap().justification.clone(), + }) + } + + async fn block( + &self, + _ctx: &ctx::Ctx, + number: validator::BlockNumber, + ) -> ctx::Result { + let blocks = self.0.lock().unwrap(); + let front = blocks.front().context("not found")?; + let idx = number + .0 + .checked_sub(front.header().number.0) + .context("not found")?; + Ok(blocks.get(idx as usize).context("not found")?.clone()) + } + + async fn store_next_block( + &self, + _ctx: &ctx::Ctx, + block: &validator::FinalBlock, + ) -> ctx::Result<()> { + let mut blocks = self.0.lock().unwrap(); + let got = block.header().number; + if let Some(last) = blocks.back() { + let want = last.header().number.next(); + if got != want { + return Err(anyhow::anyhow!("got block {got:?}, while expected {want:?}").into()); + } + } + blocks.push_back(block.clone()); + Ok(()) + } +} + +#[async_trait::async_trait] +impl crate::ReplicaStore for ReplicaStore { + async fn state(&self, _ctx: &ctx::Ctx) -> ctx::Result> { + Ok(self.0.lock().unwrap().clone()) + } + + async fn set_state(&self, _ctx: &ctx::Ctx, state: &ReplicaState) -> ctx::Result<()> { + *self.0.lock().unwrap() = Some(state.clone()); + Ok(()) + } +} diff --git a/node/libs/storage/src/testonly/mod.rs b/node/libs/storage/src/testonly/mod.rs new file mode 100644 index 00000000..a4b1481d --- /dev/null +++ b/node/libs/storage/src/testonly/mod.rs @@ -0,0 +1,58 @@ +//! Test-only utilities. +use crate::{PersistentBlockStore, Proposal, ReplicaState}; +use rand::{distributions::Standard, prelude::Distribution, Rng}; +use zksync_concurrency::ctx; +use zksync_consensus_roles::validator; + +pub mod in_memory; + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Proposal { + Proposal { + number: rng.gen(), + payload: rng.gen(), + } + } +} + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> ReplicaState { + ReplicaState { + view: rng.gen(), + phase: rng.gen(), + high_vote: rng.gen(), + high_qc: rng.gen(), + proposals: (0..rng.gen_range(1..11)).map(|_| rng.gen()).collect(), + } + } +} + +/// Dumps all the blocks stored in `store`. +pub async fn dump(ctx: &ctx::Ctx, store: &dyn PersistentBlockStore) -> Vec { + let range = store.state(ctx).await.unwrap(); + let mut blocks = vec![]; + for n in range.first.header().number.0..range.next().0 { + let n = validator::BlockNumber(n); + let block = store.block(ctx, n).await.unwrap(); + assert_eq!(block.header().number, n); + blocks.push(block); + } + assert!(store.block(ctx, range.next()).await.is_err()); + blocks +} + +/// A generator of consecutive blocks with random payload, starting with a genesis blocks. +pub fn random_blocks(ctx: &ctx::Ctx) -> impl Iterator { + let mut rng = ctx.rng(); + let v = validator::ProtocolVersion::EARLIEST; + std::iter::successors( + Some(validator::testonly::make_genesis_block(&mut rng, v)), + move |parent| { + Some(validator::testonly::make_block( + &mut rng, + parent.header(), + v, + )) + }, + ) +} diff --git a/node/libs/storage/src/tests.rs b/node/libs/storage/src/tests.rs new file mode 100644 index 00000000..fa94acdb --- /dev/null +++ b/node/libs/storage/src/tests.rs @@ -0,0 +1,22 @@ +use super::*; +use crate::ReplicaState; +use zksync_concurrency::ctx; + +#[tokio::test] +async fn test_inmemory_block_store() { + let ctx = &ctx::test_root(&ctx::RealClock); + let store = &testonly::in_memory::BlockStore::default(); + let mut want = vec![]; + for block in testonly::random_blocks(ctx).take(5) { + store.store_next_block(ctx, &block).await.unwrap(); + want.push(block); + assert_eq!(want, testonly::dump(ctx, store).await); + } +} + +#[test] +fn test_schema_encode_decode() { + let ctx = ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + zksync_protobuf::testonly::test_encode_random::<_, ReplicaState>(rng); +} diff --git a/node/libs/storage/src/tests/mod.rs b/node/libs/storage/src/tests/mod.rs deleted file mode 100644 index ffcb4744..00000000 --- a/node/libs/storage/src/tests/mod.rs +++ /dev/null @@ -1,151 +0,0 @@ -use super::*; -use crate::types::ReplicaState; -use async_trait::async_trait; -use rand::{seq::SliceRandom, Rng}; -use std::iter; -use test_casing::test_casing; -use zksync_concurrency::ctx; -use zksync_consensus_roles::validator::{ - testonly::make_block, BlockHeader, BlockNumber, FinalBlock, Payload, ProtocolVersion, -}; - -#[cfg(feature = "rocksdb")] -mod rocksdb; - -#[async_trait] -trait InitStore { - type Store: WriteBlockStore + ReplicaStateStore; - - async fn init_store(&self, ctx: &ctx::Ctx, genesis_block: &FinalBlock) -> Self::Store; -} - -#[async_trait] -impl InitStore for () { - type Store = InMemoryStorage; - - async fn init_store(&self, _ctx: &ctx::Ctx, genesis_block: &FinalBlock) -> Self::Store { - InMemoryStorage::new(genesis_block.clone()) - } -} - -fn genesis_block(rng: &mut impl Rng) -> FinalBlock { - let payload = Payload(vec![]); - FinalBlock { - header: BlockHeader::genesis(payload.hash(), BlockNumber(0)), - payload, - justification: rng.gen(), - } -} - -fn gen_blocks(rng: &mut impl Rng, genesis_block: FinalBlock, count: usize) -> Vec { - let blocks = iter::successors(Some(genesis_block), |parent| { - Some(make_block(rng, &parent.header, ProtocolVersion::EARLIEST)) - }); - blocks.skip(1).take(count).collect() -} - -async fn test_put_block(store_factory: &impl InitStore) { - let ctx = &ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - let genesis_block = genesis_block(rng); - let block_store = store_factory.init_store(ctx, &genesis_block).await; - - assert_eq!(block_store.first_block(ctx).await.unwrap(), genesis_block); - assert_eq!(block_store.head_block(ctx).await.unwrap(), genesis_block); - - let mut block_subscriber = block_store.subscribe_to_block_writes(); - assert_eq!(*block_subscriber.borrow_and_update(), BlockNumber(0)); - - // Test inserting a block with a valid parent. - let block_1 = make_block(rng, &genesis_block.header, ProtocolVersion::EARLIEST); - block_store.put_block(ctx, &block_1).await.unwrap(); - - assert_eq!(block_store.first_block(ctx).await.unwrap(), genesis_block); - assert_eq!(block_store.head_block(ctx).await.unwrap(), block_1); - assert_eq!(*block_subscriber.borrow_and_update(), block_1.header.number); - - // Test inserting a block with a valid parent that is not the genesis. - let block_2 = make_block(rng, &block_1.header, ProtocolVersion::EARLIEST); - block_store.put_block(ctx, &block_2).await.unwrap(); - - assert_eq!(block_store.first_block(ctx).await.unwrap(), genesis_block); - assert_eq!(block_store.head_block(ctx).await.unwrap(), block_2); - assert_eq!(*block_subscriber.borrow_and_update(), block_2.header.number); -} - -#[tokio::test] -async fn putting_block_for_in_memory_store() { - test_put_block(&()).await; -} - -async fn test_get_missing_block_numbers(store_factory: &impl InitStore, skip_count: usize) { - assert!(skip_count < 100); - - let ctx = &ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - let mut genesis_block = genesis_block(rng); - let mut blocks = gen_blocks(rng, genesis_block.clone(), 100); - if skip_count > 0 { - genesis_block = blocks[skip_count - 1].clone(); - blocks = blocks[skip_count..].to_vec(); - } - let block_range = BlockNumber(skip_count as u64)..BlockNumber(101); - - let block_store = store_factory.init_store(ctx, &genesis_block).await; - blocks.shuffle(rng); - - assert!(block_store - .missing_block_numbers(ctx, block_range.clone()) - .await - .unwrap() - .into_iter() - .map(|number| number.0) - .eq(skip_count as u64 + 1..101)); - - for (i, block) in blocks.iter().enumerate() { - block_store.put_block(ctx, block).await.unwrap(); - let missing_block_numbers = block_store - .missing_block_numbers(ctx, block_range.clone()) - .await - .unwrap(); - let last_contiguous_block_number = - block_store.last_contiguous_block_number(ctx).await.unwrap(); - - let mut expected_block_numbers: Vec<_> = - blocks[(i + 1)..].iter().map(|b| b.header.number).collect(); - expected_block_numbers.sort_unstable(); - - assert_eq!(missing_block_numbers, expected_block_numbers); - if let Some(&first_missing_block_number) = expected_block_numbers.first() { - assert_eq!( - last_contiguous_block_number.next(), - first_missing_block_number - ); - } else { - assert_eq!(last_contiguous_block_number, BlockNumber(100)); - } - } -} - -#[tokio::test] -async fn getting_missing_block_numbers_for_in_memory_store() { - test_get_missing_block_numbers(&(), 0).await; -} - -#[test_casing(4, [1, 10, 23, 42])] -#[tokio::test] -async fn getting_missing_block_numbers_for_snapshot(skip_count: usize) { - test_get_missing_block_numbers(&(), skip_count).await; -} - -#[test] -fn test_schema_encode_decode() { - let ctx = ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - - let replica = rng.gen::(); - assert_eq!( - replica, - zksync_protobuf::decode(&zksync_protobuf::encode(&replica)).unwrap() - ); -} diff --git a/node/libs/storage/src/tests/rocksdb.rs b/node/libs/storage/src/tests/rocksdb.rs deleted file mode 100644 index 123bc750..00000000 --- a/node/libs/storage/src/tests/rocksdb.rs +++ /dev/null @@ -1,49 +0,0 @@ -use super::*; -use tempfile::TempDir; - -#[async_trait] -impl InitStore for TempDir { - type Store = RocksdbStorage; - - async fn init_store(&self, ctx: &ctx::Ctx, genesis_block: &FinalBlock) -> Self::Store { - RocksdbStorage::new(ctx, genesis_block, self.path()) - .await - .expect("Failed initializing RocksDB") - } -} - -#[tokio::test] -async fn initializing_store_twice() { - let ctx = &ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - let genesis_block = genesis_block(rng); - let temp_dir = TempDir::new().unwrap(); - let block_store = temp_dir.init_store(ctx, &genesis_block).await; - let block_1 = make_block(rng, &genesis_block.header, ProtocolVersion::EARLIEST); - block_store.put_block(ctx, &block_1).await.unwrap(); - - assert_eq!(block_store.first_block(ctx).await.unwrap(), genesis_block); - assert_eq!(block_store.head_block(ctx).await.unwrap(), block_1); - - drop(block_store); - let block_store = temp_dir.init_store(ctx, &genesis_block).await; - - assert_eq!(block_store.first_block(ctx).await.unwrap(), genesis_block); - assert_eq!(block_store.head_block(ctx).await.unwrap(), block_1); -} - -#[tokio::test] -async fn putting_block_for_rocksdb_store() { - test_put_block(&TempDir::new().unwrap()).await; -} - -#[tokio::test] -async fn getting_missing_block_numbers_for_rocksdb_store() { - test_get_missing_block_numbers(&TempDir::new().unwrap(), 0).await; -} - -#[test_casing(4, [1, 10, 23, 42])] -#[tokio::test] -async fn getting_missing_block_numbers_for_rocksdb_snapshot(skip_count: usize) { - test_get_missing_block_numbers(&TempDir::new().unwrap(), skip_count).await; -} diff --git a/node/libs/storage/src/traits.rs b/node/libs/storage/src/traits.rs deleted file mode 100644 index 83e069f6..00000000 --- a/node/libs/storage/src/traits.rs +++ /dev/null @@ -1,79 +0,0 @@ -//! Traits for storage. -use crate::types::ReplicaState; -use async_trait::async_trait; -use std::{fmt, ops}; -use zksync_concurrency::{ctx, sync::watch}; -use zksync_consensus_roles::validator::{BlockNumber, FinalBlock, Payload}; - -/// Storage of L2 blocks. -/// -/// Implementations **must** propagate context cancellation using [`StorageError::Canceled`]. -#[async_trait] -pub trait BlockStore: fmt::Debug + Send + Sync { - /// Gets the head block. - async fn head_block(&self, ctx: &ctx::Ctx) -> ctx::Result; - - /// Returns a block with the least number stored in this database. - async fn first_block(&self, ctx: &ctx::Ctx) -> ctx::Result; - - /// Returns the number of the last block in the first contiguous range of blocks stored in this DB. - /// If there are no missing blocks, this is equal to the number of [`Self::get_head_block()`], - /// if there *are* missing blocks, the returned number will be lower. - /// - /// The returned number cannot underflow the [first block](Self::first_block()) stored in the DB; - /// all blocks preceding the first block are ignored when computing this number. For example, - /// if the storage contains blocks #5, 6 and 9, this method will return 6. - async fn last_contiguous_block_number(&self, ctx: &ctx::Ctx) -> ctx::Result; - - /// Gets a block by its number. - async fn block(&self, ctx: &ctx::Ctx, number: BlockNumber) -> ctx::Result>; - - /// Iterates over block numbers in the specified `range` that the DB *does not* have. - // TODO(slowli): We might want to limit the length of the vec returned - async fn missing_block_numbers( - &self, - ctx: &ctx::Ctx, - range: ops::Range, - ) -> ctx::Result>; - - /// Subscribes to block write operations performed using this `Storage`. Note that since - /// updates are passed using a `watch` channel, only the latest written [`BlockNumber`] - /// will be available; intermediate updates may be dropped. - /// - /// If no blocks were written during the `Storage` lifetime, the channel contains the number - /// of the genesis block. - fn subscribe_to_block_writes(&self) -> watch::Receiver; -} - -/// Mutable storage of L2 blocks. -/// -/// Implementations **must** propagate context cancellation using [`ctx::Error::Canceled`]. -#[async_trait] -pub trait WriteBlockStore: BlockStore { - /// Verify that `payload` is a correct proposal for the block `block_number`. - async fn verify_payload( - &self, - ctx: &ctx::Ctx, - block_number: BlockNumber, - _payload: &Payload, - ) -> ctx::Result<()>; - - /// Puts a block into this storage. - async fn put_block(&self, ctx: &ctx::Ctx, block: &FinalBlock) -> ctx::Result<()>; -} - -/// Storage for [`ReplicaState`]. -/// -/// Implementations **must** propagate context cancellation using [`StorageError::Canceled`]. -#[async_trait] -pub trait ReplicaStateStore: fmt::Debug + Send + Sync { - /// Gets the replica state, if it is contained in the database. - async fn replica_state(&self, ctx: &ctx::Ctx) -> ctx::Result>; - - /// Stores the given replica state into the database. - async fn put_replica_state( - &self, - ctx: &ctx::Ctx, - replica_state: &ReplicaState, - ) -> ctx::Result<()>; -} diff --git a/node/libs/storage/src/types.rs b/node/libs/storage/src/types.rs index 173478c0..8b137891 100644 --- a/node/libs/storage/src/types.rs +++ b/node/libs/storage/src/types.rs @@ -1,132 +1 @@ -//! Defines the schema of the database. -use crate::proto; -use anyhow::Context as _; -use std::{iter, ops}; -use zksync_consensus_roles::validator::{self, BlockNumber}; -use zksync_protobuf::{read_required, required, ProtoFmt}; -/// A payload of a proposed block which is not known to be finalized yet. -/// Replicas have to persist such proposed payloads for liveness: -/// consensus may finalize a block without knowing a payload in case of reproposals. -/// Currently we do not store the BlockHeader, because it is always -/// available in the LeaderPrepare message. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Proposal { - /// Number of a block for which this payload has been proposed. - pub number: BlockNumber, - /// Proposed payload. - pub payload: validator::Payload, -} - -/// The struct that contains the replica state to be persisted. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ReplicaState { - /// The current view number. - pub view: validator::ViewNumber, - /// The current phase. - pub phase: validator::Phase, - /// The highest block proposal that the replica has committed to. - pub high_vote: validator::ReplicaCommit, - /// The highest commit quorum certificate known to the replica. - pub high_qc: validator::CommitQC, - /// A cache of the received block proposals. - pub proposals: Vec, -} - -impl ProtoFmt for Proposal { - type Proto = proto::Proposal; - - fn read(r: &Self::Proto) -> anyhow::Result { - Ok(Self { - number: BlockNumber(*required(&r.number).context("number")?), - payload: validator::Payload(required(&r.payload).context("payload")?.clone()), - }) - } - - fn build(&self) -> Self::Proto { - Self::Proto { - number: Some(self.number.0), - payload: Some(self.payload.0.clone()), - } - } -} - -impl ProtoFmt for ReplicaState { - type Proto = proto::ReplicaState; - - fn read(r: &Self::Proto) -> anyhow::Result { - Ok(Self { - view: validator::ViewNumber(r.view.context("view_number")?), - phase: read_required(&r.phase).context("phase")?, - high_vote: read_required(&r.high_vote).context("high_vote")?, - high_qc: read_required(&r.high_qc).context("high_qc")?, - proposals: r - .proposals - .iter() - .map(ProtoFmt::read) - .collect::>() - .context("proposals")?, - }) - } - - fn build(&self) -> Self::Proto { - Self::Proto { - view: Some(self.view.0), - phase: Some(self.phase.build()), - high_vote: Some(self.high_vote.build()), - high_qc: Some(self.high_qc.build()), - proposals: self.proposals.iter().map(|p| p.build()).collect(), - } - } -} - -/// Iterator over missing block numbers. -pub(crate) struct MissingBlockNumbers { - range: ops::Range, - existing_numbers: iter::Peekable, -} - -impl MissingBlockNumbers -where - I: Iterator>, -{ - /// Creates a new iterator based on the provided params. - pub(crate) fn new(range: ops::Range, existing_numbers: I) -> Self { - Self { - range, - existing_numbers: existing_numbers.peekable(), - } - } -} - -impl Iterator for MissingBlockNumbers -where - I: Iterator>, -{ - type Item = anyhow::Result; - - fn next(&mut self) -> Option { - // Loop while existing numbers match the starting numbers from the range. The check - // that the range is non-empty is redundant given how `existing_numbers` are constructed - // (they are guaranteed to be lesser than the upper range bound); we add it just to be safe. - while !self.range.is_empty() - && matches!(self.existing_numbers.peek(), Some(&Ok(num)) if num == self.range.start) - { - self.range.start = self.range.start.next(); - self.existing_numbers.next(); // Advance to the next number - } - - if matches!(self.existing_numbers.peek(), Some(&Err(_))) { - let err = self.existing_numbers.next().unwrap().unwrap_err(); - // ^ Both unwraps are safe due to the check above. - return Some(Err(err)); - } - - if self.range.is_empty() { - return None; - } - let next_number = self.range.start; - self.range.start = self.range.start.next(); - Some(Ok(next_number)) - } -} diff --git a/node/libs/utils/src/no_copy.rs b/node/libs/utils/src/no_copy.rs index db3ff6f9..ebef1b33 100644 --- a/node/libs/utils/src/no_copy.rs +++ b/node/libs/utils/src/no_copy.rs @@ -3,7 +3,7 @@ use std::ops; /// No-copy wrapper allowing to carry a `Copy` type into a closure or an `async` block. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct NoCopy(T); impl NoCopy { diff --git a/node/tools/Cargo.toml b/node/tools/Cargo.toml index f589811b..c2a67f0a 100644 --- a/node/tools/Cargo.toml +++ b/node/tools/Cargo.toml @@ -13,19 +13,28 @@ zksync_consensus_bft.workspace = true zksync_consensus_crypto.workspace = true zksync_consensus_executor.workspace = true zksync_consensus_roles.workspace = true -zksync_consensus_storage = { workspace = true, features = ["rocksdb"] } +zksync_consensus_storage.workspace = true zksync_consensus_utils.workspace = true zksync_protobuf.workspace = true anyhow.workspace = true +async-trait.workspace = true clap.workspace = true +prost.workspace = true rand.workspace = true +rocksdb.workspace = true serde_json.workspace = true tokio.workspace = true tracing.workspace = true tracing-subscriber.workspace = true vise-exporter.workspace = true +[dev-dependencies] +tempfile.workspace = true + +[build-dependencies] +zksync_protobuf_build.workspace = true + [lints] workspace = true diff --git a/node/actors/executor/build.rs b/node/tools/build.rs similarity index 63% rename from node/actors/executor/build.rs rename to node/tools/build.rs index 816fd2e8..e4bba2bd 100644 --- a/node/actors/executor/build.rs +++ b/node/tools/build.rs @@ -1,12 +1,12 @@ //! Generates rust code from protobufs. fn main() { zksync_protobuf_build::Config { - input_root: "src/config/proto".into(), - proto_root: "zksync/executor/config".into(), + input_root: "src/proto".into(), + proto_root: "zksync/tools".into(), dependencies: vec![], protobuf_crate: "::zksync_protobuf".parse().unwrap(), is_public: false, } .generate() - .expect("generate(config)"); + .unwrap(); } diff --git a/node/tools/src/bin/localnet_config.rs b/node/tools/src/bin/localnet_config.rs index 81b80c86..839e727d 100644 --- a/node/tools/src/bin/localnet_config.rs +++ b/node/tools/src/bin/localnet_config.rs @@ -5,9 +5,8 @@ use rand::Rng; use std::{fs, net::SocketAddr, path::PathBuf}; use zksync_consensus_bft::testonly; use zksync_consensus_crypto::TextFmt; -use zksync_consensus_executor::{ConsensusConfig, ExecutorConfig, GossipConfig}; use zksync_consensus_roles::{node, validator}; -use zksync_consensus_tools::NodeConfig; +use zksync_consensus_tools::AppConfig; /// Encodes a generated proto message to json for arbitrary ProtoFmt. fn encode_json(x: &T) -> String { @@ -76,13 +75,18 @@ fn main() -> anyhow::Result<()> { let nodes = addrs.len(); let peers = 2; - let mut gossip_cfgs: Vec<_> = node_keys - .iter() - .map(|k| GossipConfig { - key: k.public(), - dynamic_inbound_limit: 0, - static_inbound: [].into(), - static_outbound: [].into(), + let mut cfgs: Vec<_> = (0..nodes) + .map(|i| AppConfig { + server_addr: with_unspecified_ip(addrs[i]), + public_addr: addrs[i], + metrics_server_addr, + + validators: validator_set.clone(), + genesis_block: genesis.clone(), + + gossip_dynamic_inbound_limit: 0, + gossip_static_inbound: [].into(), + gossip_static_outbound: [].into(), }) .collect(); @@ -90,37 +94,22 @@ fn main() -> anyhow::Result<()> { for i in 0..nodes { for j in 0..peers { let next = (i * peers + j + 1) % nodes; - gossip_cfgs[i] - .static_outbound + cfgs[i] + .gossip_static_outbound .insert(node_keys[next].public(), addrs[next]); - gossip_cfgs[next] - .static_inbound + cfgs[next] + .gossip_static_inbound .insert(node_keys[i].public()); } } - for (i, gossip) in gossip_cfgs.into_iter().enumerate() { - let executor_cfg = ExecutorConfig { - gossip, - server_addr: with_unspecified_ip(addrs[i]), - genesis_block: genesis.clone(), - validators: validator_set.clone(), - }; - let node_cfg = NodeConfig { - executor: executor_cfg, - metrics_server_addr, - consensus: Some(ConsensusConfig { - key: validator_keys[i].public(), - public_addr: addrs[i], - }), - }; - + for (i, cfg) in cfgs.into_iter().enumerate() { // Recreate the directory for the node's config. let root = args.output_dir.join(addrs[i].to_string()); let _ = fs::remove_dir_all(&root); fs::create_dir_all(&root).with_context(|| format!("create_dir_all({:?})", root))?; - fs::write(root.join("config.json"), encode_json(&node_cfg)).context("fs::write()")?; + fs::write(root.join("config.json"), encode_json(&cfg)).context("fs::write()")?; fs::write( root.join("validator_key"), &TextFmt::encode(&validator_keys[i]), diff --git a/node/tools/src/config.rs b/node/tools/src/config.rs index 14d922bc..d281e69e 100644 --- a/node/tools/src/config.rs +++ b/node/tools/src/config.rs @@ -1,10 +1,18 @@ //! Node configuration. +use crate::{proto, store}; use anyhow::Context as _; -use std::{fs, net, path::Path}; -use zksync_consensus_crypto::{read_optional_text, Text, TextFmt}; -use zksync_consensus_executor::{proto, ConsensusConfig, ExecutorConfig}; +use std::{ + collections::{HashMap, HashSet}, + fs, + path::{Path, PathBuf}, +}; +use zksync_concurrency::ctx; +use zksync_consensus_bft as bft; +use zksync_consensus_crypto::{read_optional_text, read_required_text, Text, TextFmt}; +use zksync_consensus_executor as executor; use zksync_consensus_roles::{node, validator}; -use zksync_protobuf::{read_optional, read_required, ProtoFmt}; +use zksync_consensus_storage::{BlockStore, BlockStoreRunner, PersistentBlockStore}; +use zksync_protobuf::{required, ProtoFmt}; /// Decodes a proto message from json for arbitrary ProtoFmt. fn decode_json(json: &str) -> anyhow::Result { @@ -14,119 +22,180 @@ fn decode_json(json: &str) -> anyhow::Result { Ok(p) } -/// This struct holds the file path to each of the config files. -#[derive(Debug)] -pub struct ConfigPaths<'a> { - /// Path to a JSON file with node configuration. - pub config: &'a Path, - /// Path to a validator key file. - pub validator_key: Option<&'a Path>, - /// Path to a node key file. - pub node_key: &'a Path, -} - /// Node configuration including executor configuration, optional validator configuration, /// and application-specific settings (e.g. metrics scraping). -#[derive(Debug)] -pub struct NodeConfig { - /// Executor configuration. - pub executor: ExecutorConfig, - /// IP:port to serve metrics data for scraping. - pub metrics_server_addr: Option, - /// Consensus network config. - pub consensus: Option, +#[derive(Debug, PartialEq)] +pub struct AppConfig { + pub server_addr: std::net::SocketAddr, + pub public_addr: std::net::SocketAddr, + pub metrics_server_addr: Option, + + pub validators: validator::ValidatorSet, + pub genesis_block: validator::FinalBlock, + + pub gossip_dynamic_inbound_limit: u64, + pub gossip_static_inbound: HashSet, + pub gossip_static_outbound: HashMap, } -impl ProtoFmt for NodeConfig { - type Proto = proto::NodeConfig; +impl ProtoFmt for AppConfig { + type Proto = proto::AppConfig; fn read(r: &Self::Proto) -> anyhow::Result { + let validators = r.validators.iter().enumerate().map(|(i, v)| { + Text::new(v) + .decode() + .with_context(|| format!("validators[{i}]")) + }); + let validators: anyhow::Result> = validators.collect(); + let validators = validator::ValidatorSet::new(validators?).context("validators")?; + + let mut gossip_static_inbound = HashSet::new(); + for (i, v) in r.gossip_static_inbound.iter().enumerate() { + gossip_static_inbound.insert( + Text::new(v) + .decode() + .with_context(|| format!("gossip_static_inbound[{i}]"))?, + ); + } + + let mut gossip_static_outbound = HashMap::new(); + for (i, e) in r.gossip_static_outbound.iter().enumerate() { + let key = read_required_text(&e.key) + .with_context(|| format!("gossip_static_outbound[{i}].key"))?; + let addr = read_required_text(&e.addr) + .with_context(|| format!("gossip_static_outbound[{i}].addr"))?; + gossip_static_outbound.insert(key, addr); + } Ok(Self { - executor: read_required(&r.executor).context("executor")?, + server_addr: read_required_text(&r.server_addr).context("server_addr")?, + public_addr: read_required_text(&r.public_addr).context("public_addr")?, metrics_server_addr: read_optional_text(&r.metrics_server_addr) .context("metrics_server_addr")?, - consensus: read_optional(&r.consensus).context("consensus")?, + + validators, + genesis_block: read_required_text(&r.genesis_block).context("genesis_block")?, + + gossip_dynamic_inbound_limit: *required(&r.gossip_dynamic_inbound_limit) + .context("gossip_dynamic_inbound_limit")?, + gossip_static_inbound, + gossip_static_outbound, }) } fn build(&self) -> Self::Proto { Self::Proto { - executor: Some(self.executor.build()), + server_addr: Some(self.server_addr.encode()), + public_addr: Some(self.public_addr.encode()), metrics_server_addr: self.metrics_server_addr.as_ref().map(TextFmt::encode), - consensus: self.consensus.as_ref().map(ProtoFmt::build), + + validators: self.validators.iter().map(TextFmt::encode).collect(), + genesis_block: Some(self.genesis_block.encode()), + + gossip_dynamic_inbound_limit: Some(self.gossip_dynamic_inbound_limit), + gossip_static_inbound: self + .gossip_static_inbound + .iter() + .map(TextFmt::encode) + .collect(), + gossip_static_outbound: self + .gossip_static_outbound + .iter() + .map(|(key, addr)| proto::NodeAddr { + key: Some(TextFmt::encode(key)), + addr: Some(TextFmt::encode(addr)), + }) + .collect(), } } } -/// Main struct that holds the config options for the node. +/// This struct holds the file path to each of the config files. #[derive(Debug)] +pub struct ConfigPaths<'a> { + /// Path to a JSON file with node configuration. + pub app: &'a Path, + /// Path to a validator key file. + pub validator_key: Option<&'a Path>, + /// Path to a node key file. + pub node_key: &'a Path, + /// Path to the rocksdb database. + pub database: &'a Path, +} + pub struct Configs { - /// Executor configuration of the node. - pub executor: ExecutorConfig, - /// IP:port to serve metrics data for scraping. - pub metrics_server_addr: Option, - /// Consensus-specific config extensions. Only set for validators. - pub consensus: Option<(ConsensusConfig, validator::SecretKey)>, - /// The validator secret key for this node. - /// The node secret key. This key is used by both full nodes and validators to identify themselves - /// in the P2P network. + pub app: AppConfig, + pub validator_key: Option, pub node_key: node::SecretKey, + pub database: PathBuf, } -impl Configs { - /// Method to fetch the node config. - #[tracing::instrument(level = "trace", ret)] - pub fn read(args: ConfigPaths<'_>) -> anyhow::Result { - let node_config = fs::read_to_string(args.config).with_context(|| { - format!( - "failed reading node config from `{}`", - args.config.display() - ) - })?; - let node_config: NodeConfig = decode_json(&node_config).with_context(|| { - format!( - "failed decoding JSON node config at `{}`", - args.config.display() - ) - })?; - - let validator_key: Option = args - .validator_key - .as_ref() - .map(|validator_key| { - let read_key = fs::read_to_string(validator_key).with_context(|| { - format!( - "failed reading validator key from `{}`", - validator_key.display() - ) - })?; - Text::new(&read_key).decode().with_context(|| { - format!( - "failed decoding validator key at `{}`", - validator_key.display() - ) +impl<'a> ConfigPaths<'a> { + // Loads configs from the file system. + pub fn load(self) -> anyhow::Result { + Ok(Configs { + app: (|| { + let app = fs::read_to_string(self.app).context("failed reading file")?; + decode_json(&app).context("failed decoding JSON") + })() + .with_context(|| self.app.display().to_string())?, + + validator_key: self + .validator_key + .as_ref() + .map(|file| { + (|| { + let key = fs::read_to_string(file).context("failed reading file")?; + Text::new(&key).decode().context("failed decoding key") + })() + .with_context(|| file.display().to_string()) }) - }) - .transpose()?; - let read_key = fs::read_to_string(args.node_key).with_context(|| { - format!("failed reading node key from `{}`", args.node_key.display()) - })?; - let node_key = Text::new(&read_key).decode().with_context(|| { - format!("failed decoding node key at `{}`", args.node_key.display()) - })?; - - anyhow::ensure!( - validator_key.is_some() == node_config.consensus.is_some(), - "Validator key and consensus config must be specified at the same time" - ); - let consensus = validator_key.and_then(|key| Some((node_config.consensus?, key))); - - let cfg = Configs { - executor: node_config.executor, - metrics_server_addr: node_config.metrics_server_addr, - consensus, - node_key, + .transpose()?, + + node_key: (|| { + let key = fs::read_to_string(self.node_key).context("failed reading file")?; + Text::new(&key).decode().context("failed decoding key") + })() + .with_context(|| self.node_key.display().to_string())?, + + database: self.database.into(), + }) + } +} + +impl Configs { + pub async fn make_executor( + &self, + ctx: &ctx::Ctx, + ) -> ctx::Result<(executor::Executor, BlockStoreRunner)> { + let store = store::RocksDB::open(&self.database).await?; + // Store genesis if db is empty. + if store.is_empty().await? { + store + .store_next_block(ctx, &self.app.genesis_block) + .await + .context("store_next_block()")?; + } + let (block_store, runner) = BlockStore::new(ctx, Box::new(store.clone())).await?; + let e = executor::Executor { + config: executor::Config { + server_addr: self.app.server_addr, + validators: self.app.validators.clone(), + node_key: self.node_key.clone(), + gossip_dynamic_inbound_limit: self.app.gossip_dynamic_inbound_limit, + gossip_static_inbound: self.app.gossip_static_inbound.clone(), + gossip_static_outbound: self.app.gossip_static_outbound.clone(), + }, + block_store, + validator: self.validator_key.as_ref().map(|key| executor::Validator { + config: executor::ValidatorConfig { + key: key.clone(), + public_addr: self.app.public_addr, + }, + replica_store: Box::new(store), + payload_manager: Box::new(bft::testonly::RandomPayload), + }), }; - Ok(cfg) + Ok((e, runner)) } } diff --git a/node/tools/src/lib.rs b/node/tools/src/lib.rs index 6b818859..62ee3cc1 100644 --- a/node/tools/src/lib.rs +++ b/node/tools/src/lib.rs @@ -1,5 +1,10 @@ //! CLI tools for the consensus node. - +#![allow(missing_docs)] mod config; +mod proto; +mod store; + +#[cfg(test)] +mod tests; -pub use self::config::{ConfigPaths, Configs, NodeConfig}; +pub use config::{AppConfig, ConfigPaths}; diff --git a/node/tools/src/main.rs b/node/tools/src/main.rs index b869f613..3701b64d 100644 --- a/node/tools/src/main.rs +++ b/node/tools/src/main.rs @@ -2,50 +2,41 @@ //! manages communication between the actors. It is the main executable in this workspace. use anyhow::Context as _; use clap::Parser; -use std::{ - fs, - io::IsTerminal as _, - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{fs, io::IsTerminal as _, path::PathBuf}; use tracing::metadata::LevelFilter; use tracing_subscriber::{prelude::*, Registry}; use vise_exporter::MetricsExporter; -use zksync_concurrency::{ctx, scope, time}; -use zksync_consensus_executor::Executor; -use zksync_consensus_storage::{BlockStore, RocksdbStorage}; -use zksync_consensus_tools::{ConfigPaths, Configs}; +use zksync_concurrency::{ctx, scope}; +use zksync_consensus_tools::ConfigPaths; use zksync_consensus_utils::no_copy::NoCopy; /// Command-line application launching a node executor. #[derive(Debug, Parser)] struct Args { - /// Verify configuration instead of launching a node. - #[arg(long, conflicts_with_all = ["ci_mode", "validator_key", "config_file", "node_key"])] - verify_config: bool, - /// Exit after finalizing 100 blocks. - #[arg(long)] - ci_mode: bool, /// Path to a validator key file. If set to an empty string, validator key will not be read /// (i.e., a node will be initialized as a non-validator node). - #[arg(long, default_value = "validator_key")] + #[arg(long, default_value = "./validator_key")] validator_key: PathBuf, /// Path to a JSON file with node configuration. - #[arg(long, default_value = "config.json")] + #[arg(long, default_value = "./config.json")] config_file: PathBuf, /// Path to a node key file. - #[arg(long, default_value = "node_key")] + #[arg(long, default_value = "./node_key")] node_key: PathBuf, + /// Path to the rocksdb database of the node. + #[arg(long, default_value = "./database")] + database: PathBuf, } impl Args { /// Extracts configuration paths from these args. fn config_paths(&self) -> ConfigPaths<'_> { ConfigPaths { - config: &self.config_file, + app: &self.config_file, node_key: &self.node_key, validator_key: (!self.validator_key.as_os_str().is_empty()) .then_some(&self.validator_key), + database: &self.database, } } } @@ -56,72 +47,51 @@ async fn main() -> anyhow::Result<()> { tracing::trace!(?args, "Starting node"); let ctx = &ctx::root(); - if !args.verify_config { - // Create log file. - fs::create_dir_all("logs/")?; - let log_file = fs::File::create("logs/output.log")?; + // Create log file. + fs::create_dir_all("logs/")?; + let log_file = fs::File::create("logs/output.log")?; - // Create the logger for stdout. This will produce human-readable logs for - // all events of level INFO or higher. - let stdout_log = tracing_subscriber::fmt::layer() - .pretty() - .with_ansi(std::env::var("NO_COLOR").is_err() && std::io::stdout().is_terminal()) - .with_file(false) - .with_line_number(false) - .with_filter(LevelFilter::INFO); + // Create the logger for stdout. This will produce human-readable logs for + // all events of level INFO or higher. + let stdout_log = tracing_subscriber::fmt::layer() + .pretty() + .with_ansi(std::env::var("NO_COLOR").is_err() && std::io::stdout().is_terminal()) + .with_file(false) + .with_line_number(false) + .with_filter(tracing_subscriber::EnvFilter::from_default_env()); - // Create the logger for the log file. This will produce machine-readable logs for - // all events of level DEBUG or higher. - let file_log = tracing_subscriber::fmt::layer() - .with_ansi(false) - .with_writer(log_file) - .with_filter(LevelFilter::DEBUG); + // Create the logger for the log file. This will produce machine-readable logs for + // all events of level DEBUG or higher. + let file_log = tracing_subscriber::fmt::layer() + .with_ansi(false) + .with_writer(log_file) + .with_filter(LevelFilter::DEBUG); - // Create the subscriber. This will combine the two loggers. - let subscriber = Registry::default().with(stdout_log).with(file_log); + // Create the subscriber. This will combine the two loggers. + let subscriber = Registry::default().with(stdout_log).with(file_log); - // Set the subscriber as the global default. This will cause all events in all threads - // to be logged by the subscriber. - tracing::subscriber::set_global_default(subscriber).unwrap(); + // Set the subscriber as the global default. This will cause all events in all threads + // to be logged by the subscriber. + tracing::subscriber::set_global_default(subscriber).unwrap(); - // Start the node. - tracing::info!("Starting node."); - } + // Start the node. + tracing::info!("Starting node."); // Load the config files. tracing::debug!("Loading config files."); - let configs = Configs::read(args.config_paths()).context("configs.read()")?; - - if args.verify_config { - tracing::info!("Configuration verified."); - return Ok(()); - } + let configs = args + .config_paths() + .load() + .context("config_paths().load()")?; - // Initialize the storage. - tracing::debug!("Initializing storage."); - - let storage = RocksdbStorage::new( - ctx, - &configs.executor.genesis_block, - Path::new("./database"), - ); - let storage = Arc::new(storage.await.context("RocksdbStorage::new()")?); - let mut executor = Executor::new(ctx, configs.executor, configs.node_key, storage.clone()) + let (executor, runner) = configs + .make_executor(ctx) .await - .context("Executor::new()")?; - if let Some((consensus_config, validator_key)) = configs.consensus { - executor - .set_validator( - consensus_config, - validator_key, - storage.clone(), - Arc::new(zksync_consensus_bft::testonly::RandomPayloadSource), - ) - .context("Executor::set_validator()")?; - } + .context("configs.into_executor()")?; + // Initialize the storage. scope::run!(ctx, |ctx, s| async { - if let Some(addr) = configs.metrics_server_addr { + if let Some(addr) = configs.app.metrics_server_addr { let addr = NoCopy::from(addr); s.spawn_bg(async { let addr = addr; @@ -132,31 +102,9 @@ async fn main() -> anyhow::Result<()> { Ok(()) }); } - + s.spawn_bg(runner.run(ctx)); s.spawn(executor.run(ctx)); - - // if we are in CI mode, we wait for the node to finalize 100 blocks and then we stop it - if args.ci_mode { - let storage = storage.clone(); - loop { - let block_finalized = storage.head_block(ctx).await.context("head_block")?; - let block_finalized = block_finalized.header.number.0; - - tracing::info!("current finalized block {}", block_finalized); - if block_finalized > 100 { - // we wait for 10 seconds to make sure that we send enough messages to other nodes - // and other nodes have enough messages to finalize 100+ blocks - ctx.sleep(time::Duration::seconds(10)).await?; - break; - } - ctx.sleep(time::Duration::seconds(1)).await?; - } - - tracing::info!("Cancel all tasks"); - s.cancel(); - } Ok(()) }) .await - .context("node stopped") } diff --git a/node/actors/executor/src/config/proto/mod.proto b/node/tools/src/proto/mod.proto similarity index 68% rename from node/actors/executor/src/config/proto/mod.proto rename to node/tools/src/proto/mod.proto index 8cfce427..e857c8eb 100644 --- a/node/actors/executor/src/config/proto/mod.proto +++ b/node/tools/src/proto/mod.proto @@ -36,7 +36,7 @@ // the validator set) or move it to a separate config file. syntax = "proto3"; -package zksync.executor.config; +package zksync.tools; // (public key, ip address) of a gossip network node. message NodeAddr { @@ -44,53 +44,39 @@ message NodeAddr { optional string addr = 2; // [required] IpAddr } -// Config of the consensus network. -message ConsensusConfig { - optional string key = 1; // [required] ValidatorPublicKey - optional string public_addr = 2; // [required] IpAddr -} +// Application configuration. +message AppConfig { + // Ports -// Config of the gossip network. -message GossipConfig { - // Public key of this node. It uniquely identifies the node. - // It should match the secret key provided in the `node_key` file. - optional string key = 1; // [required] NodePublicKey - // Limit on the number of inbound connections outside - // of the `static_inbound` set. - optional uint64 dynamic_inbound_limit = 2; // [required] - // Inbound connections that should be unconditionally accepted. - repeated string static_inbound = 3; // NodePublicKey - // Outbound connections that the node should actively try to - // establish and maintain. - repeated NodeAddr static_outbound = 4; -} - -// Top-level executor config. -message ExecutorConfig { // IP:port to listen on, for incoming TCP connections. // Use `0.0.0.0:` to listen on all network interfaces (i.e. on all IPs exposed by this VM). optional string server_addr = 1; // [required] IpAddr + + // Public IP:port to advertise, should forward to server_addr. + optional string public_addr = 2; // [required] IpAddr + + // IP:port to serve metrics data for scraping. + // Use `0.0.0.0:` to listen on all network interfaces. + // If not set, metrics data won't be served. + optional string metrics_server_addr = 3; // [optional] IpAddr - // Gossip network config. - optional GossipConfig gossip = 4; // [required] + // Consensus - // Genesis block of the blockchain. - optional string genesis_block = 5; // [required] FinalBlock // Public keys of all validators. - repeated string validators = 6; // [required] ValidatorPublicKey -} + repeated string validators = 5; // [required] ValidatorPublicKey -// Node configuration including executor configuration, optional validator configuration, -// and application-specific settings (e.g. metrics scraping). -message NodeConfig { - // Executor configuration. - optional ExecutorConfig executor = 1; // [required] + // Genesis block of the blockchain. + // Will be inserted to storage if not already present. + optional string genesis_block = 6; // [required] FinalBlock - // IP:port to serve metrics data for scraping. - // Use `0.0.0.0:` to listen on all network interfaces. - // If not set, metrics data won't be served. - optional string metrics_server_addr = 2; // [optional] IpAddr + // Gossip network - // Consensus network config. - optional ConsensusConfig consensus = 3; // [optional] + // Limit on the number of gossip network inbound connections outside + // of the `gossip_static_inbound` set. + optional uint64 gossip_dynamic_inbound_limit = 8; // [required] + // Inbound connections that should be unconditionally accepted on the gossip network. + repeated string gossip_static_inbound = 9; // NodePublicKey + // Outbound gossip network connections that the node should actively try to + // establish and maintain. + repeated NodeAddr gossip_static_outbound = 10; } diff --git a/node/tools/src/proto/mod.rs b/node/tools/src/proto/mod.rs new file mode 100644 index 00000000..660bf4c5 --- /dev/null +++ b/node/tools/src/proto/mod.rs @@ -0,0 +1,2 @@ +#![allow(warnings)] +include!(concat!(env!("OUT_DIR"), "/src/proto/gen.rs")); diff --git a/node/tools/src/store.rs b/node/tools/src/store.rs new file mode 100644 index 00000000..189ee93e --- /dev/null +++ b/node/tools/src/store.rs @@ -0,0 +1,197 @@ +//! RocksDB-based implementation of PersistentBlockStore and ReplicaStore. +use anyhow::Context as _; +use rocksdb::{Direction, IteratorMode, ReadOptions}; +use std::{ + fmt, + path::Path, + sync::{Arc, RwLock}, +}; +use zksync_concurrency::{ctx, error::Wrap as _, scope}; +use zksync_consensus_roles::validator; +use zksync_consensus_storage::{BlockStoreState, PersistentBlockStore, ReplicaState, ReplicaStore}; + +/// Enum used to represent a key in the database. It also acts as a separator between different stores. +#[derive(Debug, Clone, PartialEq, Eq)] +enum DatabaseKey { + /// Key used to store the replica state. + /// ReplicaState -> ReplicaState + ReplicaState, + /// Key used to store the finalized blocks. + /// Block(validator::BlockNumber) -> validator::FinalBlock + Block(validator::BlockNumber), +} + +impl DatabaseKey { + /// Starting database key for blocks indexed by number. All other keys in the default column family + /// are lower than this value. + pub(crate) const BLOCKS_START_KEY: &'static [u8] = &u64::MIN.to_be_bytes(); + + /// Iterator mode for the head block (i.e., a block with the greatest number). + pub(crate) const BLOCK_HEAD_ITERATOR: IteratorMode<'static> = + IteratorMode::From(&u64::MAX.to_be_bytes(), Direction::Reverse); + + /// Encodes this key for usage as a RocksDB key. + /// + /// # Implementation note + /// + /// This logic is maintainable only while the amount of non-block keys remains small. + /// If more keys are added (especially if their number is not known statically), prefer using + /// separate column families for them. + pub(crate) fn encode_key(&self) -> Vec { + match self { + // Keys for non-block entries must be smaller than all block keys. + Self::ReplicaState => vec![0], + // Number encoding that monotonically increases with the number + Self::Block(number) => number.0.to_be_bytes().to_vec(), + } + } +} + +/// Main struct for the Storage module, it just contains the database. Provides a set of high-level +/// atomic operations on the database. It "contains" the following data: +/// +/// - An append-only database of finalized blocks. +/// - A backup of the consensus replica state. +#[derive(Clone)] +pub(crate) struct RocksDB(Arc>); + +impl RocksDB { + /// Create a new Storage. It first tries to open an existing database, and if that fails it just creates a + /// a new one. We need the genesis block of the chain as input. + pub(crate) async fn open(path: &Path) -> ctx::Result { + let mut options = rocksdb::Options::default(); + options.create_missing_column_families(true); + options.create_if_missing(true); + Ok(Self(Arc::new(RwLock::new( + scope::wait_blocking(|| { + rocksdb::DB::open(&options, path).context("Failed opening RocksDB") + }) + .await?, + )))) + } + + fn state_blocking(&self) -> anyhow::Result> { + let db = self.0.read().unwrap(); + + let mut options = ReadOptions::default(); + options.set_iterate_range(DatabaseKey::BLOCKS_START_KEY..); + let Some(res) = db.iterator_opt(IteratorMode::Start, options).next() else { + return Ok(None); + }; + let (_, first) = res.context("RocksDB error reading first stored block")?; + let first: validator::FinalBlock = + zksync_protobuf::decode(&first).context("Failed decoding first stored block bytes")?; + + let mut options = ReadOptions::default(); + options.set_iterate_range(DatabaseKey::BLOCKS_START_KEY..); + let (_, last) = db + .iterator_opt(DatabaseKey::BLOCK_HEAD_ITERATOR, options) + .next() + .context("last block not found")? + .context("RocksDB error reading head block")?; + let last: validator::FinalBlock = + zksync_protobuf::decode(&last).context("Failed decoding head block bytes")?; + + Ok(Some(BlockStoreState { + first: first.justification, + last: last.justification, + })) + } + + /// Checks if BlockStore is empty. + pub(crate) async fn is_empty(&self) -> anyhow::Result { + Ok(scope::wait_blocking(|| self.state_blocking()) + .await? + .is_none()) + } +} + +impl fmt::Debug for RocksDB { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("RocksDB") + } +} + +#[async_trait::async_trait] +impl PersistentBlockStore for RocksDB { + async fn state(&self, _ctx: &ctx::Ctx) -> ctx::Result { + Ok(scope::wait_blocking(|| self.state_blocking()) + .await? + .context("storage is empty")?) + } + + async fn block( + &self, + _ctx: &ctx::Ctx, + number: validator::BlockNumber, + ) -> ctx::Result { + scope::wait_blocking(|| { + let db = self.0.read().unwrap(); + let block = db + .get(DatabaseKey::Block(number).encode_key()) + .context("RocksDB error")? + .context("not found")?; + Ok(zksync_protobuf::decode(&block).context("failed decoding block")?) + }) + .await + .wrap(number) + } + + #[tracing::instrument(level = "debug", skip(self))] + async fn store_next_block( + &self, + _ctx: &ctx::Ctx, + block: &validator::FinalBlock, + ) -> ctx::Result<()> { + scope::wait_blocking(|| { + let db = self.0.write().unwrap(); + let block_number = block.header().number; + let mut write_batch = rocksdb::WriteBatch::default(); + write_batch.put( + DatabaseKey::Block(block_number).encode_key(), + zksync_protobuf::encode(block), + ); + // Commit the transaction. + db.write(write_batch) + .context("Failed writing block to database")?; + Ok(()) + }) + .await + .wrap(block.header().number) + } +} + +#[async_trait::async_trait] +impl ReplicaStore for RocksDB { + async fn state(&self, _ctx: &ctx::Ctx) -> ctx::Result> { + Ok(scope::wait_blocking(|| { + let Some(raw_state) = self + .0 + .read() + .unwrap() + .get(DatabaseKey::ReplicaState.encode_key()) + .context("Failed to get ReplicaState from RocksDB")? + else { + return Ok(None); + }; + zksync_protobuf::decode(&raw_state) + .map(Some) + .context("Failed to decode replica state!") + }) + .await?) + } + + async fn set_state(&self, _ctx: &ctx::Ctx, state: &ReplicaState) -> ctx::Result<()> { + Ok(scope::wait_blocking(|| { + self.0 + .write() + .unwrap() + .put( + DatabaseKey::ReplicaState.encode_key(), + zksync_protobuf::encode(state), + ) + .context("Failed putting ReplicaState to RocksDB") + }) + .await?) + } +} diff --git a/node/tools/src/tests.rs b/node/tools/src/tests.rs new file mode 100644 index 00000000..65760e3d --- /dev/null +++ b/node/tools/src/tests.rs @@ -0,0 +1,55 @@ +use crate::{store, AppConfig}; +use rand::{ + distributions::{Distribution, Standard}, + Rng, +}; +use tempfile::TempDir; +use zksync_concurrency::ctx; +use zksync_consensus_roles::node; +use zksync_consensus_storage::{testonly, PersistentBlockStore}; +use zksync_protobuf::testonly::test_encode_random; + +fn make_addr(rng: &mut R) -> std::net::SocketAddr { + std::net::SocketAddr::new(std::net::IpAddr::from(rng.gen::<[u8; 16]>()), rng.gen()) +} + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> AppConfig { + AppConfig { + server_addr: make_addr(rng), + public_addr: make_addr(rng), + metrics_server_addr: Some(make_addr(rng)), + + validators: rng.gen(), + genesis_block: rng.gen(), + + gossip_dynamic_inbound_limit: rng.gen(), + gossip_static_inbound: (0..5) + .map(|_| rng.gen::().public()) + .collect(), + gossip_static_outbound: (0..6) + .map(|_| (rng.gen::().public(), make_addr(rng))) + .collect(), + } + } +} + +#[test] +fn test_schema_encoding() { + let ctx = ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + test_encode_random::<_, AppConfig>(rng); +} + +#[tokio::test] +async fn test_reopen_rocksdb() { + let ctx = &ctx::test_root(&ctx::RealClock); + let dir = TempDir::new().unwrap(); + let mut want = vec![]; + for b in testonly::random_blocks(ctx).take(5) { + let store = store::RocksDB::open(dir.path()).await.unwrap(); + store.store_next_block(ctx, &b).await.unwrap(); + want.push(b); + assert_eq!(want, testonly::dump(ctx, &store).await); + } +}