diff --git a/Cargo.lock b/Cargo.lock index 2101134f93..ad8d5a6998 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2489,6 +2489,7 @@ version = "0.6.0" dependencies = [ "async-channel", "async-lock", + "base64 0.21.2", "blake2-rfc", "derive_more", "either", diff --git a/full-node/src/consensus_service.rs b/full-node/src/consensus_service.rs index 0b926a06a4..b9adadd561 100644 --- a/full-node/src/consensus_service.rs +++ b/full-node/src/consensus_service.rs @@ -241,6 +241,7 @@ impl ConsensusService { NonZeroU32::new(2000).unwrap() }, full_mode: true, + code_trie_node_hint: None, }); let finalized_runtime = { diff --git a/lib/src/sync/all.rs b/lib/src/sync/all.rs index 958578969b..d9780794f9 100644 --- a/lib/src/sync/all.rs +++ b/lib/src/sync/all.rs @@ -35,6 +35,7 @@ use crate::{ executor::host, header, sync::{all_forks, optimistic, warp_sync}, + trie::Nibble, verify, }; @@ -47,7 +48,9 @@ use core::{ }; pub use crate::executor::vm::ExecHint; -pub use warp_sync::{FragmentError as WarpSyncFragmentError, WarpSyncFragment}; +pub use warp_sync::{ + ConfigCodeTrieNodeHint, FragmentError as WarpSyncFragmentError, WarpSyncFragment, +}; /// Configuration for the [`AllSync`]. // TODO: review these fields @@ -109,6 +112,15 @@ pub struct Config { /// verified. // TODO: change this now that we don't verify block bodies here pub full_mode: bool, + + /// Known valid Merkle value and storage value combination for the `:code` key. + /// + /// If provided, the warp syncing algorithm will first fetch the Merkle value of `:code`, and + /// if it matches the Merkle value provided in the hint, use the storage value in the hint + /// instead of downloading it. If the hint doesn't match, an extra round-trip will be needed, + /// but if the hint matches it saves a big download. + // TODO: provide only in non-full mode? + pub code_trie_node_hint: Option, } /// Identifier for a source in the [`AllSync`]. @@ -185,6 +197,7 @@ impl AllSync { block_number_bytes: config.block_number_bytes, sources_capacity: config.sources_capacity, requests_capacity: config.sources_capacity, // TODO: ?! add as config? + code_trie_node_hint: config.code_trie_node_hint, }) { Ok(inner) => AllSyncInner::GrandpaWarpSync { inner: warp_sync::WarpSync::InProgress(inner), @@ -1306,6 +1319,8 @@ impl AllSync { finalized_block_runtime, finalized_storage_code, finalized_storage_heap_pages, + finalized_storage_code_merkle_value, + finalized_storage_code_closest_ancestor_excluding, ) = self.shared.transition_grandpa_warp_sync_all_forks(success); self.inner = AllSyncInner::AllForks(new_inner); ProcessOne::WarpSyncFinished { @@ -1313,6 +1328,8 @@ impl AllSync { finalized_block_runtime, finalized_storage_code, finalized_storage_heap_pages, + finalized_storage_code_merkle_value, + finalized_storage_code_closest_ancestor_excluding, } } AllSyncInner::AllForks(sync) => match sync.process_one() { @@ -2299,6 +2316,13 @@ pub enum ProcessOne { /// Storage value at the `:heappages` key of the finalized block. finalized_storage_heap_pages: Option>, + + /// Merkle value of the `:code` trie node of the finalized block. + finalized_storage_code_merkle_value: Option>, + + /// Closest ancestor of the `:code` trie node of the finalized block excluding `:code` + /// itself. + finalized_storage_code_closest_ancestor_excluding: Option>, }, /// Ready to start verifying a block. @@ -2970,6 +2994,8 @@ impl Shared { host::HostVmPrototype, Option>, Option>, + Option>, + Option>, ) { let mut all_forks = all_forks::AllForksSync::new(all_forks::Config { chain_information: grandpa.chain_information, @@ -3073,6 +3099,8 @@ impl Shared { grandpa.finalized_runtime, grandpa.finalized_storage_code, grandpa.finalized_storage_heap_pages, + grandpa.finalized_storage_code_merkle_value, + grandpa.finalized_storage_code_closest_ancestor_excluding, ) } } diff --git a/lib/src/sync/warp_sync.rs b/lib/src/sync/warp_sync.rs index 2eddfcd180..9bdb507080 100644 --- a/lib/src/sync/warp_sync.rs +++ b/lib/src/sync/warp_sync.rs @@ -102,7 +102,7 @@ use crate::{ vm::ExecHint, }, header::{self, Header}, - trie::proof_decode, + trie::{self, proof_decode}, }; use alloc::{ @@ -112,6 +112,7 @@ use alloc::{ }; use core::{iter, mem, ops}; +pub use trie::Nibble; pub use verifier::{Error as FragmentError, WarpSyncFragment}; mod verifier; @@ -143,6 +144,7 @@ pub enum Error { } /// The configuration for [`start_warp_sync()`]. +#[derive(Debug)] pub struct Config { /// The chain information of the starting point of the warp syncing. pub start_chain_information: ValidChainInformation, @@ -156,6 +158,27 @@ pub struct Config { /// The initial capacity of the list of requests. pub requests_capacity: usize, + + /// Known valid Merkle value and storage value combination for the `:code` key. + /// + /// If provided, the warp syncing algorithm will first fetch the Merkle value of `:code`, and + /// if it matches the Merkle value provided in the hint, use the storage value in the hint + /// instead of downloading it. If the hint doesn't match, an extra round-trip will be needed, + /// but if the hint matches it saves a big download. + pub code_trie_node_hint: Option, +} + +/// See [`Config::code_trie_node_hint`]. +#[derive(Debug)] +pub struct ConfigCodeTrieNodeHint { + /// Potential Merkle value of the `:code` key. + pub merkle_value: Vec, + + /// Storage value corresponding to [`ConfigCodeTrieNodeHint::merkle_value`]. + pub storage_value: Vec, + + /// Closest ancestor of the `:code` key except for `:code` itself. + pub closest_ancestor_excluding: Vec, } /// Initializes the warp sync state machine. @@ -186,6 +209,7 @@ pub fn start_warp_sync( Ok(InProgressWarpSync { start_chain_information: config.start_chain_information, + code_trie_node_hint: config.code_trie_node_hint, block_number_bytes: config.block_number_bytes, sources: slab::Slab::with_capacity(config.sources_capacity), in_progress_requests: slab::Slab::with_capacity(config.requests_capacity), @@ -236,6 +260,12 @@ pub struct Success { /// Storage value at the `:heappages` key of the finalized block. pub finalized_storage_heap_pages: Option>, + /// Merkle value of the `:code` trie node of the finalized block. + pub finalized_storage_code_merkle_value: Option>, + + /// Closest ancestor of the `:code` trie node of the finalized block excluding `:code` itself. + pub finalized_storage_code_closest_ancestor_excluding: Option>, + /// The list of sources that were added to the state machine. /// The list is ordered by [`SourceId`]. pub sources_ordered: Vec<(SourceId, TSrc)>, @@ -275,6 +305,8 @@ impl ops::IndexMut for InProgressWarpSync { pub struct InProgressWarpSync { /// See [`Phase`]. phase: Phase, + /// See [`Config::code_trie_node_hint`]. + code_trie_node_hint: Option, /// Starting point of the warp syncing, as provided to [`start_warp_sync`]. start_chain_information: ValidChainInformation, /// Number of bytes used to encode the block number in headers. @@ -317,6 +349,9 @@ enum Phase { /// Source we downloaded the last fragments from. Assuming that the source isn't malicious, /// it is guaranteed to have access to the storage of the finalized block. warp_sync_source_id: SourceId, + /// `true` if it is known that [`InProgressWarpSync::code_trie_node_hint`] doesn't match + /// the storage of the header we warp synced to. + hint_doesnt_match: bool, /// Merkle proof containing the runtime information, or `None` if it was not downloaded yet. downloaded_runtime: Option>, }, @@ -353,6 +388,10 @@ struct DownloadedRuntime { storage_code: Option>, /// Storage item at the `:heappages` key. `None` if there is no entry at that key. storage_heap_pages: Option>, + /// Merkle value of the `:code` trie node. `None` if there is no entry at that key. + code_merkle_value: Option>, + /// Closest ancestor of the `:code` key except for `:code` itself. + closest_ancestor_excluding: Option>, } /// See [`InProgressWarpSync::status`]. @@ -620,9 +659,24 @@ impl InProgressWarpSync { let runtime_parameters_get = if let Phase::RuntimeDownload { header, warp_sync_source_id, + hint_doesnt_match, + downloaded_runtime: None, .. } = &self.phase { + let code_key_to_request = if let (false, Some(hint)) = + (*hint_doesnt_match, self.code_trie_node_hint.as_ref()) + { + Cow::Owned( + trie::nibbles_to_bytes_truncate( + hint.closest_ancestor_excluding.iter().copied(), + ) + .collect::>(), + ) + } else { + Cow::Borrowed(&b":code"[..]) + }; + // TODO: O(n) if !self.in_progress_requests.iter().any(|(_, rq)| { rq.0 == *warp_sync_source_id @@ -631,7 +685,7 @@ impl InProgressWarpSync { block_hash: ref b, ref keys, } if *b == header.hash(self.block_number_bytes) - && keys.iter().any(|k| k == b":code") + && keys.iter().any(|k| &*k == &*code_key_to_request) && keys.iter().any(|k| k == b":heappages")) }) { Some(( @@ -640,7 +694,7 @@ impl InProgressWarpSync { DesiredRequest::StorageGetMerkleProof { block_hash: header.hash(self.block_number_bytes), state_trie_root: header.state_root, - keys: vec![b":code".to_vec(), b":heappages".to_vec()], + keys: vec![code_key_to_request.to_vec(), b":heappages".to_vec()], }, )) } else { @@ -802,7 +856,7 @@ impl InProgressWarpSync { ), Phase::RuntimeDownload { header, .. }, ) if *block_hash == header.hash(self.block_number_bytes) - && keys.iter().any(|k| k == b":code") + // TODO: doesn't check for `:cod` ,but in practice this doesn't really matter anyway && keys.iter().any(|k| k == b":heappages") => { user_data @@ -1133,6 +1187,7 @@ impl VerifyWarpSyncFragment { .into(), warp_sync_source_id: *downloaded_source, downloaded_runtime: None, + hint_doesnt_match: false, }; } Ok(verifier::Next::Success { @@ -1152,6 +1207,7 @@ impl VerifyWarpSyncFragment { chain_information_finality, warp_sync_source_id: *downloaded_source, downloaded_runtime: None, + hint_doesnt_match: false, }; } else { self.inner.phase = Phase::DownloadFragments { @@ -1198,6 +1254,7 @@ impl BuildRuntime { downloaded_runtime, chain_information_finality, warp_sync_source_id, + hint_doesnt_match, .. } = &mut self.inner.phase { @@ -1221,7 +1278,75 @@ impl BuildRuntime { } }; - let finalized_storage_code = + let ( + finalized_storage_code_merkle_value, + finalized_storage_code_closest_ancestor_excluding, + ) = { + let code_nibbles = + trie::bytes_to_nibbles(b":code".iter().copied()).collect::>(); + match decoded_downloaded_runtime.closest_ancestor_in_proof( + &header.state_root, + &code_nibbles[..code_nibbles.len() - 1], + ) { + Ok(Some(closest_ancestor_key)) => { + let next_nibble = code_nibbles[closest_ancestor_key.len()]; + let merkle_value = decoded_downloaded_runtime + .trie_node_info(&header.state_root, closest_ancestor_key) + .unwrap() + .children + .child(next_nibble) + .merkle_value(); + + match merkle_value { + Some(mv) => (mv.to_owned(), closest_ancestor_key.to_vec()), + None => { + self.inner.phase = Phase::DownloadFragments { + previous_verifier_values: Some(( + header.clone(), + chain_information_finality.clone(), + )), + }; + return ( + WarpSync::InProgress(self.inner), + Some(Error::MissingCode), + ); + } + } + } + Ok(None) => { + self.inner.phase = Phase::DownloadFragments { + previous_verifier_values: Some(( + header.clone(), + chain_information_finality.clone(), + )), + }; + return (WarpSync::InProgress(self.inner), Some(Error::MissingCode)); + } + Err(proof_decode::IncompleteProofError { .. }) => { + self.inner.phase = Phase::DownloadFragments { + previous_verifier_values: Some(( + header.clone(), + chain_information_finality.clone(), + )), + }; + return ( + WarpSync::InProgress(self.inner), + Some(Error::MerkleProofEntriesMissing), + ); + } + } + }; + + let finalized_storage_code = if let (false, Some(hint)) = + (*hint_doesnt_match, self.inner.code_trie_node_hint.as_ref()) + { + if hint.merkle_value == finalized_storage_code_merkle_value { + &hint.storage_value + } else { + *hint_doesnt_match = true; + return (WarpSync::InProgress(self.inner), None); + } + } else { match decoded_downloaded_runtime.storage_value(&header.state_root, b":code") { Ok(Some((code, _))) => code, Ok(None) => { @@ -1245,7 +1370,8 @@ impl BuildRuntime { Some(Error::MerkleProofEntriesMissing), ); } - }; + } + }; let finalized_storage_heappages = match decoded_downloaded_runtime.storage_value(&header.state_root, b":heappages") { @@ -1332,6 +1458,12 @@ impl BuildRuntime { finalized_storage_code: Some(finalized_storage_code.to_owned()), finalized_storage_heap_pages: finalized_storage_heappages .map(|v| v.to_vec()), + finalized_storage_code_merkle_value: Some( + finalized_storage_code_merkle_value, + ), + finalized_storage_code_closest_ancestor_excluding: Some( + finalized_storage_code_closest_ancestor_excluding, + ), sources_ordered: mem::take(&mut self.inner.sources) .into_iter() .map(|(id, source)| (SourceId(id), source.user_data)) @@ -1377,6 +1509,10 @@ impl BuildRuntime { downloaded_runtime: Some(DownloadedRuntime { storage_code: Some(finalized_storage_code.to_vec()), storage_heap_pages: finalized_storage_heappages.map(|v| v.to_vec()), + code_merkle_value: Some(finalized_storage_code_merkle_value), + closest_ancestor_excluding: Some( + finalized_storage_code_closest_ancestor_excluding, + ), }), chain_info_builder: Some(chain_info_builder), calls, @@ -1482,6 +1618,10 @@ impl BuildChainInformation { finalized_storage_code: downloaded_runtime.storage_code, finalized_storage_heap_pages: downloaded_runtime .storage_heap_pages, + finalized_storage_code_merkle_value: downloaded_runtime + .code_merkle_value, + finalized_storage_code_closest_ancestor_excluding: + downloaded_runtime.closest_ancestor_excluding, sources_ordered: mem::take(&mut self.inner.sources) .into_iter() .map(|(id, source)| (SourceId(id), source.user_data)) diff --git a/lib/src/trie/nibble.rs b/lib/src/trie/nibble.rs index eb27f763f3..a7cdfa4583 100644 --- a/lib/src/trie/nibble.rs +++ b/lib/src/trie/nibble.rs @@ -41,6 +41,21 @@ impl Nibble { } Some(Nibble(new_nibble)) } + + /// Converts an ASCII headecimal digit (i.e. `0..9`, `a..f`, `A..F`) into a nibble. + /// + /// Returns `None` if `digit` is out of range. + pub fn from_ascii_hex_digit(digit: u8) -> Option { + if digit.is_ascii_digit() { + Some(Nibble(digit - b'0')) + } else if (b'a'..=b'f').contains(&digit) { + Some(Nibble(10 + digit - b'a')) + } else if (b'A'..=b'F').contains(&digit) { + Some(Nibble(10 + digit - b'A')) + } else { + None + } + } } impl TryFrom for Nibble { @@ -323,6 +338,20 @@ mod tests { )); } + #[test] + fn from_ascii_hex_digit_works() { + assert_eq!(u8::from(Nibble::from_ascii_hex_digit(b'0').unwrap()), 0); + assert_eq!(u8::from(Nibble::from_ascii_hex_digit(b'9').unwrap()), 9); + assert_eq!(u8::from(Nibble::from_ascii_hex_digit(b'a').unwrap()), 10); + assert_eq!(u8::from(Nibble::from_ascii_hex_digit(b'f').unwrap()), 15); + assert_eq!(u8::from(Nibble::from_ascii_hex_digit(b'A').unwrap()), 10); + assert_eq!(u8::from(Nibble::from_ascii_hex_digit(b'F').unwrap()), 15); + assert!(Nibble::from_ascii_hex_digit(b'j').is_none()); + assert!(Nibble::from_ascii_hex_digit(b' ').is_none()); + assert!(Nibble::from_ascii_hex_digit(0).is_none()); + assert!(Nibble::from_ascii_hex_digit(255).is_none()); + } + #[test] fn bytes_to_nibbles_works() { assert_eq!( diff --git a/lib/src/trie/proof_decode.rs b/lib/src/trie/proof_decode.rs index ba7c633db5..5b4fb75f1b 100644 --- a/lib/src/trie/proof_decode.rs +++ b/lib/src/trie/proof_decode.rs @@ -594,7 +594,8 @@ impl> DecodedTrieProof { Children { children } } - /// Returns the closest ancestor to the given key. If `key` is in the proof, returns `key`. + /// Returns the closest ancestor to the given key that can be found in the proof. If `key` is + /// in the proof, returns `key`. fn closest_ancestor<'a>( &'a self, trie_root_merkle_value: &[u8; 32], @@ -666,6 +667,18 @@ impl> DecodedTrieProof { } } + /// Returns the key of the closest ancestor to the given key that can be found in the proof. + /// If `key` is in the proof, returns `key`. + pub fn closest_ancestor_in_proof<'a>( + &'a self, + trie_root_merkle_value: &[u8; 32], + key: &[nibble::Nibble], + ) -> Result, IncompleteProofError> { + Ok(self + .closest_ancestor(trie_root_merkle_value, key)? + .map(|(key, _)| key)) + } + /// Returns information about a trie node. /// /// Returns an error if the proof doesn't contain enough information about this trie node. @@ -1138,6 +1151,17 @@ pub enum Child<'a> { NoChild, } +impl<'a> Child<'a> { + /// Returns the Merkle value of this child. `None` if the child doesn't exist. + pub fn merkle_value(&self) -> Option<&'a [u8]> { + match self { + Child::InProof { merkle_value, .. } => Some(merkle_value), + Child::AbsentFromProof { merkle_value } => Some(merkle_value), + Child::NoChild => None, + } + } +} + impl<'a> Children<'a> { /// Returns `true` if a child in the direction of the given nibble is present. pub fn has_child(&self, nibble: nibble::Nibble) -> bool { diff --git a/light-base/Cargo.toml b/light-base/Cargo.toml index d8d1c66943..08f3a2c42b 100644 --- a/light-base/Cargo.toml +++ b/light-base/Cargo.toml @@ -14,6 +14,7 @@ required-features = ["std"] [dependencies] async-channel = { version = "1.8.0", default-features = false } # TODO: no-std-ize; this is has been done and is just waiting for a release: https://github.com/smol-rs/event-listener/pull/34 async-lock = { version = "2.7.0", default-features = false } # TODO: no-std-ize; this is has been done and is just waiting for a release: https://github.com/smol-rs/event-listener/pull/34 +base64 = { version = "0.21.2", default-features = false, features = ["alloc"] } blake2-rfc = { version = "0.2.18", default-features = false } derive_more = "0.99.17" either = { version = "1.8.1", default-features = false } diff --git a/light-base/src/database.rs b/light-base/src/database.rs index 8150392322..dcb83d96b5 100644 --- a/light-base/src/database.rs +++ b/light-base/src/database.rs @@ -30,6 +30,7 @@ use alloc::{ borrow::ToOwned as _, boxed::Box, + format, string::{String, ToString as _}, vec::Vec, }; @@ -40,7 +41,9 @@ use smoldot::{ libp2p::{multiaddr, PeerId}, }; -use crate::{network_service, platform, sync_service}; +use crate::{network_service, platform, runtime_service, sync_service}; + +pub use smoldot::trie::Nibble; /// A decoded database. pub struct DatabaseContent { @@ -51,6 +54,24 @@ pub struct DatabaseContent { /// List of nodes that were known to be part of the peer-to-peer network when the database /// was encoded. pub known_nodes: Vec<(PeerId, Vec)>, + /// Known valid Merkle value and storage value combination for the `:code` key. + /// + /// Does **not** necessarily match the finalized block found in + /// [`DatabaseContent::chain_information`]. + pub runtime_code_hint: Option, +} + +/// See [`DatabaseContent::runtime_code_hint`]. +#[derive(Debug)] +pub struct DatabaseContentRuntimeCodeHint { + /// Storage value of the `:code` trie node corresponding to + /// [`DatabaseContentRuntimeCodeHint::code_merkle_value`]. + pub code: Vec, + /// Merkle value of the `:code` trie node in the storage main trie. + pub code_merkle_value: Vec, + /// Closest ancestor of the `:code` key except for `:code` itself. + // TODO: this punches a bit through abstraction layers, but it's temporary + pub closest_ancestor_excluding: Vec, } /// Serializes the finalized state of the chain, using the given services. @@ -60,9 +81,15 @@ pub struct DatabaseContent { pub async fn encode_database( network_service: &network_service::NetworkService, sync_service: &sync_service::SyncService, + runtime_service: &runtime_service::RuntimeService, genesis_block_hash: &[u8; 32], max_size: usize, ) -> String { + let (code_storage_value, code_merkle_value, code_closest_ancestor_excluding) = runtime_service + .finalized_runtime_storage_merkle_values() + .await + .unwrap_or((None, None, None)); + // Craft the structure containing all the data that we would like to include. let mut database_draft = SerdeDatabase { genesis_hash: hex::encode(genesis_block_hash), @@ -93,6 +120,17 @@ pub async fn encode_database( ) }) .collect(), + code_merkle_value: code_merkle_value.map(hex::encode), + // While it might seem like a good idea to compress the runtime code, in practice it is + // normally already zstd-compressed, and additional compressing shouldn't improve the size. + code_storage_value: code_storage_value.map(|data| { + base64::Engine::encode(&base64::engine::general_purpose::STANDARD_NO_PAD, data) + }), + code_closest_ancestor_excluding: code_closest_ancestor_excluding.map(|key| { + key.iter() + .map(|nibble| format!("{:x}", nibble)) + .collect::() + }), }; // Cap the database length to the maximum size. @@ -103,6 +141,14 @@ pub async fn encode_database( return serialized; } + // Scrap the code, as it is the biggest item. + if database_draft.code_merkle_value.is_some() || database_draft.code_storage_value.is_some() + { + database_draft.code_merkle_value = None; + database_draft.code_storage_value = None; + continue; + } + if database_draft.nodes.is_empty() { // Can't shrink the database anymore. Return the string `""` which will // fail to decode but will indicate what is wrong. @@ -167,10 +213,31 @@ pub fn decode_database(encoded: &str, block_number_bytes: usize) -> Result>(); + let runtime_code_hint = match ( + decoded.code_merkle_value, + decoded.code_storage_value, + decoded.code_closest_ancestor_excluding, + ) { + (Some(mv), Some(sv), Some(an)) => Some(DatabaseContentRuntimeCodeHint { + code: base64::Engine::decode(&base64::engine::general_purpose::STANDARD_NO_PAD, &sv) + .map_err(|_| ())?, + code_merkle_value: hex::decode(&mv).map_err(|_| ())?, + closest_ancestor_excluding: an + .as_bytes() + .iter() + .map(|char| Nibble::from_ascii_hex_digit(*char).ok_or(())) + .collect::, ()>>()?, + }), + // A combination of `Some` and `None` is technically invalid, but we simply ignore this + // situation. + _ => None, + }; + Ok(DatabaseContent { genesis_block_hash, chain_information, known_nodes, + runtime_code_hint, }) } @@ -181,4 +248,22 @@ struct SerdeDatabase { genesis_hash: String, chain: Box, nodes: hashbrown::HashMap, fnv::FnvBuildHasher>, + #[serde( + rename = "runtimeCode", + default = "Default::default", + skip_serializing_if = "Option::is_none" + )] + code_storage_value: Option, + #[serde( + rename = "codeMerkleValue", + default = "Default::default", + skip_serializing_if = "Option::is_none" + )] + code_merkle_value: Option, + #[serde( + rename = "codeClosestAncestor", + default = "Default::default", + skip_serializing_if = "Option::is_none" + )] + code_closest_ancestor_excluding: Option, } diff --git a/light-base/src/json_rpc_service/background.rs b/light-base/src/json_rpc_service/background.rs index b97e95847d..1371189fbc 100644 --- a/light-base/src/json_rpc_service/background.rs +++ b/light-base/src/json_rpc_service/background.rs @@ -832,7 +832,7 @@ impl Background { // Download the runtime of this block. This takes a long time as the runtime is rather // big (around 1MiB in general). - let (storage_code, storage_heap_pages) = { + let (storage_code, storage_heap_pages, code_merkle_value, code_closest_ancestor_excluding) = { let entries = self .sync_service .clone() @@ -841,6 +841,10 @@ impl Background { block_hash, &state_trie_root_hash, [ + sync_service::StorageRequestItem { + key: b":code".to_vec(), + ty: sync_service::StorageRequestItemTy::ClosestDescendantMerkleValue, + }, sync_service::StorageRequestItem { key: b":code".to_vec(), ty: sync_service::StorageRequestItemTy::Value, @@ -879,14 +883,45 @@ impl Background { _ => None, }) .unwrap(); - (code, heap_pages) + let (code_merkle_value, code_closest_ancestor_excluding) = if code.is_some() { + entries + .iter() + .find_map(|entry| match entry { + sync_service::StorageResultItem::ClosestDescendantMerkleValue { + requested_key, + closest_descendant_merkle_value, + found_closest_ancestor_excluding, + } if requested_key == b":code" => { + Some(( + closest_descendant_merkle_value.clone(), + found_closest_ancestor_excluding.clone(), + )) // TODO overhead + } + _ => None, + }) + .unwrap() + } else { + (None, None) + }; + + ( + code, + heap_pages, + code_merkle_value, + code_closest_ancestor_excluding, + ) }; // Give the code and heap pages to the runtime service. The runtime service will // try to find any similar runtime it might have, and if not will compile it. let pinned_runtime_id = self .runtime_service - .compile_and_pin_runtime(storage_code, storage_heap_pages) + .compile_and_pin_runtime( + storage_code, + storage_heap_pages, + code_merkle_value, + code_closest_ancestor_excluding, + ) .await; let precall = self diff --git a/light-base/src/json_rpc_service/background/chain_head.rs b/light-base/src/json_rpc_service/background/chain_head.rs index 3db5eb96ae..ca1810d11c 100644 --- a/light-base/src/json_rpc_service/background/chain_head.rs +++ b/light-base/src/json_rpc_service/background/chain_head.rs @@ -1038,7 +1038,7 @@ impl ChainHeadFollowTask { closest_descendant_merkle_value: None, }) } - sync_service::StorageResultItem::ClosestDescendantMerkleValue { requested_key, closest_descendant_merkle_value: merkle_value } => { + sync_service::StorageResultItem::ClosestDescendantMerkleValue { requested_key, closest_descendant_merkle_value: merkle_value, .. } => { Some(methods::ChainHeadStorageResponseItem { key: methods::HexString(requested_key), value: None, diff --git a/light-base/src/json_rpc_service/background/getters.rs b/light-base/src/json_rpc_service/background/getters.rs index e6f16c2dae..babc4767a3 100644 --- a/light-base/src/json_rpc_service/background/getters.rs +++ b/light-base/src/json_rpc_service/background/getters.rs @@ -202,6 +202,7 @@ impl Background { let response = crate::database::encode_database( &self.network_service.0, &self.sync_service, + &self.runtime_service, &self.genesis_block_hash, usize::try_from(max_size_bytes.unwrap_or(u64::max_value())) .unwrap_or(usize::max_value()), diff --git a/light-base/src/lib.rs b/light-base/src/lib.rs index b66df72fad..0032f12753 100644 --- a/light-base/src/lib.rs +++ b/light-base/src/lib.rs @@ -368,7 +368,7 @@ impl Client { // known as a checkpoint) is present in the chain spec, it is possible to start syncing at // the finalized block it describes. // TODO: clean up that block - let (chain_information, genesis_block_header, checkpoint_nodes) = { + let (chain_information, genesis_block_header, checkpoint_nodes, runtime_code_hint) = { match ( chain_spec.to_chain_information().map(|(ci, _)| ci), // TODO: don't just throw away the runtime chain_spec @@ -403,6 +403,7 @@ impl Client { database_content.chain_information, genesis_header.into(), database_content.known_nodes, + database_content.runtime_code_hint, ) } @@ -438,10 +439,16 @@ impl Client { database_content.chain_information, genesis_header, database_content.known_nodes, + database_content.runtime_code_hint, ) } else if let Some(Ok(checkpoint)) = checkpoint { // Database is incorrect. - (checkpoint, genesis_header, database_content.known_nodes) + ( + checkpoint, + genesis_header, + database_content.known_nodes, + None, + ) } else { // TODO: we can in theory support chain specs that have neither a checkpoint nor the genesis storage, but it's complicated // TODO: is this relevant for parachains? @@ -468,14 +475,14 @@ impl Client { digest: header::DigestRef::empty().into(), }; - (checkpoint, genesis_header, Default::default()) + (checkpoint, genesis_header, Default::default(), None) } (Err(err), _, _) => return Err(AddChainError::InvalidGenesisStorage(err)), (Ok(genesis_ci), Some(Ok(checkpoint)), _) => { let genesis_header = genesis_ci.as_ref().finalized_block_header.clone(); - (checkpoint, genesis_header.into(), Default::default()) + (checkpoint, genesis_header.into(), Default::default(), None) } ( @@ -488,7 +495,7 @@ impl Client { ) => { let genesis_header = header::Header::from(genesis_ci.as_ref().finalized_block_header.clone()); - (genesis_ci, genesis_header, Default::default()) + (genesis_ci, genesis_header, Default::default(), None) } (_, Some(Err(err)), _) => { @@ -705,6 +712,7 @@ impl Client { log_name.clone(), &platform, chain_information, + runtime_code_hint, genesis_block_header .scale_encoding_vec(chain_spec.block_number_bytes().into()), chain_spec, @@ -1060,6 +1068,7 @@ async fn start_services( log_name: String, platform: &TPlat, chain_information: chain::chain_information::ValidChainInformation, + runtime_code_hint: Option, genesis_block_scale_encoded_header: Vec, chain_spec: chain_spec::ChainSpec, relay_chain: Option<&ChainServices>, @@ -1115,11 +1124,15 @@ async fn start_services( block_number_bytes: usize::from(chain_spec.block_number_bytes()), network_service: (network_service.clone(), 0), network_events_receiver: network_event_receivers.pop().unwrap(), - parachain: Some(sync_service::ConfigParachain { - parachain_id: chain_spec.relay_chain().unwrap().1, - relay_chain_sync: relay_chain.runtime_service.clone(), - relay_chain_block_number_bytes: relay_chain.sync_service.block_number_bytes(), - }), + chain_type: sync_service::ConfigChainType::Parachain( + sync_service::ConfigParachain { + parachain_id: chain_spec.relay_chain().unwrap().1, + relay_chain_sync: relay_chain.runtime_service.clone(), + relay_chain_block_number_bytes: relay_chain + .sync_service + .block_number_bytes(), + }, + ), }) .await, ); @@ -1151,7 +1164,17 @@ async fn start_services( platform: platform.clone(), network_service: (network_service.clone(), 0), network_events_receiver: network_event_receivers.pop().unwrap(), - parachain: None, + chain_type: sync_service::ConfigChainType::RelayChain( + sync_service::ConfigRelayChain { + runtime_code_hint: runtime_code_hint.map(|hint| { + sync_service::ConfigRelayChainRuntimeCodeHint { + storage_value: hint.code, + merkle_value: hint.code_merkle_value, + closest_ancestor_excluding: hint.closest_ancestor_excluding, + } + }), + }, + ), }) .await, ); diff --git a/light-base/src/runtime_service.rs b/light-base/src/runtime_service.rs index d2745bebe5..2a92b97396 100644 --- a/light-base/src/runtime_service.rs +++ b/light-base/src/runtime_service.rs @@ -80,7 +80,7 @@ use smoldot::{ executor, header, informant::{BytesDisplay, HashDisplay}, network::protocol, - trie::{self, proof_decode, TrieEntryVersion}, + trie::{self, proof_decode, Nibble, TrieEntryVersion}, }; /// Configuration for a runtime service. @@ -420,6 +420,28 @@ impl RuntimeService { } } + /// Returns the storage value and Merkle value of the `:code` key of the finalized block. + /// + /// Returns `None` if the runtime of the current finalized block is not known yet. + // TODO: this function has a bad API but is hopefully temporary + pub async fn finalized_runtime_storage_merkle_values( + &self, + ) -> Option<(Option>, Option>, Option>)> { + let mut guarded = self.guarded.lock().await; + let guarded = &mut *guarded; + + if let GuardedInner::FinalizedBlockRuntimeKnown { tree, .. } = &guarded.tree { + let runtime = &tree.output_finalized_async_user_data(); + Some(( + runtime.runtime_code.clone(), + runtime.code_merkle_value.clone(), + runtime.closest_ancestor_excluding.clone(), + )) + } else { + None + } + } + /// Lock the runtime service and prepare a call to a runtime entry point. /// /// The hash of the block passed as parameter corresponds to the block whose runtime to use @@ -513,6 +535,8 @@ impl RuntimeService { &self, storage_code: Option>, storage_heap_pages: Option>, + code_merkle_value: Option>, + closest_ancestor_excluding: Option>, ) -> PinnedRuntimeId { let mut guarded = self.guarded.lock().await; @@ -532,6 +556,8 @@ impl RuntimeService { let runtime = Arc::new(Runtime { heap_pages: storage_heap_pages, runtime_code: storage_code, + code_merkle_value, + closest_ancestor_excluding, runtime, }); guarded.runtimes.insert(Arc::downgrade(&runtime)); @@ -1211,6 +1237,8 @@ async fn run_background( let runtime = Arc::new(Runtime { runtime_code: finalized_block_runtime.storage_code, heap_pages: finalized_block_runtime.storage_heap_pages, + code_merkle_value: finalized_block_runtime.code_merkle_value, + closest_ancestor_excluding: finalized_block_runtime.closest_ancestor_excluding, runtime: Ok(SuccessfulRuntime { runtime_spec: finalized_block_runtime .virtual_machine @@ -1485,7 +1513,7 @@ async fn run_background( }.format_with(", ", |block, fmt| fmt(&HashDisplay(&block.hash))).to_string(); match download_result { - Ok((storage_code, storage_heap_pages)) => { + Ok((storage_code, storage_heap_pages, code_merkle_value, closest_ancestor_excluding)) => { log::debug!( target: &log_target, "Worker <= SuccessfulDownload(blocks=[{}])", @@ -1496,7 +1524,7 @@ async fn run_background( guarded.best_near_head_of_chain = true; drop(guarded); - background.runtime_download_finished(async_op_id, storage_code, storage_heap_pages).await; + background.runtime_download_finished(async_op_id, storage_code, storage_heap_pages, code_merkle_value, closest_ancestor_excluding).await; } Err(error) => { log::debug!( @@ -1569,14 +1597,22 @@ struct Background { blocks_stream: Pin + Send>>, /// List of runtimes currently being downloaded from the network. - /// For each item, the download id, storage value of `:code`, and storage value of - /// `:heappages`. + /// For each item, the download id, storage value of `:code`, storage value of `:heappages`, + /// and Merkle value and closest ancestor of `:code`. runtime_downloads: stream::FuturesUnordered< future::BoxFuture< 'static, ( async_tree::AsyncOpId, - Result<(Option>, Option>), RuntimeDownloadError>, + Result< + ( + Option>, + Option>, + Option>, + Option>, + ), + RuntimeDownloadError, + >, ), >, >, @@ -1592,6 +1628,8 @@ impl Background { async_op_id: async_tree::AsyncOpId, storage_code: Option>, storage_heap_pages: Option>, + code_merkle_value: Option>, + closest_ancestor_excluding: Option>, ) { let mut guarded = self.guarded.lock().await; @@ -1634,6 +1672,8 @@ impl Background { heap_pages: storage_heap_pages, runtime_code: storage_code, runtime, + code_merkle_value, + closest_ancestor_excluding, }); guarded.runtimes.insert(Arc::downgrade(&runtime)); @@ -1966,6 +2006,10 @@ impl Background { &block_hash, &state_root, [ + sync_service::StorageRequestItem { + key: b":code".to_vec(), + ty: sync_service::StorageRequestItemTy::ClosestDescendantMerkleValue, + }, sync_service::StorageRequestItem { key: b":code".to_vec(), ty: sync_service::StorageRequestItemTy::Value, @@ -2008,7 +2052,24 @@ impl Background { _ => None, }) .unwrap(); - Ok((code, heap_pages)) + let (code_merkle_value, code_closest_ancestor) = if code.is_some() { + entries + .iter() + .find_map(|entry| match entry { + sync_service::StorageResultItem::ClosestDescendantMerkleValue { + requested_key, + found_closest_ancestor_excluding, + closest_descendant_merkle_value, + } if requested_key == b":code" => { + Some((closest_descendant_merkle_value.clone(), found_closest_ancestor_excluding.clone())) // TODO overhead + } + _ => None + }) + .unwrap() + } else { + (None, None) + }; + Ok((code, heap_pages, code_merkle_value, code_closest_ancestor)) } Err(error) => Err(RuntimeDownloadError::StorageQuery(error)), }; @@ -2083,6 +2144,15 @@ struct Runtime { /// happened, including a problem when obtaining the runtime specs. runtime: Result, + /// Merkle value of the `:code` trie node. + /// + /// Can be `None` if the storage is empty, in which case the runtime will have failed to + /// build. + code_merkle_value: Option>, + + /// Closest ancestor of the `:code` key except for `:code` itself. + closest_ancestor_excluding: Option>, + /// Undecoded storage value of `:code` corresponding to the [`Runtime::runtime`] /// field. /// diff --git a/light-base/src/sync_service.rs b/light-base/src/sync_service.rs index 79cdece63d..a58d46a954 100644 --- a/light-base/src/sync_service.rs +++ b/light-base/src/sync_service.rs @@ -39,7 +39,7 @@ use smoldot::{ executor::host, libp2p::PeerId, network::{protocol, service}, - trie::{self, prefix_proof, proof_decode}, + trie::{self, prefix_proof, proof_decode, Nibble}, }; mod parachain; @@ -70,12 +70,41 @@ pub struct Config { /// [`network_service::NetworkService::new`]. pub network_events_receiver: stream::BoxStream<'static, network_service::Event>, - /// Extra fields used when the chain is a parachain. - /// If `None`, this chain is a standalone chain or a relay chain. - pub parachain: Option>, + /// Extra fields depending on whether the chain is a relay chain or a parachain. + pub chain_type: ConfigChainType, } -/// See [`Config::parachain`]. +/// See [`Config::chain_type`]. +pub enum ConfigChainType { + /// Chain is a relay chain. + RelayChain(ConfigRelayChain), + /// Chain is a parachain. + Parachain(ConfigParachain), +} + +/// See [`ConfigChainType::RelayChain`]. +pub struct ConfigRelayChain { + /// Known valid Merkle value and storage value combination for the `:code` key. + /// + /// If provided, the warp syncing algorithm will first fetch the Merkle value of `:code`, and + /// if it matches the Merkle value provided in the hint, use the storage value in the hint + /// instead of downloading it. If the hint doesn't match, an extra round-trip will be needed, + /// but if the hint matches it saves a big download. + pub runtime_code_hint: Option, +} + +/// See [`ConfigRelayChain::runtime_code_hint`]. +pub struct ConfigRelayChainRuntimeCodeHint { + /// Storage value of the `:code` trie node corresponding to + /// [`ConfigRelayChainRuntimeCodeHint::merkle_value`]. + pub storage_value: Vec, + /// Merkle value of the `:code` trie node in the storage main trie. + pub merkle_value: Vec, + /// Closest ancestor of the `:code` key except for `:code` itself. + pub closest_ancestor_excluding: Vec, +} + +/// See [`ConfigChainType::Parachain`]. pub struct ConfigParachain { /// Runtime service that synchronizes the relay chain of this parachain. pub relay_chain_sync: Arc>, @@ -115,36 +144,40 @@ impl SyncService { let log_target = format!("sync-service-{}", config.log_name); - if let Some(config_parachain) = config.parachain { - config.platform.spawn_task( - log_target.clone().into(), - Box::pin(parachain::start_parachain( - log_target, - config.platform.clone(), - config.chain_information, - config.block_number_bytes, - config_parachain.relay_chain_sync.clone(), - config_parachain.relay_chain_block_number_bytes, - config_parachain.parachain_id, - from_foreground, - config.network_service.1, - config.network_events_receiver, - )), - ); - } else { - config.platform.spawn_task( - log_target.clone().into(), - Box::pin(standalone::start_standalone_chain( - log_target, - config.platform.clone(), - config.chain_information, - config.block_number_bytes, - from_foreground, - config.network_service.0.clone(), - config.network_service.1, - config.network_events_receiver, - )), - ); + match config.chain_type { + ConfigChainType::Parachain(config_parachain) => { + config.platform.spawn_task( + log_target.clone().into(), + Box::pin(parachain::start_parachain( + log_target, + config.platform.clone(), + config.chain_information, + config.block_number_bytes, + config_parachain.relay_chain_sync.clone(), + config_parachain.relay_chain_block_number_bytes, + config_parachain.parachain_id, + from_foreground, + config.network_service.1, + config.network_events_receiver, + )), + ); + } + ConfigChainType::RelayChain(config_relay_chain) => { + config.platform.spawn_task( + log_target.clone().into(), + Box::pin(standalone::start_standalone_chain( + log_target, + config.platform.clone(), + config.chain_information, + config.block_number_bytes, + config_relay_chain.runtime_code_hint, + from_foreground, + config.network_service.0.clone(), + config.network_service.1, + config.network_events_receiver, + )), + ); + } } SyncService { @@ -657,28 +690,36 @@ impl SyncService { } } RequestImpl::ClosestDescendantMerkleValue { key } => { - match decoded_proof.closest_descendant_merkle_value( - main_trie_root_hash, - &trie::bytes_to_nibbles(key.iter().copied()).collect::>(), - ) { - Ok(Some(merkle_value)) => final_results.push( - StorageResultItem::ClosestDescendantMerkleValue { - requested_key: key, - closest_descendant_merkle_value: Some( - merkle_value.as_ref().to_vec(), - ), - }, - ), - Ok(None) => final_results.push( - StorageResultItem::ClosestDescendantMerkleValue { - requested_key: key, - closest_descendant_merkle_value: None, - }, - ), + let key_nibbles = + &trie::bytes_to_nibbles(key.iter().copied()).collect::>(); + + let closest_descendant_merkle_value = match decoded_proof + .closest_descendant_merkle_value(main_trie_root_hash, &key_nibbles) + { + Ok(Some(merkle_value)) => Some(merkle_value.as_ref().to_vec()), + Ok(None) => None, Err(proof_decode::IncompleteProofError { .. }) => { outcome_errors.push(StorageQueryErrorDetail::MissingProofEntry); + continue; } - } + }; + + let found_closest_ancestor_excluding = match decoded_proof + .closest_ancestor_in_proof(main_trie_root_hash, &key_nibbles) + { + Ok(Some(ancestor)) => Some(ancestor.to_vec()), + Ok(None) => None, + Err(proof_decode::IncompleteProofError { .. }) => { + outcome_errors.push(StorageQueryErrorDetail::MissingProofEntry); + continue; + } + }; + + final_results.push(StorageResultItem::ClosestDescendantMerkleValue { + requested_key: key, + closest_descendant_merkle_value, + found_closest_ancestor_excluding, + }) } } } @@ -826,6 +867,10 @@ pub enum StorageResultItem { ClosestDescendantMerkleValue { /// Key that was requested. Equal to the value of [`StorageRequestItem::key`]. requested_key: Vec, + /// Closest ancestor to the requested key that was found in the proof. If + /// [`StorageResultItem::ClosestDescendantMerkleValue::closest_descendant_merkle_value`] + /// is `Some`, then this is always the parent of the requested key. + found_closest_ancestor_excluding: Option>, /// Merkle value of the closest descendant of /// [`StorageResultItem::DescendantValue::requested_key`]. The key that corresponds /// to this Merkle value is not included. `None` if the key has no descendant. @@ -961,6 +1006,12 @@ pub struct FinalizedBlockRuntime { /// Storage value at the `:heappages` key. pub storage_heap_pages: Option>, + + /// Merkle value of the `:code` key. + pub code_merkle_value: Option>, + + /// Closest ancestor of the `:code` key except for `:code` itself. + pub closest_ancestor_excluding: Option>, } /// Notification about a new block or a new finalized block. diff --git a/light-base/src/sync_service/standalone.rs b/light-base/src/sync_service/standalone.rs index 5ed87992e8..1e6aaa1fd2 100644 --- a/light-base/src/sync_service/standalone.rs +++ b/light-base/src/sync_service/standalone.rs @@ -15,7 +15,10 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -use super::{BlockNotification, FinalizedBlockRuntime, Notification, SubscribeAll, ToBackground}; +use super::{ + BlockNotification, ConfigRelayChainRuntimeCodeHint, FinalizedBlockRuntime, Notification, + SubscribeAll, ToBackground, +}; use crate::{network_service, platform::PlatformRef, util}; use alloc::{borrow::ToOwned as _, string::String, sync::Arc, vec::Vec}; @@ -41,6 +44,7 @@ pub(super) async fn start_standalone_chain( platform: TPlat, chain_information: chain::chain_information::ValidChainInformation, block_number_bytes: usize, + runtime_code_hint: Option, mut from_foreground: mpsc::Receiver, network_service: Arc>, network_chain_index: usize, @@ -82,6 +86,11 @@ pub(super) async fn start_standalone_chain( NonZeroU32::new(5000).unwrap() }, full_mode: false, + code_trie_node_hint: runtime_code_hint.map(|hint| all::ConfigCodeTrieNodeHint { + merkle_value: hint.merkle_value, + storage_value: hint.storage_value, + closest_ancestor_excluding: hint.closest_ancestor_excluding, + }), }), network_up_to_date_best: true, network_up_to_date_finalized: true, @@ -641,7 +650,9 @@ impl Task { sync, finalized_block_runtime, finalized_storage_code, + finalized_storage_code_closest_ancestor_excluding, finalized_storage_heap_pages, + finalized_storage_code_merkle_value, } => { self.sync = sync; @@ -661,6 +672,8 @@ impl Task { virtual_machine: finalized_block_runtime, storage_code: finalized_storage_code, storage_heap_pages: finalized_storage_heap_pages, + code_merkle_value: finalized_storage_code_merkle_value, + closest_ancestor_excluding: finalized_storage_code_closest_ancestor_excluding, }); self.network_up_to_date_finalized = false; diff --git a/wasm-node/CHANGELOG.md b/wasm-node/CHANGELOG.md index 41c543a9d1..442310aeba 100644 --- a/wasm-node/CHANGELOG.md +++ b/wasm-node/CHANGELOG.md @@ -4,11 +4,16 @@ ### Changed +- The runtime code of the finalized block is now stored in the database. At initialization, smoldot now only downloads the hash of the runtime and compares it with the one in cache. If the hashes match (which is the case if no runtime update has happened on the chain since the database has been created), smoldot doesn't download the runtime code but uses the value in the cache. This saves a relatively heavy download (typically around 1 MiB to 1.5 MiB depending on the chain) and speeds up the loading time. ([#863](https://github.com/smol-dot/smoldot/pull/863)) - The `chainHead_unstable_storage` JSON-RPC function now supports a `type` equal to `closest-descendant-merkle-value` and no longer supports `closest-ancestor-merkle-value`, in accordance with the latest changes in the JSON-RPC API specification. ([#824](https://github.com/smol-dot/smoldot/pull/824)) - Blocks are now reported to `chain_subscribeAllHeads` and `chain_subscribeNewHeads` subscribers only after they have been put in the cache, preventing race conditions where JSON-RPC clients suffer from a cache miss if they ask information about these blocks too quickly. ([#854](https://github.com/smol-dot/smoldot/pull/854)) - Runtime updates are now always reported to `state_subscribeRuntimeVersion` subscribers immediately after the `chain_subscribeNewHeads` notification corresponding to the block containing the runtime update. They were previously reported in a pseudo-random order. ([#854](https://github.com/smol-dot/smoldot/pull/854)) - All the storage subscriptions made using `state_subscribeStorage` are now queried together into a single networking request per block, instead of sending one networking query per storage key and per subscription. ([#854](https://github.com/smol-dot/smoldot/pull/854)) +### Fixed + +- Fix downloading the runtime code twice during the warp syncing process. ([#863](https://github.com/smol-dot/smoldot/pull/863)) + ## 1.0.11 - 2023-06-25 ### Changed