-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Multithreaded snapshot creation #9239
Changes from 15 commits
f432170
4418342
422d214
5242cd5
9d44c47
8f3ece7
d278cb9
4ee58d2
db4f5d5
603d265
73b8858
4c68b1c
680cef5
17f49d9
14eda40
cd5e93d
09a9076
4304c23
4e265cb
7670562
4c8e900
8dff8f1
3763dd0
087468a
ceabd5c
1610154
d8c10b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
//! https://wiki.parity.io/Warp-Sync-Snapshot-Format | ||
|
||
use std::collections::{HashMap, HashSet}; | ||
use std::cmp; | ||
use std::sync::Arc; | ||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; | ||
use hash::{keccak, KECCAK_NULL_RLP, KECCAK_EMPTY}; | ||
|
@@ -88,6 +89,26 @@ const MAX_CHUNK_SIZE: usize = PREFERRED_CHUNK_SIZE / 4 * 5; | |
const MIN_SUPPORTED_STATE_CHUNK_VERSION: u64 = 1; | ||
// current state chunk version. | ||
const STATE_CHUNK_VERSION: u64 = 2; | ||
/// number of snapshot subparts, must be a power of 2 in [1; 256] | ||
pub const SNAPSHOT_SUBPARTS: usize = 16; | ||
|
||
/// Configuration for the Snapshot service | ||
#[derive(Debug, Clone, PartialEq)] | ||
pub struct SnapshotConfiguration { | ||
/// If `true`, no periodic snapshots will be created | ||
pub no_periodic: bool, | ||
/// Number of threads for creating snapshots | ||
pub processing_threads: usize, | ||
} | ||
|
||
impl Default for SnapshotConfiguration { | ||
fn default() -> Self { | ||
SnapshotConfiguration { | ||
no_periodic: false, | ||
processing_threads: 1, | ||
} | ||
} | ||
} | ||
|
||
/// A progress indicator for snapshots. | ||
#[derive(Debug, Default)] | ||
|
@@ -130,7 +151,8 @@ pub fn take_snapshot<W: SnapshotWriter + Send>( | |
block_at: H256, | ||
state_db: &HashDB<KeccakHasher>, | ||
writer: W, | ||
p: &Progress | ||
p: &Progress, | ||
processing_threads: usize, | ||
) -> Result<(), Error> { | ||
let start_header = chain.block_header_data(&block_at) | ||
.ok_or(Error::InvalidStartingBlock(BlockId::Hash(block_at)))?; | ||
|
@@ -142,14 +164,46 @@ pub fn take_snapshot<W: SnapshotWriter + Send>( | |
let writer = Mutex::new(writer); | ||
let chunker = engine.snapshot_components().ok_or(Error::SnapshotsUnsupported)?; | ||
let snapshot_version = chunker.current_version(); | ||
let (state_hashes, block_hashes) = scope(|scope| { | ||
let (state_hashes, block_hashes) = scope(|scope| -> Result<(Vec<H256>, Vec<H256>), Error> { | ||
let writer = &writer; | ||
let block_guard = scope.spawn(move || chunk_secondary(chunker, chain, block_at, writer, p)); | ||
let state_res = chunk_state(state_db, &state_root, writer, p); | ||
|
||
state_res.and_then(|state_hashes| { | ||
block_guard.join().map(|block_hashes| (state_hashes, block_hashes)) | ||
}) | ||
// The number of threads must be between 1 and SNAPSHOT_SUBPARTS | ||
let num_threads: usize = cmp::max(1, cmp::min(processing_threads, SNAPSHOT_SUBPARTS)); | ||
info!(target: "snapshot", "Using {} threads for Snapshot creation.", num_threads); | ||
|
||
let mut state_guards = Vec::with_capacity(num_threads as usize); | ||
let subparts: Vec<usize> = (0..SNAPSHOT_SUBPARTS).collect(); | ||
|
||
for thread_idx in 0..num_threads { | ||
let subparts_c = subparts.clone(); | ||
let state_guard = scope.spawn(move || -> Result<Vec<H256>, Error> { | ||
let mut chunk_hashes = Vec::new(); | ||
|
||
for subpart_chunk in subparts_c.chunks(num_threads) { | ||
if subpart_chunk.len() > thread_idx { | ||
let part = subpart_chunk[thread_idx]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe something like this would be easier to read (and we don't need to allocate let state_guard = scope.spawn(move || -> Result<Vec<H256>, Error> {
let mut chunk_hashes = Vec::new();
for part in (thread_idx..SNAPSHOT_SUBPARTS).step_by(num_threads) {
...
}
} Note, that |
||
debug!(target: "snapshot", "Chunking part {} in thread {}", part, thread_idx); | ||
let mut hashes = chunk_state(state_db, &state_root, writer, p, Some(part))?; | ||
chunk_hashes.append(&mut hashes); | ||
} | ||
} | ||
|
||
Ok(chunk_hashes) | ||
}); | ||
state_guards.push(state_guard); | ||
} | ||
|
||
let block_hashes = block_guard.join()?; | ||
let mut state_hashes = Vec::new(); | ||
|
||
for guard in state_guards { | ||
let mut part_state_hashes = guard.join()?.clone(); | ||
state_hashes.append(&mut part_state_hashes); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can avoid cloning: let mut state_hashes = Vec::new();
for guard in state_guards {
let part_state_hashes = guard.join()?;
state_hashes.extend(part_state_hashes);
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oups, you're right, sorry. |
||
|
||
debug!(target: "snapshot", "Took a snapshot of {} accounts", p.accounts.load(Ordering::SeqCst)); | ||
Ok((state_hashes, block_hashes)) | ||
})?; | ||
|
||
info!("produced {} state chunks and {} block chunks.", state_hashes.len(), block_hashes.len()); | ||
|
@@ -200,6 +254,7 @@ pub fn chunk_secondary<'a>(mut chunker: Box<SnapshotComponents>, chain: &'a Bloc | |
chain, | ||
start_hash, | ||
&mut chunk_sink, | ||
progress, | ||
PREFERRED_CHUNK_SIZE, | ||
)?; | ||
} | ||
|
@@ -263,10 +318,12 @@ impl<'a> StateChunker<'a> { | |
|
||
/// Walk the given state database starting from the given root, | ||
/// creating chunks and writing them out. | ||
/// `part` is a number between 0 and 15, which describe which part of | ||
This comment was marked as resolved.
Sorry, something went wrong. |
||
/// the tree should be chunked. | ||
/// | ||
/// Returns a list of hashes of chunks created, or any error it may | ||
/// have encountered. | ||
pub fn chunk_state<'a>(db: &HashDB<KeccakHasher>, root: &H256, writer: &Mutex<SnapshotWriter + 'a>, progress: &'a Progress) -> Result<Vec<H256>, Error> { | ||
pub fn chunk_state<'a>(db: &HashDB<KeccakHasher>, root: &H256, writer: &Mutex<SnapshotWriter + 'a>, progress: &'a Progress, part: Option<usize>) -> Result<Vec<H256>, Error> { | ||
ordian marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let account_trie = TrieDB::new(db, &root)?; | ||
|
||
let mut chunker = StateChunker { | ||
|
@@ -281,11 +338,33 @@ pub fn chunk_state<'a>(db: &HashDB<KeccakHasher>, root: &H256, writer: &Mutex<Sn | |
let mut used_code = HashSet::new(); | ||
|
||
// account_key here is the address' hash. | ||
for item in account_trie.iter()? { | ||
let mut account_iter = account_trie.iter()?; | ||
|
||
let mut seek_to = None; | ||
|
||
if let Some(part) = part { | ||
let part_offset = 256 / SNAPSHOT_SUBPARTS; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe name |
||
let mut seek_from = vec![0; 32]; | ||
seek_from[0] = (part * part_offset) as u8; | ||
account_iter.seek(&seek_from)?; | ||
|
||
// Set the upper-bond, except for the last part | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: upper bound |
||
if part < SNAPSHOT_SUBPARTS - 1 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we could get rid of this if account_key[0] > seek_to {
break;
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But |
||
seek_to = Some(((part + 1) * part_offset) as u8) | ||
} | ||
} | ||
This comment was marked as resolved.
Sorry, something went wrong. |
||
|
||
for item in account_iter { | ||
let (account_key, account_data) = item?; | ||
let account = ::rlp::decode(&*account_data)?; | ||
let account_key_hash = H256::from_slice(&account_key); | ||
|
||
if let Some(seek_to) = seek_to { | ||
if account_key[0] >= seek_to { | ||
break; | ||
} | ||
} | ||
|
||
let account = ::rlp::decode(&*account_data)?; | ||
let account_db = AccountDB::from_hash(db, account_key_hash); | ||
|
||
let fat_rlps = account::to_fat_rlps(&account_key_hash, &account, &account_db, &mut used_code, PREFERRED_CHUNK_SIZE - chunker.chunk_size(), PREFERRED_CHUNK_SIZE)?; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ use hash::{KECCAK_NULL_RLP, keccak}; | |
|
||
use basic_account::BasicAccount; | ||
use snapshot::account; | ||
use snapshot::{chunk_state, Error as SnapshotError, Progress, StateRebuilder}; | ||
use snapshot::{chunk_state, Error as SnapshotError, Progress, StateRebuilder, SNAPSHOT_SUBPARTS}; | ||
use snapshot::io::{PackedReader, PackedWriter, SnapshotReader, SnapshotWriter}; | ||
use super::helpers::{compare_dbs, StateProducer}; | ||
|
||
|
@@ -53,7 +53,12 @@ fn snap_and_restore() { | |
let state_root = producer.state_root(); | ||
let writer = Mutex::new(PackedWriter::new(&snap_file).unwrap()); | ||
|
||
let state_hashes = chunk_state(&old_db, &state_root, &writer, &Progress::default()).unwrap(); | ||
let mut state_hashes = Vec::new(); | ||
for part in 0..SNAPSHOT_SUBPARTS { | ||
let mut hashes = chunk_state(&old_db, &state_root, &writer, &Progress::default(), Some(part)).unwrap(); | ||
state_hashes.append(&mut hashes); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: extra newline |
||
} | ||
|
||
writer.into_inner().finish(::snapshot::ManifestData { | ||
version: 2, | ||
|
@@ -164,7 +169,7 @@ fn checks_flag() { | |
let state_root = producer.state_root(); | ||
let writer = Mutex::new(PackedWriter::new(&snap_file).unwrap()); | ||
|
||
let state_hashes = chunk_state(&old_db, &state_root, &writer, &Progress::default()).unwrap(); | ||
let state_hashes = chunk_state(&old_db, &state_root, &writer, &Progress::default(), None).unwrap(); | ||
|
||
writer.into_inner().finish(::snapshot::ManifestData { | ||
version: 2, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe use
assert!(processing_threads >= 1, "...")
instead of thiscmp::max
, since it would be a logical mistake to pass0
asprocessing_threads
?