From 1d45ed057692bac28fdc1d719641bae67de6cdca Mon Sep 17 00:00:00 2001 From: Guoteng Rao <3603304+grao1991@users.noreply.github.com> Date: Tue, 6 Dec 2022 17:42:33 -0800 Subject: [PATCH] [Forge] Add working_dir param to support running node on checkpoint dir, so that the existing data on disk is preserved. (#4591) --- aptos-node/src/lib.rs | 52 +++++++++++++++++-- config/src/config/mod.rs | 9 ++++ consensus/src/consensusdb/mod.rs | 16 ++++++ consensus/src/lib.rs | 2 + docker/compose/aptos-node/validator.yaml | 2 +- .../state-sync-driver/src/metadata_storage.rs | 14 +++++ storage/aptosdb/src/lib.rs | 20 ++++--- .../files/configs/validator-base.yaml | 2 +- testsuite/forge/src/backend/local/node.rs | 2 +- 9 files changed, 104 insertions(+), 15 deletions(-) diff --git a/aptos-node/src/lib.rs b/aptos-node/src/lib.rs index dffc275129e32..b8ff8f1997aa3 100644 --- a/aptos-node/src/lib.rs +++ b/aptos-node/src/lib.rs @@ -8,11 +8,11 @@ mod log_build_information; use anyhow::{anyhow, Context}; use aptos_api::bootstrap as bootstrap_api; use aptos_build_info::build_information; -use aptos_config::config::StateSyncConfig; use aptos_config::{ config::{ AptosDataClientConfig, BaseConfig, NetworkConfig, NodeConfig, PersistableConfig, - StorageServiceConfig, + RocksdbConfigs, StateSyncConfig, StorageServiceConfig, BUFFERED_STATE_TARGET_ITEMS, + DEFAULT_MAX_NUM_NODES_PER_LRU_CACHE_SHARD, NO_OP_STORAGE_PRUNER_CONFIG, }, network_id::NetworkId, utils::get_genesis_txn, @@ -26,6 +26,7 @@ use aptos_types::{ account_config::CORE_CODE_ADDRESS, account_view::AccountView, chain_id::ChainId, on_chain_config::ON_CHAIN_CONFIG_REGISTRY, waypoint::Waypoint, }; + use aptos_vm::AptosVM; use aptosdb::AptosDB; use backup_service::start_backup_service; @@ -53,8 +54,9 @@ use state_sync_driver::{ use std::{ boxed::Box, collections::{HashMap, HashSet}, + fs, io::Write, - path::PathBuf, + path::{Path, PathBuf}, sync::{ atomic::{AtomicBool, AtomicUsize, Ordering}, Arc, @@ -569,8 +571,43 @@ fn bootstrap_indexer( Ok(None) } +fn create_checkpoint_and_change_working_dir( + node_config: &mut NodeConfig, + working_dir: impl AsRef, +) { + let source_dir = node_config.storage.dir(); + node_config.set_data_dir(working_dir.as_ref().to_path_buf()); + let checkpoint_dir = node_config.storage.dir(); + + assert!(source_dir != checkpoint_dir); + + // Create rocksdb checkpoint. + fs::create_dir_all(&checkpoint_dir).unwrap(); + + AptosDB::open( + &source_dir, + false, /* readonly */ + NO_OP_STORAGE_PRUNER_CONFIG, /* pruner */ + RocksdbConfigs::default(), + false, + BUFFERED_STATE_TARGET_ITEMS, + DEFAULT_MAX_NUM_NODES_PER_LRU_CACHE_SHARD, + ) + .expect("AptosDB open failure.") + .create_checkpoint(&checkpoint_dir) + .expect("AptosDB checkpoint creation failed."); + + consensus::create_checkpoint(&source_dir, &checkpoint_dir) + .expect("ConsensusDB checkpoint creation failed."); + let state_sync_db = + state_sync_driver::metadata_storage::PersistentMetadataStorage::new(&source_dir); + state_sync_db + .create_checkpoint(&checkpoint_dir) + .expect("StateSyncDB checkpoint creation failed."); +} + pub fn setup_environment( - node_config: NodeConfig, + mut node_config: NodeConfig, remote_log_rx: Option>, logger_filter_update_job: Option, ) -> anyhow::Result { @@ -580,6 +617,13 @@ pub fn setup_environment( inspection_service::inspection_service::start_inspection_service(node_config_clone) }); + // If working_dir is provided, we will make RocksDb checkpoint for consensus_db, + // state_sync_db, ledger_db and state_merkle_db to the checkpoint_path, and running the node + // on the new path, so that the existing data won't change. For now this is a testonly feature. + if let Some(working_dir) = node_config.base.working_dir.clone() { + create_checkpoint_and_change_working_dir(&mut node_config, working_dir); + } + // Open the database let mut instant = Instant::now(); let (aptos_db, db_rw) = DbReaderWriter::wrap( diff --git a/config/src/config/mod.rs b/config/src/config/mod.rs index 8df9ee5e05c41..64f8002cd3b24 100644 --- a/config/src/config/mod.rs +++ b/config/src/config/mod.rs @@ -101,6 +101,7 @@ pub struct NodeConfig { #[serde(default, deny_unknown_fields)] pub struct BaseConfig { pub data_dir: PathBuf, + pub working_dir: Option, pub role: RoleType, pub waypoint: WaypointConfig, } @@ -109,6 +110,7 @@ impl Default for BaseConfig { fn default() -> BaseConfig { BaseConfig { data_dir: PathBuf::from("/opt/aptos/data"), + working_dir: None, role: RoleType::Validator, waypoint: WaypointConfig::None, } @@ -266,6 +268,13 @@ impl NodeConfig { &self.base.data_dir } + pub fn working_dir(&self) -> &Path { + match &self.base.working_dir { + Some(working_dir) => working_dir, + None => &self.base.data_dir, + } + } + pub fn set_data_dir(&mut self, data_dir: PathBuf) { self.base.data_dir = data_dir.clone(); self.consensus.set_data_dir(data_dir.clone()); diff --git a/consensus/src/consensusdb/mod.rs b/consensus/src/consensusdb/mod.rs index 6c80df4b4e86d..68376da86aabd 100644 --- a/consensus/src/consensusdb/mod.rs +++ b/consensus/src/consensusdb/mod.rs @@ -24,6 +24,22 @@ use std::{collections::HashMap, iter::Iterator, path::Path, time::Instant}; /// The name of the consensus db file pub const CONSENSUS_DB_NAME: &str = "consensus_db"; +/// Creates new physical DB checkpoint in directory specified by `checkpoint_path`. +pub fn create_checkpoint + Clone>(db_path: P, checkpoint_path: P) -> Result<()> { + let start = Instant::now(); + let consensus_db_checkpoint_path = checkpoint_path.as_ref().join(CONSENSUS_DB_NAME); + std::fs::remove_dir_all(&consensus_db_checkpoint_path).unwrap_or(()); + ConsensusDB::new(db_path) + .db + .create_checkpoint(&consensus_db_checkpoint_path)?; + info!( + path = consensus_db_checkpoint_path, + time_ms = %start.elapsed().as_millis(), + "Made ConsensusDB checkpoint." + ); + Ok(()) +} + pub struct ConsensusDB { db: DB, } diff --git a/consensus/src/lib.rs b/consensus/src/lib.rs index ac3d087c300f7..05e7dacfec6aa 100644 --- a/consensus/src/lib.rs +++ b/consensus/src/lib.rs @@ -51,6 +51,8 @@ pub mod network_interface; /// Required by the smoke tests pub use consensusdb::CONSENSUS_DB_NAME; +pub use consensusdb::create_checkpoint; + #[cfg(feature = "fuzzing")] pub use round_manager::round_manager_fuzzing; diff --git a/docker/compose/aptos-node/validator.yaml b/docker/compose/aptos-node/validator.yaml index 04beb4ded3614..272d29d170037 100644 --- a/docker/compose/aptos-node/validator.yaml +++ b/docker/compose/aptos-node/validator.yaml @@ -16,7 +16,7 @@ consensus: type: "local" backend: type: "on_disk_storage" - path: /opt/aptos/data/secure-data.json + path: secure-data.json namespace: ~ initial_safety_rules_config: from_file: diff --git a/state-sync/state-sync-v2/state-sync-driver/src/metadata_storage.rs b/state-sync/state-sync-v2/state-sync-driver/src/metadata_storage.rs index d04883d8424c0..ed9896ee176ef 100644 --- a/state-sync/state-sync-v2/state-sync-driver/src/metadata_storage.rs +++ b/state-sync/state-sync-v2/state-sync-driver/src/metadata_storage.rs @@ -161,6 +161,20 @@ impl PersistentMetadataStorage { )) }) } + + /// Creates new physical DB checkpoint in directory specified by `path`. + pub fn create_checkpoint>(&self, path: P) -> Result<()> { + let start = Instant::now(); + let state_sync_db_path = path.as_ref().join(STATE_SYNC_DB_NAME); + std::fs::remove_dir_all(&state_sync_db_path).unwrap_or(()); + self.database.create_checkpoint(&state_sync_db_path)?; + info!( + path = state_sync_db_path, + time_ms = %start.elapsed().as_millis(), + "Made StateSyncDB checkpoint." + ); + Ok(()) + } } impl MetadataStorageInterface for PersistentMetadataStorage { diff --git a/storage/aptosdb/src/lib.rs b/storage/aptosdb/src/lib.rs index bfab11330c603..c71d22f799374 100644 --- a/storage/aptosdb/src/lib.rs +++ b/storage/aptosdb/src/lib.rs @@ -65,8 +65,6 @@ use aptos_crypto::hash::HashValue; use aptos_infallible::Mutex; use aptos_logger::prelude::*; use aptos_rocksdb_options::gen_rocksdb_options; -use aptos_types::proof::TransactionAccumulatorSummary; -use aptos_types::state_store::state_storage_usage::StateStorageUsage; use aptos_types::{ account_address::AccountAddress, account_config::{new_block_event_key, NewBlockEvent}, @@ -77,12 +75,13 @@ use aptos_types::{ ledger_info::LedgerInfoWithSignatures, proof::{ accumulator::InMemoryAccumulator, AccumulatorConsistencyProof, SparseMerkleProofExt, - TransactionInfoListWithProof, + TransactionAccumulatorSummary, TransactionInfoListWithProof, }, state_proof::StateProof, state_store::{ state_key::StateKey, state_key_prefix::StateKeyPrefix, + state_storage_usage::StateStorageUsage, state_value::{StateValue, StateValueChunkWithProof}, table::{TableHandle, TableInfo}, }, @@ -108,12 +107,15 @@ use std::{ time::{Duration, Instant}, }; -use crate::pruner::{ - ledger_pruner_manager::LedgerPrunerManager, ledger_store::ledger_store_pruner::LedgerPruner, - state_pruner_manager::StatePrunerManager, state_store::StateMerklePruner, +use crate::{ + pruner::{ + ledger_pruner_manager::LedgerPrunerManager, + ledger_store::ledger_store_pruner::LedgerPruner, state_pruner_manager::StatePrunerManager, + state_store::StateMerklePruner, + }, + stale_node_index::StaleNodeIndexSchema, + stale_node_index_cross_epoch::StaleNodeIndexCrossEpochSchema, }; -use crate::stale_node_index::StaleNodeIndexSchema; -use crate::stale_node_index_cross_epoch::StaleNodeIndexCrossEpochSchema; use storage_interface::{ state_delta::StateDelta, state_view::DbStateView, DbReader, DbWriter, ExecutedTrees, Order, StateSnapshotReceiver, MAX_REQUEST_LIMIT, @@ -655,6 +657,8 @@ impl AptosDB { let start = Instant::now(); let ledger_db_path = path.as_ref().join(LEDGER_DB_NAME); let state_merkle_db_path = path.as_ref().join(STATE_MERKLE_DB_NAME); + std::fs::remove_dir_all(&ledger_db_path).unwrap_or(()); + std::fs::remove_dir_all(&state_merkle_db_path).unwrap_or(()); self.ledger_db.create_checkpoint(&ledger_db_path)?; self.state_merkle_db .create_checkpoint(&state_merkle_db_path)?; diff --git a/terraform/helm/aptos-node/files/configs/validator-base.yaml b/terraform/helm/aptos-node/files/configs/validator-base.yaml index ed4ce5209e2d1..0abfa5eff605e 100644 --- a/terraform/helm/aptos-node/files/configs/validator-base.yaml +++ b/terraform/helm/aptos-node/files/configs/validator-base.yaml @@ -19,7 +19,7 @@ consensus: type: "local" backend: type: "on_disk_storage" - path: /opt/aptos/data/secure-data.json + path: secure-data.json namespace: ~ initial_safety_rules_config: from_file: diff --git a/testsuite/forge/src/backend/local/node.rs b/testsuite/forge/src/backend/local/node.rs index fc5e21ad2977e..189093cf3777b 100644 --- a/testsuite/forge/src/backend/local/node.rs +++ b/testsuite/forge/src/backend/local/node.rs @@ -282,7 +282,7 @@ impl Node for LocalNode { let node_config = self.config(); let ledger_db_path = node_config.storage.dir().join(LEDGER_DB_NAME); let state_db_path = node_config.storage.dir().join(STATE_MERKLE_DB_NAME); - let secure_storage_path = node_config.base.data_dir.join("secure_storage.json"); + let secure_storage_path = node_config.working_dir().join("secure_storage.json"); let state_sync_db_path = node_config.storage.dir().join(STATE_SYNC_DB_NAME); debug!(