Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Forge] Add working_dir param to support running node on checkpoint dir, so that the existing data on disk is preserved. #4591

Merged
merged 1 commit into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions aptos-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ mod log_build_information;
use anyhow::{anyhow, Context};
use aptos_api::bootstrap as bootstrap_api;
use aptos_build_info::build_information;
use aptos_config::config::StateSyncConfig;
use aptos_config::{
config::{
AptosDataClientConfig, BaseConfig, NetworkConfig, NodeConfig, PersistableConfig,
StorageServiceConfig,
RocksdbConfigs, StateSyncConfig, StorageServiceConfig, BUFFERED_STATE_TARGET_ITEMS,
DEFAULT_MAX_NUM_NODES_PER_LRU_CACHE_SHARD, NO_OP_STORAGE_PRUNER_CONFIG,
},
network_id::NetworkId,
utils::get_genesis_txn,
Expand All @@ -26,6 +26,7 @@ use aptos_types::{
account_config::CORE_CODE_ADDRESS, account_view::AccountView, chain_id::ChainId,
on_chain_config::ON_CHAIN_CONFIG_REGISTRY, waypoint::Waypoint,
};

use aptos_vm::AptosVM;
use aptosdb::AptosDB;
use backup_service::start_backup_service;
Expand Down Expand Up @@ -53,8 +54,9 @@ use state_sync_driver::{
use std::{
boxed::Box,
collections::{HashMap, HashSet},
fs,
io::Write,
path::PathBuf,
path::{Path, PathBuf},
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering},
Arc,
Expand Down Expand Up @@ -569,8 +571,43 @@ fn bootstrap_indexer(
Ok(None)
}

fn create_checkpoint_and_change_working_dir(
node_config: &mut NodeConfig,
working_dir: impl AsRef<Path>,
) {
let source_dir = node_config.storage.dir();
node_config.set_data_dir(working_dir.as_ref().to_path_buf());
let checkpoint_dir = node_config.storage.dir();

assert!(source_dir != checkpoint_dir);

// Create rocksdb checkpoint.
fs::create_dir_all(&checkpoint_dir).unwrap();

AptosDB::open(
&source_dir,
false, /* readonly */
NO_OP_STORAGE_PRUNER_CONFIG, /* pruner */
RocksdbConfigs::default(),
false,
BUFFERED_STATE_TARGET_ITEMS,
DEFAULT_MAX_NUM_NODES_PER_LRU_CACHE_SHARD,
)
.expect("AptosDB open failure.")
.create_checkpoint(&checkpoint_dir)
.expect("AptosDB checkpoint creation failed.");

consensus::create_checkpoint(&source_dir, &checkpoint_dir)
.expect("ConsensusDB checkpoint creation failed.");
let state_sync_db =
state_sync_driver::metadata_storage::PersistentMetadataStorage::new(&source_dir);
state_sync_db
.create_checkpoint(&checkpoint_dir)
.expect("StateSyncDB checkpoint creation failed.");
}

pub fn setup_environment(
node_config: NodeConfig,
mut node_config: NodeConfig,
remote_log_rx: Option<mpsc::Receiver<TelemetryLog>>,
logger_filter_update_job: Option<LoggerFilterUpdater>,
) -> anyhow::Result<AptosHandle> {
Expand All @@ -580,6 +617,13 @@ pub fn setup_environment(
inspection_service::inspection_service::start_inspection_service(node_config_clone)
});

// If working_dir is provided, we will make RocksDb checkpoint for consensus_db,
// state_sync_db, ledger_db and state_merkle_db to the checkpoint_path, and running the node
// on the new path, so that the existing data won't change. For now this is a testonly feature.
if let Some(working_dir) = node_config.base.working_dir.clone() {
create_checkpoint_and_change_working_dir(&mut node_config, working_dir);
}

// Open the database
let mut instant = Instant::now();
let (aptos_db, db_rw) = DbReaderWriter::wrap(
Expand Down
9 changes: 9 additions & 0 deletions config/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ pub struct NodeConfig {
#[serde(default, deny_unknown_fields)]
pub struct BaseConfig {
pub data_dir: PathBuf,
pub working_dir: Option<PathBuf>,
pub role: RoleType,
pub waypoint: WaypointConfig,
}
Expand All @@ -109,6 +110,7 @@ impl Default for BaseConfig {
fn default() -> BaseConfig {
BaseConfig {
data_dir: PathBuf::from("/opt/aptos/data"),
working_dir: None,
role: RoleType::Validator,
waypoint: WaypointConfig::None,
}
Expand Down Expand Up @@ -266,6 +268,13 @@ impl NodeConfig {
&self.base.data_dir
}

pub fn working_dir(&self) -> &Path {
match &self.base.working_dir {
Some(working_dir) => working_dir,
None => &self.base.data_dir,
}
}

pub fn set_data_dir(&mut self, data_dir: PathBuf) {
self.base.data_dir = data_dir.clone();
self.consensus.set_data_dir(data_dir.clone());
Expand Down
16 changes: 16 additions & 0 deletions consensus/src/consensusdb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@ use std::{collections::HashMap, iter::Iterator, path::Path, time::Instant};
/// The name of the consensus db file
pub const CONSENSUS_DB_NAME: &str = "consensus_db";

/// Creates new physical DB checkpoint in directory specified by `checkpoint_path`.
pub fn create_checkpoint<P: AsRef<Path> + Clone>(db_path: P, checkpoint_path: P) -> Result<()> {
let start = Instant::now();
let consensus_db_checkpoint_path = checkpoint_path.as_ref().join(CONSENSUS_DB_NAME);
std::fs::remove_dir_all(&consensus_db_checkpoint_path).unwrap_or(());
ConsensusDB::new(db_path)
.db
.create_checkpoint(&consensus_db_checkpoint_path)?;
info!(
path = consensus_db_checkpoint_path,
time_ms = %start.elapsed().as_millis(),
"Made ConsensusDB checkpoint."
);
Ok(())
}

pub struct ConsensusDB {
db: DB,
}
Expand Down
2 changes: 2 additions & 0 deletions consensus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ pub mod network_interface;
/// Required by the smoke tests
pub use consensusdb::CONSENSUS_DB_NAME;

pub use consensusdb::create_checkpoint;

#[cfg(feature = "fuzzing")]
pub use round_manager::round_manager_fuzzing;

Expand Down
2 changes: 1 addition & 1 deletion docker/compose/aptos-node/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ consensus:
type: "local"
backend:
type: "on_disk_storage"
path: /opt/aptos/data/secure-data.json
path: secure-data.json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did we change this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it will be prepended with working_dir which is /opt/aptos/data/ in this case.

namespace: ~
initial_safety_rules_config:
from_file:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,20 @@ impl PersistentMetadataStorage {
))
})
}

/// Creates new physical DB checkpoint in directory specified by `path`.
pub fn create_checkpoint<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let start = Instant::now();
let state_sync_db_path = path.as_ref().join(STATE_SYNC_DB_NAME);
std::fs::remove_dir_all(&state_sync_db_path).unwrap_or(());
self.database.create_checkpoint(&state_sync_db_path)?;
info!(
path = state_sync_db_path,
time_ms = %start.elapsed().as_millis(),
"Made StateSyncDB checkpoint."
);
Ok(())
}
}

impl MetadataStorageInterface for PersistentMetadataStorage {
Expand Down
20 changes: 12 additions & 8 deletions storage/aptosdb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ use aptos_crypto::hash::HashValue;
use aptos_infallible::Mutex;
use aptos_logger::prelude::*;
use aptos_rocksdb_options::gen_rocksdb_options;
use aptos_types::proof::TransactionAccumulatorSummary;
use aptos_types::state_store::state_storage_usage::StateStorageUsage;
use aptos_types::{
account_address::AccountAddress,
account_config::{new_block_event_key, NewBlockEvent},
Expand All @@ -77,12 +75,13 @@ use aptos_types::{
ledger_info::LedgerInfoWithSignatures,
proof::{
accumulator::InMemoryAccumulator, AccumulatorConsistencyProof, SparseMerkleProofExt,
TransactionInfoListWithProof,
TransactionAccumulatorSummary, TransactionInfoListWithProof,
},
state_proof::StateProof,
state_store::{
state_key::StateKey,
state_key_prefix::StateKeyPrefix,
state_storage_usage::StateStorageUsage,
state_value::{StateValue, StateValueChunkWithProof},
table::{TableHandle, TableInfo},
},
Expand All @@ -108,12 +107,15 @@ use std::{
time::{Duration, Instant},
};

use crate::pruner::{
ledger_pruner_manager::LedgerPrunerManager, ledger_store::ledger_store_pruner::LedgerPruner,
state_pruner_manager::StatePrunerManager, state_store::StateMerklePruner,
use crate::{
pruner::{
ledger_pruner_manager::LedgerPrunerManager,
ledger_store::ledger_store_pruner::LedgerPruner, state_pruner_manager::StatePrunerManager,
state_store::StateMerklePruner,
},
stale_node_index::StaleNodeIndexSchema,
stale_node_index_cross_epoch::StaleNodeIndexCrossEpochSchema,
};
use crate::stale_node_index::StaleNodeIndexSchema;
use crate::stale_node_index_cross_epoch::StaleNodeIndexCrossEpochSchema;
use storage_interface::{
state_delta::StateDelta, state_view::DbStateView, DbReader, DbWriter, ExecutedTrees, Order,
StateSnapshotReceiver, MAX_REQUEST_LIMIT,
Expand Down Expand Up @@ -655,6 +657,8 @@ impl AptosDB {
let start = Instant::now();
let ledger_db_path = path.as_ref().join(LEDGER_DB_NAME);
let state_merkle_db_path = path.as_ref().join(STATE_MERKLE_DB_NAME);
std::fs::remove_dir_all(&ledger_db_path).unwrap_or(());
std::fs::remove_dir_all(&state_merkle_db_path).unwrap_or(());
self.ledger_db.create_checkpoint(&ledger_db_path)?;
self.state_merkle_db
.create_checkpoint(&state_merkle_db_path)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ consensus:
type: "local"
backend:
type: "on_disk_storage"
path: /opt/aptos/data/secure-data.json
path: secure-data.json
namespace: ~
initial_safety_rules_config:
from_file:
Expand Down
2 changes: 1 addition & 1 deletion testsuite/forge/src/backend/local/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ impl Node for LocalNode {
let node_config = self.config();
let ledger_db_path = node_config.storage.dir().join(LEDGER_DB_NAME);
let state_db_path = node_config.storage.dir().join(STATE_MERKLE_DB_NAME);
let secure_storage_path = node_config.base.data_dir.join("secure_storage.json");
let secure_storage_path = node_config.working_dir().join("secure_storage.json");
let state_sync_db_path = node_config.storage.dir().join(STATE_SYNC_DB_NAME);

debug!(
Expand Down