-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[TieredStorage] Enable hot-storage in TieredStorage::write_accounts() #35049
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ use { | |
}, | ||
error::TieredStorageError, | ||
footer::{AccountBlockFormat, AccountMetaFormat}, | ||
hot::{HotStorageWriter, HOT_FORMAT}, | ||
index::IndexBlockFormat, | ||
owners::OwnersBlockFormat, | ||
readable::TieredStorageReader, | ||
|
@@ -30,14 +31,13 @@ use { | |
path::{Path, PathBuf}, | ||
sync::OnceLock, | ||
}, | ||
writer::TieredStorageWriter, | ||
}; | ||
|
||
pub type TieredStorageResult<T> = Result<T, TieredStorageError>; | ||
|
||
/// The struct that defines the formats of all building blocks of a | ||
/// TieredStorage. | ||
#[derive(Clone, Debug)] | ||
#[derive(Clone, Debug, PartialEq)] | ||
pub struct TieredStorageFormat { | ||
pub meta_entry_size: usize, | ||
pub account_meta_format: AccountMetaFormat, | ||
|
@@ -115,19 +115,23 @@ impl TieredStorage { | |
)); | ||
} | ||
|
||
let result = { | ||
let writer = TieredStorageWriter::new(&self.path, format)?; | ||
writer.write_accounts(accounts, skip) | ||
}; | ||
if format == &HOT_FORMAT { | ||
let result = { | ||
let writer = HotStorageWriter::new(&self.path)?; | ||
writer.write_accounts(accounts, skip) | ||
}; | ||
|
||
// panic here if self.reader.get() is not None as self.reader can only be | ||
// None since we have passed `is_read_only()` check previously, indicating | ||
// self.reader is not yet set. | ||
self.reader | ||
.set(TieredStorageReader::new_from_path(&self.path)?) | ||
.unwrap(); | ||
Comment on lines
+124
to
+129
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why create a reader here at all? What about deferring creating the mmap until something reads from the tiered storage? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea. I was under the impression that it will be used right away, but I think having things in lazy style works better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Put it in a separate PR (#35063) as it needs to create another field as well as potentially change the return value format of the reader() API. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We'd have to check the flush and clean code. Maybe immediately after flush we clean that slot? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Flush typically only means finishing writing. So it's up to us to determine which way works the best. In Rocks, I remember it has an option to determine whether to keep entries in memory after flushing from memtable. (i.e., the entries were available in mem-table, but after flushing to disk as a sst file, it no longer be available in memory). This depends on whether our workload is more likely to read recent data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Once a slot is rooted, the contents for that slot (and any older) are written to disk from the accounts write cache. This process is called
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the sake of this and subsequent PRs, I think we can use the current implementation and then change/improve it over time/usage. I.e. we don't need to wait for #35063 to land. |
||
|
||
// panic here if self.reader.get() is not None as self.reader can only be | ||
// None since we have passed `is_read_only()` check previously, indicating | ||
// self.reader is not yet set. | ||
self.reader | ||
.set(TieredStorageReader::new_from_path(&self.path)?) | ||
.unwrap(); | ||
return result; | ||
} | ||
|
||
result | ||
Err(TieredStorageError::UnknownFormat(self.path.to_path_buf())) | ||
} | ||
|
||
/// Returns the underlying reader of the TieredStorage. None will be | ||
|
@@ -156,9 +160,11 @@ impl TieredStorage { | |
mod tests { | ||
use { | ||
super::*, | ||
crate::account_storage::meta::{StoredMeta, StoredMetaWriteVersion}, | ||
crate::account_storage::meta::{StoredAccountMeta, StoredMeta, StoredMetaWriteVersion}, | ||
footer::{TieredStorageFooter, TieredStorageMagicNumber}, | ||
hot::HOT_FORMAT, | ||
index::IndexOffset, | ||
owners::OWNER_NO_OWNER, | ||
solana_accounts_db::rent_collector::RENT_EXEMPT_RENT_EPOCH, | ||
solana_sdk::{ | ||
account::{Account, AccountSharedData}, | ||
|
@@ -167,7 +173,10 @@ mod tests { | |
pubkey::Pubkey, | ||
system_instruction::MAX_PERMITTED_DATA_LENGTH, | ||
}, | ||
std::mem::ManuallyDrop, | ||
std::{ | ||
collections::{HashMap, HashSet}, | ||
mem::ManuallyDrop, | ||
}, | ||
tempfile::tempdir, | ||
}; | ||
|
||
|
@@ -201,6 +210,7 @@ mod tests { | |
Err(TieredStorageError::AttemptToUpdateReadOnly(_)), | ||
) => {} | ||
(Err(TieredStorageError::Unsupported()), Err(TieredStorageError::Unsupported())) => {} | ||
(Ok(_), Ok(_)) => {} | ||
// we don't expect error type mis-match or other error types here | ||
_ => { | ||
panic!("actual: {result:?}, expected: {expected_result:?}"); | ||
|
@@ -229,10 +239,7 @@ mod tests { | |
assert_eq!(tiered_storage.path(), tiered_storage_path); | ||
assert_eq!(tiered_storage.file_size().unwrap(), 0); | ||
|
||
// Expect the result to be TieredStorageError::Unsupported as the feature | ||
// is not yet fully supported, but we can still check its partial results | ||
// in the test. | ||
write_zero_accounts(&tiered_storage, Err(TieredStorageError::Unsupported())); | ||
write_zero_accounts(&tiered_storage, Ok(vec![])); | ||
} | ||
|
||
let tiered_storage_readonly = TieredStorage::new_readonly(&tiered_storage_path).unwrap(); | ||
|
@@ -257,10 +264,7 @@ mod tests { | |
let tiered_storage_path = temp_dir.path().join("test_write_accounts_twice"); | ||
|
||
let tiered_storage = TieredStorage::new_writable(&tiered_storage_path); | ||
// Expect the result to be TieredStorageError::Unsupported as the feature | ||
// is not yet fully supported, but we can still check its partial results | ||
// in the test. | ||
write_zero_accounts(&tiered_storage, Err(TieredStorageError::Unsupported())); | ||
write_zero_accounts(&tiered_storage, Ok(vec![])); | ||
// Expect AttemptToUpdateReadOnly error as write_accounts can only | ||
// be invoked once. | ||
write_zero_accounts( | ||
|
@@ -278,15 +282,15 @@ mod tests { | |
let tiered_storage_path = temp_dir.path().join("test_remove_on_drop"); | ||
{ | ||
let tiered_storage = TieredStorage::new_writable(&tiered_storage_path); | ||
write_zero_accounts(&tiered_storage, Err(TieredStorageError::Unsupported())); | ||
write_zero_accounts(&tiered_storage, Ok(vec![])); | ||
} | ||
// expect the file does not exists as it has been removed on drop | ||
assert!(!tiered_storage_path.try_exists().unwrap()); | ||
|
||
{ | ||
let tiered_storage = | ||
ManuallyDrop::new(TieredStorage::new_writable(&tiered_storage_path)); | ||
write_zero_accounts(&tiered_storage, Err(TieredStorageError::Unsupported())); | ||
write_zero_accounts(&tiered_storage, Ok(vec![])); | ||
} | ||
// expect the file exists as we have ManuallyDrop this time. | ||
assert!(tiered_storage_path.try_exists().unwrap()); | ||
|
@@ -329,6 +333,35 @@ mod tests { | |
(stored_meta, AccountSharedData::from(account)) | ||
} | ||
|
||
fn verify_account( | ||
stored_meta: &StoredAccountMeta<'_>, | ||
account: Option<&impl ReadableAccount>, | ||
account_hash: &AccountHash, | ||
) { | ||
let (lamports, owner, data, executable, account_hash) = account | ||
.map(|acc| { | ||
( | ||
acc.lamports(), | ||
acc.owner(), | ||
acc.data(), | ||
acc.executable(), | ||
// only persist rent_epoch for those rent-paying accounts | ||
Some(*account_hash), | ||
) | ||
}) | ||
.unwrap_or((0, &OWNER_NO_OWNER, &[], false, None)); | ||
|
||
assert_eq!(stored_meta.lamports(), lamports); | ||
assert_eq!(stored_meta.data().len(), data.len()); | ||
assert_eq!(stored_meta.data(), data); | ||
assert_eq!(stored_meta.executable(), executable); | ||
assert_eq!(stored_meta.owner(), owner); | ||
assert_eq!( | ||
*stored_meta.hash(), | ||
account_hash.unwrap_or(AccountHash(Hash::default())) | ||
); | ||
} | ||
Comment on lines
+336
to
+363
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will create a separate PR that put all the test utils into one rs file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test utils are moved to test_utils.rs in #35065. |
||
|
||
/// The helper function for all write_accounts tests. | ||
/// Currently only supports hot accounts. | ||
fn do_test_write_accounts( | ||
|
@@ -368,34 +401,27 @@ mod tests { | |
let tiered_storage = TieredStorage::new_writable(tiered_storage_path); | ||
_ = tiered_storage.write_accounts(&storable_accounts, 0, &format); | ||
|
||
verify_hot_storage(&tiered_storage, &accounts, format); | ||
} | ||
|
||
/// Verify the generated tiered storage in the test. | ||
fn verify_hot_storage( | ||
tiered_storage: &TieredStorage, | ||
expected_accounts: &[(StoredMeta, AccountSharedData)], | ||
expected_format: TieredStorageFormat, | ||
) { | ||
let reader = tiered_storage.reader().unwrap(); | ||
assert_eq!(reader.num_accounts(), expected_accounts.len()); | ||
|
||
let footer = reader.footer(); | ||
let expected_footer = TieredStorageFooter { | ||
account_meta_format: expected_format.account_meta_format, | ||
owners_block_format: expected_format.owners_block_format, | ||
index_block_format: expected_format.index_block_format, | ||
account_block_format: expected_format.account_block_format, | ||
account_entry_count: expected_accounts.len() as u32, | ||
// Hash is not yet implemented, so we bypass the check | ||
hash: footer.hash, | ||
..TieredStorageFooter::default() | ||
}; | ||
let num_accounts = storable_accounts.len(); | ||
assert_eq!(reader.num_accounts(), num_accounts); | ||
|
||
// TODO(yhchiang): verify account meta and data once the reader side | ||
// is implemented in a separate PR. | ||
let mut expected_accounts_map = HashMap::new(); | ||
for i in 0..num_accounts { | ||
let (account, address, account_hash, _write_version) = storable_accounts.get(i); | ||
expected_accounts_map.insert(address, (account, account_hash)); | ||
} | ||
|
||
assert_eq!(*footer, expected_footer); | ||
let mut index_offset = IndexOffset(0); | ||
let mut verified_accounts = HashSet::new(); | ||
while let Some((stored_meta, next)) = reader.get_account(index_offset).unwrap() { | ||
if let Some((account, account_hash)) = expected_accounts_map.get(stored_meta.pubkey()) { | ||
verify_account(&stored_meta, *account, account_hash); | ||
verified_accounts.insert(stored_meta.pubkey()); | ||
} | ||
index_offset = next; | ||
} | ||
assert!(!verified_accounts.is_empty()); | ||
assert_eq!(verified_accounts.len(), expected_accounts_map.len()) | ||
} | ||
|
||
#[test] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So in the upper layer, AccountsFile::append_accounts returns an Option instead. This means when we have an error here, it's either panic or we hide the error.
I can probably improve the comment a bit by saying that it is guaranteed to have no reader here as we just checked its is_read_only() so we expect
reader.set()
to be successful.