From ea41b5ec8a33f347a5e508e63f0bfeaee3260e5c Mon Sep 17 00:00:00 2001 From: Christopher Berner Date: Sat, 24 Aug 2024 09:40:11 -0700 Subject: [PATCH] Refactor InternalTableDefinition This makes it harder to misuse and uncovered one bug in the compaction code already --- src/db.rs | 127 +------ src/transactions.rs | 89 +++-- src/tree_store/mod.rs | 6 +- src/tree_store/table_tree.rs | 604 +++++++----------------------- src/tree_store/table_tree_base.rs | 515 +++++++++++++++++++++++++ 5 files changed, 724 insertions(+), 617 deletions(-) create mode 100644 src/tree_store/table_tree_base.rs diff --git a/src/db.rs b/src/db.rs index c035fa62..c25143e2 100644 --- a/src/db.rs +++ b/src/db.rs @@ -20,7 +20,6 @@ use std::path::Path; use std::sync::{Arc, Mutex}; use crate::error::TransactionError; -use crate::multimap_table::{parse_subtree_roots, DynamicCollection}; use crate::sealed::Sealed; use crate::transactions::SAVEPOINT_TABLE; use crate::tree_store::file_backend::FileBackend; @@ -483,10 +482,16 @@ impl Database { e.into_storage_error_or_corrupted("Persistent savepoint table corrupted") })? { + let savepoint_table_root = + if let InternalTableDefinition::Normal { table_root, .. } = savepoint_table_def { + table_root + } else { + unreachable!() + }; let savepoint_table: ReadOnlyTable = ReadOnlyTable::new( "internal savepoint table".to_string(), - savepoint_table_def.get_root(), + savepoint_table_root, PageHint::None, Arc::new(TransactionGuard::fake()), mem.clone(), @@ -563,65 +568,10 @@ impl Database { // Chain all the other tables to the master table iter for entry in iter { let definition = entry?.value(); - if let Some(header) = definition.get_root() { - match definition.get_type() { - TableType::Normal => { - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), - mem.clone(), - )?; - for result in table_pages_iter { - let page = result?; - assert!(mem.is_allocated(page)); - } - } - TableType::Multimap => { - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - DynamicCollection::<()>::fixed_width_with( - definition.get_fixed_value_size(), - ), - mem.clone(), - )?; - for result in table_pages_iter { - let page = result?; - assert!(mem.is_allocated(page)); - } - - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - DynamicCollection::<()>::fixed_width_with( - definition.get_fixed_value_size(), - ), - mem.clone(), - )?; - for table_page in table_pages_iter { - let page = mem.get_page(table_page?)?; - let subtree_roots = parse_subtree_roots( - &page, - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), - ); - for subtree_header in subtree_roots { - let sub_root_iter = AllPageNumbersBtreeIter::new( - subtree_header.root, - definition.get_fixed_value_size(), - <()>::fixed_width(), - mem.clone(), - )?; - for result in sub_root_iter { - let page = result?; - assert!(mem.is_allocated(page)); - } - } - } - } - } - } + definition.visit_all_pages(mem.clone(), |page_number| { + assert!(mem.is_allocated(page_number)); + Ok(()) + })?; } Ok(()) @@ -644,56 +594,11 @@ impl Database { // Chain all the other tables to the master table iter for entry in iter { let definition = entry?.value(); - if let Some(header) = definition.get_root() { - match definition.get_type() { - TableType::Normal => { - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), - mem.clone(), - )?; - mem.mark_pages_allocated(table_pages_iter, allow_duplicates)?; - } - TableType::Multimap => { - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - DynamicCollection::<()>::fixed_width_with( - definition.get_fixed_value_size(), - ), - mem.clone(), - )?; - mem.mark_pages_allocated(table_pages_iter, allow_duplicates)?; - - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - DynamicCollection::<()>::fixed_width_with( - definition.get_fixed_value_size(), - ), - mem.clone(), - )?; - for table_page in table_pages_iter { - let page = mem.get_page(table_page?)?; - let subtree_roots = parse_subtree_roots( - &page, - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), - ); - for subtree_header in subtree_roots { - let sub_root_iter = AllPageNumbersBtreeIter::new( - subtree_header.root, - definition.get_fixed_value_size(), - <()>::fixed_width(), - mem.clone(), - )?; - mem.mark_pages_allocated(sub_root_iter, allow_duplicates)?; - } - } - } - } - } + definition.visit_all_pages(mem.clone(), |page_number| { + // TODO: simplify mark_pages_allocated() + mem.mark_pages_allocated([Ok(page_number)].into_iter(), allow_duplicates)?; + Ok(()) + })?; } Ok(()) diff --git a/src/transactions.rs b/src/transactions.rs index f761506d..bb0ec429 100644 --- a/src/transactions.rs +++ b/src/transactions.rs @@ -286,7 +286,7 @@ impl<'db> SystemNamespace<'db> { ) -> Result> { #[cfg(feature = "logging")] debug!("Opening system table: {}", definition); - let root = self + let (root, _) = self .table_tree .get_or_create_table::(definition.name(), TableType::Normal) .map_err(|e| { @@ -296,7 +296,7 @@ impl<'db> SystemNamespace<'db> { Ok(SystemTable::new( definition.name(), - root.get_root(), + root, transaction.freed_pages.clone(), self.transaction_guard.clone(), transaction.mem.clone(), @@ -331,13 +331,13 @@ impl<'db> TableNamespace<'db> { return Err(TableError::TableAlreadyOpen(name.to_string(), location)); } - let internal_table = self + let root = self .table_tree .get_or_create_table::(name, table_type)?; self.open_tables .insert(name.to_string(), panic::Location::caller()); - Ok((internal_table.get_root(), internal_table.get_length())) + Ok(root) } #[track_caller] @@ -1338,13 +1338,16 @@ impl ReadTransaction { .get_table::(definition.name(), TableType::Normal)? .ok_or_else(|| TableError::TableDoesNotExist(definition.name().to_string()))?; - Ok(ReadOnlyTable::new( - definition.name().to_string(), - header.get_root(), - PageHint::Clean, - self.tree.transaction_guard().clone(), - self.mem.clone(), - )?) + match header { + InternalTableDefinition::Normal { table_root, .. } => Ok(ReadOnlyTable::new( + definition.name().to_string(), + table_root, + PageHint::Clean, + self.tree.transaction_guard().clone(), + self.mem.clone(), + )?), + InternalTableDefinition::Multimap { .. } => unreachable!(), + } } /// Open the given table without a type @@ -1357,12 +1360,20 @@ impl ReadTransaction { .get_table_untyped(handle.name(), TableType::Normal)? .ok_or_else(|| TableError::TableDoesNotExist(handle.name().to_string()))?; - Ok(ReadOnlyUntypedTable::new( - header.get_root(), - header.get_fixed_key_size(), - header.get_fixed_value_size(), - self.mem.clone(), - )) + match header { + InternalTableDefinition::Normal { + table_root, + fixed_key_size, + fixed_value_size, + .. + } => Ok(ReadOnlyUntypedTable::new( + table_root, + fixed_key_size, + fixed_value_size, + self.mem.clone(), + )), + InternalTableDefinition::Multimap { .. } => unreachable!(), + } } /// Open the given table @@ -1375,13 +1386,20 @@ impl ReadTransaction { .get_table::(definition.name(), TableType::Multimap)? .ok_or_else(|| TableError::TableDoesNotExist(definition.name().to_string()))?; - Ok(ReadOnlyMultimapTable::new( - header.get_root(), - header.get_length(), - PageHint::Clean, - self.tree.transaction_guard().clone(), - self.mem.clone(), - )?) + match header { + InternalTableDefinition::Normal { .. } => unreachable!(), + InternalTableDefinition::Multimap { + table_root, + table_length, + .. + } => Ok(ReadOnlyMultimapTable::new( + table_root, + table_length, + PageHint::Clean, + self.tree.transaction_guard().clone(), + self.mem.clone(), + )?), + } } /// Open the given table without a type @@ -1394,13 +1412,22 @@ impl ReadTransaction { .get_table_untyped(handle.name(), TableType::Multimap)? .ok_or_else(|| TableError::TableDoesNotExist(handle.name().to_string()))?; - Ok(ReadOnlyUntypedMultimapTable::new( - header.get_root(), - header.get_length(), - header.get_fixed_key_size(), - header.get_fixed_value_size(), - self.mem.clone(), - )) + match header { + InternalTableDefinition::Normal { .. } => unreachable!(), + InternalTableDefinition::Multimap { + table_root, + table_length, + fixed_key_size, + fixed_value_size, + .. + } => Ok(ReadOnlyUntypedMultimapTable::new( + table_root, + table_length, + fixed_key_size, + fixed_value_size, + self.mem.clone(), + )), + } } /// List all the tables diff --git a/src/tree_store/mod.rs b/src/tree_store/mod.rs index 746a1f95..84c58700 100644 --- a/src/tree_store/mod.rs +++ b/src/tree_store/mod.rs @@ -4,6 +4,7 @@ mod btree_iters; mod btree_mutator; mod page_store; mod table_tree; +mod table_tree_base; pub(crate) use btree::{btree_stats, Btree, BtreeMut, BtreeStats, RawBtree, UntypedBtreeMut}; pub use btree_base::{AccessGuard, AccessGuardMut}; @@ -16,6 +17,5 @@ pub(crate) use page_store::{ CachePriority, Page, PageHint, PageNumber, SerializedSavepoint, TransactionalMemory, FILE_FORMAT_VERSION2, MAX_PAIR_LENGTH, MAX_VALUE_LENGTH, PAGE_SIZE, }; -pub(crate) use table_tree::{ - FreedPageList, FreedTableKey, InternalTableDefinition, TableTree, TableTreeMut, TableType, -}; +pub(crate) use table_tree::{FreedPageList, FreedTableKey, TableTree, TableTreeMut}; +pub(crate) use table_tree_base::{InternalTableDefinition, TableType}; diff --git a/src/tree_store/table_tree.rs b/src/tree_store/table_tree.rs index 53de8090..d4b4e3b2 100644 --- a/src/tree_store/table_tree.rs +++ b/src/tree_store/table_tree.rs @@ -1,15 +1,14 @@ use crate::db::TransactionGuard; use crate::error::TableError; use crate::multimap_table::{ - finalize_tree_and_subtree_checksums, multimap_btree_stats, parse_subtree_roots, - verify_tree_and_subtree_checksums, DynamicCollection, + finalize_tree_and_subtree_checksums, multimap_btree_stats, verify_tree_and_subtree_checksums, }; use crate::tree_store::btree::{btree_stats, UntypedBtreeMut}; use crate::tree_store::btree_base::BtreeHeader; -use crate::tree_store::btree_iters::AllPageNumbersBtreeIter; use crate::tree_store::page_store::{new_allocators, BuddyAllocator}; use crate::tree_store::{ - Btree, BtreeMut, BtreeRangeIter, PageHint, PageNumber, RawBtree, TransactionalMemory, + Btree, BtreeMut, BtreeRangeIter, InternalTableDefinition, PageHint, PageNumber, RawBtree, + TableType, TransactionalMemory, }; use crate::types::{Key, MutInPlaceValue, TypeName, Value}; use crate::{DatabaseStats, Result}; @@ -20,10 +19,6 @@ use std::mem::size_of; use std::ops::RangeFull; use std::sync::{Arc, Mutex}; -// Forward compatibility feature in case alignment can be supported in the future -// See https://github.com/cberner/redb/issues/360 -const ALIGNMENT: usize = 1; - #[derive(Debug)] pub(crate) struct FreedTableKey { pub(crate) transaction_id: u64, @@ -177,229 +172,6 @@ impl MutInPlaceValue for FreedPageList<'_> { } } -#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] -pub(crate) enum TableType { - Normal, - Multimap, -} - -impl TableType { - fn is_legacy(value: u8) -> bool { - value == 1 || value == 2 - } -} - -#[allow(clippy::from_over_into)] -impl Into for TableType { - fn into(self) -> u8 { - match self { - // 1 & 2 were used in the v1 file format - // TableType::Normal => 1, - // TableType::Multimap => 2, - TableType::Normal => 3, - TableType::Multimap => 4, - } - } -} - -impl From for TableType { - fn from(value: u8) -> Self { - match value { - 3 => TableType::Normal, - 4 => TableType::Multimap, - _ => unreachable!(), - } - } -} - -#[derive(Clone, PartialEq, Debug)] -pub(crate) struct InternalTableDefinition { - table_root: Option, - table_type: TableType, - table_length: u64, - fixed_key_size: Option, - fixed_value_size: Option, - key_alignment: usize, - value_alignment: usize, - key_type: TypeName, - value_type: TypeName, -} - -impl InternalTableDefinition { - pub(crate) fn get_root(&self) -> Option { - self.table_root - } - - pub(crate) fn get_length(&self) -> u64 { - self.table_length - } - - pub(crate) fn get_fixed_key_size(&self) -> Option { - self.fixed_key_size - } - - pub(crate) fn get_fixed_value_size(&self) -> Option { - self.fixed_value_size - } - - pub(crate) fn get_key_alignment(&self) -> usize { - self.key_alignment - } - - pub(crate) fn get_value_alignment(&self) -> usize { - self.value_alignment - } - - pub(crate) fn get_type(&self) -> TableType { - self.table_type - } -} - -impl Value for InternalTableDefinition { - type SelfType<'a> = InternalTableDefinition; - type AsBytes<'a> = Vec; - - fn fixed_width() -> Option { - None - } - - fn from_bytes<'a>(data: &'a [u8]) -> Self - where - Self: 'a, - { - debug_assert!(data.len() > 22); - let mut offset = 0; - let legacy = TableType::is_legacy(data[offset]); - assert!(!legacy); - let table_type = TableType::from(data[offset]); - offset += 1; - - let table_length = u64::from_le_bytes( - data[offset..(offset + size_of::())] - .try_into() - .unwrap(), - ); - offset += size_of::(); - - let non_null = data[offset] != 0; - offset += 1; - let table_root = if non_null { - Some(BtreeHeader::from_le_bytes( - data[offset..(offset + BtreeHeader::serialized_size())] - .try_into() - .unwrap(), - )) - } else { - None - }; - offset += BtreeHeader::serialized_size(); - - let non_null = data[offset] != 0; - offset += 1; - let fixed_key_size = if non_null { - let fixed = u32::from_le_bytes( - data[offset..(offset + size_of::())] - .try_into() - .unwrap(), - ) as usize; - Some(fixed) - } else { - None - }; - offset += size_of::(); - - let non_null = data[offset] != 0; - offset += 1; - let fixed_value_size = if non_null { - let fixed = u32::from_le_bytes( - data[offset..(offset + size_of::())] - .try_into() - .unwrap(), - ) as usize; - Some(fixed) - } else { - None - }; - offset += size_of::(); - let key_alignment = u32::from_le_bytes( - data[offset..(offset + size_of::())] - .try_into() - .unwrap(), - ) as usize; - offset += size_of::(); - let value_alignment = u32::from_le_bytes( - data[offset..(offset + size_of::())] - .try_into() - .unwrap(), - ) as usize; - offset += size_of::(); - - let key_type_len = u32::from_le_bytes( - data[offset..(offset + size_of::())] - .try_into() - .unwrap(), - ) as usize; - offset += size_of::(); - let key_type = TypeName::from_bytes(&data[offset..(offset + key_type_len)]); - offset += key_type_len; - let value_type = TypeName::from_bytes(&data[offset..]); - - InternalTableDefinition { - table_root, - table_type, - table_length, - fixed_key_size, - fixed_value_size, - key_alignment, - value_alignment, - key_type, - value_type, - } - } - - fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Vec - where - Self: 'a, - Self: 'b, - { - let mut result = vec![value.table_type.into()]; - result.extend_from_slice(&value.table_length.to_le_bytes()); - if let Some(header) = value.table_root { - result.push(1); - result.extend_from_slice(&header.to_le_bytes()); - } else { - result.push(0); - result.extend_from_slice(&[0; BtreeHeader::serialized_size()]); - } - if let Some(fixed) = value.fixed_key_size { - result.push(1); - result.extend_from_slice(&u32::try_from(fixed).unwrap().to_le_bytes()); - } else { - result.push(0); - result.extend_from_slice(&[0; size_of::()]) - } - if let Some(fixed) = value.fixed_value_size { - result.push(1); - result.extend_from_slice(&u32::try_from(fixed).unwrap().to_le_bytes()); - } else { - result.push(0); - result.extend_from_slice(&[0; size_of::()]) - } - result.extend_from_slice(&u32::try_from(value.key_alignment).unwrap().to_le_bytes()); - result.extend_from_slice(&u32::try_from(value.value_alignment).unwrap().to_le_bytes()); - let key_type_bytes = value.key_type.to_bytes(); - result.extend_from_slice(&u32::try_from(key_type_bytes.len()).unwrap().to_le_bytes()); - result.extend_from_slice(&key_type_bytes); - result.extend_from_slice(&value.value_type.to_bytes()); - - result - } - - fn type_name() -> TypeName { - TypeName::internal("redb::InternalTableDefinition") - } -} - pub struct TableNameIter { inner: BtreeRangeIter<&'static str, InternalTableDefinition>, table_type: TableType, @@ -412,7 +184,7 @@ impl Iterator for TableNameIter { for entry in self.inner.by_ref() { match entry { Ok(entry) => { - if entry.value().table_type == self.table_type { + if entry.value().get_type() == self.table_type { return Some(Ok(entry.key().to_string())); } } @@ -466,28 +238,7 @@ impl TableTree { ) -> Result, TableError> { if let Some(guard) = self.tree.get(&name)? { let definition = guard.value(); - if definition.get_type() != table_type { - return if definition.get_type() == TableType::Multimap { - Err(TableError::TableIsMultimap(name.to_string())) - } else { - Err(TableError::TableIsNotMultimap(name.to_string())) - }; - } - if definition.get_key_alignment() != ALIGNMENT { - return Err(TableError::TypeDefinitionChanged { - name: definition.key_type.clone(), - alignment: definition.get_key_alignment(), - width: definition.get_fixed_key_size(), - }); - } - if definition.get_value_alignment() != ALIGNMENT { - return Err(TableError::TypeDefinitionChanged { - name: definition.value_type.clone(), - alignment: definition.get_value_alignment(), - width: definition.get_fixed_value_size(), - }); - } - + definition.check_match_untyped(table_type, name)?; Ok(Some(definition)) } else { Ok(None) @@ -503,28 +254,7 @@ impl TableTree { Ok( if let Some(definition) = self.get_table_untyped(name, table_type)? { // Do additional checks on the types to be sure they match - if definition.key_type != K::type_name() || definition.value_type != V::type_name() - { - return Err(TableError::TableTypeMismatch { - table: name.to_string(), - key: definition.key_type, - value: definition.value_type, - }); - } - if definition.get_fixed_key_size() != K::fixed_width() { - return Err(TableError::TypeDefinitionChanged { - name: K::type_name(), - alignment: definition.get_key_alignment(), - width: definition.get_fixed_key_size(), - }); - } - if definition.get_fixed_value_size() != V::fixed_width() { - return Err(TableError::TypeDefinitionChanged { - name: V::type_name(), - alignment: definition.get_value_alignment(), - width: definition.get_fixed_value_size(), - }); - } + definition.check_match::(table_type, name)?; Some(definition) } else { None @@ -575,19 +305,10 @@ impl<'txn> TableTreeMut<'txn> { .get_table_untyped(&entry, TableType::Normal) .map_err(|e| e.into_storage_error_or_corrupted("Internal corruption"))? .unwrap(); - if let Some(header) = definition.get_root() { - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), - self.mem.clone(), - )?; - - for page in table_pages_iter { - let page = page?; - result[page.region as usize].record_alloc(page.page_index, page.page_order); - } - } + definition.visit_all_pages(self.mem.clone(), |page| { + result[page.region as usize].record_alloc(page.page_index, page.page_order); + Ok(()) + })?; } for entry in self.list_tables(TableType::Multimap)? { @@ -595,46 +316,10 @@ impl<'txn> TableTreeMut<'txn> { .get_table_untyped(&entry, TableType::Multimap) .map_err(|e| e.into_storage_error_or_corrupted("Internal corruption"))? .unwrap(); - if let Some(header) = definition.get_root() { - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - DynamicCollection::<()>::fixed_width_with(definition.get_fixed_value_size()), - self.mem.clone(), - )?; - for page in table_pages_iter { - let page = page?; - result[page.region as usize].record_alloc(page.page_index, page.page_order); - } - - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - DynamicCollection::<()>::fixed_width_with(definition.get_fixed_value_size()), - self.mem.clone(), - )?; - for table_page in table_pages_iter { - let page = self.mem.get_page(table_page?)?; - let subtree_roots = parse_subtree_roots( - &page, - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), - ); - for subtree_header in subtree_roots { - let sub_root_iter = AllPageNumbersBtreeIter::new( - subtree_header.root, - definition.get_fixed_value_size(), - <()>::fixed_width(), - self.mem.clone(), - )?; - for page in sub_root_iter { - let page = page?; - result[page.region as usize] - .record_alloc(page.page_index, page.page_order); - } - } - } - } + definition.visit_all_pages(self.mem.clone(), |page| { + result[page.region as usize].record_alloc(page.page_index, page.page_order); + Ok(()) + })?; } Ok(result) @@ -664,15 +349,18 @@ impl<'txn> TableTreeMut<'txn> { for entry in self.tree.range::(&(..))? { let entry = entry?; let definition = entry.value(); - // TODO: all these matches on table_type() are pretty errorprone, because you can call .get_fixed_value_size() on either. - // Maybe InternalTableDefinition should be an enum for normal and multimap, instead - match definition.get_type() { - TableType::Normal => { - if let Some(header) = definition.get_root() { + match definition { + InternalTableDefinition::Normal { + table_root, + fixed_key_size, + fixed_value_size, + .. + } => { + if let Some(header) = table_root { if !RawBtree::new( Some(header), - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), + fixed_key_size, + fixed_value_size, self.mem.clone(), ) .verify_checksum()? @@ -681,11 +369,16 @@ impl<'txn> TableTreeMut<'txn> { } } } - TableType::Multimap => { + InternalTableDefinition::Multimap { + table_root, + fixed_key_size, + fixed_value_size, + .. + } => { if !verify_tree_and_subtree_checksums( - definition.get_root(), - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), + table_root, + fixed_key_size, + fixed_value_size, self.mem.clone(), )? { return Ok(false); @@ -698,32 +391,53 @@ impl<'txn> TableTreeMut<'txn> { } pub(crate) fn flush_table_root_updates(&mut self) -> Result> { - for (name, (table_root, table_length)) in self.pending_table_updates.drain() { + for (name, (new_root, new_length)) in self.pending_table_updates.drain() { // Bypass .get_table() since the table types are dynamic let mut definition = self.tree.get(&name.as_str())?.unwrap().value(); // No-op if the root has not changed - if definition.table_root == table_root { - continue; + match definition { + InternalTableDefinition::Normal { table_root, .. } + | InternalTableDefinition::Multimap { table_root, .. } => { + if table_root == new_root { + continue; + } + } } - definition.table_length = table_length; // Finalize any dirty checksums - if definition.table_type == TableType::Normal { - let mut tree = UntypedBtreeMut::new( - table_root, - self.mem.clone(), - self.freed_pages.clone(), - definition.fixed_key_size, - definition.fixed_value_size, - ); - tree.finalize_dirty_checksums()?; - definition.table_root = tree.get_root(); - } else { - definition.table_root = finalize_tree_and_subtree_checksums( - table_root, - definition.fixed_key_size, - definition.fixed_value_size, - self.mem.clone(), - )?; + match definition { + InternalTableDefinition::Normal { + ref mut table_root, + ref mut table_length, + fixed_key_size, + fixed_value_size, + .. + } => { + let mut tree = UntypedBtreeMut::new( + new_root, + self.mem.clone(), + self.freed_pages.clone(), + fixed_key_size, + fixed_value_size, + ); + tree.finalize_dirty_checksums()?; + *table_root = tree.get_root(); + *table_length = new_length; + } + InternalTableDefinition::Multimap { + ref mut table_root, + ref mut table_length, + fixed_key_size, + fixed_value_size, + .. + } => { + *table_root = finalize_tree_and_subtree_checksums( + new_root, + fixed_key_size, + fixed_value_size, + self.mem.clone(), + )?; + *table_length = new_length; + } } self.tree.insert(&name.as_str(), &definition)?; } @@ -757,8 +471,7 @@ impl<'txn> TableTreeMut<'txn> { if let Ok(Some(definition)) = result.as_mut() { if let Some((updated_root, updated_length)) = self.pending_table_updates.get(name) { - definition.table_root = *updated_root; - definition.table_length = *updated_length; + definition.set_header(*updated_root, *updated_length); } } @@ -781,8 +494,7 @@ impl<'txn> TableTreeMut<'txn> { if let Ok(Some(definition)) = result.as_mut() { if let Some((updated_root, updated_length)) = self.pending_table_updates.get(name) { - definition.table_root = *updated_root; - definition.table_length = *updated_length; + definition.set_header(*updated_root, *updated_length); } } @@ -796,66 +508,12 @@ impl<'txn> TableTreeMut<'txn> { table_type: TableType, ) -> Result { if let Some(definition) = self.get_table_untyped(name, table_type)? { - if definition.table_type == TableType::Normal { - if let Some(header) = definition.get_root() { - let iter = AllPageNumbersBtreeIter::new( - header.root, - definition.fixed_key_size, - definition.fixed_value_size, - self.mem.clone(), - )?; - let mut freed_pages = self.freed_pages.lock().unwrap(); - for page_number in iter { - freed_pages.push(page_number?); - } - } - } else if definition.table_type == TableType::Multimap { - if let Some(header) = definition.get_root() { - let mut freed_pages = self.freed_pages.lock().unwrap(); - // Delete all the pages in the subtrees - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - DynamicCollection::<()>::fixed_width_with( - definition.get_fixed_value_size(), - ), - self.mem.clone(), - )?; - for table_page in table_pages_iter { - let page = self.mem.get_page(table_page?)?; - let subtree_roots = parse_subtree_roots( - &page, - definition.get_fixed_key_size(), - definition.get_fixed_value_size(), - ); - for subtree_header in subtree_roots { - let sub_root_iter = AllPageNumbersBtreeIter::new( - subtree_header.root, - definition.get_fixed_value_size(), - <()>::fixed_width(), - self.mem.clone(), - )?; - for page in sub_root_iter { - freed_pages.push(page?); - } - } - } - // Now free the multimap table itself - let table_pages_iter = AllPageNumbersBtreeIter::new( - header.root, - definition.get_fixed_key_size(), - DynamicCollection::<()>::fixed_width_with( - definition.get_fixed_value_size(), - ), - self.mem.clone(), - )?; - for table_page in table_pages_iter { - freed_pages.push(table_page?); - } - } - } else { - unreachable!() - } + let mut freed_pages = self.freed_pages.lock().unwrap(); + definition.visit_all_pages(self.mem.clone(), |page_number| { + freed_pages.push(page_number); + Ok(()) + })?; + drop(freed_pages); self.pending_table_updates.remove(name); @@ -866,30 +524,31 @@ impl<'txn> TableTreeMut<'txn> { Ok(false) } - // Returns a tuple of the table id and the new root page - // root_page: the root of the master table pub(crate) fn get_or_create_table( &mut self, name: &str, table_type: TableType, - ) -> Result { - if let Some(found) = self.get_table::(name, table_type)? { - return Ok(found); - } - - let table = InternalTableDefinition { - table_root: None, - table_type, - table_length: 0, - fixed_key_size: K::fixed_width(), - fixed_value_size: V::fixed_width(), - key_alignment: ALIGNMENT, - value_alignment: ALIGNMENT, - key_type: K::type_name(), - value_type: V::type_name(), + ) -> Result<(Option, u64), TableError> { + let table = if let Some(found) = self.get_table::(name, table_type)? { + found + } else { + let table = InternalTableDefinition::new::(table_type, None, 0); + self.tree.insert(&name, &table)?; + table }; - self.tree.insert(&name, &table)?; - Ok(table) + + match table { + InternalTableDefinition::Normal { + table_root, + table_length, + .. + } + | InternalTableDefinition::Multimap { + table_root, + table_length, + .. + } => Ok((table_root, table_length)), + } } pub(crate) fn compact_tables(&mut self) -> Result { @@ -900,23 +559,15 @@ impl<'txn> TableTreeMut<'txn> { if let Some((updated_root, updated_length)) = self.pending_table_updates.get(entry.key()) { - definition.table_root = *updated_root; - definition.table_length = *updated_length; + definition.set_header(*updated_root, *updated_length); } - let mut tree = UntypedBtreeMut::new( - definition.table_root, - self.mem.clone(), - self.freed_pages.clone(), - definition.fixed_key_size, - definition.fixed_value_size, - ); - if tree.relocate()? { + if let Some(new_root) = + definition.relocate_tree(self.mem.clone(), self.freed_pages.clone())? + { progress = true; - self.pending_table_updates.insert( - entry.key().to_string(), - (tree.get_root(), definition.table_length), - ); + self.pending_table_updates + .insert(entry.key().to_string(), (new_root, definition.get_length())); } } @@ -942,16 +593,21 @@ impl<'txn> TableTreeMut<'txn> { for entry in self.tree.range::(&(..))? { let entry = entry?; let mut definition = entry.value(); - if let Some((updated_root, _)) = self.pending_table_updates.get(entry.key()) { - definition.table_root = *updated_root; + if let Some((updated_root, length)) = self.pending_table_updates.get(entry.key()) { + definition.set_header(*updated_root, *length); } - match definition.get_type() { - TableType::Normal => { + match definition { + InternalTableDefinition::Normal { + table_root, + fixed_key_size, + fixed_value_size, + .. + } => { let subtree_stats = btree_stats( - definition.table_root.map(|x| x.root), + table_root.map(|x| x.root), &self.mem, - definition.fixed_key_size, - definition.fixed_value_size, + fixed_key_size, + fixed_value_size, )?; max_subtree_height = max(max_subtree_height, subtree_stats.tree_height); total_stored_bytes += subtree_stats.stored_leaf_bytes; @@ -960,12 +616,17 @@ impl<'txn> TableTreeMut<'txn> { branch_pages += subtree_stats.branch_pages; leaf_pages += subtree_stats.leaf_pages; } - TableType::Multimap => { + InternalTableDefinition::Multimap { + table_root, + fixed_key_size, + fixed_value_size, + .. + } => { let subtree_stats = multimap_btree_stats( - definition.table_root.map(|x| x.root), + table_root.map(|x| x.root), &self.mem, - definition.fixed_key_size, - definition.fixed_value_size, + fixed_key_size, + fixed_value_size, )?; max_subtree_height = max(max_subtree_height, subtree_stats.tree_height); total_stored_bytes += subtree_stats.stored_leaf_bytes; @@ -991,15 +652,14 @@ impl<'txn> TableTreeMut<'txn> { #[cfg(test)] mod test { - use crate::tree_store::{InternalTableDefinition, TableType}; + use crate::tree_store::table_tree_base::InternalTableDefinition; use crate::types::TypeName; use crate::Value; #[test] fn round_trip() { - let x = InternalTableDefinition { + let x = InternalTableDefinition::Multimap { table_root: None, - table_type: TableType::Multimap, table_length: 0, fixed_key_size: None, fixed_value_size: Some(5), diff --git a/src/tree_store/table_tree_base.rs b/src/tree_store/table_tree_base.rs new file mode 100644 index 00000000..4e6f1de8 --- /dev/null +++ b/src/tree_store/table_tree_base.rs @@ -0,0 +1,515 @@ +use crate::multimap_table::{parse_subtree_roots, DynamicCollection}; +use crate::tree_store::{ + AllPageNumbersBtreeIter, BtreeHeader, PageNumber, TransactionalMemory, UntypedBtreeMut, +}; +use crate::{Key, Result, TableError, TypeName, Value}; +use std::mem::size_of; +use std::sync::{Arc, Mutex}; + +// Forward compatibility feature in case alignment can be supported in the future +// See https://github.com/cberner/redb/issues/360 +const ALIGNMENT: usize = 1; + +#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] +pub(crate) enum TableType { + Normal, + Multimap, +} + +impl TableType { + fn is_legacy(value: u8) -> bool { + value == 1 || value == 2 + } +} + +#[allow(clippy::from_over_into)] +impl Into for TableType { + fn into(self) -> u8 { + match self { + // 1 & 2 were used in the v1 file format + // TableType::Normal => 1, + // TableType::Multimap => 2, + TableType::Normal => 3, + TableType::Multimap => 4, + } + } +} + +impl From for TableType { + fn from(value: u8) -> Self { + match value { + 3 => TableType::Normal, + 4 => TableType::Multimap, + _ => unreachable!(), + } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub(crate) enum InternalTableDefinition { + Normal { + table_root: Option, + table_length: u64, + fixed_key_size: Option, + fixed_value_size: Option, + key_alignment: usize, + value_alignment: usize, + key_type: TypeName, + value_type: TypeName, + }, + Multimap { + table_root: Option, + table_length: u64, + fixed_key_size: Option, + fixed_value_size: Option, + key_alignment: usize, + value_alignment: usize, + key_type: TypeName, + value_type: TypeName, + }, +} + +impl InternalTableDefinition { + pub(super) fn new( + table_type: TableType, + table_root: Option, + table_length: u64, + ) -> Self { + match table_type { + TableType::Normal => InternalTableDefinition::Normal { + table_root, + table_length, + fixed_key_size: K::fixed_width(), + fixed_value_size: V::fixed_width(), + key_alignment: ALIGNMENT, + value_alignment: ALIGNMENT, + key_type: K::type_name(), + value_type: V::type_name(), + }, + TableType::Multimap => InternalTableDefinition::Multimap { + table_root, + table_length, + fixed_key_size: K::fixed_width(), + fixed_value_size: V::fixed_width(), + key_alignment: ALIGNMENT, + value_alignment: ALIGNMENT, + key_type: K::type_name(), + value_type: V::type_name(), + }, + } + } + + pub(super) fn set_header(&mut self, root: Option, length: u64) { + match self { + InternalTableDefinition::Normal { + table_root, + table_length, + .. + } => { + *table_root = root; + *table_length = length; + } + InternalTableDefinition::Multimap { + table_root, + table_length, + .. + } => { + *table_root = root; + *table_length = length; + } + } + } + + pub(super) fn check_match_untyped( + &self, + table_type: TableType, + name: &str, + ) -> Result<(), TableError> { + if self.get_type() != table_type { + return if self.get_type() == TableType::Multimap { + Err(TableError::TableIsMultimap(name.to_string())) + } else { + Err(TableError::TableIsNotMultimap(name.to_string())) + }; + } + if self.private_get_key_alignment() != ALIGNMENT { + return Err(TableError::TypeDefinitionChanged { + name: self.private_key_type(), + alignment: self.private_get_key_alignment(), + width: self.private_get_fixed_key_size(), + }); + } + if self.private_get_value_alignment() != ALIGNMENT { + return Err(TableError::TypeDefinitionChanged { + name: self.private_value_type(), + alignment: self.private_get_value_alignment(), + width: self.private_get_fixed_value_size(), + }); + } + + Ok(()) + } + + pub(super) fn check_match( + &self, + table_type: TableType, + name: &str, + ) -> Result<(), TableError> { + self.check_match_untyped(table_type, name)?; + + if self.private_key_type() != K::type_name() || self.private_value_type() != V::type_name() + { + return Err(TableError::TableTypeMismatch { + table: name.to_string(), + key: self.private_key_type(), + value: self.private_value_type(), + }); + } + if self.private_get_fixed_key_size() != K::fixed_width() { + return Err(TableError::TypeDefinitionChanged { + name: K::type_name(), + alignment: self.private_get_key_alignment(), + width: self.private_get_fixed_key_size(), + }); + } + if self.private_get_fixed_value_size() != V::fixed_width() { + return Err(TableError::TypeDefinitionChanged { + name: V::type_name(), + alignment: self.private_get_value_alignment(), + width: self.private_get_fixed_value_size(), + }); + } + + Ok(()) + } + + pub(crate) fn visit_all_pages<'a, F>( + &self, + mem: Arc, + mut visitor: F, + ) -> Result + where + F: FnMut(PageNumber) -> Result + 'a, + { + match self { + InternalTableDefinition::Normal { + table_root, + fixed_key_size, + fixed_value_size, + .. + } => { + if let Some(header) = table_root { + let table_pages_iter = AllPageNumbersBtreeIter::new( + header.root, + *fixed_key_size, + *fixed_value_size, + mem, + )?; + + for page in table_pages_iter { + visitor(page?)?; + } + } + } + InternalTableDefinition::Multimap { + table_root, + fixed_key_size, + fixed_value_size, + .. + } => { + if let Some(header) = table_root { + let table_pages_iter = AllPageNumbersBtreeIter::new( + header.root, + *fixed_key_size, + DynamicCollection::<()>::fixed_width_with(*fixed_value_size), + mem.clone(), + )?; + for page in table_pages_iter { + visitor(page?)?; + } + + let table_pages_iter = AllPageNumbersBtreeIter::new( + header.root, + *fixed_key_size, + DynamicCollection::<()>::fixed_width_with(*fixed_value_size), + mem.clone(), + )?; + for table_page in table_pages_iter { + let page = mem.get_page(table_page?)?; + let subtree_roots = + parse_subtree_roots(&page, *fixed_key_size, *fixed_value_size); + for subtree_header in subtree_roots { + let sub_root_iter = AllPageNumbersBtreeIter::new( + subtree_header.root, + *fixed_value_size, + <()>::fixed_width(), + mem.clone(), + )?; + for page in sub_root_iter { + visitor(page?)?; + } + } + } + } + } + } + + Ok(()) + } + + pub(crate) fn relocate_tree( + &mut self, + mem: Arc, + freed_pages: Arc>>, + ) -> Result>> { + // TODO: this does not correctly handle multimap subtrees + let mut tree = UntypedBtreeMut::new( + self.private_get_root(), + mem, + freed_pages, + self.private_get_fixed_key_size(), + self.private_get_fixed_value_size(), + ); + if tree.relocate()? { + self.set_header(tree.get_root(), self.get_length()); + Ok(Some(tree.get_root())) + } else { + Ok(None) + } + } + + fn private_get_root(&self) -> Option { + match self { + InternalTableDefinition::Normal { table_root, .. } => *table_root, + InternalTableDefinition::Multimap { table_root, .. } => *table_root, + } + } + + pub(crate) fn get_length(&self) -> u64 { + match self { + InternalTableDefinition::Normal { table_length, .. } => *table_length, + InternalTableDefinition::Multimap { table_length, .. } => *table_length, + } + } + + fn private_get_fixed_key_size(&self) -> Option { + match self { + InternalTableDefinition::Normal { fixed_key_size, .. } => *fixed_key_size, + InternalTableDefinition::Multimap { fixed_key_size, .. } => *fixed_key_size, + } + } + + fn private_get_fixed_value_size(&self) -> Option { + match self { + InternalTableDefinition::Normal { + fixed_value_size, .. + } => *fixed_value_size, + InternalTableDefinition::Multimap { + fixed_value_size, .. + } => *fixed_value_size, + } + } + + fn private_get_key_alignment(&self) -> usize { + match self { + InternalTableDefinition::Normal { key_alignment, .. } => *key_alignment, + InternalTableDefinition::Multimap { key_alignment, .. } => *key_alignment, + } + } + + fn private_get_value_alignment(&self) -> usize { + match self { + InternalTableDefinition::Normal { + value_alignment, .. + } => *value_alignment, + InternalTableDefinition::Multimap { + value_alignment, .. + } => *value_alignment, + } + } + + pub(crate) fn get_type(&self) -> TableType { + match self { + InternalTableDefinition::Normal { .. } => TableType::Normal, + InternalTableDefinition::Multimap { .. } => TableType::Multimap, + } + } + + fn private_key_type(&self) -> TypeName { + match self { + InternalTableDefinition::Normal { key_type, .. } => key_type.clone(), + InternalTableDefinition::Multimap { key_type, .. } => key_type.clone(), + } + } + + fn private_value_type(&self) -> TypeName { + match self { + InternalTableDefinition::Normal { value_type, .. } => value_type.clone(), + InternalTableDefinition::Multimap { value_type, .. } => value_type.clone(), + } + } +} + +impl Value for InternalTableDefinition { + type SelfType<'a> = InternalTableDefinition; + type AsBytes<'a> = Vec; + + fn fixed_width() -> Option { + None + } + + fn from_bytes<'a>(data: &'a [u8]) -> Self + where + Self: 'a, + { + debug_assert!(data.len() > 22); + let mut offset = 0; + let legacy = TableType::is_legacy(data[offset]); + assert!(!legacy); + let table_type = TableType::from(data[offset]); + offset += 1; + + let table_length = u64::from_le_bytes( + data[offset..(offset + size_of::())] + .try_into() + .unwrap(), + ); + offset += size_of::(); + + let non_null = data[offset] != 0; + offset += 1; + let table_root = if non_null { + Some(BtreeHeader::from_le_bytes( + data[offset..(offset + BtreeHeader::serialized_size())] + .try_into() + .unwrap(), + )) + } else { + None + }; + offset += BtreeHeader::serialized_size(); + + let non_null = data[offset] != 0; + offset += 1; + let fixed_key_size = if non_null { + let fixed = u32::from_le_bytes( + data[offset..(offset + size_of::())] + .try_into() + .unwrap(), + ) as usize; + Some(fixed) + } else { + None + }; + offset += size_of::(); + + let non_null = data[offset] != 0; + offset += 1; + let fixed_value_size = if non_null { + let fixed = u32::from_le_bytes( + data[offset..(offset + size_of::())] + .try_into() + .unwrap(), + ) as usize; + Some(fixed) + } else { + None + }; + offset += size_of::(); + let key_alignment = u32::from_le_bytes( + data[offset..(offset + size_of::())] + .try_into() + .unwrap(), + ) as usize; + offset += size_of::(); + let value_alignment = u32::from_le_bytes( + data[offset..(offset + size_of::())] + .try_into() + .unwrap(), + ) as usize; + offset += size_of::(); + + let key_type_len = u32::from_le_bytes( + data[offset..(offset + size_of::())] + .try_into() + .unwrap(), + ) as usize; + offset += size_of::(); + let key_type = TypeName::from_bytes(&data[offset..(offset + key_type_len)]); + offset += key_type_len; + let value_type = TypeName::from_bytes(&data[offset..]); + + match table_type { + TableType::Normal => InternalTableDefinition::Normal { + table_root, + table_length, + fixed_key_size, + fixed_value_size, + key_alignment, + value_alignment, + key_type, + value_type, + }, + TableType::Multimap => InternalTableDefinition::Multimap { + table_root, + table_length, + fixed_key_size, + fixed_value_size, + key_alignment, + value_alignment, + key_type, + value_type, + }, + } + } + + fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Vec + where + Self: 'a, + Self: 'b, + { + let mut result = vec![value.get_type().into()]; + result.extend_from_slice(&value.get_length().to_le_bytes()); + if let Some(header) = value.private_get_root() { + result.push(1); + result.extend_from_slice(&header.to_le_bytes()); + } else { + result.push(0); + result.extend_from_slice(&[0; BtreeHeader::serialized_size()]); + } + if let Some(fixed) = value.private_get_fixed_key_size() { + result.push(1); + result.extend_from_slice(&u32::try_from(fixed).unwrap().to_le_bytes()); + } else { + result.push(0); + result.extend_from_slice(&[0; size_of::()]) + } + if let Some(fixed) = value.private_get_fixed_value_size() { + result.push(1); + result.extend_from_slice(&u32::try_from(fixed).unwrap().to_le_bytes()); + } else { + result.push(0); + result.extend_from_slice(&[0; size_of::()]) + } + result.extend_from_slice( + &u32::try_from(value.private_get_key_alignment()) + .unwrap() + .to_le_bytes(), + ); + result.extend_from_slice( + &u32::try_from(value.private_get_value_alignment()) + .unwrap() + .to_le_bytes(), + ); + let key_type_bytes = value.private_key_type().to_bytes(); + result.extend_from_slice(&u32::try_from(key_type_bytes.len()).unwrap().to_le_bytes()); + result.extend_from_slice(&key_type_bytes); + result.extend_from_slice(&value.private_value_type().to_bytes()); + + result + } + + fn type_name() -> TypeName { + TypeName::internal("redb::InternalTableDefinition") + } +}