From 624dae65dec52ab8bdacad4b91c4ba2fd6ebaf3a Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Thu, 12 Dec 2024 15:59:14 -0600 Subject: [PATCH] test(beatree): beatree iterator tests + related bugfixed --- nomt/src/beatree/iterator.rs | 323 ++++++++++++++++++++++++++++++----- nomt/src/beatree/mod.rs | 8 +- 2 files changed, 288 insertions(+), 43 deletions(-) diff --git a/nomt/src/beatree/iterator.rs b/nomt/src/beatree/iterator.rs index 2c226e3d..cfd9c8d8 100644 --- a/nomt/src/beatree/iterator.rs +++ b/nomt/src/beatree/iterator.rs @@ -1,7 +1,7 @@ //! Database iterators over the Beatree. -// TODO: remove this once used. -#![allow(unused)] +// TODO: remove this once used +#![allow(dead_code)] use std::{ cmp::Ordering, @@ -17,7 +17,7 @@ use super::{ branch::node::{get_key, BranchNode}, index::Index, leaf::node::LeafNode, - Key, ReadTransaction, ValueChange, + Key, ValueChange, }; /// An iterator over the state of the beatree at some particular point. @@ -37,10 +37,16 @@ pub struct BeatreeIterator { } impl BeatreeIterator { - pub(super) fn new(tx: &ReadTransaction, start: Key, end: Option) -> Self { + pub(super) fn new( + primary_staging: OrdMap, + secondary_staging: Option>, + bbn_index: Index, + start: Key, + end: Option, + ) -> Self { BeatreeIterator { - memory_values: StagingIterator::new(tx, start, end), - leaf_values: LeafIterator::new(tx.bbn_index.clone(), start, end), + memory_values: StagingIterator::new(primary_staging, secondary_staging, start, end), + leaf_values: LeafIterator::new(bbn_index, start, end), } } @@ -151,13 +157,13 @@ pub enum IterOutput<'a> { // The iterator has produced a new item. Item(Key, &'a [u8]), // The iterator has produced a new overflow item. The slice here is the entire overflow cell. + #[allow(dead_code)] OverflowItem(Key, ValueHash, &'a [u8]), } struct CurrentLeaf { index: usize, leaf: Arc, - separator: Key, } impl CurrentLeaf { @@ -173,8 +179,8 @@ impl CurrentLeaf { let index = self.index - 1; let key = self.leaf.key(index); let (cell, overflow) = self.leaf.value(index); - let (_, value_hash, _) = super::leaf::overflow::decode_cell(cell); if overflow { + let (_, value_hash, _) = super::leaf::overflow::decode_cell(cell); IterOutput::OverflowItem(key, value_hash, cell) } else { IterOutput::Item(key, cell) @@ -198,19 +204,17 @@ impl LeafIterator { start: Some(start), end, }; - let Some((_, branch)) = iter.index.lookup(start) else { return iter; }; - let Some((index_in_branch, pn)) = super::ops::search_branch(&branch, start) else { + let Some((index_in_branch, _)) = super::ops::search_branch(&branch, start) else { return iter; }; let separator = get_key(&branch, index_in_branch); iter.state = LeafIteratorState::Blocked { branch, - pn, index_in_branch, separator, last: None, @@ -298,7 +302,7 @@ impl LeafIterator { .next_key(get_key(&*branch, next_index_in_branch - 1)); match next_key { None => LeafIteratorState::Done { last: Some(last) }, - Some(k) if self.end.as_ref().map_or(false, |end| end < &k) => { + Some(k) if self.end.as_ref().map_or(false, |end| end <= &k) => { LeafIteratorState::Done { last: Some(last) } } Some(k) => { @@ -306,7 +310,6 @@ impl LeafIterator { let (separator, branch) = self.index.lookup(k).unwrap(); LeafIteratorState::Blocked { index_in_branch: 0, - pn: branch.node_pointer(0).into(), separator, branch, last: Some(last), @@ -314,12 +317,16 @@ impl LeafIterator { } } } else { - LeafIteratorState::Blocked { - index_in_branch: next_index_in_branch, - pn: branch.node_pointer(next_index_in_branch).into(), - separator: get_key(&branch, next_index_in_branch), - branch, - last: Some(last), + let separator = get_key(&branch, next_index_in_branch); + if self.end.map_or(true, |end| separator < end) { + LeafIteratorState::Blocked { + index_in_branch: next_index_in_branch, + separator: get_key(&branch, next_index_in_branch), + branch, + last: Some(last), + } + } else { + LeafIteratorState::Done { last: Some(last) } } } } @@ -342,9 +349,7 @@ impl LeafIterator { let prev_state = std::mem::replace(&mut self.state, LeafIteratorState::Done { last: None }); let LeafIteratorState::Blocked { branch, - pn, index_in_branch, - separator, .. } = prev_state else { @@ -352,11 +357,7 @@ impl LeafIterator { panic!("No leaf expected in iterator") }; - let leaf = CurrentLeaf { - index, - leaf, - separator, - }; + let leaf = CurrentLeaf { index, leaf }; self.state = if leaf.is_consumed() { self.new_state_leaf_consumed(branch, index_in_branch + 1, leaf) @@ -439,7 +440,6 @@ impl Iterator for NeededLeavesIter { enum LeafIteratorState { Blocked { branch: Arc, - pn: PageNumber, index_in_branch: usize, separator: Key, // this ensures that borrows can be kept valid. @@ -461,13 +461,15 @@ struct StagingIterator { } impl StagingIterator { - fn new(tx: &ReadTransaction, start: Key, end: Option) -> Self { + fn new( + primary_staging: OrdMap, + secondary_staging: Option>, + start: Key, + end: Option, + ) -> Self { StagingIterator { - primary: OrdMapOwnedIter::new(tx.primary_staging.clone(), start, end), - secondary: tx - .secondary_staging - .clone() - .map(|s| OrdMapOwnedIter::new(s, start, end)), + primary: OrdMapOwnedIter::new(primary_staging, start, end), + secondary: secondary_staging.map(|s| OrdMapOwnedIter::new(s, start, end)), } } @@ -490,21 +492,35 @@ impl StagingIterator { } fn next<'a>(&'a mut self) -> Option<(&'a Key, &'a ValueChange)> { - let primary_peek = self.primary.next(); - let secondary_peek = self.secondary.as_mut().and_then(|s| s.next()); - + let primary_peek = self.primary.peek().map(|(k, _)| k); + let secondary_peek = self + .secondary + .as_mut() + .and_then(|s| s.peek()) + .map(|(k, _)| k); match (primary_peek, secondary_peek) { (None, None) => None, - (Some(x), None) | (None, Some(x)) => Some(x), + (Some(_), None) => self.primary.next(), + (None, Some(_)) => self.next_secondary(), (Some(primary), Some(secondary)) => { - if primary.0 <= secondary.0 { - Some(primary) - } else { - Some(secondary) + match primary.cmp(&secondary) { + Ordering::Less => self.primary.next(), + Ordering::Equal => { + // if equal, favor the primary (more recent) staging map. + // consume the secondary item. + // UNWRAP: known to be `Some` + let _ = self.next_secondary(); + self.primary.next() + } + Ordering::Greater => self.next_secondary(), } } } } + + fn next_secondary<'a>(&'a mut self) -> Option<(&'a Key, &'a ValueChange)> { + self.secondary.as_mut().and_then(|s| s.next()) + } } // This lets us do a range iteration over an `OrdMap` in an owned manner, as a streaming iterator. @@ -519,7 +535,7 @@ impl OrdMapOwnedIter { } fn next<'a>(&'a mut self) -> Option<(&'a Key, &'a ValueChange)> { - self.iter.peek().map(|x| (x.0, x.1)) + self.iter.next().map(|x| (x.0, x.1)) } } @@ -541,3 +557,226 @@ impl OrdMapOwnedIter { } } } + +#[cfg(test)] +mod tests { + use super::IterOutput; + use crate::beatree::{self as beatree, BeatreeIterator}; + use crate::io::PagePool; + use beatree::{ + branch::{node::get_key, BranchNode, BranchNodeBuilder}, + index::Index, + leaf::node::{LeafBuilder, LeafNode}, + ops::bit_ops, + Key, PageNumber, ValueChange, + }; + + use imbl::OrdMap; + use std::sync::Arc; + + lazy_static::lazy_static! { + static ref PAGE_POOL: PagePool = PagePool::new(); + } + + fn encode_value(x: u64) -> Vec { + x.to_be_bytes().to_vec() + } + + fn decode_value(v: &[u8]) -> u64 { + u64::from_be_bytes(v.try_into().unwrap()) + } + + fn build_leaf(values: Vec<(Key, u64)>) -> (Key, Arc) { + let n = values.len(); + let total_value_size = n * 8; + let mut builder = LeafBuilder::new(&PAGE_POOL, n, total_value_size); + let separator = values[0].0; + for (key, value) in values { + builder.push_cell(key, &encode_value(value), false); + } + + (separator, Arc::new(builder.finish())) + } + + fn build_branch(leaves: Vec<(Key, PageNumber)>) -> Arc { + let n = leaves.len(); + let prefix_len = { + let mut prefix_len = 0; + let mut first_key = None; + for (key, _) in &leaves { + if let Some(first_key) = first_key { + prefix_len = bit_ops::prefix_len(key, first_key) + } else { + prefix_len = bit_ops::separator_len(key); + first_key = Some(key); + } + } + + prefix_len + }; + + let branch = BranchNode::new_in(&PAGE_POOL); + let mut builder = BranchNodeBuilder::new(branch, n, n, prefix_len); + for (key, pn) in leaves { + builder.push(key, bit_ops::separator_len(&key), pn.0); + } + + Arc::new(builder.finish()) + } + + fn build_index(branches: Vec>) -> Index { + let mut index = Index::default(); + for branch in branches { + let separator = get_key(&branch, 0); + index.insert(separator, branch); + } + + index + } + + fn key(x: u16) -> Key { + let mut k = Key::default(); + k[0..2].copy_from_slice(&x.to_be_bytes()); + k + } + + #[test] + fn overlay_takes_priority() { + let (_, leaf) = build_leaf(vec![ + (key(1), 1), + (key(2), 2), + (key(3), 3), + (key(4), 4), + (key(5), 5), + ]); + + let branch = build_branch(vec![(key(0), 69.into())]); + let index = build_index(vec![branch.clone()]); + + let secondary_staging = vec![ + (key(1), ValueChange::Delete), + (key(2), ValueChange::Insert(encode_value(200))), + (key(3), ValueChange::Insert(encode_value(300))), + ] + .into_iter() + .collect::>(); + + let primary_staging = vec![ + (key(3), ValueChange::Delete), + (key(4), ValueChange::Insert(encode_value(400))), + ] + .into_iter() + .collect::>(); + + let mut iterator = BeatreeIterator::new( + primary_staging, + Some(secondary_staging), + index, + Key::default(), + None, + ); + let mut collected = Vec::new(); + while let Some(iter_output) = iterator.next() { + match iter_output { + IterOutput::Blocked => iterator.provide_leaf(leaf.clone()), + IterOutput::Item(key, value) => collected.push((key, decode_value(value))), + IterOutput::OverflowItem(_, _, _) => panic!(), + } + } + + assert_eq!(collected, vec![(key(2), 200), (key(4), 400), (key(5), 5)]); + } + + #[test] + fn needed_leaves_is_accurate() { + // split across 2 branches + let branch_1 = build_branch(vec![(key(0), 69.into()), (key(4), 70.into())]); + let branch_2 = build_branch(vec![(key(6), 420.into()), (key(8), 421.into())]); + + let index = build_index(vec![branch_1.clone(), branch_2.clone()]); + + let get_needed = |start, end| { + let iter = BeatreeIterator::new(OrdMap::new(), None, index.clone(), start, end); + iter.needed_leaves().map(|pn| pn.0).collect::>() + }; + + assert_eq!(get_needed(Key::default(), None), vec![69, 70, 420, 421]); + assert_eq!(get_needed(key(2), Some(key(7))), vec![69, 70, 420]); + assert_eq!(get_needed(key(4), Some(key(8))), vec![70, 420]); + } + + #[test] + fn start_bound_respected_in_leaves() { + let (_, leaf_1) = build_leaf(vec![(key(1), 1), (key(2), 2), (key(3), 3)]); + + let (_, leaf_2) = build_leaf(vec![(key(4), 4), (key(5), 5)]); + + let branch = build_branch(vec![(key(0), 69.into()), (key(4), 70.into())]); + + let index = build_index(vec![branch.clone()]); + { + let start = key(2); + let mut leaves = vec![leaf_1.clone(), leaf_2.clone()].into_iter(); + let mut iter = BeatreeIterator::new(OrdMap::new(), None, index.clone(), start, None); + while let Some(output) = iter.next() { + match output { + IterOutput::Blocked => iter.provide_leaf(leaves.next().unwrap()), + IterOutput::Item(k, _) => assert!(k >= start), + IterOutput::OverflowItem(_, _, _) => panic!(), + } + } + } + } + + #[test] + fn end_bound_respected_in_leaves() { + let (_, leaf_1) = build_leaf(vec![(key(1), 1), (key(2), 2), (key(3), 3)]); + + let (_, leaf_2) = build_leaf(vec![(key(6), 6), (key(7), 7)]); + + let branch = build_branch(vec![(key(0), 69.into()), (key(6), 70.into())]); + + let index = build_index(vec![branch.clone()]); + { + let end = key(7); + let mut leaves = vec![leaf_1.clone(), leaf_2.clone()].into_iter(); + let mut iter = BeatreeIterator::new( + OrdMap::new(), + None, + index.clone(), + Key::default(), + Some(end), + ); + while let Some(output) = iter.next() { + match output { + IterOutput::Blocked => iter.provide_leaf(leaves.next().unwrap()), + IterOutput::Item(k, _) => assert!(k < end), + IterOutput::OverflowItem(_, _, _) => panic!(), + } + } + + assert!(leaves.next().is_none()); + } + + { + let end = key(4); + let mut leaves = vec![leaf_1.clone()].into_iter(); + let mut iter = BeatreeIterator::new( + OrdMap::new(), + None, + index.clone(), + Key::default(), + Some(end), + ); + while let Some(output) = iter.next() { + match output { + IterOutput::Blocked => iter.provide_leaf(leaves.next().unwrap()), + IterOutput::Item(k, _) => assert!(k < end), + IterOutput::OverflowItem(_, _, _) => panic!(), + } + } + + assert!(leaves.next().is_none()); + } + } +} diff --git a/nomt/src/beatree/mod.rs b/nomt/src/beatree/mod.rs index 3f8c39d5..0e1ca7a6 100644 --- a/nomt/src/beatree/mod.rs +++ b/nomt/src/beatree/mod.rs @@ -496,7 +496,13 @@ impl ReadTransaction { /// Create a new iterator with the given half-open start and end range. #[allow(unused)] pub fn iterator(&self, start: Key, end: Option) -> BeatreeIterator { - BeatreeIterator::new(self, start, end) + BeatreeIterator::new( + self.primary_staging.clone(), + self.secondary_staging.clone(), + self.bbn_index.clone(), + start, + end, + ) } }