diff --git a/columnar/src/tests.rs b/columnar/src/tests.rs index 41251af8e9..f703133611 100644 --- a/columnar/src/tests.rs +++ b/columnar/src/tests.rs @@ -17,7 +17,7 @@ fn test_dataframe_writer_str() { assert_eq!(columnar.num_columns(), 1); let cols: Vec = columnar.read_columns("my_string").unwrap(); assert_eq!(cols.len(), 1); - assert_eq!(cols[0].num_bytes(), 88); + assert_eq!(cols[0].num_bytes(), 89); } #[test] @@ -31,7 +31,7 @@ fn test_dataframe_writer_bytes() { assert_eq!(columnar.num_columns(), 1); let cols: Vec = columnar.read_columns("my_string").unwrap(); assert_eq!(cols.len(), 1); - assert_eq!(cols[0].num_bytes(), 88); + assert_eq!(cols[0].num_bytes(), 89); } #[test] diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 5aa0cf5f61..bbcae7fe6c 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -130,7 +130,7 @@ mod tests { } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 94); + assert_eq!(file.len(), 95); let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let column = fast_field_readers .u64("field") @@ -180,7 +180,7 @@ mod tests { write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 122); + assert_eq!(file.len(), 123); let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let col = fast_field_readers .u64("field") @@ -213,7 +213,7 @@ mod tests { write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 95); + assert_eq!(file.len(), 96); let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let fast_field_reader = fast_field_readers .u64("field") @@ -245,7 +245,7 @@ mod tests { write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 4490); + assert_eq!(file.len(), 4491); { let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let col = fast_field_readers @@ -278,7 +278,7 @@ mod tests { write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 266); + assert_eq!(file.len(), 267); { let fast_field_readers = FastFieldReaders::open(file, schema).unwrap(); @@ -772,7 +772,7 @@ mod tests { write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 103); + assert_eq!(file.len(), 104); let fast_field_readers = FastFieldReaders::open(file, schema).unwrap(); let bool_col = fast_field_readers.bool("field_bool").unwrap(); assert_eq!(bool_col.first(0), Some(true)); @@ -804,7 +804,7 @@ mod tests { write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 115); + assert_eq!(file.len(), 116); let readers = FastFieldReaders::open(file, schema).unwrap(); let bool_col = readers.bool("field_bool").unwrap(); for i in 0..25 { @@ -829,7 +829,7 @@ mod tests { write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 105); + assert_eq!(file.len(), 106); let fastfield_readers = FastFieldReaders::open(file, schema).unwrap(); let col = fastfield_readers.bool("field_bool").unwrap(); assert_eq!(col.first(0), None); diff --git a/sstable/README.md b/sstable/README.md index ac2287b6f7..9fdc5b609a 100644 --- a/sstable/README.md +++ b/sstable/README.md @@ -100,13 +100,14 @@ Note: there is no ambiguity between both representation as Add is always guarant ### IndexValue ``` -+------------+-------+-------+-----+ -| EntryCount | Entry | Entry | ... | -+------------+-------+-------+-----+ - |---( # of entries)---| ++------------+----------+-------+-------+-----+ +| EntryCount | StartPos | Entry | Entry | ... | ++------------+----------+-------+-------+-----+ + |---( # of entries)---| ``` - EntryCount(VInt): number of entries +- StartPos(VInt): the start pos of the first (data) block referenced by this (index) block - Entry (IndexEntry) ### Entry diff --git a/sstable/src/lib.rs b/sstable/src/lib.rs index 18cfee66c3..e62c425398 100644 --- a/sstable/src/lib.rs +++ b/sstable/src/lib.rs @@ -397,8 +397,9 @@ mod test { // end of block 0u8, 0u8, 0u8, 0u8, // no more blocks // index - 6u8, 0u8, 0u8, 0u8, // block len + 7u8, 0u8, 0u8, 0u8, // block len 1, // num blocks + 0, // offset 11, // len of 1st block 0, // first ord of 1st block 32, 17, 20, // keep 0 push 2 | 17 20 diff --git a/sstable/src/sstable_index.rs b/sstable/src/sstable_index.rs index eddd42c694..b27f6e1845 100644 --- a/sstable/src/sstable_index.rs +++ b/sstable/src/sstable_index.rs @@ -151,6 +151,7 @@ impl SSTableIndexBuilder { sstable_writer.write_suffix(keep_len, &block.last_key_or_greater[keep_len..]); sstable_writer.write_value(&block.block_addr); + sstable_writer.flush_block_if_required()?; previous_key.clear(); previous_key.extend_from_slice(&block.last_key_or_greater); @@ -184,7 +185,7 @@ mod tests { #[test] fn test_sstable_index() { let mut sstable_builder = SSTableIndexBuilder::default(); - sstable_builder.add_block(b"aaa", 0..20, 0u64); + sstable_builder.add_block(b"aaa", 10..20, 0u64); sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64); sstable_builder.add_block(b"ccc", 30..40, 10u64); sstable_builder.add_block(b"dddd", 40..50, 15u64); diff --git a/sstable/src/value/index.rs b/sstable/src/value/index.rs index 52bd6c9967..50faf36c42 100644 --- a/sstable/src/value/index.rs +++ b/sstable/src/value/index.rs @@ -21,7 +21,7 @@ impl ValueReader for IndexValueReader { let num_vals = deserialize_vint_u64(&mut data) as usize; self.vals.clear(); let mut first_ordinal = 0u64; - let mut prev_start = 0usize; + let mut prev_start = deserialize_vint_u64(&mut data) as usize; for _ in 0..num_vals { let len = deserialize_vint_u64(&mut data); let delta_ordinal = deserialize_vint_u64(&mut data); @@ -53,6 +53,14 @@ impl ValueWriter for IndexValueWriter { fn serialize_block(&self, output: &mut Vec) { let mut prev_ord = 0u64; vint::serialize_into_vec(self.vals.len() as u64, output); + + let start_pos = if let Some(block_addr) = self.vals.first() { + block_addr.byte_range.start as u64 + } else { + 0 + }; + vint::serialize_into_vec(start_pos, output); + // TODO use array_windows when it gets stabilized for elem in self.vals.windows(2) { let [current, next] = elem else { @@ -114,5 +122,11 @@ mod tests { first_ordinal: 10, }, ]); + crate::value::tests::test_value_reader_writer::<_, IndexValueReader, IndexValueWriter>(&[ + BlockAddr { + byte_range: 5..10, + first_ordinal: 2, + }, + ]); } }