Skip to content

Commit

Permalink
fix bug with new sstable index format (#1953)
Browse files Browse the repository at this point in the history
  • Loading branch information
trinity-1686a authored Mar 22, 2023
1 parent 1a35f65 commit 482b415
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 17 deletions.
4 changes: 2 additions & 2 deletions columnar/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ fn test_dataframe_writer_str() {
assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1);
assert_eq!(cols[0].num_bytes(), 88);
assert_eq!(cols[0].num_bytes(), 89);
}

#[test]
Expand All @@ -31,7 +31,7 @@ fn test_dataframe_writer_bytes() {
assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1);
assert_eq!(cols[0].num_bytes(), 88);
assert_eq!(cols[0].num_bytes(), 89);
}

#[test]
Expand Down
16 changes: 8 additions & 8 deletions src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ mod tests {
}
let file = directory.open_read(path).unwrap();

assert_eq!(file.len(), 94);
assert_eq!(file.len(), 95);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let column = fast_field_readers
.u64("field")
Expand Down Expand Up @@ -180,7 +180,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 122);
assert_eq!(file.len(), 123);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let col = fast_field_readers
.u64("field")
Expand Down Expand Up @@ -213,7 +213,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 95);
assert_eq!(file.len(), 96);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let fast_field_reader = fast_field_readers
.u64("field")
Expand Down Expand Up @@ -245,7 +245,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 4490);
assert_eq!(file.len(), 4491);
{
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let col = fast_field_readers
Expand Down Expand Up @@ -278,7 +278,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 266);
assert_eq!(file.len(), 267);

{
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
Expand Down Expand Up @@ -772,7 +772,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 103);
assert_eq!(file.len(), 104);
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
let bool_col = fast_field_readers.bool("field_bool").unwrap();
assert_eq!(bool_col.first(0), Some(true));
Expand Down Expand Up @@ -804,7 +804,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 115);
assert_eq!(file.len(), 116);
let readers = FastFieldReaders::open(file, schema).unwrap();
let bool_col = readers.bool("field_bool").unwrap();
for i in 0..25 {
Expand All @@ -829,7 +829,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 105);
assert_eq!(file.len(), 106);
let fastfield_readers = FastFieldReaders::open(file, schema).unwrap();
let col = fastfield_readers.bool("field_bool").unwrap();
assert_eq!(col.first(0), None);
Expand Down
9 changes: 5 additions & 4 deletions sstable/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,14 @@ Note: there is no ambiguity between both representation as Add is always guarant

### IndexValue
```
+------------+-------+-------+-----+
| EntryCount | Entry | Entry | ... |
+------------+-------+-------+-----+
|---( # of entries)---|
+------------+----------+-------+-------+-----+
| EntryCount | StartPos | Entry | Entry | ... |
+------------+----------+-------+-------+-----+
|---( # of entries)---|
```

- EntryCount(VInt): number of entries
- StartPos(VInt): the start pos of the first (data) block referenced by this (index) block
- Entry (IndexEntry)

### Entry
Expand Down
3 changes: 2 additions & 1 deletion sstable/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,9 @@ mod test {
// end of block
0u8, 0u8, 0u8, 0u8, // no more blocks
// index
6u8, 0u8, 0u8, 0u8, // block len
7u8, 0u8, 0u8, 0u8, // block len
1, // num blocks
0, // offset
11, // len of 1st block
0, // first ord of 1st block
32, 17, 20, // keep 0 push 2 | 17 20
Expand Down
3 changes: 2 additions & 1 deletion sstable/src/sstable_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ impl SSTableIndexBuilder {

sstable_writer.write_suffix(keep_len, &block.last_key_or_greater[keep_len..]);
sstable_writer.write_value(&block.block_addr);
sstable_writer.flush_block_if_required()?;

previous_key.clear();
previous_key.extend_from_slice(&block.last_key_or_greater);
Expand Down Expand Up @@ -184,7 +185,7 @@ mod tests {
#[test]
fn test_sstable_index() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 0..20, 0u64);
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
Expand Down
16 changes: 15 additions & 1 deletion sstable/src/value/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ impl ValueReader for IndexValueReader {
let num_vals = deserialize_vint_u64(&mut data) as usize;
self.vals.clear();
let mut first_ordinal = 0u64;
let mut prev_start = 0usize;
let mut prev_start = deserialize_vint_u64(&mut data) as usize;
for _ in 0..num_vals {
let len = deserialize_vint_u64(&mut data);
let delta_ordinal = deserialize_vint_u64(&mut data);
Expand Down Expand Up @@ -53,6 +53,14 @@ impl ValueWriter for IndexValueWriter {
fn serialize_block(&self, output: &mut Vec<u8>) {
let mut prev_ord = 0u64;
vint::serialize_into_vec(self.vals.len() as u64, output);

let start_pos = if let Some(block_addr) = self.vals.first() {
block_addr.byte_range.start as u64
} else {
0
};
vint::serialize_into_vec(start_pos, output);

// TODO use array_windows when it gets stabilized
for elem in self.vals.windows(2) {
let [current, next] = elem else {
Expand Down Expand Up @@ -114,5 +122,11 @@ mod tests {
first_ordinal: 10,
},
]);
crate::value::tests::test_value_reader_writer::<_, IndexValueReader, IndexValueWriter>(&[
BlockAddr {
byte_range: 5..10,
first_ordinal: 2,
},
]);
}
}

0 comments on commit 482b415

Please sign in to comment.