Skip to content

Commit

Permalink
feat: support fastlanes bitpacking (#2886)
Browse files Browse the repository at this point in the history
This PR uses [fastlanes
algorithm](https://www.vldb.org/pvldb/vol16/p2132-afroozeh.pdf) for
bit-pack encoding.

The bit-packing routine is migrated from [SpiralDB's fastlanes
implementation](https://github.com/spiraldb/fastlanes), the migrated
code is modified to allow rust stable build.

#2865

---------

Co-authored-by: Weston Pace <weston.pace@gmail.com>
  • Loading branch information
broccoliSpicy and westonpace authored Sep 27, 2024
1 parent 75aa2c2 commit 681db8c
Show file tree
Hide file tree
Showing 12 changed files with 3,840 additions and 7 deletions.
13 changes: 13 additions & 0 deletions protos/encodings.proto
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,18 @@ message Bitpacked {
bool signed = 4;
}

// Items are bitpacked in a buffer
message BitpackedForNonNeg {
// the number of bits used for a value in the buffer
uint64 compressed_bits_per_value = 1;

// the number of bits of the uncompressed value. e.g. for a u32, this will be 32
uint64 uncompressed_bits_per_value = 2;

// The items in the list
Buffer buffer = 3;
}

// An array encoding for shredded structs that will never be null
//
// There is no actual data in this column.
Expand Down Expand Up @@ -240,6 +252,7 @@ message ArrayEncoding {
PackedStruct packed_struct = 9;
Bitpacked bitpacked = 10;
FixedSizeBinary fixed_size_binary = 11;
BitpackedForNonNeg bitpacked_for_non_neg = 12;
}
}

Expand Down
4 changes: 4 additions & 0 deletions rust/lance-encoding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ snafu.workspace = true
tokio.workspace = true
tracing.workspace = true
zstd.workspace = true
bytemuck = "=1.18.0"
arrayref = "0.3.7"
paste = "1.0.15"
seq-macro = "0.3.5"

[dev-dependencies]
lance-testing.workspace = true
Expand Down
5 changes: 3 additions & 2 deletions rust/lance-encoding/benches/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ fn bench_decode(c: &mut Criterion) {
let mut group = c.benchmark_group("decode_primitive");
for data_type in PRIMITIVE_TYPES {
let data = lance_datagen::gen()
.anon_col(lance_datagen::array::rand_type(&DataType::Int32))
.into_batch_rows(lance_datagen::RowCount::from(1024 * 1024))
.anon_col(lance_datagen::array::rand_type(data_type))
.into_batch_rows(lance_datagen::RowCount::from(1024 * 1024 * 1024))
.unwrap();
let lance_schema =
Arc::new(lance_core::datatypes::Schema::try_from(data.schema().as_ref()).unwrap());
Expand Down Expand Up @@ -96,6 +96,7 @@ fn bench_decode(c: &mut Criterion) {
});
}
}

fn bench_decode_fsl(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
let mut group = c.benchmark_group("decode_primitive_fsl");
Expand Down
26 changes: 26 additions & 0 deletions rust/lance-encoding/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,32 @@ impl LanceBuffer {
pub fn copy_array<const N: usize>(array: [u8; N]) -> Self {
Self::Owned(Vec::from(array))
}

#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize {
match self {
Self::Borrowed(buffer) => buffer.len(),
Self::Owned(buffer) => buffer.len(),
}
}

/// Returns a new [LanceBuffer] that is a slice of this buffer starting at `offset`,
/// with `length` bytes.
/// Doing so allows the same memory region to be shared between lance buffers.
/// # Panics
/// Panics if `(offset + length)` is larger than the existing length.
/// If the buffer is owned this method will require a copy.
pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
let original_buffer_len = self.len();
assert!(
offset.saturating_add(length) <= original_buffer_len,
"the offset + length of the sliced Buffer cannot exceed the existing length"
);
match self {
Self::Borrowed(buffer) => Self::Borrowed(buffer.slice_with_length(offset, length)),
Self::Owned(buffer) => Self::Owned(buffer[offset..offset + length].to_vec()),
}
}
}

impl AsRef<[u8]> for LanceBuffer {
Expand Down
Loading

0 comments on commit 681db8c

Please sign in to comment.