Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: fix miniblock selection logic error #3107

Merged
merged 3 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions rust/lance-encoding/src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{collections::HashMap, env, sync::Arc};

use arrow::array::AsArray;
use arrow::datatypes::UInt64Type;
use arrow_array::PrimitiveArray;
use arrow_array::{Array, ArrayRef, RecordBatch, UInt8Array};
use arrow_schema::DataType;
use bytes::{Bytes, BytesMut};
Expand Down Expand Up @@ -780,8 +779,6 @@ impl ArrayEncodingStrategy for CoreArrayEncodingStrategy {
}
}

const MINIBLOCK_MAX_BYTE_LENGTH_PER_VALUE: u64 = 256;

impl CompressionStrategy for CoreArrayEncodingStrategy {
fn create_miniblock_compressor(
&self,
Expand Down Expand Up @@ -810,16 +807,7 @@ impl CompressionStrategy for CoreArrayEncodingStrategy {
}
if let DataBlock::VariableWidth(ref variable_width_data) = data {
if variable_width_data.bits_per_offset == 32 {
let max_len = data
.get_stat(Stat::MaxLength)
.expect("VariableWidthDataBlock should have valid max length statistics");
let max_len = max_len
.as_any()
.downcast_ref::<PrimitiveArray<UInt64Type>>()
.unwrap();
if max_len.value(0) < MINIBLOCK_MAX_BYTE_LENGTH_PER_VALUE {
return Ok(Box::new(BinaryMiniBlockEncoder::default()));
}
return Ok(Box::new(BinaryMiniBlockEncoder::default()));
}
}
Ok(Box::new(ValueEncoder::default()))
Expand Down
4 changes: 3 additions & 1 deletion rust/lance-encoding/src/encodings/logical/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1588,12 +1588,14 @@ impl PrimitiveStructuralEncoder {
// data doesn't even fit in a mini-block and the block overhead gets
// too large and we prefer zipped.
fn is_narrow(data_block: &DataBlock) -> bool {
const MINIBLOCK_MAX_BYTE_LENGTH_PER_VALUE: u64 = 256;

if let Some(max_len_array) = data_block.get_stat(Stat::MaxLength) {
let max_len_array = max_len_array
.as_any()
.downcast_ref::<PrimitiveArray<UInt64Type>>()
.unwrap();
if max_len_array.value(0) < 128 {
if max_len_array.value(0) < MINIBLOCK_MAX_BYTE_LENGTH_PER_VALUE {
return true;
}
}
Expand Down
Loading