diff --git a/parquet/benches/encoding.rs b/parquet/benches/encoding.rs index bc18a49da2a4..8e61666e6345 100644 --- a/parquet/benches/encoding.rs +++ b/parquet/benches/encoding.rs @@ -17,7 +17,7 @@ use criterion::*; use half::f16; -use parquet::basic::Encoding; +use parquet::basic::{Encoding, Type as ParquetType}; use parquet::data_type::{ DataType, DoubleType, FixedLenByteArray, FixedLenByteArrayType, FloatType, }; @@ -35,7 +35,10 @@ fn bench_typed( ) { let name = format!( "dtype={}, encoding={:?}", - std::any::type_name::(), + match T::get_physical_type() { + ParquetType::FIXED_LEN_BYTE_ARRAY => format!("FixedLenByteArray({type_length})"), + _ => std::any::type_name::().to_string(), + }, encoding ); let column_desc_ptr = ColumnDescPtr::new(ColumnDescriptor::new( diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 01e92115c45b..324e1c379bcd 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -674,6 +674,13 @@ pub(crate) mod private { /// Return the value as an mutable Any to allow for downcasts without transmutation fn as_mut_any(&mut self) -> &mut dyn std::any::Any; + + /// Sets the value of this object from the provided [`Bytes`] + /// + /// Only implemented for `ByteArray` and `FixedLenByteArray`. Will panic for other types. + fn set_from_bytes(&mut self, _data: Bytes) { + unimplemented!(); + } } impl ParquetValueType for bool { @@ -953,9 +960,7 @@ pub(crate) mod private { return Err(eof_err!("Not enough bytes to decode")); } - let val: &mut Self = val_array.as_mut_any().downcast_mut().unwrap(); - - val.set_data(data.slice(decoder.start..decoder.start + len)); + val_array.set_data(data.slice(decoder.start..decoder.start + len)); decoder.start += len; } decoder.num_values -= num_values; @@ -998,6 +1003,11 @@ pub(crate) mod private { fn as_mut_any(&mut self) -> &mut dyn std::any::Any { self } + + #[inline] + fn set_from_bytes(&mut self, data: Bytes) { + self.set_data(data); + } } impl HeapSize for super::ByteArray { @@ -1093,6 +1103,11 @@ pub(crate) mod private { fn as_mut_any(&mut self) -> &mut dyn std::any::Any { self } + + #[inline] + fn set_from_bytes(&mut self, data: Bytes) { + self.set_data(data); + } } impl HeapSize for super::FixedLenByteArray { diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs index 16467b32dcaf..b5217d02ff09 100644 --- a/parquet/src/encodings/decoding.rs +++ b/parquet/src/encodings/decoding.rs @@ -901,11 +901,7 @@ impl Decoder for DeltaLengthByteArrayDecoder { for item in buffer.iter_mut().take(num_values) { let len = self.lengths[self.current_idx] as usize; - - item.as_mut_any() - .downcast_mut::() - .unwrap() - .set_data(data.slice(self.offset..self.offset + len)); + item.set_from_bytes(data.slice(self.offset..self.offset + len)); self.offset += len; self.current_idx += 1; @@ -1029,7 +1025,7 @@ impl Decoder for DeltaByteArrayDecoder { fn get(&mut self, buffer: &mut [T::T]) -> Result { match T::get_physical_type() { - ty @ Type::BYTE_ARRAY | ty @ Type::FIXED_LEN_BYTE_ARRAY => { + Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => { let num_values = cmp::min(buffer.len(), self.num_values); let mut v: [ByteArray; 1] = [ByteArray::new(); 1]; for item in buffer.iter_mut().take(num_values) { @@ -1051,20 +1047,7 @@ impl Decoder for DeltaByteArrayDecoder { result.extend_from_slice(suffix); let data = Bytes::from(result.clone()); - - match ty { - Type::BYTE_ARRAY => item - .as_mut_any() - .downcast_mut::() - .unwrap() - .set_data(data), - Type::FIXED_LEN_BYTE_ARRAY => item - .as_mut_any() - .downcast_mut::() - .unwrap() - .set_data(data), - _ => unreachable!(), - }; + item.set_from_bytes(data); self.previous_value = result; self.current_idx += 1; diff --git a/parquet/src/encodings/decoding/byte_stream_split_decoder.rs b/parquet/src/encodings/decoding/byte_stream_split_decoder.rs index 9b2f43ace8a8..b72ae8f62c34 100644 --- a/parquet/src/encodings/decoding/byte_stream_split_decoder.rs +++ b/parquet/src/encodings/decoding/byte_stream_split_decoder.rs @@ -21,7 +21,7 @@ use bytes::Bytes; use crate::basic::{Encoding, Type}; use crate::data_type::private::ParquetValueType; -use crate::data_type::{DataType, FixedLenByteArray, SliceAsBytes}; +use crate::data_type::{DataType, SliceAsBytes}; use crate::errors::{ParquetError, Result}; use super::Decoder; @@ -234,12 +234,7 @@ impl Decoder for VariableWidthByteStreamSplitDecoder { for (i, bi) in buffer.iter_mut().enumerate().take(num_values) { // Get a view into the data, without also copying the bytes let data = bytes_with_data.slice(i * type_size..(i + 1) * type_size); - // TODO: perhaps add a `set_from_bytes` method to `DataType` to avoid downcasting - let bi = bi - .as_mut_any() - .downcast_mut::() - .expect("Decoding fixed length byte array"); - bi.set_data(data); + bi.set_from_bytes(data); } Ok(num_values)