From 0a585931f25f18612cb35edf652f367434411120 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Wed, 20 Jan 2021 06:59:04 -0500 Subject: [PATCH] ARROW-11265: [Rust] Made bool not ArrowNativeType This PR removes the risk of boolean values to be converted to bytes via `ToByteSlice` by explicitly making `ArrowNativeType` be only used in types whose in-memory representation in Rust equates to the in-memory representation in Arrow. `bool` in Rust is a byte and in Arrow it is a bit. Overall, the direction of this PR is to have the traits represent one aspect of the type. In this case, `ArrowNativeType` is currently * a type that has the same in memory representation (ToByteSlice is implemented for it) * a json serializable type * something that can be casted to/from `usize`. This poses a problem because: 1. bools are serializable, not castable to usize, have different memory representation 2. fixed size (iX, uX) are serializable, castable to usize, have the same memory representation 3. fixed floating (f32, f64) are serializable, not castable to usize, have the same memory representation however, they all implement `ArrowNativeType`. This PR focus on splitting the json-serializable part of it. Closes #9212 from jorgecarleitao/fix_trait Authored-by: Jorge C. Leitao Signed-off-by: Andrew Lamb --- rust/arrow/src/compute/kernels/sort.rs | 2 +- rust/arrow/src/datatypes.rs | 71 +++++++++++++++++-------- rust/arrow/src/util/integration_util.rs | 6 +-- 3 files changed, 52 insertions(+), 27 deletions(-) diff --git a/rust/arrow/src/compute/kernels/sort.rs b/rust/arrow/src/compute/kernels/sort.rs index 56e7c79ca591d..e86372dc0cde9 100644 --- a/rust/arrow/src/compute/kernels/sort.rs +++ b/rust/arrow/src/compute/kernels/sort.rs @@ -385,7 +385,7 @@ where } // insert valid and nan values in the correct order depending on the descending flag -fn insert_valid_values( +fn insert_valid_values( result_slice: &mut [u32], offset: usize, valids: Vec<(u32, T)>, diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index a49e8cb2baf36..6533217d9a089 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -199,11 +199,18 @@ pub struct Field { metadata: Option>, } -pub trait ArrowNativeType: - fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + 'static -{ +/// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.). +pub trait JsonSerializable: 'static { fn into_json_value(self) -> Option; +} +/// Trait expressing a Rust type that has the same in-memory representation +/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits). +/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers +/// as is. +pub trait ArrowNativeType: + fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + JsonSerializable +{ /// Convert native type from usize. fn from_usize(_: usize) -> Option { None @@ -225,7 +232,8 @@ pub trait ArrowNativeType: } } -/// Trait indicating a primitive fixed-width type (bool, ints and floats). +/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the +/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`]. pub trait ArrowPrimitiveType: 'static { /// Corresponding Rust native type for the primitive type. type Native: ArrowNativeType; @@ -246,17 +254,19 @@ pub trait ArrowPrimitiveType: 'static { } } -impl ArrowNativeType for bool { +impl JsonSerializable for bool { fn into_json_value(self) -> Option { Some(self.into()) } } -impl ArrowNativeType for i8 { +impl JsonSerializable for i8 { fn into_json_value(self) -> Option { - Some(VNumber(Number::from(self))) + Some(self.into()) } +} +impl ArrowNativeType for i8 { fn from_usize(v: usize) -> Option { num::FromPrimitive::from_usize(v) } @@ -266,11 +276,13 @@ impl ArrowNativeType for i8 { } } -impl ArrowNativeType for i16 { +impl JsonSerializable for i16 { fn into_json_value(self) -> Option { - Some(VNumber(Number::from(self))) + Some(self.into()) } +} +impl ArrowNativeType for i16 { fn from_usize(v: usize) -> Option { num::FromPrimitive::from_usize(v) } @@ -280,11 +292,13 @@ impl ArrowNativeType for i16 { } } -impl ArrowNativeType for i32 { +impl JsonSerializable for i32 { fn into_json_value(self) -> Option { - Some(VNumber(Number::from(self))) + Some(self.into()) } +} +impl ArrowNativeType for i32 { fn from_usize(v: usize) -> Option { num::FromPrimitive::from_usize(v) } @@ -299,11 +313,13 @@ impl ArrowNativeType for i32 { } } -impl ArrowNativeType for i64 { +impl JsonSerializable for i64 { fn into_json_value(self) -> Option { Some(VNumber(Number::from(self))) } +} +impl ArrowNativeType for i64 { fn from_usize(v: usize) -> Option { num::FromPrimitive::from_usize(v) } @@ -318,11 +334,13 @@ impl ArrowNativeType for i64 { } } -impl ArrowNativeType for u8 { +impl JsonSerializable for u8 { fn into_json_value(self) -> Option { - Some(VNumber(Number::from(self))) + Some(self.into()) } +} +impl ArrowNativeType for u8 { fn from_usize(v: usize) -> Option { num::FromPrimitive::from_usize(v) } @@ -332,11 +350,13 @@ impl ArrowNativeType for u8 { } } -impl ArrowNativeType for u16 { +impl JsonSerializable for u16 { fn into_json_value(self) -> Option { - Some(VNumber(Number::from(self))) + Some(self.into()) } +} +impl ArrowNativeType for u16 { fn from_usize(v: usize) -> Option { num::FromPrimitive::from_usize(v) } @@ -346,11 +366,13 @@ impl ArrowNativeType for u16 { } } -impl ArrowNativeType for u32 { +impl JsonSerializable for u32 { fn into_json_value(self) -> Option { - Some(VNumber(Number::from(self))) + Some(self.into()) } +} +impl ArrowNativeType for u32 { fn from_usize(v: usize) -> Option { num::FromPrimitive::from_usize(v) } @@ -360,11 +382,13 @@ impl ArrowNativeType for u32 { } } -impl ArrowNativeType for u64 { +impl JsonSerializable for u64 { fn into_json_value(self) -> Option { - Some(VNumber(Number::from(self))) + Some(self.into()) } +} +impl ArrowNativeType for u64 { fn from_usize(v: usize) -> Option { num::FromPrimitive::from_usize(v) } @@ -374,18 +398,21 @@ impl ArrowNativeType for u64 { } } -impl ArrowNativeType for f32 { +impl JsonSerializable for f32 { fn into_json_value(self) -> Option { Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(VNumber) } } -impl ArrowNativeType for f64 { +impl JsonSerializable for f64 { fn into_json_value(self) -> Option { Number::from_f64(self).map(VNumber) } } +impl ArrowNativeType for f32 {} +impl ArrowNativeType for f64 {} + // BooleanType is special: its bit-width is not the size of the primitive type, and its `index` // operation assumes bit-packing. #[derive(Debug)] diff --git a/rust/arrow/src/util/integration_util.rs b/rust/arrow/src/util/integration_util.rs index 1cc4ad7882c7f..c80cfd81d422f 100644 --- a/rust/arrow/src/util/integration_util.rs +++ b/rust/arrow/src/util/integration_util.rs @@ -449,12 +449,10 @@ impl ArrowJsonBatch { for i in 0..col.len() { if col.is_null(i) { validity.push(1); - data.push( - Int8Type::default_value().into_json_value().unwrap(), - ); + data.push(0i8.into()); } else { validity.push(0); - data.push(col.value(i).into_json_value().unwrap()); + data.push(col.value(i).into()); } }