Skip to content

Commit

Permalink
ARROW-11265: [Rust] Made bool not ArrowNativeType
Browse files Browse the repository at this point in the history
This PR removes the risk of boolean values to be converted to bytes via `ToByteSlice` by explicitly making `ArrowNativeType` be only used in types whose in-memory representation in Rust equates to the in-memory representation in Arrow. `bool` in Rust is a byte and in Arrow it is a bit.

Overall, the direction of this PR is to have the traits represent one aspect of the type. In this case, `ArrowNativeType` is currently
* a type that has the same in memory representation (ToByteSlice is implemented for it)
* a json serializable type
* something that can be casted to/from `usize`.

This poses a problem because:

1. bools are serializable, not castable to usize, have different memory representation
2. fixed size (iX, uX) are serializable, castable to usize, have the same memory representation
3. fixed floating (f32, f64) are serializable, not castable to usize, have the same memory representation

however, they all implement `ArrowNativeType`.

This PR focus on splitting the json-serializable part of it.

Closes #9212 from jorgecarleitao/fix_trait

Authored-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
jorgecarleitao authored and kszucs committed Jan 25, 2021
1 parent 29cfed4 commit 0a58593
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 27 deletions.
2 changes: 1 addition & 1 deletion rust/arrow/src/compute/kernels/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ where
}

// insert valid and nan values in the correct order depending on the descending flag
fn insert_valid_values<T: ArrowNativeType>(
fn insert_valid_values<T>(
result_slice: &mut [u32],
offset: usize,
valids: Vec<(u32, T)>,
Expand Down
71 changes: 49 additions & 22 deletions rust/arrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,18 @@ pub struct Field {
metadata: Option<BTreeMap<String, String>>,
}

pub trait ArrowNativeType:
fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + 'static
{
/// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.).
pub trait JsonSerializable: 'static {
fn into_json_value(self) -> Option<Value>;
}

/// Trait expressing a Rust type that has the same in-memory representation
/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits).
/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers
/// as is.
pub trait ArrowNativeType:
fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + JsonSerializable
{
/// Convert native type from usize.
fn from_usize(_: usize) -> Option<Self> {
None
Expand All @@ -225,7 +232,8 @@ pub trait ArrowNativeType:
}
}

/// Trait indicating a primitive fixed-width type (bool, ints and floats).
/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
pub trait ArrowPrimitiveType: 'static {
/// Corresponding Rust native type for the primitive type.
type Native: ArrowNativeType;
Expand All @@ -246,17 +254,19 @@ pub trait ArrowPrimitiveType: 'static {
}
}

impl ArrowNativeType for bool {
impl JsonSerializable for bool {
fn into_json_value(self) -> Option<Value> {
Some(self.into())
}
}

impl ArrowNativeType for i8 {
impl JsonSerializable for i8 {
fn into_json_value(self) -> Option<Value> {
Some(VNumber(Number::from(self)))
Some(self.into())
}
}

impl ArrowNativeType for i8 {
fn from_usize(v: usize) -> Option<Self> {
num::FromPrimitive::from_usize(v)
}
Expand All @@ -266,11 +276,13 @@ impl ArrowNativeType for i8 {
}
}

impl ArrowNativeType for i16 {
impl JsonSerializable for i16 {
fn into_json_value(self) -> Option<Value> {
Some(VNumber(Number::from(self)))
Some(self.into())
}
}

impl ArrowNativeType for i16 {
fn from_usize(v: usize) -> Option<Self> {
num::FromPrimitive::from_usize(v)
}
Expand All @@ -280,11 +292,13 @@ impl ArrowNativeType for i16 {
}
}

impl ArrowNativeType for i32 {
impl JsonSerializable for i32 {
fn into_json_value(self) -> Option<Value> {
Some(VNumber(Number::from(self)))
Some(self.into())
}
}

impl ArrowNativeType for i32 {
fn from_usize(v: usize) -> Option<Self> {
num::FromPrimitive::from_usize(v)
}
Expand All @@ -299,11 +313,13 @@ impl ArrowNativeType for i32 {
}
}

impl ArrowNativeType for i64 {
impl JsonSerializable for i64 {
fn into_json_value(self) -> Option<Value> {
Some(VNumber(Number::from(self)))
}
}

impl ArrowNativeType for i64 {
fn from_usize(v: usize) -> Option<Self> {
num::FromPrimitive::from_usize(v)
}
Expand All @@ -318,11 +334,13 @@ impl ArrowNativeType for i64 {
}
}

impl ArrowNativeType for u8 {
impl JsonSerializable for u8 {
fn into_json_value(self) -> Option<Value> {
Some(VNumber(Number::from(self)))
Some(self.into())
}
}

impl ArrowNativeType for u8 {
fn from_usize(v: usize) -> Option<Self> {
num::FromPrimitive::from_usize(v)
}
Expand All @@ -332,11 +350,13 @@ impl ArrowNativeType for u8 {
}
}

impl ArrowNativeType for u16 {
impl JsonSerializable for u16 {
fn into_json_value(self) -> Option<Value> {
Some(VNumber(Number::from(self)))
Some(self.into())
}
}

impl ArrowNativeType for u16 {
fn from_usize(v: usize) -> Option<Self> {
num::FromPrimitive::from_usize(v)
}
Expand All @@ -346,11 +366,13 @@ impl ArrowNativeType for u16 {
}
}

impl ArrowNativeType for u32 {
impl JsonSerializable for u32 {
fn into_json_value(self) -> Option<Value> {
Some(VNumber(Number::from(self)))
Some(self.into())
}
}

impl ArrowNativeType for u32 {
fn from_usize(v: usize) -> Option<Self> {
num::FromPrimitive::from_usize(v)
}
Expand All @@ -360,11 +382,13 @@ impl ArrowNativeType for u32 {
}
}

impl ArrowNativeType for u64 {
impl JsonSerializable for u64 {
fn into_json_value(self) -> Option<Value> {
Some(VNumber(Number::from(self)))
Some(self.into())
}
}

impl ArrowNativeType for u64 {
fn from_usize(v: usize) -> Option<Self> {
num::FromPrimitive::from_usize(v)
}
Expand All @@ -374,18 +398,21 @@ impl ArrowNativeType for u64 {
}
}

impl ArrowNativeType for f32 {
impl JsonSerializable for f32 {
fn into_json_value(self) -> Option<Value> {
Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(VNumber)
}
}

impl ArrowNativeType for f64 {
impl JsonSerializable for f64 {
fn into_json_value(self) -> Option<Value> {
Number::from_f64(self).map(VNumber)
}
}

impl ArrowNativeType for f32 {}
impl ArrowNativeType for f64 {}

// BooleanType is special: its bit-width is not the size of the primitive type, and its `index`
// operation assumes bit-packing.
#[derive(Debug)]
Expand Down
6 changes: 2 additions & 4 deletions rust/arrow/src/util/integration_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,12 +449,10 @@ impl ArrowJsonBatch {
for i in 0..col.len() {
if col.is_null(i) {
validity.push(1);
data.push(
Int8Type::default_value().into_json_value().unwrap(),
);
data.push(0i8.into());
} else {
validity.push(0);
data.push(col.value(i).into_json_value().unwrap());
data.push(col.value(i).into());
}
}

Expand Down

0 comments on commit 0a58593

Please sign in to comment.