Skip to content

Commit

Permalink
Structured interval type (apache#3125) (apache#5654)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed May 15, 2024
1 parent 30767a6 commit 2dfa137
Show file tree
Hide file tree
Showing 24 changed files with 874 additions and 308 deletions.
40 changes: 20 additions & 20 deletions arrow-arith/src/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use arrow_array::cast::AsArray;
use arrow_array::timezone::Tz;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
use arrow_buffer::{ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};

use crate::arity::{binary, try_binary};
Expand Down Expand Up @@ -343,12 +343,12 @@ trait TimestampOp: ArrowTimestampType {
type Duration: ArrowPrimitiveType<Native = i64>;

fn add_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
fn add_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
fn add_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
fn add_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option<i64>;
fn add_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option<i64>;

fn sub_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
fn sub_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
fn sub_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
fn sub_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option<i64>;
fn sub_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option<i64>;
}

macro_rules! timestamp {
Expand All @@ -360,23 +360,23 @@ macro_rules! timestamp {
Self::add_year_months(left, right, tz)
}

fn add_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
fn add_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option<i64> {
Self::add_day_time(left, right, tz)
}

fn add_month_day_nano(left: i64, right: i128, tz: Tz) -> Option<i64> {
fn add_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option<i64> {
Self::add_month_day_nano(left, right, tz)
}

fn sub_year_month(left: i64, right: i32, tz: Tz) -> Option<i64> {
Self::subtract_year_months(left, right, tz)
}

fn sub_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
fn sub_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option<i64> {
Self::subtract_day_time(left, right, tz)
}

fn sub_month_day_nano(left: i64, right: i128, tz: Tz) -> Option<i64> {
fn sub_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option<i64> {
Self::subtract_month_day_nano(left, right, tz)
}
}
Expand Down Expand Up @@ -506,12 +506,12 @@ fn timestamp_op<T: TimestampOp>(
/// Note: these should be fallible (#4456)
trait DateOp: ArrowTemporalType {
fn add_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
fn add_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
fn add_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native;
fn add_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
fn add_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;

fn sub_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
fn sub_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
fn sub_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native;
fn sub_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
fn sub_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;
}

macro_rules! date {
Expand All @@ -521,23 +521,23 @@ macro_rules! date {
Self::add_year_months(left, right)
}

fn add_day_time(left: Self::Native, right: i64) -> Self::Native {
fn add_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
Self::add_day_time(left, right)
}

fn add_month_day_nano(left: Self::Native, right: i128) -> Self::Native {
fn add_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
Self::add_month_day_nano(left, right)
}

fn sub_year_month(left: Self::Native, right: i32) -> Self::Native {
Self::subtract_year_months(left, right)
}

fn sub_day_time(left: Self::Native, right: i64) -> Self::Native {
fn sub_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
Self::subtract_day_time(left, right)
}

fn sub_month_day_nano(left: Self::Native, right: i128) -> Self::Native {
fn sub_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
Self::subtract_month_day_nano(left, right)
}
}
Expand Down Expand Up @@ -1346,12 +1346,12 @@ mod tests {
IntervalMonthDayNanoType::make_value(35, -19, 41899000000000000)
])
);
let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]);
let b = IntervalMonthDayNanoArray::from(vec![1]);
let a = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::MAX]);
let b = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::ONE]);
let err = add(&a, &b).unwrap_err().to_string();
assert_eq!(
err,
"Compute error: Overflow happened on: 9223372036854775807 + 1"
"Compute error: Overflow happened on: 2147483647 + 1"
);
}

Expand Down
10 changes: 8 additions & 2 deletions arrow-array/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use arrow_buffer::{i256, ArrowNativeType};
use arrow_buffer::{i256, ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::ArrowError;
use half::f16;
use num::complex::ComplexFloat;
Expand Down Expand Up @@ -139,7 +139,10 @@ pub trait ArrowNativeTypeOp: ArrowNativeType {

macro_rules! native_type_op {
($t:tt) => {
native_type_op!($t, 0, 1, $t::MIN, $t::MAX);
native_type_op!($t, 0, 1);
};
($t:tt, $zero:expr, $one: expr) => {
native_type_op!($t, $zero, $one, $t::MIN, $t::MAX);
};
($t:tt, $zero:expr, $one: expr, $min: expr, $max: expr) => {
impl ArrowNativeTypeOp for $t {
Expand Down Expand Up @@ -284,6 +287,9 @@ native_type_op!(u32);
native_type_op!(u64);
native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);

native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
native_type_op!(IntervalMonthDayNano, IntervalMonthDayNano::ZERO, IntervalMonthDayNano::ONE);

macro_rules! native_type_float_op {
($t:tt, $zero:expr, $one:expr, $min:expr, $max:expr) => {
impl ArrowNativeTypeOp for $t {
Expand Down
52 changes: 33 additions & 19 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,7 @@ mod tests {
use crate::builder::{Decimal128Builder, Decimal256Builder};
use crate::cast::downcast_array;
use crate::BooleanArray;
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::TimeUnit;

#[test]
Expand Down Expand Up @@ -1624,33 +1625,46 @@ mod tests {
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);

// a day_time interval contains days and milliseconds, but we do not yet have accessors for the values
let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
let v0 = IntervalDayTime {
days: 34,
milliseconds: 1,
};
let v2 = IntervalDayTime {
days: -2,
milliseconds: -5,
};

let arr = IntervalDayTimeArray::from(vec![Some(v0), None, Some(v2)]);

assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert_eq!(v0, arr.value(0));
assert_eq!(v0, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
assert_eq!(v2, arr.value(2));
assert_eq!(v2, arr.values()[2]);

// a month_day_nano interval contains months, days and nanoseconds,
// but we do not yet have accessors for the values.
// TODO: implement month, day, and nanos access method for month_day_nano.
let arr = IntervalMonthDayNanoArray::from(vec![
Some(100000000000000000000),
None,
Some(-500000000000000000000),
]);
let v0 = IntervalMonthDayNano {
months: 2,
days: 34,
nanoseconds: -1,
};
let v2 = IntervalMonthDayNano {
months: -3,
days: -2,
nanoseconds: 4,
};

let arr = IntervalMonthDayNanoArray::from(vec![Some(v0), None, Some(v2)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(100000000000000000000, arr.value(0));
assert_eq!(100000000000000000000, arr.values()[0]);
assert_eq!(v0, arr.value(0));
assert_eq!(v0, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-500000000000000000000, arr.value(2));
assert_eq!(-500000000000000000000, arr.values()[2]);
assert_eq!(v2, arr.value(2));
assert_eq!(v2, arr.values()[2]);
}

#[test]
Expand Down Expand Up @@ -2460,7 +2474,7 @@ mod tests {
expected = "PrimitiveArray expected data type Interval(MonthDayNano) got Interval(DayTime)"
)]
fn test_invalid_interval_type() {
let array = IntervalDayTimeArray::from(vec![1, 2, 3]);
let array = IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]);
let _ = IntervalMonthDayNanoArray::from(array.into_data());
}

Expand Down
77 changes: 19 additions & 58 deletions arrow-array/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::delta::{
use crate::temporal_conversions::as_datetime_with_timezone;
use crate::timezone::Tz;
use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
use arrow_buffer::{i256, Buffer, OffsetBuffer};
use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano, OffsetBuffer};
use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
use arrow_data::{validate_binary_view, validate_string_view};
use arrow_schema::{
Expand Down Expand Up @@ -220,7 +220,7 @@ make_type!(
);
make_type!(
IntervalDayTimeType,
i64,
IntervalDayTime,
DataType::Interval(IntervalUnit::DayTime),
r#"A “calendar” interval type in days and milliseconds.
Expand All @@ -247,7 +247,7 @@ which can lead to surprising results. Please see the description of ordering on
);
make_type!(
IntervalMonthDayNanoType,
i128,
IntervalMonthDayNano,
DataType::Interval(IntervalUnit::MonthDayNano),
r#"A “calendar” interval type in months, days, and nanoseconds.
Expand All @@ -264,11 +264,11 @@ Each field is independent (e.g. there is no constraint that the quantity of
nanoseconds represents less than a day's worth of time).
```text
┌──────────────────────────────┬───────────────────────────┐
Nanos Days Months
(64 bits) │ (32 bits) │ (32 bits)
└──────────────────────────────┴───────────────────────────┘
0 63 95 127 bit offset
┌────────────────────────────┬─────────────────────────────┐
Months Days Nanos
(32 bits) │ (32 bits) (64 bits)
└────────────────────────────┴─────────────────────────────┘
0 32 64 128 bit offset
```
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details
Expand Down Expand Up @@ -917,25 +917,8 @@ impl IntervalDayTimeType {
/// * `days` - The number of days (+/-) represented in this interval
/// * `millis` - The number of milliseconds (+/-) represented in this interval
#[inline]
pub fn make_value(
days: i32,
millis: i32,
) -> <IntervalDayTimeType as ArrowPrimitiveType>::Native {
/*
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1433
struct DayMilliseconds {
int32_t days = 0;
int32_t milliseconds = 0;
...
}
64 56 48 40 32 24 16 8 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| days | milliseconds |
+-------+-------+-------+-------+-------+-------+-------+-------+
*/
let m = millis as u64 & u32::MAX as u64;
let d = (days as u64 & u32::MAX as u64) << 32;
(m | d) as <IntervalDayTimeType as ArrowPrimitiveType>::Native
pub fn make_value(days: i32, milliseconds: i32) -> IntervalDayTime {
IntervalDayTime { days, milliseconds }
}

/// Turns a IntervalDayTimeType into a tuple of (days, milliseconds)
Expand All @@ -944,10 +927,8 @@ impl IntervalDayTimeType {
///
/// * `i` - The IntervalDayTimeType to convert
#[inline]
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
let days = (i >> 32) as i32;
let ms = i as i32;
(days, ms)
pub fn to_parts(i: IntervalDayTime) -> (i32, i32) {
(i.days, i.milliseconds)
}
}

Expand All @@ -960,27 +941,12 @@ impl IntervalMonthDayNanoType {
/// * `days` - The number of days (+/-) represented in this interval
/// * `nanos` - The number of nanoseconds (+/-) represented in this interval
#[inline]
pub fn make_value(
months: i32,
days: i32,
nanos: i64,
) -> <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native {
/*
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1475
struct MonthDayNanos {
int32_t months;
int32_t days;
int64_t nanoseconds;
pub fn make_value(months: i32, days: i32, nanoseconds: i64) -> IntervalMonthDayNano {
IntervalMonthDayNano {
months,
days,
nanoseconds,
}
128 112 96 80 64 48 32 16 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| months | days | nanos |
+-------+-------+-------+-------+-------+-------+-------+-------+
*/
let m = (months as u128 & u32::MAX as u128) << 96;
let d = (days as u128 & u32::MAX as u128) << 64;
let n = nanos as u128 & u64::MAX as u128;
(m | d | n) as <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native
}

/// Turns a IntervalMonthDayNanoType into a tuple of (months, days, nanos)
Expand All @@ -989,13 +955,8 @@ impl IntervalMonthDayNanoType {
///
/// * `i` - The IntervalMonthDayNanoType to convert
#[inline]
pub fn to_parts(
i: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> (i32, i32, i64) {
let months = (i >> 96) as i32;
let days = (i >> 64) as i32;
let nanos = i as i64;
(months, days, nanos)
pub fn to_parts(i: IntervalMonthDayNano) -> (i32, i32, i64) {
(i.months, i.days, i.nanoseconds)
}
}

Expand Down
Loading

0 comments on commit 2dfa137

Please sign in to comment.