Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support complex interval via IntervalMonthDayNano #1615

Merged
merged 1 commit into from
Jan 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion datafusion/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ pub enum ScalarValue {
IntervalYearMonth(Option<i32>),
/// Interval with DayTime unit
IntervalDayTime(Option<i64>),
/// Interval with MonthDayNano unit
IntervalMonthDayNano(Option<i128>),
/// struct of nested ScalarValue (boxed to reduce size_of(ScalarValue))
#[allow(clippy::box_collection)]
Struct(Option<Box<Vec<ScalarValue>>>, Box<Vec<Field>>),
Expand Down Expand Up @@ -168,6 +170,8 @@ impl PartialEq for ScalarValue {
(IntervalYearMonth(_), _) => false,
(IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
(IntervalDayTime(_), _) => false,
(IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
(IntervalMonthDayNano(_), _) => false,
(Struct(v1, t1), Struct(v2, t2)) => v1.eq(v2) && t1.eq(t2),
(Struct(_, _), _) => false,
}
Expand Down Expand Up @@ -260,6 +264,8 @@ impl PartialOrd for ScalarValue {
(IntervalYearMonth(_), _) => None,
(IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
(IntervalDayTime(_), _) => None,
(IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
(IntervalMonthDayNano(_), _) => None,
(Struct(v1, t1), Struct(v2, t2)) => {
if t1.eq(t2) {
v1.partial_cmp(v2)
Expand Down Expand Up @@ -318,6 +324,7 @@ impl std::hash::Hash for ScalarValue {
TimestampNanosecond(v, _) => v.hash(state),
IntervalYearMonth(v) => v.hash(state),
IntervalDayTime(v) => v.hash(state),
IntervalMonthDayNano(v) => v.hash(state),
Struct(v, t) => {
v.hash(state);
t.hash(state);
Expand Down Expand Up @@ -585,6 +592,9 @@ impl ScalarValue {
DataType::Interval(IntervalUnit::YearMonth)
}
ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
ScalarValue::IntervalMonthDayNano(_) => {
DataType::Interval(IntervalUnit::MonthDayNano)
}
ScalarValue::Struct(_, fields) => DataType::Struct(fields.as_ref().clone()),
}
}
Expand Down Expand Up @@ -1216,14 +1226,20 @@ impl ScalarValue {
e,
size
),

ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
Interval,
IntervalUnit::YearMonth,
IntervalYearMonthArray,
e,
size
),
ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
Interval,
IntervalUnit::MonthDayNano,
IntervalMonthDayNanoArray,
e,
size
),
ScalarValue::Struct(values, fields) => match values {
Some(values) => {
let field_values: Vec<_> = fields
Expand Down Expand Up @@ -1510,6 +1526,9 @@ impl ScalarValue {
ScalarValue::IntervalDayTime(val) => {
eq_array_primitive!(array, index, IntervalDayTimeArray, val)
}
ScalarValue::IntervalMonthDayNano(val) => {
eq_array_primitive!(array, index, IntervalMonthDayNanoArray, val)
}
ScalarValue::Struct(_, _) => unimplemented!(),
}
}
Expand Down Expand Up @@ -1811,6 +1830,7 @@ impl fmt::Display for ScalarValue {
ScalarValue::Date64(e) => format_option!(f, e)?,
ScalarValue::IntervalDayTime(e) => format_option!(f, e)?,
ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
ScalarValue::IntervalMonthDayNano(e) => format_option!(f, e)?,
ScalarValue::Struct(e, fields) => match e {
Some(l) => write!(
f,
Expand Down Expand Up @@ -1872,6 +1892,9 @@ impl fmt::Debug for ScalarValue {
ScalarValue::IntervalYearMonth(_) => {
write!(f, "IntervalYearMonth(\"{}\")", self)
}
ScalarValue::IntervalMonthDayNano(_) => {
write!(f, "IntervalMonthDayNano(\"{}\")", self)
}
ScalarValue::Struct(e, fields) => {
// Use Debug representation of field values
match e {
Expand Down
27 changes: 10 additions & 17 deletions datafusion/src/sql/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1818,16 +1818,19 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// Interval is tricky thing
// 1 day is not 24 hours because timezones, 1 year != 365/364! 30 days != 1 month
// The true way to store and calculate intervals is to store it as it defined
// Due the fact that Arrow supports only two types YearMonth (month) and DayTime (day, time)
// It's not possible to store complex intervals
// It's possible to do select (NOW() + INTERVAL '1 year') + INTERVAL '1 day'; as workaround
// It's why we there are 3 different interval types in Arrow
if result_month != 0 && (result_days != 0 || result_millis != 0) {
return Err(DataFusionError::NotImplemented(format!(
"DF does not support intervals that have both a Year/Month part as well as Days/Hours/Mins/Seconds: {:?}. Hint: try breaking the interval into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds - e.g. (NOW() + INTERVAL '1 year') + INTERVAL '1 day'",
value
)));
let result: i128 = ((result_month as i128) << 96)
| ((result_days as i128) << 64)
// IntervalMonthDayNano uses nanos, but IntervalDayTime uses milles
| ((result_millis * 1_000_000_i64) as i128);

return Ok(Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(
result,
))));
}

// Month interval
if result_month != 0 {
return Ok(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
result_month as i32,
Expand Down Expand Up @@ -2764,16 +2767,6 @@ mod tests {
);
}

#[test]
fn select_unsupported_complex_interval() {
let sql = "SELECT INTERVAL '1 year 1 day'";
let err = logical_plan(sql).expect_err("query should have failed");
assert!(matches!(
err,
DataFusionError::NotImplemented(msg) if msg == "DF does not support intervals that have both a Year/Month part as well as Days/Hours/Mins/Seconds: \"1 year 1 day\". Hint: try breaking the interval into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds - e.g. (NOW() + INTERVAL '1 year') + INTERVAL '1 day'",
));
}

#[test]
fn select_simple_aggregate_with_groupby_and_column_is_in_aggregate_and_groupby() {
quick_test(
Expand Down
20 changes: 20 additions & 0 deletions datafusion/tests/sql/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ async fn test_crypto_expressions() -> Result<()> {

#[tokio::test]
async fn test_interval_expressions() -> Result<()> {
// day nano intervals
test_expression!(
"interval '1'",
"0 years 0 mons 0 days 0 hours 0 mins 1.00 secs"
Expand Down Expand Up @@ -456,6 +457,7 @@ async fn test_interval_expressions() -> Result<()> {
"interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds'",
"0 years 0 mons 5 days 4 hours 3 mins 2.100 secs"
);
// month intervals
test_expression!(
"interval '0.5 month'",
"0 years 0 mons 15 days 0 hours 0 mins 0.00 secs"
Expand Down Expand Up @@ -496,6 +498,24 @@ async fn test_interval_expressions() -> Result<()> {
"interval '2' year",
"2 years 0 mons 0 days 0 hours 0 mins 0.00 secs"
);
// complex
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 nice

test_expression!(
"interval '1 year 1 day'",
"0 years 12 mons 1 days 0 hours 0 mins 0.00 secs"
);
test_expression!(
"interval '1 year 1 day 1 hour'",
"0 years 12 mons 1 days 1 hours 0 mins 0.00 secs"
);
test_expression!(
"interval '1 year 1 day 1 hour 1 minute'",
"0 years 12 mons 1 days 1 hours 1 mins 0.00 secs"
);
test_expression!(
"interval '1 year 1 day 1 hour 1 minute 1 second'",
"0 years 12 mons 1 days 1 hours 1 mins 1.00 secs"
);

Ok(())
}

Expand Down