Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sql): improve interval expression, support shortened version #4182

Merged
4 changes: 4 additions & 0 deletions src/sql/src/statements/transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ use sqlparser::ast::{visit_expressions_mut, Expr};

use crate::error::Result;
use crate::statements::statement::Statement;
mod expand_interval;
mod type_alias;

use expand_interval::ExpandIntervalTransformRule;
pub use type_alias::get_data_type_by_alias_name;
use type_alias::TypeAliasTransformRule;

lazy_static! {
/// [TransformRule] registry
static ref RULES: Vec<Arc<dyn TransformRule>> = vec![
Arc::new(ExpandIntervalTransformRule{}),
Arc::new(TypeAliasTransformRule{}),
];
}
Expand Down
249 changes: 249 additions & 0 deletions src/sql/src/statements/transform/expand_interval.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::HashMap;
use std::ops::ControlFlow;

use itertools::Itertools;
use lazy_static::lazy_static;
use regex::Regex;
use sqlparser::ast::{Expr, Interval, Value};

use crate::statements::transform::TransformRule;

lazy_static! {
/// Matches either one or more digits `(\d+)` or one or more non-digits `(\D+)` characters
/// Negative sign before digits is matched optionally
static ref INTERVAL_SHORT_NAME_PATTERN: Regex = Regex::new(r"(-?\d+|\D+)").unwrap();

static ref INTERVAL_SHORT_NAME_MAPPING: HashMap<&'static str, &'static str> = HashMap::from([
("y","years"),
("mon","months"),
("w","weeks"),
("d","days"),
("h","hours"),
("m","minutes"),
("s","seconds"),
("millis","milliseconds"),
("mils","milliseconds"),
("ms","microseconds"),
("us","microseconds"),
("ns","nanoseconds"),
]);
}

/// 'Interval' expression transformer
/// - `y` for `years`
/// - `mon` for `months`
/// - `w` for `weeks`
/// - `d` for `days`
/// - `h` for `hours`
/// - `m` for `minutes`
/// - `s` for `seconds`
/// - `millis` for `milliseconds`
/// - `mils` for `milliseconds`
/// - `ms` for `microseconds`
/// - `us` for `microseconds`
/// - `ns` for `nanoseconds`
/// Required for use cases that use the shortened version of Interval declaration,
/// f.e `select interval '1h'` or `select interval '3w'`
pub(crate) struct ExpandIntervalTransformRule;

impl TransformRule for ExpandIntervalTransformRule {
/// Applies transform rule for `Interval` type by extending the shortened version (e.g. '1h', '2d')
/// In case when `Interval` has `BinaryOp` value (e.g. query like `SELECT INTERVAL '2h' - INTERVAL '1h'`)
/// it's AST has `left` part of type `Value::SingleQuotedString` which needs to be handled specifically.
/// To handle the `right` part which is `Interval` no extra steps are needed.
fn visit_expr(&self, expr: &mut Expr) -> ControlFlow<()> {
if let Expr::Interval(interval) = expr {
match *interval.value.clone() {
Expr::Value(Value::SingleQuotedString(value))
| Expr::Value(Value::DoubleQuotedString(value)) => {
if let Some(data) = expand_interval_name(&value) {
*expr = create_interval_with_expanded_name(
interval,
single_quoted_string_expr(data),
);
}
}
Expr::BinaryOp { left, op, right } => match *left {
killme2008 marked this conversation as resolved.
Show resolved Hide resolved
Expr::Value(Value::SingleQuotedString(value))
| Expr::Value(Value::DoubleQuotedString(value)) => {
if let Some(data) = expand_interval_name(&value) {
let new_value = Box::new(Expr::BinaryOp {
left: single_quoted_string_expr(data),
op,
right,
});
*expr = create_interval_with_expanded_name(interval, new_value);
}
}
_ => {}
},
_ => {}
}
}
ControlFlow::<()>::Continue(())
}
}

fn single_quoted_string_expr(data: String) -> Box<Expr> {
Box::new(Expr::Value(Value::SingleQuotedString(data)))
}

fn create_interval_with_expanded_name(interval: &Interval, new_value: Box<Expr>) -> Expr {
Expr::Interval(Interval {
value: new_value,
leading_field: interval.leading_field.clone(),
leading_precision: interval.leading_precision,
last_field: interval.last_field.clone(),
fractional_seconds_precision: interval.fractional_seconds_precision,
})
}

/// Expands a shortened interval name to its full name.
/// Returns an interval's full name (e.g., "years", "hours", "minutes") according to `INTERVAL_SHORT_NAME_MAPPING` mapping
/// If the `interval_str` contains whitespaces, the interval name is considered to be in a full form.
/// Hybrid format "1y 2 days 3h" is not supported.
fn expand_interval_name(interval_str: &str) -> Option<String> {
return if !interval_str.contains(|c: char| c.is_whitespace()) {
Some(
INTERVAL_SHORT_NAME_PATTERN
.find_iter(interval_str)
.map(|mat| match INTERVAL_SHORT_NAME_MAPPING.get(mat.as_str()) {
Some(&expanded_name) => expanded_name,
None => mat.as_str(),
})
.join(" "),
)
} else {
None
};
}

#[cfg(test)]
mod tests {
use std::ops::ControlFlow;

use sqlparser::ast::{BinaryOperator, Expr, Interval, Value};

use crate::statements::transform::expand_interval::{
expand_interval_name, single_quoted_string_expr, ExpandIntervalTransformRule,
};
use crate::statements::transform::TransformRule;

#[test]
fn test_transform_interval_conversions() {
let test_cases = vec![
("1y", "1 years"),
("4mon", "4 months"),
("-3w", "-3 weeks"),
("55h", "55 hours"),
("3d", "3 days"),
("5s", "5 seconds"),
("2m", "2 minutes"),
("100millis", "100 milliseconds"),
("150mils", "150 milliseconds"),
("200ms", "200 microseconds"),
("350us", "350 microseconds"),
("400ns", "400 nanoseconds"),
("2y4w1h", "2 years 4 weeks 1 hours"),
];
for (input, expected) in test_cases {
let result = expand_interval_name(input).unwrap();
assert_eq!(result, expected);
}

let test_cases = vec!["1 year 2 months 3 days 4 hours", "-2 months"];
for input in test_cases {
assert_eq!(expand_interval_name(input), None);
}
}

#[test]
fn test_visit_expr_when_interval_is_single_quoted_string_expr() {
let interval_transformation_rule = ExpandIntervalTransformRule {};

let mut string_expr = Expr::Interval(Interval {
value: single_quoted_string_expr("5y".to_string()),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
});

let control_flow = interval_transformation_rule.visit_expr(&mut string_expr);

assert_eq!(control_flow, ControlFlow::Continue(()));
assert_eq!(
string_expr,
Expr::Interval(Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString(
"5 years".to_string()
))),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
})
);
}

#[test]
fn test_visit_expr_when_interval_is_binary_op() {
let interval_transformation_rule = ExpandIntervalTransformRule {};

let mut binary_op_expr = Expr::Interval(Interval {
value: Box::new(Expr::BinaryOp {
left: single_quoted_string_expr("2d".to_string()),
op: BinaryOperator::Minus,
right: Box::new(Expr::Interval(Interval {
value: single_quoted_string_expr("1d".to_string()),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
})),
}),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
});

let control_flow = interval_transformation_rule.visit_expr(&mut binary_op_expr);

assert_eq!(control_flow, ControlFlow::Continue(()));
assert_eq!(
binary_op_expr,
Expr::Interval(Interval {
value: Box::new(Expr::BinaryOp {
left: single_quoted_string_expr("2 days".to_string()),
op: BinaryOperator::Minus,
right: Box::new(Expr::Interval(Interval {
value: single_quoted_string_expr("1d".to_string()),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
})),
}),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
})
);
}
}
2 changes: 1 addition & 1 deletion src/sql/src/statements/transform/type_alias.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ impl TransformRule for TypeAliasTransformRule {

fn replace_type_alias(data_type: &mut DataType) {
match data_type {
// TODO(dennis): The sqlparser latest version contains the Int8 alias for postres Bigint.
// TODO(dennis): The sqlparser latest version contains the Int8 alias for Postgres Bigint.
etolbakov marked this conversation as resolved.
Show resolved Hide resolved
// Which means 8 bytes in postgres (not 8 bits). If we upgrade the sqlparser, need to process it.
// See https://docs.rs/sqlparser/latest/sqlparser/ast/enum.DataType.html#variant.Int8
DataType::Custom(name, tokens) if name.0.len() == 1 && tokens.is_empty() => {
Expand Down
41 changes: 41 additions & 0 deletions tests/cases/standalone/common/types/interval/interval.result
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,47 @@ SELECT TIMESTAMP '1992-09-20 11:30:00.123456' - interval_value as new_value from
| 1980-09-07T23:17:48.111443988 |
+-------------------------------+

-- Interval shortened names
SELECT INTERVAL '55h';

+--------------------------------------------------------+
| IntervalMonthDayNano("198000000000000") |
+--------------------------------------------------------+
| 0 years 0 mons 0 days 55 hours 0 mins 0.000000000 secs |
+--------------------------------------------------------+

SELECT INTERVAL '-2mon';

+---------------------------------------------------------+
| IntervalMonthDayNano("-158456325028528675187087900672") |
+---------------------------------------------------------+
| 0 years -2 mons 0 days 0 hours 0 mins 0.000000000 secs |
+---------------------------------------------------------+

SELECT INTERVAL '1y2w3d4h';

+---------------------------------------------------------+
| IntervalMonthDayNano("950737950484766714775589781504") |
+---------------------------------------------------------+
| 0 years 12 mons 17 days 4 hours 0 mins 0.000000000 secs |
+---------------------------------------------------------+
killme2008 marked this conversation as resolved.
Show resolved Hide resolved

SELECT INTERVAL '7 days' - INTERVAL '1d';

+----------------------------------------------------------------------------------------------+
| IntervalMonthDayNano("129127208515966861312") - IntervalMonthDayNano("18446744073709551616") |
+----------------------------------------------------------------------------------------------+
| 0 years 0 mons 6 days 0 hours 0 mins 0.000000000 secs |
+----------------------------------------------------------------------------------------------+

SELECT INTERVAL '2h' + INTERVAL '1h';

+-------------------------------------------------------------------------------+
| IntervalMonthDayNano("7200000000000") + IntervalMonthDayNano("3600000000000") |
+-------------------------------------------------------------------------------+
| 0 years 0 mons 0 days 3 hours 0 mins 0.000000000 secs |
+-------------------------------------------------------------------------------+

-- Interval type does not support aggregation functions.
SELECT MIN(interval_value) from intervals;

Expand Down
11 changes: 11 additions & 0 deletions tests/cases/standalone/common/types/interval/interval.sql
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ SELECT TIMESTAMP '1992-09-20 11:30:00.123456' + interval_value as new_value from
-- TIMESTAMP CONSTANT - INTERVAL
SELECT TIMESTAMP '1992-09-20 11:30:00.123456' - interval_value as new_value from intervals;

-- Interval shortened names
SELECT INTERVAL '55h';

SELECT INTERVAL '-2mon';

SELECT INTERVAL '1y2w3d4h';

SELECT INTERVAL '7 days' - INTERVAL '1d';

SELECT INTERVAL '2h' + INTERVAL '1h';


-- Interval type does not support aggregation functions.
SELECT MIN(interval_value) from intervals;
Expand Down