From bbe15beebce0d37683f70f6c9fdd737066f87d81 Mon Sep 17 00:00:00 2001 From: Runji Wang Date: Tue, 4 Apr 2023 15:36:43 +0800 Subject: [PATCH] feat: support `date_part` function and more fields for `extract` (#8830) Signed-off-by: Runji Wang Co-authored-by: xiangjinwu <17769960+xiangjinwu@users.noreply.github.com> --- .../join/iejoin/test_iejoin.test.slt.part | 80 +++--- proto/expr.proto | 1 + src/common/src/array/arrow.rs | 6 +- src/common/src/array/interval_array.rs | 12 +- src/common/src/types/chrono_wrapper.rs | 8 +- src/common/src/types/interval.rs | 228 ++++++++++++---- src/common/src/util/value_encoding/mod.rs | 6 +- src/expr/src/vector_op/arithmetic_op.rs | 8 +- src/expr/src/vector_op/cast.rs | 2 +- src/expr/src/vector_op/extract.rs | 257 +++++++++++++++--- src/expr/src/vector_op/tumble.rs | 4 +- src/frontend/src/binder/expr/function.rs | 1 + src/tests/regress/data/expected/interval.out | 4 +- src/tests/regress/data/sql/date.sql | 88 +++--- src/tests/regress/data/sql/interval.sql | 70 ++--- src/tests/regress/data/sql/time.sql | 16 +- 16 files changed, 548 insertions(+), 243 deletions(-) diff --git a/e2e_test/batch/duckdb/join/iejoin/test_iejoin.test.slt.part b/e2e_test/batch/duckdb/join/iejoin/test_iejoin.test.slt.part index 77c2a4fc7f2f..42d98d97d8c1 100644 --- a/e2e_test/batch/duckdb/join/iejoin/test_iejoin.test.slt.part +++ b/e2e_test/batch/duckdb/join/iejoin/test_iejoin.test.slt.part @@ -140,46 +140,46 @@ WITH data_table AS ( GROUP BY bucket, low, high ORDER BY bucket; ---- -0 1577836800 1578627360 10 -1 1578627360 1579417920 9 -2 1579417920 1580208480 9 -3 1580208480 1580999040 9 -4 1580999040 1581789600 9 -5 1581789600 1582580160 9 -6 1582580160 1583370720 10 -7 1583370720 1584161280 9 -8 1584161280 1584951840 9 -9 1584951840 1585742400 9 -10 1585742400 1586532960 9 -11 1586532960 1587323520 9 -12 1587323520 1588114080 9 -13 1588114080 1588904640 10 -14 1588904640 1589695200 9 -15 1589695200 1590485760 9 -16 1590485760 1591276320 9 -17 1591276320 1592066880 9 -18 1592066880 1592857440 9 -19 1592857440 1593648000 9 -20 1593648000 1594438560 10 -21 1594438560 1595229120 9 -22 1595229120 1596019680 9 -23 1596019680 1596810240 9 -24 1596810240 1597600800 9 -25 1597600800 1598391360 9 -26 1598391360 1599181920 10 -27 1599181920 1599972480 9 -28 1599972480 1600763040 9 -29 1600763040 1601553600 9 -30 1601553600 1602344160 9 -31 1602344160 1603134720 9 -32 1603134720 1603925280 9 -33 1603925280 1604715840 10 -34 1604715840 1605506400 9 -35 1605506400 1606296960 9 -36 1606296960 1607087520 9 -37 1607087520 1607878080 9 -38 1607878080 1608668640 9 -39 1608668640 1609459200 9 +0 1577836800.000000 1578627360.000000 10 +1 1578627360.000000 1579417920.000000 9 +2 1579417920.000000 1580208480.000000 9 +3 1580208480.000000 1580999040.000000 9 +4 1580999040.000000 1581789600.000000 9 +5 1581789600.000000 1582580160.000000 9 +6 1582580160.000000 1583370720.000000 10 +7 1583370720.000000 1584161280.000000 9 +8 1584161280.000000 1584951840.000000 9 +9 1584951840.000000 1585742400.000000 9 +10 1585742400.000000 1586532960.000000 9 +11 1586532960.000000 1587323520.000000 9 +12 1587323520.000000 1588114080.000000 9 +13 1588114080.000000 1588904640.000000 10 +14 1588904640.000000 1589695200.000000 9 +15 1589695200.000000 1590485760.000000 9 +16 1590485760.000000 1591276320.000000 9 +17 1591276320.000000 1592066880.000000 9 +18 1592066880.000000 1592857440.000000 9 +19 1592857440.000000 1593648000.000000 9 +20 1593648000.000000 1594438560.000000 10 +21 1594438560.000000 1595229120.000000 9 +22 1595229120.000000 1596019680.000000 9 +23 1596019680.000000 1596810240.000000 9 +24 1596810240.000000 1597600800.000000 9 +25 1597600800.000000 1598391360.000000 9 +26 1598391360.000000 1599181920.000000 10 +27 1599181920.000000 1599972480.000000 9 +28 1599972480.000000 1600763040.000000 9 +29 1600763040.000000 1601553600.000000 9 +30 1601553600.000000 1602344160.000000 9 +31 1602344160.000000 1603134720.000000 9 +32 1603134720.000000 1603925280.000000 9 +33 1603925280.000000 1604715840.000000 10 +34 1604715840.000000 1605506400.000000 9 +35 1605506400.000000 1606296960.000000 9 +36 1606296960.000000 1607087520.000000 9 +37 1607087520.000000 1607878080.000000 9 +38 1607878080.000000 1608668640.000000 9 +39 1608668640.000000 1609459200.000000 9 statement ok diff --git a/proto/expr.proto b/proto/expr.proto index f6937eba0409..4d27757b7380 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -45,6 +45,7 @@ message ExprNode { BITWISE_SHIFT_RIGHT = 36; // date functions EXTRACT = 101; + DATE_PART = 102; TUMBLE_START = 103; // From f64 to timestamp. // e.g. `select to_timestamp(1672044740.0)` diff --git a/src/common/src/array/arrow.rs b/src/common/src/array/arrow.rs index 3941a7f94375..ddeaa5045577 100644 --- a/src/common/src/array/arrow.rs +++ b/src/common/src/array/arrow.rs @@ -353,10 +353,10 @@ impl FromIntoArrow for Interval { fn into_arrow(self) -> Self::ArrowType { arrow_array::types::IntervalMonthDayNanoType::make_value( - self.get_months(), - self.get_days(), + self.months(), + self.days(), // TODO: this may overflow and we need `try_into` - self.get_usecs() * 1000, + self.usecs() * 1000, ) } } diff --git a/src/common/src/array/interval_array.rs b/src/common/src/array/interval_array.rs index 5e23ebf4eed1..b70ea5602694 100644 --- a/src/common/src/array/interval_array.rs +++ b/src/common/src/array/interval_array.rs @@ -35,17 +35,17 @@ mod tests { } let ret_arr = array_builder.finish(); for v in ret_arr.iter().flatten() { - assert_eq!(v.get_months(), 12); - assert_eq!(v.get_days(), 0); + assert_eq!(v.months(), 12); + assert_eq!(v.days(), 0); } let ret_arr = IntervalArray::from_iter([Some(Interval::from_ymd(1, 0, 0)), None]); let v = ret_arr.value_at(0).unwrap(); - assert_eq!(v.get_months(), 12); - assert_eq!(v.get_days(), 0); + assert_eq!(v.months(), 12); + assert_eq!(v.days(), 0); let v = ret_arr.value_at(1); assert_eq!(v, None); let v = unsafe { ret_arr.value_at_unchecked(0).unwrap() }; - assert_eq!(v.get_months(), 12); - assert_eq!(v.get_days(), 0); + assert_eq!(v.months(), 12); + assert_eq!(v.days(), 0); } } diff --git a/src/common/src/types/chrono_wrapper.rs b/src/common/src/types/chrono_wrapper.rs index 6859c6e4d9de..b4d7e611eb61 100644 --- a/src/common/src/types/chrono_wrapper.rs +++ b/src/common/src/types/chrono_wrapper.rs @@ -525,13 +525,13 @@ impl CheckedAdd for Timestamp { fn checked_add(self, rhs: Interval) -> Option { let mut date = self.0.date(); - if rhs.get_months() != 0 { + if rhs.months() != 0 { // NaiveDate don't support add months. We need calculate manually let mut day = date.day() as i32; let mut month = date.month() as i32; let mut year = date.year(); // Calculate the number of year in this interval - let interval_months = rhs.get_months(); + let interval_months = rhs.months(); let year_diff = interval_months / 12; year += year_diff; @@ -556,8 +556,8 @@ impl CheckedAdd for Timestamp { date = NaiveDate::from_ymd_opt(year, month as u32, day as u32)?; } let mut datetime = NaiveDateTime::new(date, self.0.time()); - datetime = datetime.checked_add_signed(Duration::days(rhs.get_days().into()))?; - datetime = datetime.checked_add_signed(Duration::microseconds(rhs.get_usecs()))?; + datetime = datetime.checked_add_signed(Duration::days(rhs.days().into()))?; + datetime = datetime.checked_add_signed(Duration::microseconds(rhs.usecs()))?; Some(Timestamp::new(datetime)) } diff --git a/src/common/src/types/interval.rs b/src/common/src/types/interval.rs index 20e447491566..dff599787675 100644 --- a/src/common/src/types/interval.rs +++ b/src/common/src/types/interval.rs @@ -57,6 +57,7 @@ impl Interval { usecs: i64::MIN, }; + /// Creates a new `Interval` from the given number of months, days, and microseconds. pub fn from_month_day_usec(months: i32, days: i32, usecs: i64) -> Self { Interval { months, @@ -65,22 +66,163 @@ impl Interval { } } - pub fn get_days(&self) -> i32 { - self.days + /// Returns the total number of whole months. + /// + /// Note the difference between `months` and `months_field`. + /// + /// We have: `months = years_field * 12 + months_field` + /// + /// # Example + /// ``` + /// # use risingwave_common::types::Interval; + /// let interval: Interval = "5 yrs 1 month".parse().unwrap(); + /// assert_eq!(interval.months(), 61); + /// assert_eq!(interval.months_field(), 1); + /// ``` + pub fn months(&self) -> i32 { + self.months } - pub fn get_months(&self) -> i32 { - self.months + /// Returns the number of days. + pub fn days(&self) -> i32 { + self.days } - pub fn get_usecs(&self) -> i64 { + /// Returns the number of microseconds. + /// + /// Note the difference between `usecs` and `seconds_in_micros`. + /// + /// We have: `usecs = (hours_field * 3600 + minutes_field * 60) * 1_000_000 + + /// seconds_in_micros`. + pub fn usecs(&self) -> i64 { self.usecs } - pub fn get_usecs_of_day(&self) -> u64 { + /// Calculates the remaining number of microseconds. + /// range: `0..86_400_000_000` + /// + /// Note the difference between `usecs` and `usecs_of_day`. + /// ``` + /// # use risingwave_common::types::Interval; + /// let interval: Interval = "-1:00:00".parse().unwrap(); + /// assert_eq!(interval.usecs(), -1 * 60 * 60 * 1_000_000); + /// assert_eq!(interval.usecs_of_day(), 23 * 60 * 60 * 1_000_000); + /// ``` + pub fn usecs_of_day(&self) -> u64 { self.usecs.rem_euclid(USECS_PER_DAY) as u64 } + /// Returns the years field. range: unlimited + /// + /// # Example + /// ``` + /// # use risingwave_common::types::Interval; + /// let interval: Interval = "2332 yrs 12 months".parse().unwrap(); + /// assert_eq!(interval.years_field(), 2333); + /// ``` + pub fn years_field(&self) -> i32 { + self.months / 12 + } + + /// Returns the months field. range: `-11..=11` + /// + /// # Example + /// ``` + /// # use risingwave_common::types::Interval; + /// let interval: Interval = "15 months".parse().unwrap(); + /// assert_eq!(interval.months_field(), 3); + /// + /// let interval: Interval = "-15 months".parse().unwrap(); + /// assert_eq!(interval.months_field(), -3); + /// ``` + pub fn months_field(&self) -> i32 { + self.months % 12 + } + + /// Returns the days field. range: unlimited + /// + /// # Example + /// ``` + /// # use risingwave_common::types::Interval; + /// let interval: Interval = "1 months 100 days 25:00:00".parse().unwrap(); + /// assert_eq!(interval.days_field(), 100); + /// ``` + pub fn days_field(&self) -> i32 { + self.days + } + + /// Returns the hours field. range: unlimited + /// + /// # Example + /// ``` + /// # use risingwave_common::types::Interval; + /// let interval: Interval = "25:00:00".parse().unwrap(); + /// assert_eq!(interval.hours_field(), 25); + /// + /// let interval: Interval = "-25:00:00".parse().unwrap(); + /// assert_eq!(interval.hours_field(), -25); + /// ``` + pub fn hours_field(&self) -> i64 { + self.usecs / USECS_PER_SEC / 3600 + } + + /// Returns the minutes field. range: `-59..=-59` + /// + /// # Example + /// ``` + /// # use risingwave_common::types::Interval; + /// let interval: Interval = "00:20:00".parse().unwrap(); + /// assert_eq!(interval.minutes_field(), 20); + /// + /// let interval: Interval = "-00:20:00".parse().unwrap(); + /// assert_eq!(interval.minutes_field(), -20); + /// ``` + pub fn minutes_field(&self) -> i32 { + (self.usecs / USECS_PER_SEC / 60 % 60) as i32 + } + + /// Returns the seconds field, including fractional parts, in microseconds. + /// range: `-59_999_999..=59_999_999` + /// + /// # Example + /// ``` + /// # use risingwave_common::types::Interval; + /// let interval: Interval = "01:02:03.45678".parse().unwrap(); + /// assert_eq!(interval.seconds_in_micros(), 3_456_780); + /// + /// let interval: Interval = "-01:02:03.45678".parse().unwrap(); + /// assert_eq!(interval.seconds_in_micros(), -3_456_780); + /// ``` + pub fn seconds_in_micros(&self) -> i32 { + (self.usecs % (USECS_PER_SEC * 60)) as i32 + } + + /// Returns the total number of microseconds, as defined by PostgreSQL `extract`. + /// + /// Note this value is not used by interval ordering (`IntervalCmpValue`) and is not consistent + /// with it. + pub fn epoch_in_micros(&self) -> i128 { + // https://github.com/postgres/postgres/blob/REL_15_2/src/backend/utils/adt/timestamp.c#L5304 + + const DAYS_PER_YEAR_X4: i32 = 365 * 4 + 1; + const DAYS_PER_MONTH: i32 = 30; + const SECS_PER_DAY: i32 = 86400; + const MONTHS_PER_YEAR: i32 = 12; + + // To do this calculation in integer arithmetic even though + // DAYS_PER_YEAR is fractional, multiply everything by 4 and then + // divide by 4 again at the end. This relies on DAYS_PER_YEAR + // being a multiple of 0.25 and on SECS_PER_DAY being a multiple + // of 4. + let secs_from_day_month = ((DAYS_PER_YEAR_X4 as i64) + * (self.months / MONTHS_PER_YEAR) as i64 + + (4 * DAYS_PER_MONTH as i64) * (self.months % MONTHS_PER_YEAR) as i64 + + 4 * self.days as i64) + * (SECS_PER_DAY / 4) as i64; + + secs_from_day_month as i128 * USECS_PER_SEC as i128 + self.usecs as i128 + } + pub fn to_protobuf(self, output: &mut T) -> ArrayResult { output.write_i32::(self.months)?; output.write_i32::(self.days)?; @@ -1104,43 +1246,35 @@ fn convert_hms(c: &mut Vec, t: &mut Vec) -> Option<()> { if c.len() > 3 { return None; } - const HOUR: usize = 0; - const MINUTE: usize = 1; - const SECOND: usize = 2; let mut is_neg = false; - for (i, s) in c.iter().enumerate() { - match i { - HOUR => { - let v = s.parse().ok()?; - is_neg = v < 0; - t.push(TimeStrToken::Num(v)); - t.push(TimeStrToken::TimeUnit(DateTimeField::Hour)) - } - MINUTE => { - let mut v: i64 = s.parse().ok()?; - if !(0..60).contains(&v) { - return None; - } - if is_neg { - v = v.checked_neg()?; - } - t.push(TimeStrToken::Num(v)); - t.push(TimeStrToken::TimeUnit(DateTimeField::Minute)) - } - SECOND => { - let mut v: F64 = s.parse().ok()?; - // PostgreSQL allows '60.x' for seconds. - if !(0f64 <= *v && *v < 61f64) { - return None; - } - if is_neg { - v = v.checked_neg()?; - } - t.push(TimeStrToken::Second(v)); - t.push(TimeStrToken::TimeUnit(DateTimeField::Second)) - } - _ => unreachable!(), + if let Some(s) = c.get(0) { + let v = s.parse().ok()?; + is_neg = s.starts_with('-'); + t.push(TimeStrToken::Num(v)); + t.push(TimeStrToken::TimeUnit(DateTimeField::Hour)) + } + if let Some(s) = c.get(1) { + let mut v: i64 = s.parse().ok()?; + if !(0..60).contains(&v) { + return None; + } + if is_neg { + v = v.checked_neg()?; + } + t.push(TimeStrToken::Num(v)); + t.push(TimeStrToken::TimeUnit(DateTimeField::Minute)) + } + if let Some(s) = c.get(2) { + let mut v: F64 = s.parse().ok()?; + // PostgreSQL allows '60.x' for seconds. + if !(0f64 <= *v && *v < 61f64) { + return None; + } + if is_neg { + v = v.checked_neg()?; } + t.push(TimeStrToken::Second(v)); + t.push(TimeStrToken::TimeUnit(DateTimeField::Second)) } Some(()) } @@ -1505,9 +1639,9 @@ mod tests { } Some((rhs_months, rhs_days, rhs_usecs, rhs_str)) => { // We should test individual fields rather than using custom `Eq` - assert_eq!(actual_deserialize.unwrap().get_months(), rhs_months); - assert_eq!(actual_deserialize.unwrap().get_days(), rhs_days); - assert_eq!(actual_deserialize.unwrap().get_usecs(), rhs_usecs); + assert_eq!(actual_deserialize.unwrap().months(), rhs_months); + assert_eq!(actual_deserialize.unwrap().days(), rhs_days); + assert_eq!(actual_deserialize.unwrap().usecs(), rhs_usecs); assert_eq!(actual_deserialize.unwrap().to_string(), rhs_str); } } @@ -1517,9 +1651,9 @@ mod tests { let input = Interval::from_month_day_usec(i32::MIN, -30, 1); let actual_deserialize = IntervalCmpValue::from(input).as_justified(); // It has a justified interval within range, and can be obtained by our deserialization. - assert_eq!(actual_deserialize.unwrap().get_months(), i32::MIN); - assert_eq!(actual_deserialize.unwrap().get_days(), -29); - assert_eq!(actual_deserialize.unwrap().get_usecs(), -USECS_PER_DAY + 1); + assert_eq!(actual_deserialize.unwrap().months(), i32::MIN); + assert_eq!(actual_deserialize.unwrap().days(), -29); + assert_eq!(actual_deserialize.unwrap().usecs(), -USECS_PER_DAY + 1); } #[test] diff --git a/src/common/src/util/value_encoding/mod.rs b/src/common/src/util/value_encoding/mod.rs index fd4e5fd7cd4e..eb03fcd7ca67 100644 --- a/src/common/src/util/value_encoding/mod.rs +++ b/src/common/src/util/value_encoding/mod.rs @@ -293,9 +293,9 @@ fn estimate_serialize_str_size(bytes: &[u8]) -> usize { } fn serialize_interval(interval: &Interval, buf: &mut impl BufMut) { - buf.put_i32_le(interval.get_months()); - buf.put_i32_le(interval.get_days()); - buf.put_i64_le(interval.get_usecs()); + buf.put_i32_le(interval.months()); + buf.put_i32_le(interval.days()); + buf.put_i64_le(interval.usecs()); } fn estimate_serialize_interval_size() -> usize { diff --git a/src/expr/src/vector_op/arithmetic_op.rs b/src/expr/src/vector_op/arithmetic_op.rs index fe89cab3a043..f875763817e8 100644 --- a/src/expr/src/vector_op/arithmetic_op.rs +++ b/src/expr/src/vector_op/arithmetic_op.rs @@ -234,14 +234,14 @@ pub fn interval_timestamptz_add(l: Interval, r: i64) -> Result { #[inline(always)] fn timestamptz_interval_inner(l: i64, r: Interval, f: fn(i64, i64) -> Option) -> Result { // Without session TimeZone, we cannot add month/day in local time. See #5826. - if r.get_months() != 0 || r.get_days() != 0 { + if r.months() != 0 || r.days() != 0 { return Err(ExprError::UnsupportedFunction( "timestamp with time zone +/- interval of days".into(), )); } let result: Option = try { - let delta_usecs = r.get_usecs(); + let delta_usecs = r.usecs(); f(l, delta_usecs)? }; @@ -280,7 +280,7 @@ pub fn time_time_sub(l: Time, r: Time) -> Result { #[function("subtract(time, interval) -> time")] pub fn time_interval_sub(l: Time, r: Interval) -> Result