Skip to content

Commit

Permalink
[minor] overload from_unixtime func to have optional timezone paramet…
Browse files Browse the repository at this point in the history
…er (#13130)

* overloaded from ts

* Update docs/source/user-guide/sql/scalar_functions_new.md

Co-authored-by: Bruce Ritchie <bruce.ritchie@veeva.com>

* fixed return type

* added sql example

* optional in ∂oc

* review

---------

Co-authored-by: Bruce Ritchie <bruce.ritchie@veeva.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
3 people authored Nov 4, 2024
1 parent 85f92ef commit d19865e
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 23 deletions.
132 changes: 111 additions & 21 deletions datafusion/functions/src/datetime/from_unixtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@
// specific language governing permissions and limitations
// under the License.

use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{Int64, Timestamp};
use arrow::datatypes::TimeUnit::Second;
use std::any::Any;
use std::sync::OnceLock;
use std::sync::{Arc, OnceLock};

use datafusion_common::{exec_err, Result};
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{Int64, Timestamp, Utf8};
use arrow::datatypes::TimeUnit::Second;
use datafusion_common::{exec_err, internal_err, ExprSchema, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};

#[derive(Debug)]
Expand All @@ -41,7 +42,10 @@ impl Default for FromUnixtimeFunc {
impl FromUnixtimeFunc {
pub fn new() -> Self {
Self {
signature: Signature::uniform(1, vec![Int64], Volatility::Immutable),
signature: Signature::one_of(
vec![Exact(vec![Int64, Utf8]), Exact(vec![Int64])],
Volatility::Immutable,
),
}
}
}
Expand All @@ -59,28 +63,63 @@ impl ScalarUDFImpl for FromUnixtimeFunc {
&self.signature
}

fn return_type_from_exprs(
&self,
args: &[Expr],
_schema: &dyn ExprSchema,
arg_types: &[DataType],
) -> Result<DataType> {
match arg_types.len() {
1 => Ok(Timestamp(Second, None)),
2 => match &args[1] {
Expr::Literal(ScalarValue::Utf8(Some(tz))) => Ok(Timestamp(Second, Some(Arc::from(tz.to_string())))),
_ => exec_err!(
"Second argument for `from_unixtime` must be non-null utf8, received {:?}",
arg_types[1]),
},
_ => exec_err!(
"from_unixtime function requires 1 or 2 arguments, got {}",
arg_types.len()
),
}
}

fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(Timestamp(Second, None))
internal_err!("call return_type_from_exprs instead")
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
if args.len() != 1 {
let len = args.len();
if len != 1 && len != 2 {
return exec_err!(
"from_unixtime function requires 1 argument, got {}",
"from_unixtime function requires 1 or 2 argument, got {}",
args.len()
);
}

match args[0].data_type() {
Int64 => args[0].cast_to(&Timestamp(Second, None), None),
other => {
exec_err!(
"Unsupported data type {:?} for function from_unixtime",
other
)
}
if args[0].data_type() != Int64 {
return exec_err!(
"Unsupported data type {:?} for function from_unixtime",
args[0].data_type()
);
}

match len {
1 => args[0].cast_to(&Timestamp(Second, None), None),
2 => match &args[1] {
ColumnarValue::Scalar(ScalarValue::Utf8(Some(tz))) => args[0]
.cast_to(&Timestamp(Second, Some(Arc::from(tz.to_string()))), None),
_ => {
exec_err!(
"Unsupported data type {:?} for function from_unixtime",
args[1].data_type()
)
}
},
_ => unreachable!(),
}
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_from_unixtime_doc())
}
Expand All @@ -93,12 +132,63 @@ fn get_from_unixtime_doc() -> &'static Documentation {
Documentation::builder()
.with_doc_section(DOC_SECTION_DATETIME)
.with_description("Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.")
.with_syntax_example("from_unixtime(expression)")
.with_syntax_example("from_unixtime(expression[, timezone])")
.with_standard_argument("expression", None)
.with_argument(
"expression",
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
"timezone",
"Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC.",
)
.with_sql_example(r#"```sql
> select from_unixtime(1599572549, 'America/New_York');
+-----------------------------------------------------------+
| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
+-----------------------------------------------------------+
| 2020-09-08T09:42:29-04:00 |
+-----------------------------------------------------------+
```"#)
.build()
.unwrap()
})
}

#[cfg(test)]
mod test {
use crate::datetime::from_unixtime::FromUnixtimeFunc;
use datafusion_common::ScalarValue;
use datafusion_common::ScalarValue::Int64;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl};

#[test]
fn test_without_timezone() {
let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))];

let result = FromUnixtimeFunc::new().invoke(&args).unwrap();

match result {
ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), None)) => {
assert_eq!(sec, 1729900800);
}
_ => panic!("Expected scalar value"),
}
}

#[test]
fn test_with_timezone() {
let args = [
ColumnarValue::Scalar(Int64(Some(1729900800))),
ColumnarValue::Scalar(ScalarValue::Utf8(Some(
"America/New_York".to_string(),
))),
];

let result = FromUnixtimeFunc::new().invoke(&args).unwrap();

match result {
ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), Some(tz))) => {
assert_eq!(sec, 1729900800);
assert_eq!(tz.to_string(), "America/New_York");
}
_ => panic!("Expected scalar value"),
}
}
}
23 changes: 23 additions & 0 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,29 @@ SELECT from_unixtime(ts / 1000000000) FROM ts_data LIMIT 3;
2020-09-08T12:42:29
2020-09-08T11:42:29

# from_unixtime single

query P
SELECT from_unixtime(1599572549190855123 / 1000000000, 'America/New_York');
----
2020-09-08T09:42:29-04:00

# from_unixtime with timezone
query P
SELECT from_unixtime(ts / 1000000000, 'Asia/Istanbul') FROM ts_data LIMIT 3;
----
2020-09-08T16:42:29+03:00
2020-09-08T15:42:29+03:00
2020-09-08T14:42:29+03:00

# from_unixtime with utc timezone
query P
SELECT from_unixtime(ts / 1000000000, 'UTC') FROM ts_data LIMIT 3;
----
2020-09-08T13:42:29Z
2020-09-08T12:42:29Z
2020-09-08T11:42:29Z

# to_timestamp

query I
Expand Down
16 changes: 14 additions & 2 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -2037,12 +2037,24 @@ _Alias of [date_trunc](#date_trunc)._
Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.

```
from_unixtime(expression)
from_unixtime(expression[, timezone])
```

#### Arguments

- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators.
- **timezone**: Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC.

#### Example

```sql
> select from_unixtime(1599572549, 'America/New_York');
+-----------------------------------------------------------+
| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
+-----------------------------------------------------------+
| 2020-09-08T09:42:29-04:00 |
+-----------------------------------------------------------+
```

### `make_date`

Expand Down

0 comments on commit d19865e

Please sign in to comment.