Skip to content

Commit

Permalink
Migrate datetime documentation to code (#12966)
Browse files Browse the repository at this point in the history
* added code docs for the datetime functions

* removed old docs for time and date functions

* fixed description for to_unixtime()

* removed todo comments

* fix merge

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
jatin510 and alamb authored Oct 17, 2024
1 parent 0ed369e commit f718fe2
Show file tree
Hide file tree
Showing 14 changed files with 1,177 additions and 585 deletions.
27 changes: 26 additions & 1 deletion datafusion/functions/src/datetime/current_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@ use arrow::datatypes::DataType::Date32;
use chrono::{Datelike, NaiveDate};

use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
use std::sync::OnceLock;

#[derive(Debug)]
pub struct CurrentDateFunc {
Expand Down Expand Up @@ -95,4 +99,25 @@ impl ScalarUDFImpl for CurrentDateFunc {
ScalarValue::Date32(days),
)))
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_current_date_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_current_date_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_DATETIME)
.with_description(r#"
Returns the current UTC date.
The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
"#)
.with_syntax_example("current_date()")
.build()
.unwrap()
})
}
30 changes: 27 additions & 3 deletions datafusion/functions/src/datetime/current_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@
// specific language governing permissions and limitations
// under the License.

use std::any::Any;

use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Time64;
use arrow::datatypes::TimeUnit::Nanosecond;
use std::any::Any;
use std::sync::OnceLock;

use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};

#[derive(Debug)]
pub struct CurrentTimeFunc {
Expand Down Expand Up @@ -84,4 +87,25 @@ impl ScalarUDFImpl for CurrentTimeFunc {
ScalarValue::Time64Nanosecond(nano),
)))
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_current_time_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_current_time_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_DATETIME)
.with_description(r#"
Returns the current UTC time.
The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
"#)
.with_syntax_example("current_time()")
.build()
.unwrap()
})
}
43 changes: 41 additions & 2 deletions datafusion/functions/src/datetime/date_bin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

use arrow::array::temporal_conversions::NANOSECONDS;
use arrow::array::types::{
Expand All @@ -35,10 +35,11 @@ use datafusion_common::{exec_err, not_impl_err, plan_err, Result, ScalarValue};
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
};

use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;

#[derive(Debug)]
pub struct DateBinFunc {
Expand Down Expand Up @@ -163,6 +164,44 @@ impl ScalarUDFImpl for DateBinFunc {
Ok(SortProperties::Unordered)
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_date_bin_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_date_bin_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_DATETIME)
.with_description(r#"
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
"#)
.with_syntax_example("date_bin(interval, expression, origin-timestamp)")
.with_argument("interval", "Bin interval.")
.with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.")
.with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).
The following intervals are supported:
- nanoseconds
- microseconds
- milliseconds
- seconds
- minutes
- hours
- days
- weeks
- months
- years
- century
")
.build()
.unwrap()
})
}

enum Interval {
Expand Down
45 changes: 43 additions & 2 deletions datafusion/functions/src/datetime/date_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::str::FromStr;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

use arrow::array::{Array, ArrayRef, Float64Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
Expand All @@ -37,9 +37,10 @@ use datafusion_common::cast::{
as_timestamp_nanosecond_array, as_timestamp_second_array,
};
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
};

#[derive(Debug)]
Expand Down Expand Up @@ -217,6 +218,46 @@ impl ScalarUDFImpl for DatePartFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_date_part_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_date_part_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_DATETIME)
.with_description("Returns the specified part of the date as an integer.")
.with_syntax_example("date_part(part, expression)")
.with_argument(
"part",
r#"Part of the date to return. The following date parts are supported:
- year
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
- month
- week (week of the year)
- day (day of the month)
- hour
- minute
- second
- millisecond
- microsecond
- nanosecond
- dow (day of the week)
- doy (day of the year)
- epoch (seconds since Unix epoch)
"#,
)
.with_argument(
"expression",
"Time expression to operate on. Can be a constant, column, or function.",
)
.build()
.unwrap()
})
}

/// Invoke [`date_part`] and cast the result to Float64
Expand Down
39 changes: 37 additions & 2 deletions datafusion/functions/src/datetime/date_trunc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use std::any::Any;
use std::ops::{Add, Sub};
use std::str::FromStr;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

use arrow::array::temporal_conversions::{
as_datetime_with_timezone, timestamp_ns_to_datetime,
Expand All @@ -36,12 +36,13 @@ use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
};

use chrono::{
DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;

#[derive(Debug)]
pub struct DateTruncFunc {
Expand Down Expand Up @@ -241,6 +242,40 @@ impl ScalarUDFImpl for DateTruncFunc {
Ok(SortProperties::Unordered)
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_date_trunc_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_date_trunc_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_DATETIME)
.with_description("Truncates a timestamp value to a specified precision.")
.with_syntax_example("date_trunc(precision, expression)")
.with_argument(
"precision",
r#"Time precision to truncate to. The following precisions are supported:
- year / YEAR
- quarter / QUARTER
- month / MONTH
- week / WEEK
- day / DAY
- hour / HOUR
- minute / MINUTE
- second / SECOND
"#,
)
.with_argument(
"expression",
"Time expression to operate on. Can be a constant, column, or function.",
)
.build()
.unwrap()
})
}

fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> Result<Option<T>>
Expand Down
29 changes: 26 additions & 3 deletions datafusion/functions/src/datetime/from_unixtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@
// specific language governing permissions and limitations
// under the License.

use std::any::Any;

use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{Int64, Timestamp};
use arrow::datatypes::TimeUnit::Second;
use std::any::Any;
use std::sync::OnceLock;

use datafusion_common::{exec_err, Result};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};

#[derive(Debug)]
pub struct FromUnixtimeFunc {
Expand Down Expand Up @@ -78,4 +81,24 @@ impl ScalarUDFImpl for FromUnixtimeFunc {
}
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_from_unixtime_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_from_unixtime_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_DATETIME)
.with_description("Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.")
.with_syntax_example("from_unixtime(expression)")
.with_argument(
"expression",
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
)
.build()
.unwrap()
})
}
Loading

0 comments on commit f718fe2

Please sign in to comment.