Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

doc-gen: migrate scalar functions (datetime) documentation 1/2 #13920

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 11 additions & 19 deletions datafusion/functions/src/datetime/current_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,21 @@ use arrow::datatypes::DataType::Date32;
use chrono::{Datelike, NaiveDate};

use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
use std::sync::OnceLock;
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Returns the current UTC date.

The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
"#,
syntax_example = "current_date()"
)]
#[derive(Debug)]
pub struct CurrentDateFunc {
signature: Signature,
Expand Down Expand Up @@ -105,22 +113,6 @@ impl ScalarUDFImpl for CurrentDateFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_current_date_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_current_date_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
r#"
Returns the current UTC date.

The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
"#,
"current_date()")
.build()
})
}
30 changes: 11 additions & 19 deletions datafusion/functions/src/datetime/current_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,23 @@ use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Time64;
use arrow::datatypes::TimeUnit::Nanosecond;
use std::any::Any;
use std::sync::OnceLock;

use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Returns the current UTC time.

The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
"#,
syntax_example = "current_time()"
)]
#[derive(Debug)]
pub struct CurrentTimeFunc {
signature: Signature,
Expand Down Expand Up @@ -93,22 +101,6 @@ impl ScalarUDFImpl for CurrentTimeFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_current_time_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_current_time_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
r#"
Returns the current UTC time.

The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
"#,
"current_time()")
.build()
})
}
118 changes: 57 additions & 61 deletions datafusion/functions/src/datetime/date_bin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::temporal_conversions::NANOSECONDS;
use arrow::array::types::{
Expand All @@ -37,10 +37,64 @@ use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
};
use datafusion_macros::user_doc;

use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;

#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.

For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
"#,
syntax_example = "date_bin(interval, expression, origin-timestamp)",
sql_example = r#"```sql
-- Bin the timestamp into 1 day intervals
> SELECT date_bin(interval '1 day', time) as bin
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
+---------------------+
| bin |
+---------------------+
| 2023-01-01T00:00:00 |
| 2023-01-03T00:00:00 |
+---------------------+
2 row(s) fetched.

-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
+---------------------+
| bin |
+---------------------+
| 2023-01-01T03:00:00 |
| 2023-01-03T03:00:00 |
+---------------------+
2 row(s) fetched.
```"#,
argument(name = "interval", description = "Bin interval."),
argument(
name = "expression",
description = "Time expression to operate on. Can be a constant, column, or function."
),
argument(
name = "origin-timestamp",
description = r#"Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). The following intervals are supported:

- nanoseconds
- microseconds
- milliseconds
- seconds
- minutes
- hours
- days
- weeks
- months
- years
- century
"#
)
)]
#[derive(Debug)]
pub struct DateBinFunc {
signature: Signature,
Expand Down Expand Up @@ -169,68 +223,10 @@ impl ScalarUDFImpl for DateBinFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_date_bin_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_date_bin_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
r#"
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.

For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
"#,
"date_bin(interval, expression, origin-timestamp)")
.with_sql_example(r#"```sql
-- Bin the timestamp into 1 day intervals
> SELECT date_bin(interval '1 day', time) as bin
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
+---------------------+
| bin |
+---------------------+
| 2023-01-01T00:00:00 |
| 2023-01-03T00:00:00 |
+---------------------+
2 row(s) fetched.

-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
+---------------------+
| bin |
+---------------------+
| 2023-01-01T03:00:00 |
| 2023-01-03T03:00:00 |
+---------------------+
2 row(s) fetched.
```
"#)
.with_argument("interval", "Bin interval.")
.with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.")
.with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).

The following intervals are supported:

- nanoseconds
- microseconds
- milliseconds
- seconds
- minutes
- hours
- days
- weeks
- months
- years
- century
")
.build()
})
}

enum Interval {
Nanoseconds(i64),
Months(i64),
Expand Down
78 changes: 36 additions & 42 deletions datafusion/functions/src/datetime/date_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::str::FromStr;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
Expand All @@ -41,11 +41,42 @@ use datafusion_common::{
ExprSchema, Result, ScalarValue,
};
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_DATETIME, ColumnarValue, Documentation, Expr,
ScalarUDFImpl, Signature, TypeSignature, Volatility,
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, TypeSignature,
Volatility,
};
use datafusion_expr_common::signature::TypeSignatureClass;

use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Returns the specified part of the date as an integer.",
syntax_example = "date_part(part, expression)",
alternative_syntax = "extract(field FROM source)",
argument(
name = "part",
description = r#"Part of the date to return. The following date parts are supported:

- year
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
- month
- week (week of the year)
- day (day of the month)
- hour
- minute
- second
- millisecond
- microsecond
- nanosecond
- dow (day of the week)
- doy (day of the year)
- epoch (seconds since Unix epoch)
"#
),
argument(
name = "expression",
description = "Time expression to operate on. Can be a constant, column, or function."
)
)]
#[derive(Debug)]
pub struct DatePartFunc {
signature: Signature,
Expand Down Expand Up @@ -190,7 +221,7 @@ impl ScalarUDFImpl for DatePartFunc {
&self.aliases
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_date_part_doc())
self.doc()
}
}

Expand All @@ -206,43 +237,6 @@ fn part_normalization(part: &str) -> &str {
.unwrap_or(part)
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_date_part_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Returns the specified part of the date as an integer.",
"date_part(part, expression)")
.with_argument(
"part",
r#"Part of the date to return. The following date parts are supported:

- year
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
- month
- week (week of the year)
- day (day of the month)
- hour
- minute
- second
- millisecond
- microsecond
- nanosecond
- dow (day of the week)
- doy (day of the year)
- epoch (seconds since Unix epoch)
"#,
)
.with_argument(
"expression",
"Time expression to operate on. Can be a constant, column, or function.",
)
.with_alternative_syntax("extract(field FROM source)")
.build()
})
}

/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
/// result to a total number of seconds, milliseconds, microseconds or
/// nanoseconds
Expand Down
Loading
Loading