From 9e636d8fe920340409e527da36cff3d2c25aef5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20=C5=9Een?= Date: Thu, 24 Oct 2024 23:28:13 +0300 Subject: [PATCH] [docs]: migrate lead/lag window function docs to new docs (#13095) * added lead-lag docs * deleted old --- datafusion/functions-window/src/lead_lag.rs | 58 ++++++++++++++++++- .../source/user-guide/sql/window_functions.md | 30 ---------- .../user-guide/sql/window_functions_new.md | 33 +++++++++++ 3 files changed, 88 insertions(+), 33 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index f81521099751..bbe50cbbdc8a 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -22,9 +22,10 @@ use datafusion_common::arrow::array::ArrayRef; use datafusion_common::arrow::datatypes::DataType; use datafusion_common::arrow::datatypes::Field; use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue}; +use datafusion_expr::window_doc_sections::DOC_SECTION_ANALYTICAL; use datafusion_expr::{ - Literal, PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, Volatility, - WindowUDFImpl, + Documentation, Literal, PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, + Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::expr::ExpressionArgs; use datafusion_functions_window_common::field::WindowUDFFieldArgs; @@ -34,7 +35,7 @@ use std::any::Any; use std::cmp::min; use std::collections::VecDeque; use std::ops::{Neg, Range}; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; get_or_init_udwf!( Lag, @@ -147,6 +148,50 @@ impl WindowShift { } } +static LAG_DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_lag_doc() -> &'static Documentation { + LAG_DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_ANALYTICAL) + .with_description( + "Returns value evaluated at the row that is offset rows before the \ + current row within the partition; if there is no such row, instead return default \ + (which must be of the same type as value).", + ) + .with_syntax_example("lag(expression, offset, default)") + .with_argument("expression", "Expression to operate on") + .with_argument("offset", "Integer. Specifies how many rows back \ + the value of expression should be retrieved. Defaults to 1.") + .with_argument("default", "The default value if the offset is \ + not within the partition. Must be of the same type as expression.") + .build() + .unwrap() + }) +} + +static LEAD_DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_lead_doc() -> &'static Documentation { + LEAD_DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_ANALYTICAL) + .with_description( + "Returns value evaluated at the row that is offset rows after the \ + current row within the partition; if there is no such row, instead return default \ + (which must be of the same type as value).", + ) + .with_syntax_example("lead(expression, offset, default)") + .with_argument("expression", "Expression to operate on") + .with_argument("offset", "Integer. Specifies how many rows \ + forward the value of expression should be retrieved. Defaults to 1.") + .with_argument("default", "The default value if the offset is \ + not within the partition. Must be of the same type as expression.") + .build() + .unwrap() + }) +} + impl WindowUDFImpl for WindowShift { fn as_any(&self) -> &dyn Any { self @@ -212,6 +257,13 @@ impl WindowUDFImpl for WindowShift { WindowShiftKind::Lead => ReversedUDWF::Reversed(lead_udwf()), } } + + fn documentation(&self) -> Option<&Documentation> { + match self.kind { + WindowShiftKind::Lag => Some(get_lag_doc()), + WindowShiftKind::Lead => Some(get_lead_doc()), + } + } } /// When `lead`/`lag` is evaluated on a `NULL` expression we attempt to diff --git a/docs/source/user-guide/sql/window_functions.md b/docs/source/user-guide/sql/window_functions.md index 6c0de711bc0c..0799859e4371 100644 --- a/docs/source/user-guide/sql/window_functions.md +++ b/docs/source/user-guide/sql/window_functions.md @@ -184,8 +184,6 @@ ntile(expression) - [cume_dist](#cume_dist) - [percent_rank](#percent_rank) -- [lag](#lag) -- [lead](#lead) - [first_value](#first_value) - [last_value](#last_value) - [nth_value](#nth_value) @@ -206,34 +204,6 @@ Relative rank of the current row: (rank - 1) / (total rows - 1). percent_rank() ``` -### `lag` - -Returns value evaluated at the row that is offset rows before the current row within the partition; if there is no such row, instead return default (which must be of the same type as value). Both offset and default are evaluated with respect to the current row. If omitted, offset defaults to 1 and default to null. - -```sql -lag(expression, offset, default) -``` - -#### Arguments - -- **expression**: Expression to operate on -- **offset**: Integer. Specifies how many rows back the value of _expression_ should be retrieved. Defaults to 1. -- **default**: The default value if the offset is not within the partition. Must be of the same type as _expression_. - -### `lead` - -Returns value evaluated at the row that is offset rows after the current row within the partition; if there is no such row, instead return default (which must be of the same type as value). Both offset and default are evaluated with respect to the current row. If omitted, offset defaults to 1 and default to null. - -```sql -lead(expression, offset, default) -``` - -#### Arguments - -- **expression**: Expression to operate on -- **offset**: Integer. Specifies how many rows forward the value of _expression_ should be retrieved. Defaults to 1. -- **default**: The default value if the offset is not within the partition. Must be of the same type as _expression_. - ### `first_value` Returns value evaluated at the row that is the first row of the window frame. diff --git a/docs/source/user-guide/sql/window_functions_new.md b/docs/source/user-guide/sql/window_functions_new.md index 89ce2284a70c..267060abfdcc 100644 --- a/docs/source/user-guide/sql/window_functions_new.md +++ b/docs/source/user-guide/sql/window_functions_new.md @@ -202,3 +202,36 @@ Number of the current row within its partition, counting from 1. ``` row_number() ``` + +## Analytical Functions + +- [lag](#lag) +- [lead](#lead) + +### `lag` + +Returns value evaluated at the row that is offset rows before the current row within the partition; if there is no such row, instead return default (which must be of the same type as value). + +``` +lag(expression, offset, default) +``` + +#### Arguments + +- **expression**: Expression to operate on +- **offset**: Integer. Specifies how many rows back the value of expression should be retrieved. Defaults to 1. +- **default**: The default value if the offset is not within the partition. Must be of the same type as expression. + +### `lead` + +Returns value evaluated at the row that is offset rows after the current row within the partition; if there is no such row, instead return default (which must be of the same type as value). + +``` +lead(expression, offset, default) +``` + +#### Arguments + +- **expression**: Expression to operate on +- **offset**: Integer. Specifies how many rows forward the value of expression should be retrieved. Defaults to 1. +- **default**: The default value if the offset is not within the partition. Must be of the same type as expression.