From 618767e8befd3c5bf39d07efb48d66365a5f4537 Mon Sep 17 00:00:00 2001 From: Ian Lai <108986288+Chen-Yuan-Lai@users.noreply.github.com> Date: Sat, 28 Dec 2024 22:16:03 +0800 Subject: [PATCH] doc-gen: migrate scalar functions (string) documentation 3/4 (#13926) Co-authored-by: Cheng-Yuan-Lai --- datafusion/functions/src/string/repeat.rs | 49 ++++++++----------- datafusion/functions/src/string/replace.rs | 49 +++++++++---------- datafusion/functions/src/string/split_part.rs | 45 +++++++---------- .../functions/src/string/starts_with.rs | 46 +++++++---------- datafusion/functions/src/string/to_hex.rs | 45 +++++++---------- datafusion/functions/src/string/uuid.rs | 39 ++++++--------- .../functions/src/unicode/find_in_set.rs | 46 ++++++++--------- datafusion/functions/src/unicode/reverse.rs | 44 +++++++---------- 8 files changed, 152 insertions(+), 211 deletions(-) diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs index 044b3549243b..d5ebf902c110 100644 --- a/datafusion/functions/src/string/repeat.rs +++ b/datafusion/functions/src/string/repeat.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use crate::strings::StringArrayType; use crate::utils::{make_scalar_function, utf8_to_str_type}; @@ -29,11 +29,29 @@ use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View}; use datafusion_common::cast::as_int64_array; use datafusion_common::types::{logical_int64, logical_string}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; use datafusion_expr_common::signature::TypeSignatureClass; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns a string with an input string repeated a specified number.", + syntax_example = "repeat(str, n)", + sql_example = r#"```sql +> select repeat('data', 3); ++-------------------------------+ +| repeat(Utf8("data"),Int64(3)) | ++-------------------------------+ +| datadatadata | ++-------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "n", + description = "Number of times to repeat the input string." + ) +)] #[derive(Debug)] pub struct RepeatFunc { signature: Signature, @@ -85,35 +103,10 @@ impl ScalarUDFImpl for RepeatFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_repeat_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_repeat_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns a string with an input string repeated a specified number.", - "repeat(str, n)", - ) - .with_sql_example( - r#"```sql -> select repeat('data', 3); -+-------------------------------+ -| repeat(Utf8("data"),Int64(3)) | -+-------------------------------+ -| datadatadata | -+-------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("n", "Number of times to repeat the input string.") - .build() - }) -} - /// Repeats string the specified number of times. /// repeat('Pg', 4) = 'PgPgPgPg' fn repeat(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 9b71d3871ea8..9b6afc546994 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray}; use arrow::datatypes::DataType; @@ -24,10 +24,28 @@ use arrow::datatypes::DataType; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; - +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Replaces all occurrences of a specified substring in a string with a new substring.", + syntax_example = "replace(str, substr, replacement)", + sql_example = r#"```sql +> select replace('ABabbaBA', 'ab', 'cd'); ++-------------------------------------------------+ +| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) | ++-------------------------------------------------+ +| ABcdbaBA | ++-------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + standard_argument( + name = "substr", + prefix = "Substring expression to replace in the input string. Substring" + ), + standard_argument(name = "replacement", prefix = "Replacement substring") +)] #[derive(Debug)] pub struct ReplaceFunc { signature: Signature, @@ -80,33 +98,10 @@ impl ScalarUDFImpl for ReplaceFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_replace_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_replace_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Replaces all occurrences of a specified substring in a string with a new substring.", - "replace(str, substr, replacement)") - .with_sql_example(r#"```sql -> select replace('ABabbaBA', 'ab', 'cd'); -+-------------------------------------------------+ -| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) | -+-------------------------------------------------+ -| ABcdbaBA | -+-------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_standard_argument("substr", Some("Substring expression to replace in the input string. Substring")) - .with_standard_argument("replacement", Some("Replacement substring")) - .build() - }) -} - fn replace_view(args: &[ArrayRef]) -> Result { let string_array = as_string_view_array(&args[0])?; let from_array = as_string_view_array(&args[1])?; diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index 40bdd3ad01b2..9a6ee726698b 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -25,12 +25,28 @@ use arrow::datatypes::DataType; use datafusion_common::cast::as_int64_array; use datafusion_common::ScalarValue; use datafusion_common::{exec_err, DataFusionError, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Splits a string based on a specified delimiter and returns the substring in the specified position.", + syntax_example = "split_part(str, delimiter, pos)", + sql_example = r#"```sql +> select split_part('1.2.3.4.5', '.', 3); ++--------------------------------------------------+ +| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) | ++--------------------------------------------------+ +| 3 | ++--------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "delimiter", description = "String or character to split on."), + argument(name = "pos", description = "Position of the part to return.") +)] #[derive(Debug)] pub struct SplitPartFunc { signature: Signature, @@ -182,33 +198,10 @@ impl ScalarUDFImpl for SplitPartFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_split_part_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_split_part_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Splits a string based on a specified delimiter and returns the substring in the specified position.", - "split_part(str, delimiter, pos)") - .with_sql_example(r#"```sql -> select split_part('1.2.3.4.5', '.', 3); -+--------------------------------------------------+ -| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) | -+--------------------------------------------------+ -| 3 | -+--------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("delimiter", "String or character to split on.") - .with_argument("pos", "Position of the part to return.") - .build() - }) -} - /// impl pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>( string_array: StringArrType, diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index 7354fda09584..229982a9616a 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -16,16 +16,16 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::ArrayRef; use arrow::datatypes::DataType; use crate::utils::make_scalar_function; use datafusion_common::{internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; /// Returns true if string starts with prefix. /// starts_with('alphabet', 'alph') = 't' @@ -34,6 +34,21 @@ pub fn starts_with(args: &[ArrayRef]) -> Result { Ok(Arc::new(result) as ArrayRef) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Tests if a string starts with a substring.", + syntax_example = "starts_with(str, substr)", + sql_example = r#"```sql +> select starts_with('datafusion','data'); ++----------------------------------------------+ +| starts_with(Utf8("datafusion"),Utf8("data")) | ++----------------------------------------------+ +| true | ++----------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "substr", description = "Substring to test for.") +)] #[derive(Debug)] pub struct StartsWithFunc { signature: Signature, @@ -84,35 +99,10 @@ impl ScalarUDFImpl for StartsWithFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_starts_with_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_starts_with_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Tests if a string starts with a substring.", - "starts_with(str, substr)", - ) - .with_sql_example( - r#"```sql -> select starts_with('datafusion','data'); -+----------------------------------------------+ -| starts_with(Utf8("datafusion"),Utf8("data")) | -+----------------------------------------------+ -| true | -+----------------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("substr", "Substring to test for.") - .build() - }) -} - #[cfg(test)] mod tests { use crate::utils::test::test_function; diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs index 04907af14ade..64654ef6ef10 100644 --- a/datafusion/functions/src/string/to_hex.rs +++ b/datafusion/functions/src/string/to_hex.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; use arrow::datatypes::{ @@ -27,9 +27,10 @@ use crate::utils::make_scalar_function; use datafusion_common::cast::as_primitive_array; use datafusion_common::Result; use datafusion_common::{exec_err, plan_err}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; + use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; /// Converts the number to its equivalent hexadecimal representation. /// to_hex(2147483647) = '7fffffff' @@ -59,6 +60,20 @@ where Ok(Arc::new(result) as ArrayRef) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Converts an integer to a hexadecimal string.", + syntax_example = "to_hex(int)", + sql_example = r#"```sql +> select to_hex(12345689); ++-------------------------+ +| to_hex(Int64(12345689)) | ++-------------------------+ +| bc6159 | ++-------------------------+ +```"#, + standard_argument(name = "int", prefix = "Integer") +)] #[derive(Debug)] pub struct ToHexFunc { signature: Signature, @@ -116,34 +131,10 @@ impl ScalarUDFImpl for ToHexFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_hex_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_to_hex_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Converts an integer to a hexadecimal string.", - "to_hex(int)", - ) - .with_sql_example( - r#"```sql -> select to_hex(12345689); -+-------------------------+ -| to_hex(Int64(12345689)) | -+-------------------------+ -| bc6159 | -+-------------------------+ -```"#, - ) - .with_standard_argument("int", Some("Integer")) - .build() - }) -} - #[cfg(test)] mod tests { use arrow::array::{Int32Array, StringArray}; diff --git a/datafusion/functions/src/string/uuid.rs b/datafusion/functions/src/string/uuid.rs index 6048a70bd8c5..f6d6a941068d 100644 --- a/datafusion/functions/src/string/uuid.rs +++ b/datafusion/functions/src/string/uuid.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::GenericStringArray; use arrow::datatypes::DataType; @@ -24,10 +24,23 @@ use arrow::datatypes::DataType::Utf8; use uuid::Uuid; use datafusion_common::{internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns [`UUID v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)) string value which is unique per row.", + syntax_example = "uuid()", + sql_example = r#"```sql +> select uuid(); ++--------------------------------------+ +| uuid() | ++--------------------------------------+ +| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 | ++--------------------------------------+ +```"# +)] #[derive(Debug)] pub struct UuidFunc { signature: Signature, @@ -80,26 +93,6 @@ impl ScalarUDFImpl for UuidFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_uuid_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_uuid_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns [`UUID v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)) string value which is unique per row.", - "uuid()") - .with_sql_example(r#"```sql -> select uuid(); -+--------------------------------------+ -| uuid() | -+--------------------------------------+ -| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 | -+--------------------------------------+ -```"#) - .build() - }) -} diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index 38efb408c1d3..c4d9b51f6032 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::{ ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, @@ -26,12 +26,30 @@ use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; use crate::utils::{make_scalar_function, utf8_to_int_type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings.", + syntax_example = "find_in_set(str, strlist)", + sql_example = r#"```sql +> select find_in_set('b', 'a,b,c,d'); ++----------------------------------------+ +| find_in_set(Utf8("b"),Utf8("a,b,c,d")) | ++----------------------------------------+ +| 2 | ++----------------------------------------+ +```"#, + argument(name = "str", description = "String expression to find in strlist."), + argument( + name = "strlist", + description = "A string list is a string composed of substrings separated by , characters." + ) +)] #[derive(Debug)] pub struct FindInSetFunc { signature: Signature, @@ -85,32 +103,10 @@ impl ScalarUDFImpl for FindInSetFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_find_in_set_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_find_in_set_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings.", - "find_in_set(str, strlist)") - .with_sql_example(r#"```sql -> select find_in_set('b', 'a,b,c,d'); -+----------------------------------------+ -| find_in_set(Utf8("b"),Utf8("a,b,c,d")) | -+----------------------------------------+ -| 2 | -+----------------------------------------+ -```"#) - .with_argument("str", "String expression to find in strlist.") - .with_argument("strlist", "A string list is a string composed of substrings separated by , characters.") - .build() - }) -} - ///Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings ///A string list is a string composed of substrings separated by , characters. fn find_in_set(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs index 8e3cf8845f98..5ad347ed96c0 100644 --- a/datafusion/functions/src/unicode/reverse.rs +++ b/datafusion/functions/src/unicode/reverse.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use crate::utils::{make_scalar_function, utf8_to_str_type}; use arrow::array::{ @@ -25,12 +25,26 @@ use arrow::array::{ }; use arrow::datatypes::DataType; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use DataType::{LargeUtf8, Utf8, Utf8View}; +#[user_doc( + doc_section(label = "String Functions"), + description = "Reverses the character order of a string.", + syntax_example = "reverse(str)", + sql_example = r#"```sql +> select reverse('datafusion'); ++-----------------------------+ +| reverse(Utf8("datafusion")) | ++-----------------------------+ +| noisufatad | ++-----------------------------+ +```"#, + standard_argument(name = "str", prefix = "String") +)] #[derive(Debug)] pub struct ReverseFunc { signature: Signature, @@ -87,34 +101,10 @@ impl ScalarUDFImpl for ReverseFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_reverse_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_reverse_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Reverses the character order of a string.", - "reverse(str)", - ) - .with_sql_example( - r#"```sql -> select reverse('datafusion'); -+-----------------------------+ -| reverse(Utf8("datafusion")) | -+-----------------------------+ -| noisufatad | -+-----------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .build() - }) -} - /// Reverses the order of the characters in the string. /// reverse('abcde') = 'edcba' /// The implementation uses UTF-8 code points as characters