Skip to content

Commit

Permalink
doc-gen: migrate scalar functions (string) documentation 3/4 (#13926)
Browse files Browse the repository at this point in the history
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
  • Loading branch information
Chen-Yuan-Lai and Cheng-Yuan-Lai authored Dec 28, 2024
1 parent a08dc0a commit 618767e
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 211 deletions.
49 changes: 21 additions & 28 deletions datafusion/functions/src/string/repeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
Expand All @@ -29,11 +29,29 @@ use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
use datafusion_common::cast::as_int64_array;
use datafusion_common::types::{logical_int64, logical_string};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_expr_common::signature::TypeSignatureClass;
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns a string with an input string repeated a specified number.",
syntax_example = "repeat(str, n)",
sql_example = r#"```sql
> select repeat('data', 3);
+-------------------------------+
| repeat(Utf8("data"),Int64(3)) |
+-------------------------------+
| datadatadata |
+-------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(
name = "n",
description = "Number of times to repeat the input string."
)
)]
#[derive(Debug)]
pub struct RepeatFunc {
signature: Signature,
Expand Down Expand Up @@ -85,35 +103,10 @@ impl ScalarUDFImpl for RepeatFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_repeat_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_repeat_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns a string with an input string repeated a specified number.",
"repeat(str, n)",
)
.with_sql_example(
r#"```sql
> select repeat('data', 3);
+-------------------------------+
| repeat(Utf8("data"),Int64(3)) |
+-------------------------------+
| datadatadata |
+-------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("n", "Number of times to repeat the input string.")
.build()
})
}

/// Repeats string the specified number of times.
/// repeat('Pg', 4) = 'PgPgPgPg'
fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down
49 changes: 22 additions & 27 deletions datafusion/functions/src/string/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,36 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
use arrow::datatypes::DataType;

use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use datafusion_macros::user_doc;
#[user_doc(
doc_section(label = "String Functions"),
description = "Replaces all occurrences of a specified substring in a string with a new substring.",
syntax_example = "replace(str, substr, replacement)",
sql_example = r#"```sql
> select replace('ABabbaBA', 'ab', 'cd');
+-------------------------------------------------+
| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
+-------------------------------------------------+
| ABcdbaBA |
+-------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
standard_argument(
name = "substr",
prefix = "Substring expression to replace in the input string. Substring"
),
standard_argument(name = "replacement", prefix = "Replacement substring")
)]
#[derive(Debug)]
pub struct ReplaceFunc {
signature: Signature,
Expand Down Expand Up @@ -80,33 +98,10 @@ impl ScalarUDFImpl for ReplaceFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_replace_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_replace_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Replaces all occurrences of a specified substring in a string with a new substring.",
"replace(str, substr, replacement)")
.with_sql_example(r#"```sql
> select replace('ABabbaBA', 'ab', 'cd');
+-------------------------------------------------+
| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
+-------------------------------------------------+
| ABcdbaBA |
+-------------------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_standard_argument("substr", Some("Substring expression to replace in the input string. Substring"))
.with_standard_argument("replacement", Some("Replacement substring"))
.build()
})
}

fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = as_string_view_array(&args[0])?;
let from_array = as_string_view_array(&args[1])?;
Expand Down
45 changes: 19 additions & 26 deletions datafusion/functions/src/string/split_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,28 @@ use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
use datafusion_common::ScalarValue;
use datafusion_common::{exec_err, DataFusionError, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Splits a string based on a specified delimiter and returns the substring in the specified position.",
syntax_example = "split_part(str, delimiter, pos)",
sql_example = r#"```sql
> select split_part('1.2.3.4.5', '.', 3);
+--------------------------------------------------+
| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
+--------------------------------------------------+
| 3 |
+--------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "delimiter", description = "String or character to split on."),
argument(name = "pos", description = "Position of the part to return.")
)]
#[derive(Debug)]
pub struct SplitPartFunc {
signature: Signature,
Expand Down Expand Up @@ -182,33 +198,10 @@ impl ScalarUDFImpl for SplitPartFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_split_part_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_split_part_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Splits a string based on a specified delimiter and returns the substring in the specified position.",
"split_part(str, delimiter, pos)")
.with_sql_example(r#"```sql
> select split_part('1.2.3.4.5', '.', 3);
+--------------------------------------------------+
| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
+--------------------------------------------------+
| 3 |
+--------------------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_argument("delimiter", "String or character to split on.")
.with_argument("pos", "Position of the part to return.")
.build()
})
}

/// impl
pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
string_array: StringArrType,
Expand Down
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/starts_with.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::datatypes::DataType;

use crate::utils::make_scalar_function;
use datafusion_common::{internal_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;

/// Returns true if string starts with prefix.
/// starts_with('alphabet', 'alph') = 't'
Expand All @@ -34,6 +34,21 @@ pub fn starts_with(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}

#[user_doc(
doc_section(label = "String Functions"),
description = "Tests if a string starts with a substring.",
syntax_example = "starts_with(str, substr)",
sql_example = r#"```sql
> select starts_with('datafusion','data');
+----------------------------------------------+
| starts_with(Utf8("datafusion"),Utf8("data")) |
+----------------------------------------------+
| true |
+----------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "substr", description = "Substring to test for.")
)]
#[derive(Debug)]
pub struct StartsWithFunc {
signature: Signature,
Expand Down Expand Up @@ -84,35 +99,10 @@ impl ScalarUDFImpl for StartsWithFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_starts_with_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_starts_with_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Tests if a string starts with a substring.",
"starts_with(str, substr)",
)
.with_sql_example(
r#"```sql
> select starts_with('datafusion','data');
+----------------------------------------------+
| starts_with(Utf8("datafusion"),Utf8("data")) |
+----------------------------------------------+
| true |
+----------------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("substr", "Substring to test for.")
.build()
})
}

#[cfg(test)]
mod tests {
use crate::utils::test::test_function;
Expand Down
45 changes: 18 additions & 27 deletions datafusion/functions/src/string/to_hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
use arrow::datatypes::{
Expand All @@ -27,9 +27,10 @@ use crate::utils::make_scalar_function;
use datafusion_common::cast::as_primitive_array;
use datafusion_common::Result;
use datafusion_common::{exec_err, plan_err};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;

use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;

/// Converts the number to its equivalent hexadecimal representation.
/// to_hex(2147483647) = '7fffffff'
Expand Down Expand Up @@ -59,6 +60,20 @@ where
Ok(Arc::new(result) as ArrayRef)
}

#[user_doc(
doc_section(label = "String Functions"),
description = "Converts an integer to a hexadecimal string.",
syntax_example = "to_hex(int)",
sql_example = r#"```sql
> select to_hex(12345689);
+-------------------------+
| to_hex(Int64(12345689)) |
+-------------------------+
| bc6159 |
+-------------------------+
```"#,
standard_argument(name = "int", prefix = "Integer")
)]
#[derive(Debug)]
pub struct ToHexFunc {
signature: Signature,
Expand Down Expand Up @@ -116,34 +131,10 @@ impl ScalarUDFImpl for ToHexFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_to_hex_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_to_hex_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Converts an integer to a hexadecimal string.",
"to_hex(int)",
)
.with_sql_example(
r#"```sql
> select to_hex(12345689);
+-------------------------+
| to_hex(Int64(12345689)) |
+-------------------------+
| bc6159 |
+-------------------------+
```"#,
)
.with_standard_argument("int", Some("Integer"))
.build()
})
}

#[cfg(test)]
mod tests {
use arrow::array::{Int32Array, StringArray};
Expand Down
Loading

0 comments on commit 618767e

Please sign in to comment.