Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create datafusion-functions-array crate and move ArrayToString function into it #9113

Merged
merged 4 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ jobs:
- name: Check function packages (encoding_expressions)
run: cargo check --no-default-features --features=encoding_expressions -p datafusion

- name: Check function packages (array_expressions)
run: cargo check --no-default-features --features=array_expressions -p datafusion

- name: Check Cargo.lock for datafusion-cli
run: |
# If this test fails, try running `cargo update` in the `datafusion-cli` directory
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

[workspace]
exclude = ["datafusion-cli"]
members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/functions", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", "docs", "test-utils", "benchmarks",
members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/functions", "datafusion/functions-array", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", "docs", "test-utils", "benchmarks",
]
resolver = "2"

Expand Down Expand Up @@ -51,6 +51,7 @@ datafusion-common = { path = "datafusion/common", version = "35.0.0" }
datafusion-execution = { path = "datafusion/execution", version = "35.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "35.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "35.0.0" }
datafusion-functions-array = { path = "datafusion/functions-array", version = "35.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "35.0.0" }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "35.0.0" }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "35.0.0" }
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ This crate has several [features] which can be specified in your `Cargo.toml`.

Default features:

- `array_expressions`: functions for working with arrays such as `array_to_string`
- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
- `encoding_expressions`: `encode` and `decode` functions
Expand Down
68 changes: 40 additions & 28 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ path = "src/lib.rs"

[features]
# Used to enable the avro format
array_expressions = ["datafusion-functions-array"]
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
crypto_expressions = ["datafusion-physical-expr/crypto_expressions", "datafusion-optimizer/crypto_expressions"]
default = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"]
default = ["array_expressions", "crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"]
encoding_expressions = ["datafusion-functions/encoding_expressions"]
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
force_hash_collisions = []
Expand All @@ -68,7 +69,8 @@ dashmap = { workspace = true }
datafusion-common = { path = "../common", version = "35.0.0", features = ["object_store"], default-features = false }
datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-functions = { path = "../functions", version = "35.0.0" }
datafusion-functions = { workspace = true }
datafusion-functions-array = { workspace = true, optional = true }
datafusion-optimizer = { path = "../optimizer", version = "35.0.0", default-features = false }
datafusion-physical-expr = { path = "../physical-expr", version = "35.0.0", default-features = false }
datafusion-physical-plan = { workspace = true }
Expand Down
5 changes: 5 additions & 0 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,11 @@ impl SessionState {
datafusion_functions::register_all(&mut new_self)
.expect("can not register built in functions");

// register crate of array expressions (if enabled)
#[cfg(feature = "array_expressions")]
datafusion_functions_array::register_all(&mut new_self)
.expect("can not register array expressions");

new_self
}
/// Returns new [`SessionState`] using the provided
Expand Down
6 changes: 6 additions & 0 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,12 @@ pub mod functions {
pub use datafusion_functions::*;
}

/// re-export of [`datafusion_functions_array`] crate, if "array_expressions" feature is enabled
pub mod functions_array {
#[cfg(feature = "array_expressions")]
pub use datafusion_functions::*;
}

#[cfg(test)]
pub mod test;
pub mod test_util;
Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ pub use datafusion_expr::{
Expr,
};
pub use datafusion_functions::expr_fn::*;
#[cfg(feature = "array_expressions")]
pub use datafusion_functions_array::expr_fn::*;

pub use std::ops::Not;
pub use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
Expand Down
30 changes: 29 additions & 1 deletion datafusion/core/tests/dataframe/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ use arrow::{
array::{Int32Array, StringArray},
record_batch::RecordBatch,
};
use arrow_array::types::Int32Type;
use arrow_array::ListArray;
use arrow_schema::SchemaRef;
use std::sync::Arc;

Expand All @@ -40,6 +42,7 @@ fn test_schema() -> SchemaRef {
Arc::new(Schema::new(vec![
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::Int32, false),
Field::new("l", DataType::new_list(DataType::Int32, true), true),
]))
}

Expand All @@ -57,6 +60,12 @@ async fn create_test_table() -> Result<DataFrame> {
"123AbcDef",
])),
Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
Some(vec![Some(0), Some(1), Some(2)]),
None,
Some(vec![Some(3), None, Some(5)]),
Some(vec![Some(6), Some(7)]),
])),
],
)?;

Expand All @@ -67,7 +76,7 @@ async fn create_test_table() -> Result<DataFrame> {
ctx.table("test").await
}

/// Excutes an expression on the test dataframe as a select.
/// Executes an expression on the test dataframe as a select.
/// Compares formatted output of a record batch with an expected
/// vector of strings, using the assert_batch_eq! macro
macro_rules! assert_fn_batches {
Expand Down Expand Up @@ -862,3 +871,22 @@ async fn test_fn_decode() -> Result<()> {

Ok(())
}

#[tokio::test]
async fn test_fn_array_to_string() -> Result<()> {
let expr = array_to_string(col("l"), lit("***"));

let expected = [
"+-------------------------------------+",
"| array_to_string(test.l,Utf8(\"***\")) |",
"+-------------------------------------+",
"| 0***1***2 |",
"| |",
"| 3***5 |",
"| 6***7 |",
"+-------------------------------------+",
];
assert_fn_batches!(expr, expected);

Ok(())
}
13 changes: 0 additions & 13 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,6 @@ pub enum BuiltinScalarFunction {
ArrayReverse,
/// array_slice
ArraySlice,
/// array_to_string
ArrayToString,
/// array_intersect
ArrayIntersect,
/// array_union
Expand Down Expand Up @@ -434,7 +432,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReverse => Volatility::Immutable,
BuiltinScalarFunction::Flatten => Volatility::Immutable,
BuiltinScalarFunction::ArraySlice => Volatility::Immutable,
BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
BuiltinScalarFunction::ArrayResize => Volatility::Immutable,
Expand Down Expand Up @@ -631,7 +628,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayToString => Ok(Utf8),
BuiltinScalarFunction::ArrayIntersect => {
match (input_expr_types[0].clone(), input_expr_types[1].clone()) {
(DataType::Null, DataType::Null) | (DataType::Null, _) => {
Expand Down Expand Up @@ -991,9 +987,6 @@ impl BuiltinScalarFunction {
Signature::variadic_any(self.volatility())
}

BuiltinScalarFunction::ArrayToString => {
Signature::variadic_any(self.volatility())
}
BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()),
BuiltinScalarFunction::Cardinality => Signature::any(1, self.volatility()),
Expand Down Expand Up @@ -1605,12 +1598,6 @@ impl BuiltinScalarFunction {
}
BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"],
BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
BuiltinScalarFunction::ArrayToString => &[
"array_to_string",
"list_to_string",
"array_join",
"list_join",
],
BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"],
BuiltinScalarFunction::Cardinality => &["cardinality"],
BuiltinScalarFunction::ArrayResize => &["array_resize", "list_resize"],
Expand Down
Loading
Loading