Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace array_contains with SQL array functions: array_has, array_has_any, array_has_all #6990

Merged
merged 17 commits into from
Jul 22, 2023
Merged
208 changes: 183 additions & 25 deletions datafusion/core/tests/sqllogictests/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,60 @@ AS VALUES
(NULL, NULL, NULL, NULL)
;

statement ok
CREATE TABLE array_has_table_1D
AS VALUES
(make_array(1, 2), 1, make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3,5)),
(make_array(3, 4, 5), 2, make_array(1,2,3,4), make_array(2,5), make_array(2,4,6), make_array(1,3,5))
;

statement ok
CREATE TABLE array_has_table_1D_Float
AS VALUES
(make_array(1.0, 2.0), 1.0, make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)),
(make_array(3.0, 4.0, 5.0), 2.0, make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33))
;

statement ok
CREATE TABLE array_has_table_1D_Boolean
AS VALUES
(make_array(true, true, true), false, make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)),
(make_array(false, false, false), false, make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true))
;

statement ok
CREATE TABLE array_has_table_1D_UTF8
AS VALUES
(make_array('a', 'bc', 'def'), 'bc', make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')),
(make_array('a', 'bc', 'def'), 'defg', make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow'))
;

statement ok
CREATE TABLE array_has_table_2D
AS VALUES
(make_array([1,2]), make_array(1,3), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])),
(make_array([3,4], [5]), make_array(5), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10]))
;

statement ok
CREATE TABLE array_has_table_2D_float
AS VALUES
(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])),
(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3]))
;

statement ok
CREATE TABLE array_has_table_3D
AS VALUES
(make_array([[1,2]]), make_array([1])),
(make_array([[1,2]]), make_array([1,2])),
(make_array([[1,2]]), make_array([1,2,3])),
(make_array([[1], [2]]), make_array([2])),
(make_array([[1], [2]]), make_array([1], [2])),
(make_array([[1], [2]], [[2], [3]]), make_array([1], [2], [3])),
(make_array([[1], [2]], [[2], [3]]), make_array([1], [2]))
;

statement ok
CREATE TABLE arrays_values_without_nulls
AS VALUES
Expand Down Expand Up @@ -1164,48 +1218,129 @@ NULL 1 1
2 NULL 1
2 1 NULL

## array_contains
jayzhan211 marked this conversation as resolved.
Show resolved Hide resolved
## array_has/array_has_all/array_has_any

query BBBBBBBBBBBB
select array_has(make_array(1,2), 1),
array_has(make_array(1,2,NULL), 1),
array_has(make_array([2,3], [3,4]), make_array(2,3)),
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1], [2,3])),
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([4,5], [6])),
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1])),
array_has(make_array([[[1]]]), make_array([[1]])),
array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[2]])),
array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[1], [2]])),
list_has(make_array(1,2,3), 4),
array_contains(make_array(1,2,3), 3),
list_contains(make_array(1,2,3), 0)
;
----
true true true true true false true false true false true false

# array_contains scalar function #1
query BBB
select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)), array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]);
select array_has(column1, column2),
array_has_all(column3, column4),
array_has_any(column5, column6)
from array_has_table_1D;
----
true true true
false false false

# array_contains scalar function #2
query BB
select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 3]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 4]);
query BBB
select array_has(column1, column2),
array_has_all(column3, column4),
array_has_any(column5, column6)
from array_has_table_1D_Float;
----
true true
true true false
false false true

# array_contains scalar function #3
query BBB
select array_contains(make_array(1, 2, 3), make_array(1, 2, 3, 4)), array_contains([1, 2, 3], [1, 1, 4]), array_contains([1, 2, 3], [2, 1, 3, 4]);
select array_has(column1, column2),
array_has_all(column3, column4),
array_has_any(column5, column6)
from array_has_table_1D_Boolean;
----
false false false
false true true
true true true

# array_contains scalar function #4
query BB
select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 5]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 5]);
query BBB
select array_has(column1, column2),
array_has_all(column3, column4),
array_has_any(column5, column6)
from array_has_table_1D_UTF8;
----
false false
true true false
false false true

# array_contains scalar function #5
query BB
select array_contains([true, true, false, true, false], [true, false, false]), array_contains([true, false, true], [true, true]);
select array_has(column1, column2),
array_has_all(column3, column4)
from array_has_table_2D;
----
false true
true false

query B
select array_has_all(column1, column2)
from array_has_table_2D_float;
----
true
false

query B
select array_has(column1, column2) from array_has_table_3D;
----
false
true
false
false
true
false
true

query BBBB
select array_has(column1, make_array(5, 6)),
array_has(column1, make_array(7, NULL)),
array_has(column2, 5.5),
array_has(column3, 'o')
from arrays;
----
false false false true
true false true false
true false false true
false true false false
false false false false
false false false false

query BBBBBBBBBBBBB
select array_has_all(make_array(1,2,3), make_array(1,3)),
array_has_all(make_array(1,2,3), make_array(1,4)),
array_has_all(make_array([1,2], [3,4]), make_array([1,2])),
array_has_all(make_array([1,2], [3,4]), make_array([1,3])),
array_has_all(make_array([1,2], [3,4]), make_array([1,2], [3,4], [5,6])),
array_has_all(make_array([[1,2,3]]), make_array([[1]])),
array_has_all(make_array([[1,2,3]]), make_array([[1,2,3]])),
array_has_any(make_array(1,2,3), make_array(1,10,100)),
array_has_any(make_array(1,2,3), make_array(10,100)),
array_has_any(make_array([1,2], [3,4]), make_array([1,10], [10,4])),
array_has_any(make_array([1,2], [3,4]), make_array([10,20], [3,4])),
array_has_any(make_array([[1,2,3]]), make_array([[1,2,3], [4,5,6]])),
array_has_any(make_array([[1,2,3]]), make_array([[1,2,3]], [[4,5,6]]))
;
----
true true
true false true false false false true true false false true false true

# array_contains scalar function #6
query BB
select array_contains(make_array(true, true, true), make_array(false, false)), array_contains([false, false, false], [true, true]);
query BBBB
select list_has_all(make_array(1,2,3), make_array(4,5,6)),
list_has_all(make_array(1,2,3), make_array(1,2)),
list_has_any(make_array(1,2,3), make_array(4,5,6)),
list_has_any(make_array(1,2,3), make_array(1,2,4))
;
----
false false

false true false true

### Array operators tests


## array concatenate operator

# array concatenate operator with scalars #1 (like array_concat scalar function)
Expand Down Expand Up @@ -1296,7 +1431,6 @@ select make_array(f0) from fixed_size_list_array
[[1, 2], [3, 4]]



### Delete tables


Expand All @@ -1312,5 +1446,29 @@ drop table arrays;
statement ok
drop table arrays_values;

statement ok
drop table arrays_values_v2;

statement ok
drop table array_has_table_1D;

statement ok
drop table array_has_table_1D_Float;

statement ok
drop table array_has_table_1D_Boolean;

statement ok
drop table array_has_table_1D_UTF8;

statement ok
drop table array_has_table_2D;

statement ok
drop table array_has_table_2D_float;

statement ok
drop table array_has_table_3D;

statement ok
drop table arrays_values_without_nulls;
26 changes: 20 additions & 6 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,12 @@ pub enum BuiltinScalarFunction {
ArrayAppend,
/// array_concat
ArrayConcat,
/// array_contains
ArrayContains,
/// array_has
ArrayHas,
/// array_has_all
ArrayHasAll,
/// array_has_any
ArrayHasAny,
/// array_dims
ArrayDims,
/// array_fill
Expand Down Expand Up @@ -330,7 +334,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Trunc => Volatility::Immutable,
BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
BuiltinScalarFunction::ArrayContains => Volatility::Immutable,
BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable,
BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable,
BuiltinScalarFunction::ArrayHas => Volatility::Immutable,
BuiltinScalarFunction::ArrayDims => Volatility::Immutable,
BuiltinScalarFunction::ArrayFill => Volatility::Immutable,
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
Expand Down Expand Up @@ -501,7 +507,9 @@ impl BuiltinScalarFunction {

Ok(expr_type)
}
BuiltinScalarFunction::ArrayContains => Ok(Boolean),
BuiltinScalarFunction::ArrayHasAll
| BuiltinScalarFunction::ArrayHasAny
| BuiltinScalarFunction::ArrayHas => Ok(Boolean),
BuiltinScalarFunction::ArrayDims => {
Ok(List(Arc::new(Field::new("item", UInt64, true))))
}
Expand Down Expand Up @@ -808,7 +816,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayConcat => {
Signature::variadic_any(self.volatility())
}
BuiltinScalarFunction::ArrayContains => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayHasAll
| BuiltinScalarFunction::ArrayHasAny
| BuiltinScalarFunction::ArrayHas => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()),
BuiltinScalarFunction::ArrayFill => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayLength => {
Expand Down Expand Up @@ -1278,8 +1288,12 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
BuiltinScalarFunction::ArrayConcat => {
&["array_concat", "array_cat", "list_concat", "list_cat"]
}
BuiltinScalarFunction::ArrayContains => &["array_contains"],
BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"],
BuiltinScalarFunction::ArrayHasAll => &["array_has_all", "list_has_all"],
BuiltinScalarFunction::ArrayHasAny => &["array_has_any", "list_has_any"],
BuiltinScalarFunction::ArrayHas => {
&["array_has", "list_has", "array_contains", "list_contains"]
}
BuiltinScalarFunction::ArrayFill => &["array_fill"],
BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"],
BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"],
Expand Down
18 changes: 15 additions & 3 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,22 @@ scalar_expr!(
);
nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
scalar_expr!(
ArrayContains,
array_contains,
ArrayHas,
array_has,
first_array second_array,
"returns true, if each element of the second array appearing in the first array, otherwise false."
"Returns true, if the element appears in the first array, otherwise false."
);
scalar_expr!(
ArrayHasAll,
array_has_all,
first_array second_array,
"Returns true if each element of the second array appears in the first array; otherwise, it returns false."
);
scalar_expr!(
ArrayHasAny,
array_has_any,
first_array second_array,
"Returns true if at least one element of the second array appears in the first array; otherwise, it returns false."
);
scalar_expr!(
ArrayDims,
Expand Down
Loading