Skip to content

Commit

Permalink
Replace array_contains with SQL array functions: array_has, `arra…
Browse files Browse the repository at this point in the history
…y_has_any`, `array_has_all` (#6990)

* rename array_contains to array_has_all

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* array_has_all with columns done

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add more tests

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* prepare array has any and array has

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* clippy

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add proto only

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add generated files

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* basic array_has done

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add more tests

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* basic array_has done

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* array_has_all done

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* any and all are done

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* remove proto files

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* remove test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* address comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* run doc command

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rerun command

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
  • Loading branch information
jayzhan211 authored Jul 22, 2023
1 parent 466b6c3 commit 49c91b5
Show file tree
Hide file tree
Showing 12 changed files with 578 additions and 181 deletions.
208 changes: 183 additions & 25 deletions datafusion/core/tests/sqllogictests/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,60 @@ AS VALUES
(NULL, NULL, NULL, NULL)
;

statement ok
CREATE TABLE array_has_table_1D
AS VALUES
(make_array(1, 2), 1, make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3,5)),
(make_array(3, 4, 5), 2, make_array(1,2,3,4), make_array(2,5), make_array(2,4,6), make_array(1,3,5))
;

statement ok
CREATE TABLE array_has_table_1D_Float
AS VALUES
(make_array(1.0, 2.0), 1.0, make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)),
(make_array(3.0, 4.0, 5.0), 2.0, make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33))
;

statement ok
CREATE TABLE array_has_table_1D_Boolean
AS VALUES
(make_array(true, true, true), false, make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)),
(make_array(false, false, false), false, make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true))
;

statement ok
CREATE TABLE array_has_table_1D_UTF8
AS VALUES
(make_array('a', 'bc', 'def'), 'bc', make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')),
(make_array('a', 'bc', 'def'), 'defg', make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow'))
;

statement ok
CREATE TABLE array_has_table_2D
AS VALUES
(make_array([1,2]), make_array(1,3), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])),
(make_array([3,4], [5]), make_array(5), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10]))
;

statement ok
CREATE TABLE array_has_table_2D_float
AS VALUES
(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])),
(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3]))
;

statement ok
CREATE TABLE array_has_table_3D
AS VALUES
(make_array([[1,2]]), make_array([1])),
(make_array([[1,2]]), make_array([1,2])),
(make_array([[1,2]]), make_array([1,2,3])),
(make_array([[1], [2]]), make_array([2])),
(make_array([[1], [2]]), make_array([1], [2])),
(make_array([[1], [2]], [[2], [3]]), make_array([1], [2], [3])),
(make_array([[1], [2]], [[2], [3]]), make_array([1], [2]))
;

statement ok
CREATE TABLE arrays_values_without_nulls
AS VALUES
Expand Down Expand Up @@ -1164,48 +1218,129 @@ NULL 1 1
2 NULL 1
2 1 NULL

## array_contains
## array_has/array_has_all/array_has_any

query BBBBBBBBBBBB
select array_has(make_array(1,2), 1),
array_has(make_array(1,2,NULL), 1),
array_has(make_array([2,3], [3,4]), make_array(2,3)),
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1], [2,3])),
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([4,5], [6])),
array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1])),
array_has(make_array([[[1]]]), make_array([[1]])),
array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[2]])),
array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[1], [2]])),
list_has(make_array(1,2,3), 4),
array_contains(make_array(1,2,3), 3),
list_contains(make_array(1,2,3), 0)
;
----
true true true true true false true false true false true false

# array_contains scalar function #1
query BBB
select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)), array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]);
select array_has(column1, column2),
array_has_all(column3, column4),
array_has_any(column5, column6)
from array_has_table_1D;
----
true true true
false false false

# array_contains scalar function #2
query BB
select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 3]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 4]);
query BBB
select array_has(column1, column2),
array_has_all(column3, column4),
array_has_any(column5, column6)
from array_has_table_1D_Float;
----
true true
true true false
false false true

# array_contains scalar function #3
query BBB
select array_contains(make_array(1, 2, 3), make_array(1, 2, 3, 4)), array_contains([1, 2, 3], [1, 1, 4]), array_contains([1, 2, 3], [2, 1, 3, 4]);
select array_has(column1, column2),
array_has_all(column3, column4),
array_has_any(column5, column6)
from array_has_table_1D_Boolean;
----
false false false
false true true
true true true

# array_contains scalar function #4
query BB
select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 5]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 5]);
query BBB
select array_has(column1, column2),
array_has_all(column3, column4),
array_has_any(column5, column6)
from array_has_table_1D_UTF8;
----
false false
true true false
false false true

# array_contains scalar function #5
query BB
select array_contains([true, true, false, true, false], [true, false, false]), array_contains([true, false, true], [true, true]);
select array_has(column1, column2),
array_has_all(column3, column4)
from array_has_table_2D;
----
false true
true false

query B
select array_has_all(column1, column2)
from array_has_table_2D_float;
----
true
false

query B
select array_has(column1, column2) from array_has_table_3D;
----
false
true
false
false
true
false
true

query BBBB
select array_has(column1, make_array(5, 6)),
array_has(column1, make_array(7, NULL)),
array_has(column2, 5.5),
array_has(column3, 'o')
from arrays;
----
false false false true
true false true false
true false false true
false true false false
false false false false
false false false false

query BBBBBBBBBBBBB
select array_has_all(make_array(1,2,3), make_array(1,3)),
array_has_all(make_array(1,2,3), make_array(1,4)),
array_has_all(make_array([1,2], [3,4]), make_array([1,2])),
array_has_all(make_array([1,2], [3,4]), make_array([1,3])),
array_has_all(make_array([1,2], [3,4]), make_array([1,2], [3,4], [5,6])),
array_has_all(make_array([[1,2,3]]), make_array([[1]])),
array_has_all(make_array([[1,2,3]]), make_array([[1,2,3]])),
array_has_any(make_array(1,2,3), make_array(1,10,100)),
array_has_any(make_array(1,2,3), make_array(10,100)),
array_has_any(make_array([1,2], [3,4]), make_array([1,10], [10,4])),
array_has_any(make_array([1,2], [3,4]), make_array([10,20], [3,4])),
array_has_any(make_array([[1,2,3]]), make_array([[1,2,3], [4,5,6]])),
array_has_any(make_array([[1,2,3]]), make_array([[1,2,3]], [[4,5,6]]))
;
----
true true
true false true false false false true true false false true false true

# array_contains scalar function #6
query BB
select array_contains(make_array(true, true, true), make_array(false, false)), array_contains([false, false, false], [true, true]);
query BBBB
select list_has_all(make_array(1,2,3), make_array(4,5,6)),
list_has_all(make_array(1,2,3), make_array(1,2)),
list_has_any(make_array(1,2,3), make_array(4,5,6)),
list_has_any(make_array(1,2,3), make_array(1,2,4))
;
----
false false

false true false true

### Array operators tests


## array concatenate operator

# array concatenate operator with scalars #1 (like array_concat scalar function)
Expand Down Expand Up @@ -1296,7 +1431,6 @@ select make_array(f0) from fixed_size_list_array
[[1, 2], [3, 4]]



### Delete tables


Expand All @@ -1312,5 +1446,29 @@ drop table arrays;
statement ok
drop table arrays_values;

statement ok
drop table arrays_values_v2;

statement ok
drop table array_has_table_1D;

statement ok
drop table array_has_table_1D_Float;

statement ok
drop table array_has_table_1D_Boolean;

statement ok
drop table array_has_table_1D_UTF8;

statement ok
drop table array_has_table_2D;

statement ok
drop table array_has_table_2D_float;

statement ok
drop table array_has_table_3D;

statement ok
drop table arrays_values_without_nulls;
26 changes: 20 additions & 6 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,12 @@ pub enum BuiltinScalarFunction {
ArrayAppend,
/// array_concat
ArrayConcat,
/// array_contains
ArrayContains,
/// array_has
ArrayHas,
/// array_has_all
ArrayHasAll,
/// array_has_any
ArrayHasAny,
/// array_dims
ArrayDims,
/// array_fill
Expand Down Expand Up @@ -330,7 +334,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Trunc => Volatility::Immutable,
BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
BuiltinScalarFunction::ArrayContains => Volatility::Immutable,
BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable,
BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable,
BuiltinScalarFunction::ArrayHas => Volatility::Immutable,
BuiltinScalarFunction::ArrayDims => Volatility::Immutable,
BuiltinScalarFunction::ArrayFill => Volatility::Immutable,
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
Expand Down Expand Up @@ -501,7 +507,9 @@ impl BuiltinScalarFunction {

Ok(expr_type)
}
BuiltinScalarFunction::ArrayContains => Ok(Boolean),
BuiltinScalarFunction::ArrayHasAll
| BuiltinScalarFunction::ArrayHasAny
| BuiltinScalarFunction::ArrayHas => Ok(Boolean),
BuiltinScalarFunction::ArrayDims => {
Ok(List(Arc::new(Field::new("item", UInt64, true))))
}
Expand Down Expand Up @@ -808,7 +816,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayConcat => {
Signature::variadic_any(self.volatility())
}
BuiltinScalarFunction::ArrayContains => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayHasAll
| BuiltinScalarFunction::ArrayHasAny
| BuiltinScalarFunction::ArrayHas => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()),
BuiltinScalarFunction::ArrayFill => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayLength => {
Expand Down Expand Up @@ -1278,8 +1288,12 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
BuiltinScalarFunction::ArrayConcat => {
&["array_concat", "array_cat", "list_concat", "list_cat"]
}
BuiltinScalarFunction::ArrayContains => &["array_contains"],
BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"],
BuiltinScalarFunction::ArrayHasAll => &["array_has_all", "list_has_all"],
BuiltinScalarFunction::ArrayHasAny => &["array_has_any", "list_has_any"],
BuiltinScalarFunction::ArrayHas => {
&["array_has", "list_has", "array_contains", "list_contains"]
}
BuiltinScalarFunction::ArrayFill => &["array_fill"],
BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"],
BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"],
Expand Down
18 changes: 15 additions & 3 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,22 @@ scalar_expr!(
);
nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
scalar_expr!(
ArrayContains,
array_contains,
ArrayHas,
array_has,
first_array second_array,
"returns true, if each element of the second array appearing in the first array, otherwise false."
"Returns true, if the element appears in the first array, otherwise false."
);
scalar_expr!(
ArrayHasAll,
array_has_all,
first_array second_array,
"Returns true if each element of the second array appears in the first array; otherwise, it returns false."
);
scalar_expr!(
ArrayHasAny,
array_has_any,
first_array second_array,
"Returns true if at least one element of the second array appears in the first array; otherwise, it returns false."
);
scalar_expr!(
ArrayDims,
Expand Down
Loading

0 comments on commit 49c91b5

Please sign in to comment.