From 6a0ad0b83049b689233f2256a70a4f4bc2bcebef Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Fri, 21 May 2021 15:41:17 +0800 Subject: [PATCH] skip test_window_aggs_for_rows_lead_lag_on_arrays If some rows of order-by columns (it's `a,b,c` in the test) are equal, then it may fail because CPU and GPU can't guarantee the order for the same rows, while lead/lag is typically depending on row's order. The solution is we should add the aggregation column `d` and the default column `d_default` columns into the order-by to guarantee the order. But for now, sorting on array has not been supported yet, see https://github.com/NVIDIA/spark-rapids/issues/2470. So this PR just skip the test Signed-off-by: Bobby Wang --- integration_tests/src/main/python/window_function_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py index fce0ba50562..c4b63d2102d 100644 --- a/integration_tests/src/main/python/window_function_test.py +++ b/integration_tests/src/main/python/window_function_test.py @@ -281,6 +281,12 @@ def do_it(spark): # lead and lag are supported for arrays, but the other window operations like min and max are not right now # once they are all supported the tests should be combined. +@pytest.mark.skip(reason="If some rows of order-by columns (here is a,b,c) are equal, then it may fail because" + "CPU and GPU can't guarantee the order for the same rows, while lead/lag is typically" + "depending on row's order. The solution is we should add the d and d_default columns" + "into the order-by to guarantee the order. But for now, sorting on array has not been" + "supported yet, see https://github.com/NVIDIA/spark-rapids/issues/2470." + "Once the issue is resolved, we should remove skip mark") @ignore_order(local=True) @pytest.mark.parametrize('d_gen', lead_lag_array_data_gens, ids=meta_idfn('agg:')) @pytest.mark.parametrize('c_gen', [long_gen], ids=meta_idfn('orderBy:'))