From 6a0ad0b83049b689233f2256a70a4f4bc2bcebef Mon Sep 17 00:00:00 2001
From: Bobby Wang <wbo4958@gmail.com>
Date: Fri, 21 May 2021 15:41:17 +0800
Subject: [PATCH] skip test_window_aggs_for_rows_lead_lag_on_arrays

If some rows of order-by columns (it's `a,b,c` in the test) are equal,
then it may fail because CPU and GPU can't guarantee the order for the
same rows, while lead/lag is typically depending on row's order.

The solution is we should add the aggregation column `d` and the default column
`d_default` columns into the order-by to guarantee the order. But for now,
sorting on array has not been supported yet, see
https://github.com/NVIDIA/spark-rapids/issues/2470.

So this PR just skip the test

Signed-off-by: Bobby Wang <wbo4958@gmail.com>
---
 integration_tests/src/main/python/window_function_test.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py
index fce0ba50562..c4b63d2102d 100644
--- a/integration_tests/src/main/python/window_function_test.py
+++ b/integration_tests/src/main/python/window_function_test.py
@@ -281,6 +281,12 @@ def do_it(spark):
 
 # lead and lag are supported for arrays, but the other window operations like min and max are not right now
 # once they are all supported the tests should be combined.
+@pytest.mark.skip(reason="If some rows of order-by columns (here is a,b,c) are equal, then it may fail because"
+                          "CPU and GPU can't guarantee the order for the same rows, while lead/lag is typically"
+                          "depending on row's order. The solution is we should add the d and d_default columns"
+                          "into the order-by to guarantee the order. But for now, sorting on array has not been"
+                          "supported yet, see https://github.com/NVIDIA/spark-rapids/issues/2470."
+                          "Once the issue is resolved, we should remove skip mark")
 @ignore_order(local=True)
 @pytest.mark.parametrize('d_gen', lead_lag_array_data_gens, ids=meta_idfn('agg:'))
 @pytest.mark.parametrize('c_gen', [long_gen], ids=meta_idfn('orderBy:'))