put expensive test cases to the head of xdist worker queue

NVIDIA · Sep 13, 2021 · 5fba2d3 · 5fba2d3
1 parent 11433ed
commit 5fba2d3
Show file tree

Hide file tree

Showing 6 changed files with 12 additions and 1 deletion.
diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py
@@ -66,6 +66,7 @@ def test_if_else_map(data_gen):
                 'IF(a, b, c)'),
             conf = allow_negative_scale_of_decimal_conf)
 
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('data_gen', all_gens + all_nested_gens, ids=idfn)
 def test_case_when(data_gen):
     num_cmps = 20

diff --git a/integration_tests/src/main/python/generate_expr_test.py b/integration_tests/src/main/python/generate_expr_test.py
@@ -50,6 +50,7 @@ def test_explode_litarray(data_gen):
         'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'}
 
 @ignore_order(local=True)
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
 def test_explode_array_data(spark_tmp_path, data_gen):
     data_gen = [int_gen, ArrayGen(data_gen)]
@@ -81,6 +82,7 @@ def test_explode_nested_array_data(spark_tmp_path, data_gen):
 #sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
 def test_explode_outer_array_data(spark_tmp_path, data_gen):
     data_gen = [int_gen, ArrayGen(data_gen)]
@@ -130,6 +132,7 @@ def test_posexplode_litarray(data_gen):
 #sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
 def test_posexplode_array_data(spark_tmp_path, data_gen):
     data_gen = [int_gen, ArrayGen(data_gen)]
@@ -161,6 +164,7 @@ def test_posexplode_nested_array_data(spark_tmp_path, data_gen):
 #sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
 def test_posexplode_outer_array_data(spark_tmp_path, data_gen):
     data_gen = [int_gen, ArrayGen(data_gen)]

diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py
@@ -245,6 +245,7 @@ def do_join(spark):
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens, ids=idfn)
 @pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
 def test_cartesian_join(data_gen, batch_size):
@@ -257,6 +258,7 @@ def do_join(spark):
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.xfail(condition=is_databricks_runtime(),
     reason='https://github.com/NVIDIA/spark-rapids/issues/334')
 @pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens, ids=idfn)
@@ -271,6 +273,7 @@ def do_join(spark):
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.xfail(condition=is_databricks_runtime(),
     reason='https://github.com/NVIDIA/spark-rapids/issues/334')
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
@@ -285,6 +288,7 @@ def do_join(spark):
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
 def test_cartesian_join_with_condition(data_gen, batch_size):

diff --git a/integration_tests/src/main/python/parquet_write_test.py b/integration_tests/src/main/python/parquet_write_test.py
@@ -72,6 +72,7 @@ def limited_timestamp(nullable=True):
     parquet_basic_gen + parquet_struct_gen + parquet_array_gen + parquet_decimal_gens + parquet_map_gens]
 parquet_ts_write_options = ['INT96', 'TIMESTAMP_MICROS', 'TIMESTAMP_MILLIS']
 
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('parquet_gens', parquet_write_gens_list, ids=idfn)
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
@@ -117,6 +118,7 @@ def test_write_ts_millis(spark_tmp_path, ts_type, ts_rebase):
 
 # There are race conditions around when individual files are read in for partitioned data
 @ignore_order
+@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
 @pytest.mark.parametrize('parquet_gen', parquet_part_write_gens, ids=idfn)
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])

diff --git a/...n_tests/src/main/python/subsuqery_test.py → ...on_tests/src/main/python/subquery_test.py b/...n_tests/src/main/python/subsuqery_test.py → ...on_tests/src/main/python/subquery_test.py
diff --git a/jenkins/Dockerfile-blossom.ubuntu b/jenkins/Dockerfile-blossom.ubuntu
@@ -44,7 +44,7 @@ RUN python3.8 -m easy_install pip
 RUN update-java-alternatives --set /usr/lib/jvm/java-1.8.0-openjdk-amd64
 
 RUN ln -s /usr/bin/python3.8 /usr/bin/python
-RUN python -m pip install pytest sre_yield requests pandas pyarrow findspark pytest-xdist pre-commit
+RUN python -m pip install pytest sre_yield requests pandas pyarrow findspark pytest-xdist pre-commit pytest-order
 
 # libnuma1 and libgomp1 are required by ucx packaging
 RUN apt install -y inetutils-ping expect wget libnuma1 libgomp1