Skip to content

Commit

Permalink
put expensive test cases to the head of xdist worker queue
Browse files Browse the repository at this point in the history
  • Loading branch information
pxLi committed Sep 13, 2021
1 parent 11433ed commit 5fba2d3
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 1 deletion.
1 change: 1 addition & 0 deletions integration_tests/src/main/python/conditionals_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def test_if_else_map(data_gen):
'IF(a, b, c)'),
conf = allow_negative_scale_of_decimal_conf)

@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gens + all_nested_gens, ids=idfn)
def test_case_when(data_gen):
num_cmps = 20
Expand Down
4 changes: 4 additions & 0 deletions integration_tests/src/main/python/generate_expr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_explode_litarray(data_gen):
'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'}

@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
def test_explode_array_data(spark_tmp_path, data_gen):
data_gen = [int_gen, ArrayGen(data_gen)]
Expand Down Expand Up @@ -81,6 +82,7 @@ def test_explode_nested_array_data(spark_tmp_path, data_gen):
#sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
def test_explode_outer_array_data(spark_tmp_path, data_gen):
data_gen = [int_gen, ArrayGen(data_gen)]
Expand Down Expand Up @@ -130,6 +132,7 @@ def test_posexplode_litarray(data_gen):
#sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
def test_posexplode_array_data(spark_tmp_path, data_gen):
data_gen = [int_gen, ArrayGen(data_gen)]
Expand Down Expand Up @@ -161,6 +164,7 @@ def test_posexplode_nested_array_data(spark_tmp_path, data_gen):
#sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
def test_posexplode_outer_array_data(spark_tmp_path, data_gen):
data_gen = [int_gen, ArrayGen(data_gen)]
Expand Down
4 changes: 4 additions & 0 deletions integration_tests/src/main/python/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def do_join(spark):
# local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens, ids=idfn)
@pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
def test_cartesian_join(data_gen, batch_size):
Expand All @@ -257,6 +258,7 @@ def do_join(spark):
# local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.xfail(condition=is_databricks_runtime(),
reason='https://github.com/NVIDIA/spark-rapids/issues/334')
@pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens, ids=idfn)
Expand All @@ -271,6 +273,7 @@ def do_join(spark):
# local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.xfail(condition=is_databricks_runtime(),
reason='https://github.com/NVIDIA/spark-rapids/issues/334')
@pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
Expand All @@ -285,6 +288,7 @@ def do_join(spark):
# local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
@pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
def test_cartesian_join_with_condition(data_gen, batch_size):
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/src/main/python/parquet_write_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def limited_timestamp(nullable=True):
parquet_basic_gen + parquet_struct_gen + parquet_array_gen + parquet_decimal_gens + parquet_map_gens]
parquet_ts_write_options = ['INT96', 'TIMESTAMP_MICROS', 'TIMESTAMP_MILLIS']

@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('parquet_gens', parquet_write_gens_list, ids=idfn)
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
Expand Down Expand Up @@ -117,6 +118,7 @@ def test_write_ts_millis(spark_tmp_path, ts_type, ts_rebase):

# There are race conditions around when individual files are read in for partitioned data
@ignore_order
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('parquet_gen', parquet_part_write_gens, ids=idfn)
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
Expand Down
2 changes: 1 addition & 1 deletion jenkins/Dockerfile-blossom.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ RUN python3.8 -m easy_install pip
RUN update-java-alternatives --set /usr/lib/jvm/java-1.8.0-openjdk-amd64

RUN ln -s /usr/bin/python3.8 /usr/bin/python
RUN python -m pip install pytest sre_yield requests pandas pyarrow findspark pytest-xdist pre-commit
RUN python -m pip install pytest sre_yield requests pandas pyarrow findspark pytest-xdist pre-commit pytest-order

# libnuma1 and libgomp1 are required by ucx packaging
RUN apt install -y inetutils-ping expect wget libnuma1 libgomp1
Expand Down

0 comments on commit 5fba2d3

Please sign in to comment.