Skip to content

Commit

Permalink
put expensive test cases to the head of xdist worker queue
Browse files Browse the repository at this point in the history
  • Loading branch information
pxLi committed Sep 13, 2021
1 parent 11433ed commit 7d74179
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 1 deletion.
5 changes: 5 additions & 0 deletions integration_tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ only needed when testing integration with pandas.
integration and interoperability with pandas, NumPy, and other software in the Python ecosystem. This is used
to test improved transfer performance to pandas based user defined functions.

### pytest-order
`pip install pytest-order`

`pytest-order` allows you to customize the order in which your tests are run. This is optional.

## pytest-xdist and findspark

`pytest-xdist` and `findspark` can be used to speed up running the tests by running them in parallel.
Expand Down
1 change: 1 addition & 0 deletions integration_tests/src/main/python/conditionals_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def test_if_else_map(data_gen):
'IF(a, b, c)'),
conf = allow_negative_scale_of_decimal_conf)

@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gens + all_nested_gens, ids=idfn)
def test_case_when(data_gen):
num_cmps = 20
Expand Down
4 changes: 4 additions & 0 deletions integration_tests/src/main/python/generate_expr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_explode_litarray(data_gen):
'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'}

@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
def test_explode_array_data(spark_tmp_path, data_gen):
data_gen = [int_gen, ArrayGen(data_gen)]
Expand Down Expand Up @@ -81,6 +82,7 @@ def test_explode_nested_array_data(spark_tmp_path, data_gen):
#sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
def test_explode_outer_array_data(spark_tmp_path, data_gen):
data_gen = [int_gen, ArrayGen(data_gen)]
Expand Down Expand Up @@ -130,6 +132,7 @@ def test_posexplode_litarray(data_gen):
#sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
def test_posexplode_array_data(spark_tmp_path, data_gen):
data_gen = [int_gen, ArrayGen(data_gen)]
Expand Down Expand Up @@ -161,6 +164,7 @@ def test_posexplode_nested_array_data(spark_tmp_path, data_gen):
#sort locally because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + struct_gens_sample + array_gens_sample + map_gens_sample, ids=idfn)
def test_posexplode_outer_array_data(spark_tmp_path, data_gen):
data_gen = [int_gen, ArrayGen(data_gen)]
Expand Down
4 changes: 4 additions & 0 deletions integration_tests/src/main/python/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def do_join(spark):
# local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens, ids=idfn)
@pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
def test_cartesian_join(data_gen, batch_size):
Expand All @@ -257,6 +258,7 @@ def do_join(spark):
# local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.xfail(condition=is_databricks_runtime(),
reason='https://github.com/NVIDIA/spark-rapids/issues/334')
@pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens, ids=idfn)
Expand All @@ -271,6 +273,7 @@ def do_join(spark):
# local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.xfail(condition=is_databricks_runtime(),
reason='https://github.com/NVIDIA/spark-rapids/issues/334')
@pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
Expand All @@ -285,6 +288,7 @@ def do_join(spark):
# local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
# After 3.1.0 is the min spark version we can drop this
@ignore_order(local=True)
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
@pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
def test_cartesian_join_with_condition(data_gen, batch_size):
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/src/main/python/parquet_write_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def limited_timestamp(nullable=True):
parquet_basic_gen + parquet_struct_gen + parquet_array_gen + parquet_decimal_gens + parquet_map_gens]
parquet_ts_write_options = ['INT96', 'TIMESTAMP_MICROS', 'TIMESTAMP_MILLIS']

@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('parquet_gens', parquet_write_gens_list, ids=idfn)
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
Expand Down Expand Up @@ -117,6 +118,7 @@ def test_write_ts_millis(spark_tmp_path, ts_type, ts_rebase):

# There are race conditions around when individual files are read in for partitioned data
@ignore_order
@pytest.mark.order(1) # put expensive test case at the head of the xdist worker queue
@pytest.mark.parametrize('parquet_gen', parquet_part_write_gens, ids=idfn)
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
Expand Down
2 changes: 1 addition & 1 deletion jenkins/Dockerfile-blossom.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ RUN python3.8 -m easy_install pip
RUN update-java-alternatives --set /usr/lib/jvm/java-1.8.0-openjdk-amd64

RUN ln -s /usr/bin/python3.8 /usr/bin/python
RUN python -m pip install pytest sre_yield requests pandas pyarrow findspark pytest-xdist pre-commit
RUN python -m pip install pytest sre_yield requests pandas pyarrow findspark pytest-xdist pre-commit pytest-order

# libnuma1 and libgomp1 are required by ucx packaging
RUN apt install -y inetutils-ping expect wget libnuma1 libgomp1
Expand Down

0 comments on commit 7d74179

Please sign in to comment.