From 5dbdbf25bb184428d55dbb169c34f12b35229fd7 Mon Sep 17 00:00:00 2001 From: zhanga5 Date: Wed, 18 Aug 2021 17:07:31 +0800 Subject: [PATCH] Split pytest by 'slow_test' tag and run from different k8s pods to reduce premerge job duration (#3241) * Add slow_test tag for pytest files that required long duration or big memory Signed-off-by: Alex Zhang * Split pytest by 'slow test' tag and run from different k8s pods to reduce premerge job duration and keep one Spark version of the unit test for premerge job Signed-off-by: Alex Zhang * Simplify description of 'slow_test' mark Signed-off-by: Alex Zhang Signed-off-by: Raza Jafri --- integration_tests/pytest.ini | 1 + .../src/main/python/join_test.py | 4 ++ integration_tests/src/main/python/udf_test.py | 3 + jenkins/spark-premerge-build.sh | 62 ++++++++++--------- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/integration_tests/pytest.ini b/integration_tests/pytest.ini index 8e2750ddd438..cf1f2f30d2d7 100644 --- a/integration_tests/pytest.ini +++ b/integration_tests/pytest.ini @@ -25,3 +25,4 @@ markers = rapids_udf_example_native: test UDFs that require custom cuda compilation validate_execs_in_gpu_plan([execs]): Exec class names to validate they exist in the GPU plan. shuffle_test: Mark to include test in the RAPIDS Shuffle Manager + slow_test: Mark test that will run long time or require big memory to help split tests diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py index 12fd1e7e75c8..9e3d2e7128a3 100644 --- a/integration_tests/src/main/python/join_test.py +++ b/integration_tests/src/main/python/join_test.py @@ -20,6 +20,10 @@ from marks import ignore_order, allow_non_gpu, incompat, validate_execs_in_gpu_plan from spark_session import with_cpu_session, with_spark_session + +# Mark all tests in current file as slow test since it would require ~30mins in total +pytestmark = pytest.mark.slow_test + all_gen = [StringGen(), ByteGen(), ShortGen(), IntegerGen(), LongGen(), BooleanGen(), DateGen(), TimestampGen(), null_gen, pytest.param(FloatGen(), marks=[incompat]), diff --git a/integration_tests/src/main/python/udf_test.py b/integration_tests/src/main/python/udf_test.py index 02c257a91670..c9b8b45d3c15 100644 --- a/integration_tests/src/main/python/udf_test.py +++ b/integration_tests/src/main/python/udf_test.py @@ -40,6 +40,9 @@ import pandas as pd from typing import Iterator, Tuple +# Mark all tests in current file as slow test since it would require more memory than others +pytestmark = pytest.mark.slow_test + arrow_udf_conf = { 'spark.sql.execution.arrow.pyspark.enabled': 'true', 'spark.rapids.sql.exec.WindowInPandasExec': 'true', diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index b2c917530207..e792b8fd956f 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -35,24 +35,21 @@ mvn_verify() { # file size check for pull request. The size of a committed file should be less than 1.5MiB pre-commit run check-added-large-files --from-ref $BASE_REF --to-ref HEAD - ARTF_ROOT="$WORKSPACE/.download" - MVN_GET_CMD="mvn org.apache.maven.plugins:maven-dependency-plugin:2.8:get -B \ - $MVN_URM_MIRROR -DremoteRepositories=$URM_URL \ - -Ddest=$ARTF_ROOT" + # Here run Python integration tests tagged with 'slow_test' only, that would require long duration or big memory. Such split would help + # balance test duration and memory consumption from two k8s pods running in parallel, which executes 'mvn_verify()' and 'unit_test()' respectively. + mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TEST_TAGS="slow_test" \ + -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=5 -Dcuda.version=$CUDA_CLASSIFIER - rm -rf $ARTF_ROOT && mkdir -p $ARTF_ROOT - - # Download a full version of spark - $MVN_GET_CMD \ - -DgroupId=org.apache -DartifactId=spark -Dversion=$SPARK_VER -Dclassifier=bin-hadoop3.2 -Dpackaging=tgz - - export SPARK_HOME="$ARTF_ROOT/spark-$SPARK_VER-bin-hadoop3.2" - export PATH="$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH" - tar zxf $SPARK_HOME.tgz -C $ARTF_ROOT && \ - rm -f $SPARK_HOME.tgz - - mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TEST_TAGS='' \ - -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=3 -Dcuda.version=$CUDA_CLASSIFIER + # Run the unit tests for other Spark versions but dont run full python integration tests + # NOT ALL TESTS NEEDED FOR PREMERGE + # Just test one 3.0.X version (base version covers this) and one 3.1.X version. + # All others shims test should be covered in nightly pipelines + # Disabled until Spark 3.2 source incompatibility fixed, see https://github.com/NVIDIA/spark-rapids/issues/2052 + # env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark320tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER + # env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark303tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER + # env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark304tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER + # env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark312tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER + env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark313tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER # The jacoco coverage should have been collected, but because of how the shade plugin # works and jacoco we need to clean some things up so jacoco will only report for the @@ -96,17 +93,10 @@ rapids_shuffle_smoke_test() { } unit_test() { - echo "Run unit testings..." - # Run the unit tests for other Spark versions but dont run full python integration tests - # NOT ALL TESTS NEEDED FOR PREMERGE - # Just test one 3.0.X version (base version covers this) and one 3.1.X version. - # All others shims test should be covered in nightly pipelines - # Disabled until Spark 3.2 source incompatibility fixed, see https://github.com/NVIDIA/spark-rapids/issues/2052 - #env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark320tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER - env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark303tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER - env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark304tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER - env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark312tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER - env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark313tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER + # TODO: this function should be named as 'integration_test()' but it would break backward compatibility. Need find a way to fix this. + echo "Run integration testings..." + mvn -U -B $MVN_URM_MIRROR clean package -DskipTests=true -Dcuda.version=$CUDA_CLASSIFIER + TEST_TAGS="not slow_test" TEST_TYPE="pre-commit" TEST_PARALLEL=5 ./integration_tests/run_pyspark_from_build.sh } @@ -114,6 +104,22 @@ nvidia-smi . jenkins/version-def.sh +ARTF_ROOT="$WORKSPACE/.download" +MVN_GET_CMD="mvn org.apache.maven.plugins:maven-dependency-plugin:2.8:get -B \ + $MVN_URM_MIRROR -DremoteRepositories=$URM_URL \ + -Ddest=$ARTF_ROOT" + +rm -rf $ARTF_ROOT && mkdir -p $ARTF_ROOT + +# Download a full version of spark +$MVN_GET_CMD \ + -DgroupId=org.apache -DartifactId=spark -Dversion=$SPARK_VER -Dclassifier=bin-hadoop3.2 -Dpackaging=tgz + +export SPARK_HOME="$ARTF_ROOT/spark-$SPARK_VER-bin-hadoop3.2" +export PATH="$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH" +tar zxf $SPARK_HOME.tgz -C $ARTF_ROOT && \ + rm -f $SPARK_HOME.tgz + case $BUILD_TYPE in all)