Remove unnecessary copies of ParquetCachedBatchSerializer (#2948)

* removed unnecessary copies of ParquetCachedBatchSerializer Signed-off-by: Raza Jafri <rjafri@nvidia.com> * optimized import Signed-off-by: Raza Jafri <rjafri@nvidia.com> * removed serializer for 320 Signed-off-by: Raza Jafri <rjafri@nvidia.com> * updated docs Signed-off-by: Raza Jafri <rjafri@nvidia.com> * changed package Signed-off-by: Raza Jafri <rjafri@nvidia.com> * Update jenkins/spark-tests.sh Co-authored-by: Jason Lowe <jlowe@nvidia.com> Co-authored-by: Raza Jafri <rjafri@nvidia.com> Co-authored-by: Jason Lowe <jlowe@nvidia.com>
NVIDIA · Jul 20, 2021 · 173a822 · 173a822
1 parent f429d1e
commit 173a822
Show file tree

Hide file tree

Showing 7 changed files with 4 additions and 1,457 deletions.
diff --git a/docs/additional-functionality/cache-serializer.md b/docs/additional-functionality/cache-serializer.md
@@ -35,21 +35,12 @@ nav_order: 2
   should not be used.  Using the serializer with negative decimal scales will generate
   an error at runtime.
 
-  Make sure to use the right package corresponding to the spark version you are using. To use
-  this serializer with Spark 3.1.1 please run Spark with the following conf.
+  To use this serializer please run Spark with the following conf.
   ```
   spark-shell --conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer"
   ```
-  See the below table for all the names of the serializers corresponding to the Spark
-  versions
+
 
-  | Spark version | Serializer name |
-  | ------ | -----|
-  | 3.1.1 | com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer |
-  | 3.1.2 | com.nvidia.spark.rapids.shims.spark312.ParquetCachedBatchSerializer |
-  | 3.1.3 | com.nvidia.spark.rapids.shims.spark313.ParquetCachedBatchSerializer |
-  | 3.2.0 | com.nvidia.spark.rapids.shims.spark320.ParquetCachedBatchSerializer | 
-
 ##          Supported Types                       
 
  All types are supported on the CPU, on the GPU, ArrayType, MapType and BinaryType are not

diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh
@@ -103,11 +103,9 @@ spark-submit $BASE_SPARK_SUBMIT_ARGS --jars $RAPIDS_TEST_JAR ./runtests.py -v -r
 spark-submit $BASE_SPARK_SUBMIT_ARGS $CUDF_UDF_TEST_ARGS --jars $RAPIDS_TEST_JAR ./runtests.py -m "cudf_udf" -v -rfExXs --cudf_udf --test_type=$TEST_TYPE
 #only run cache tests with our serializer in nightly test for Spark version >= 3.1.1
 if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
-  SHIM_PACKAGE=$(echo ${SPARK_VER} | sed 's/\.//g' | sed 's/-SNAPSHOT//')
-  spark-submit ${BASE_SPARK_SUBMIT_ARGS} --conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.spark${SHIM_PACKAGE}.ParquetCachedBatchSerializer --jars $RAPIDS_TEST_JAR \
+  spark-submit ${BASE_SPARK_SUBMIT_ARGS} --conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer --jars $RAPIDS_TEST_JAR \
   ./runtests.py -v -rfExXs --std_input_path="$WORKSPACE/integration_tests/src/test/resources/" -k cache_test.py -x
 fi
 popd
 stop-slave.sh
 stop-master.sh
-