Unshim many SparkShim interfaces [databricks] (#5031)

* Unshim Arrow Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim Alluxio Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim Kryo Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim getGpuColumnarToRowTransition Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim createTable Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim hasSeparateINT96RebaseConf Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim getScalaUDFAsExpression Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim getMapSizesByExecutorId Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim getFileSourceMaxMetadataValueLength Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim getGpuShuffleExchangeExec Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim SortOrder Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Remove HadoopFSUtils shim Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim getLegacyComplexTypeToString Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim shouldFailDivByZero Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Unshim shouldFailOnElementNotExists Signed-off-by: Jason Lowe <jlowe@nvidia.com> * Remove registerKryo from Spark31XdbShims * Unshim shouldFallbackOnAnsiTimestamp
NVIDIA · Mar 24, 2022 · 0d7750d · 0d7750d
1 parent 384f0d5
commit 0d7750d
Show file tree

Hide file tree

Showing 39 changed files with 244 additions and 915 deletions.
diff --git a/sql-plugin/src/main/311+-all/scala/org/apache/spark/sql/rapids/shims/HadoopFSUtilsShim.scala b/sql-plugin/src/main/311+-all/scala/org/apache/spark/sql/rapids/shims/HadoopFSUtilsShim.scala
diff --git a/...ala/org/apache/spark/sql/rapids/execution/python/shims/GpuFlatMapGroupsInPandasExec.scala b/...ala/org/apache/spark/sql/rapids/execution/python/shims/GpuFlatMapGroupsInPandasExec.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.rapids.execution.python.shims
 
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.python.PythonWorkerSemaphore
-import com.nvidia.spark.rapids.shims.{ShimUnaryExecNode, SparkShimImpl}
+import com.nvidia.spark.rapids.shims.ShimUnaryExecNode
 
 import org.apache.spark.TaskContext
 import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
@@ -96,7 +96,7 @@ case class GpuFlatMapGroupsInPandasExec(
   }
 
   override def requiredChildOrdering: Seq[Seq[SortOrder]] =
-    Seq(groupingAttributes.map(SparkShimImpl.sortOrder(_, Ascending)))
+    Seq(groupingAttributes.map(SortOrder(_, Ascending)))
 
   private val pandasFunction = func.asInstanceOf[GpuPythonUDF].func
 

diff --git a/...n/src/main/311until320-all/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala b/...n/src/main/311until320-all/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala
@@ -16,8 +16,6 @@
 
 package org.apache.spark.rapids.shims
 
-import com.nvidia.spark.rapids.shims.SparkShimImpl
-
 import org.apache.spark.{MapOutputTrackerMaster, Partition, ShuffleDependency, SparkEnv, TaskContext}
 import org.apache.spark.shuffle.ShuffleReader
 import org.apache.spark.sql.execution.{CoalescedPartitionSpec, PartialMapperPartitionSpec, PartialReducerPartitionSpec}
@@ -57,7 +55,6 @@ object ShuffledBatchRDDUtil {
       dependency: ShuffleDependency[Int, ColumnarBatch, ColumnarBatch],
       sqlMetricsReporter: SQLShuffleReadMetricsReporter):
   (ShuffleReader[Nothing, Nothing], Long) = {
-    val shim = SparkShimImpl
     split.asInstanceOf[ShuffledBatchRDDPartition].spec match {
       case CoalescedPartitionSpec(startReducerIndex, endReducerIndex) =>
         val reader = SparkEnv.get.shuffleManager.getReader(
@@ -66,7 +63,7 @@ object ShuffledBatchRDDUtil {
           endReducerIndex,
           context,
           sqlMetricsReporter)
-        val blocksByAddress = shim.getMapSizesByExecutorId(
+        val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
           dependency.shuffleHandle.shuffleId, 0, Int.MaxValue, startReducerIndex, endReducerIndex)
         val partitionSize = blocksByAddress.flatMap(_._2).map(_._2).sum
         (reader, partitionSize)
@@ -80,7 +77,7 @@ object ShuffledBatchRDDUtil {
           reducerIndex + 1,
           context,
           sqlMetricsReporter)
-        val blocksByAddress = shim.getMapSizesByExecutorId(
+        val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
           dependency.shuffleHandle.shuffleId, 0, Int.MaxValue, reducerIndex,
           reducerIndex + 1)
         val partitionSize = blocksByAddress.flatMap(_._2)
@@ -96,7 +93,7 @@ object ShuffledBatchRDDUtil {
           endReducerIndex,
           context,
           sqlMetricsReporter)
-        val blocksByAddress = shim.getMapSizesByExecutorId(
+        val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
           dependency.shuffleHandle.shuffleId, 0, Int.MaxValue, startReducerIndex, endReducerIndex)
         val partitionSize = blocksByAddress.flatMap(_._2)
             .filter(_._3 == mapIndex)