From e05ed1b893b43b6e6a69e892af23a817370829d7 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Fri, 30 Jul 2021 11:13:09 -0500 Subject: [PATCH] Update spark 301db shim to new ParquetFilter api (#3100) Signed-off-by: Thomas Graves --- .../rapids/shims/spark301db/Spark301dbShims.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/Spark301dbShims.scala b/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/Spark301dbShims.scala index 984b22e3f11..856e4fd8b50 100644 --- a/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/Spark301dbShims.scala +++ b/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/Spark301dbShims.scala @@ -20,6 +20,7 @@ import com.databricks.sql.execution.window.RunningWindowFunctionExec import com.nvidia.spark.rapids._ import com.nvidia.spark.rapids.shims.spark301.Spark301Shims import org.apache.hadoop.fs.Path +import org.apache.parquet.schema.MessageType import org.apache.spark.sql.rapids.shims.spark301db._ import org.apache.spark.rdd.RDD @@ -33,11 +34,13 @@ import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec import org.apache.spark.sql.execution.datasources.{FilePartition, HadoopFsRelation, PartitionDirectory, PartitionedFile} import org.apache.spark.sql.execution.datasources.json.JsonFileFormat +import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, HashJoin, SortMergeJoinExec} import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec import org.apache.spark.sql.execution.python.{AggregateInPandasExec, ArrowEvalPythonExec, FlatMapGroupsInPandasExec, MapInPandasExec, WindowInPandasExec} import org.apache.spark.sql.execution.window.WindowExecBase +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.rapids.GpuFileSourceScanExec import org.apache.spark.sql.rapids.execution.{GpuBroadcastExchangeExecBase, GpuBroadcastNestedLoopJoinExecBase, GpuShuffleExchangeExecBase} import org.apache.spark.sql.rapids.execution.python.{GpuAggregateInPandasExecMeta, GpuArrowEvalPythonExec, GpuFlatMapGroupsInPandasExecMeta, GpuMapInPandasExecMeta, GpuPythonUDF, GpuWindowInPandasExecMetaBase} @@ -47,6 +50,18 @@ class Spark301dbShims extends Spark301Shims { override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION + override def getParquetFilters( + schema: MessageType, + pushDownDate: Boolean, + pushDownTimestamp: Boolean, + pushDownDecimal: Boolean, + pushDownStartWith: Boolean, + pushDownInFilterThreshold: Int, + caseSensitive: Boolean, + datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters = + new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith, + pushDownInFilterThreshold, caseSensitive, datetimeRebaseMode) + override def getGpuBroadcastNestedLoopJoinShim( left: SparkPlan, right: SparkPlan,