From e0b4a44b062ff619cf194e69c988c420a73b599f Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 8 Mar 2022 09:42:37 -0700 Subject: [PATCH] Add shim code for getting dateFormat from CSV and JSON read options --- .../spark/sql/catalyst/csv/GpuCsvUtils.scala | 21 ++++++++++++++++ .../sql/catalyst/json/GpuJsonUtils.scala | 21 ++++++++++++++++ .../spark/sql/catalyst/csv/GpuCsvUtils.scala | 24 +++++++++++++++++++ .../sql/catalyst/json/GpuJsonUtils.scala | 24 +++++++++++++++++++ .../spark/rapids/GpuBatchScanExec.scala | 4 ++-- .../catalyst/json/rapids/GpuJsonScan.scala | 4 ++-- 6 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 sql-plugin/src/main/301until330-all/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala create mode 100644 sql-plugin/src/main/301until330-all/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala create mode 100644 sql-plugin/src/main/330+/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala create mode 100644 sql-plugin/src/main/330+/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala diff --git a/sql-plugin/src/main/301until330-all/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala b/sql-plugin/src/main/301until330-all/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala new file mode 100644 index 00000000000..b8736640a9f --- /dev/null +++ b/sql-plugin/src/main/301until330-all/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.csv + +object GpuCsvUtils { + def dateFormatInRead(options: CSVOptions): String = options.dateFormat +} diff --git a/sql-plugin/src/main/301until330-all/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/301until330-all/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala new file mode 100644 index 00000000000..b22da8a4f71 --- /dev/null +++ b/sql-plugin/src/main/301until330-all/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.json + +object GpuJsonUtils { + def dateFormatInRead(options: JSONOptions): String = options.dateFormat +} diff --git a/sql-plugin/src/main/330+/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala b/sql-plugin/src/main/330+/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala new file mode 100644 index 00000000000..2b7e5b2193a --- /dev/null +++ b/sql-plugin/src/main/330+/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.csv + +import org.apache.spark.sql.catalyst.util.DateFormatter + +object GpuCsvUtils { + def dateFormatInRead(options: CSVOptions): String = + options.dateFormatInRead.getOrElse(DateFormatter.defaultPattern) +} diff --git a/sql-plugin/src/main/330+/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/330+/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala new file mode 100644 index 00000000000..cd112da4e7a --- /dev/null +++ b/sql-plugin/src/main/330+/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.json + +import org.apache.spark.sql.catalyst.util.DateFormatter + +object GpuJsonUtils { + def dateFormatInRead(options: JSONOptions): String = + options.dateFormatInRead.getOrElse(DateFormatter.defaultPattern) +} diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuBatchScanExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuBatchScanExec.scala index 00d5f1cba06..9471c4b8dee 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuBatchScanExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuBatchScanExec.scala @@ -29,7 +29,7 @@ import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.csv.CSVOptions +import org.apache.spark.sql.catalyst.csv.{CSVOptions, GpuCsvUtils} import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.util.PermissiveMode @@ -413,5 +413,5 @@ class CSVPartitionReader( } } - override def dateFormat: String = parsedOptions.dateFormat + override def dateFormat: String = GpuCsvUtils.dateFormatInRead(parsedOptions) } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala index 802445aef95..64810bf9551 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala @@ -30,7 +30,7 @@ import org.apache.spark.broadcast.Broadcast import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.json.{JSONOptions, JSONOptionsInRead} +import org.apache.spark.sql.catalyst.json.{GpuJsonUtils, JSONOptions, JSONOptionsInRead} import org.apache.spark.sql.catalyst.util.PermissiveMode import org.apache.spark.sql.connector.read.{PartitionReader, PartitionReaderFactory} import org.apache.spark.sql.execution.QueryExecutionException @@ -411,6 +411,6 @@ class JsonPartitionReader( } } - override def dateFormat: String = parsedOptions.dateFormat + override def dateFormat: String = GpuJsonUtils.dateFormatInRead(parsedOptions) }