From fc77b142073980e6778388f8c3becd91bfaa127c Mon Sep 17 00:00:00 2001 From: Vladimir Golubev Date: Fri, 1 Nov 2024 19:06:00 +0100 Subject: [PATCH] [SPARK-50204][SQL] Factor out `HiveTableRelation` read path resolution ### What changes were proposed in this pull request? Factor out `HiveTableRelation` resolution into a separate `RelationConversions` method to reuse it in the single-pass Analyzer. ### Why are the changes needed? Context: https://issues.apache.org/jira/browse/SPARK-49834 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. ### Was this patch authored or co-authored using generative AI tooling? copilot.vim. Closes #48738 from vladimirg-db/vladimirg-db/refactor-relation-converters. Authored-by: Vladimir Golubev Signed-off-by: Max Gekk --- .../apache/spark/sql/hive/HiveStrategies.scala | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 12366f351331f..87ce809914e10 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils, InsertIntoDataSourceDirCommand} -import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy, HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelationWithTable} +import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy, HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation, LogicalRelationWithTable} import org.apache.spark.sql.hive.execution._ import org.apache.spark.sql.hive.execution.HiveScriptTransformationExec import org.apache.spark.sql.hive.execution.InsertIntoHiveTable.BY_CTAS @@ -240,9 +240,8 @@ case class RelationConversions( query, overwrite, ifPartitionNotExists, byName) // Read path - case relation: HiveTableRelation - if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) => - metastoreCatalog.convert(relation, isWrite = false) + case relation: HiveTableRelation if doConvertHiveTableRelationForRead(relation) => + convertHiveTableRelationForRead(relation) // CTAS path // This `InsertIntoHiveTable` is derived from `CreateHiveTableAsSelectCommand`, @@ -287,6 +286,15 @@ case class RelationConversions( convertProvider(storage), query, overwrite) } } + + private[hive] def doConvertHiveTableRelationForRead(relation: HiveTableRelation): Boolean = { + DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) + } + + private[hive] def convertHiveTableRelationForRead( + relation: HiveTableRelation): LogicalRelation = { + metastoreCatalog.convert(relation, isWrite = false) + } } private[hive] trait HiveStrategies {