From 48104e1367e2de40ddf8c87de3e5f3c01a13be2d Mon Sep 17 00:00:00 2001 From: zhouyifan279 Date: Fri, 20 Oct 2023 17:22:14 +0800 Subject: [PATCH 1/2] [Bug] Cached clickhouse table is not used by SQL --- .../scala/xenon/clickhouse/spec/TableSpec.scala | 6 +++--- .../clickhouse/single/ClickHouseSingleSuite.scala | 15 +++++++++++++++ .../scala/xenon/clickhouse/ClickHouseHelper.scala | 6 +++--- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/clickhouse-core/src/main/scala/xenon/clickhouse/spec/TableSpec.scala b/clickhouse-core/src/main/scala/xenon/clickhouse/spec/TableSpec.scala index f5eca277..9b3168be 100644 --- a/clickhouse-core/src/main/scala/xenon/clickhouse/spec/TableSpec.scala +++ b/clickhouse-core/src/main/scala/xenon/clickhouse/spec/TableSpec.scala @@ -27,11 +27,11 @@ case class TableSpec( uuid: String, engine: String, is_temporary: Boolean, - data_paths: Array[String], + data_paths: List[String], metadata_path: String, metadata_modification_time: LocalDateTime, - dependencies_database: Array[String], - dependencies_table: Array[String], + dependencies_database: List[String], + dependencies_table: List[String], create_table_query: String, engine_full: String, partition_key: String, diff --git a/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseSingleSuite.scala b/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseSingleSuite.scala index a452ff98..44b615c3 100644 --- a/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseSingleSuite.scala +++ b/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseSingleSuite.scala @@ -436,4 +436,19 @@ class ClickHouseSingleSuite extends SparkClickHouseSingleTest { createOrReplaceTable() } } + + test("cache table") { + val db = "cache_db" + val tbl = "cache_tbl" + + withSimpleTable(db, tbl, true) { + try { + spark.sql(s"CACHE TABLE $db.$tbl") + val cachedPlan = spark.sql(s"SELECT * FROM $db.$tbl").queryExecution.normalized + .find(node => spark.sharedState.cacheManager.lookupCachedData(node).isDefined) + assert(cachedPlan.isDefined) + } finally + spark.sql(s"UNCACHE TABLE $db.$tbl") + } + } } diff --git a/spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseHelper.scala b/spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseHelper.scala index 9e2f9da9..850c4b2f 100644 --- a/spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseHelper.scala +++ b/spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseHelper.scala @@ -185,14 +185,14 @@ trait ClickHouseHelper extends Logging { uuid = tableRow.get("uuid").asText, engine = tableRow.get("engine").asText, is_temporary = tableRow.get("is_temporary").asBoolean, - data_paths = tableRow.get("data_paths").elements().asScala.map(_.asText).toArray, + data_paths = tableRow.get("data_paths").elements().asScala.map(_.asText).toList, metadata_path = tableRow.get("metadata_path").asText, metadata_modification_time = LocalDateTime.parse( tableRow.get("metadata_modification_time").asText, dateTimeFmt.withZone(tz) ), - dependencies_database = tableRow.get("dependencies_database").elements().asScala.map(_.asText).toArray, - dependencies_table = tableRow.get("dependencies_table").elements().asScala.map(_.asText).toArray, + dependencies_database = tableRow.get("dependencies_database").elements().asScala.map(_.asText).toList, + dependencies_table = tableRow.get("dependencies_table").elements().asScala.map(_.asText).toList, create_table_query = tableRow.get("create_table_query").asText, engine_full = tableRow.get("engine_full").asText, partition_key = tableRow.get("partition_key").asText, From 86b8b078cf4b856071247d4396d0686b5a86574c Mon Sep 17 00:00:00 2001 From: zhouyifan279 Date: Fri, 20 Oct 2023 18:10:13 +0800 Subject: [PATCH 2/2] [Bug] Cached clickhouse table is not used by SQL --- .../src/main/scala/xenon/clickhouse/ClickHouseHelper.scala | 6 +++--- .../spark/sql/clickhouse/single/ClickHouseSingleSuite.scala | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spark-3.3/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseHelper.scala b/spark-3.3/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseHelper.scala index c83c4591..d6ab35ef 100644 --- a/spark-3.3/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseHelper.scala +++ b/spark-3.3/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseHelper.scala @@ -184,14 +184,14 @@ trait ClickHouseHelper extends Logging { uuid = tableRow.get("uuid").asText, engine = tableRow.get("engine").asText, is_temporary = tableRow.get("is_temporary").asBoolean, - data_paths = tableRow.get("data_paths").elements().asScala.map(_.asText).toArray, + data_paths = tableRow.get("data_paths").elements().asScala.map(_.asText).toList, metadata_path = tableRow.get("metadata_path").asText, metadata_modification_time = LocalDateTime.parse( tableRow.get("metadata_modification_time").asText, dateTimeFmt.withZone(tz) ), - dependencies_database = tableRow.get("dependencies_database").elements().asScala.map(_.asText).toArray, - dependencies_table = tableRow.get("dependencies_table").elements().asScala.map(_.asText).toArray, + dependencies_database = tableRow.get("dependencies_database").elements().asScala.map(_.asText).toList, + dependencies_table = tableRow.get("dependencies_table").elements().asScala.map(_.asText).toList, create_table_query = tableRow.get("create_table_query").asText, engine_full = tableRow.get("engine_full").asText, partition_key = tableRow.get("partition_key").asText, diff --git a/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseSingleSuite.scala b/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseSingleSuite.scala index 44b615c3..d3a259e5 100644 --- a/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseSingleSuite.scala +++ b/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseSingleSuite.scala @@ -444,7 +444,7 @@ class ClickHouseSingleSuite extends SparkClickHouseSingleTest { withSimpleTable(db, tbl, true) { try { spark.sql(s"CACHE TABLE $db.$tbl") - val cachedPlan = spark.sql(s"SELECT * FROM $db.$tbl").queryExecution.normalized + val cachedPlan = spark.sql(s"SELECT * FROM $db.$tbl").queryExecution.commandExecuted .find(node => spark.sharedState.cacheManager.lookupCachedData(node).isDefined) assert(cachedPlan.isDefined) } finally