From 1417b3fa040734c5dfc5cc8f7ca48b3ebe3de046 Mon Sep 17 00:00:00 2001 From: rui-mo Date: Thu, 8 Jun 2023 01:13:34 +0000 Subject: [PATCH] add names in struct type --- .../substrait/type/StructNode.java | 11 +++- .../substrait/type/TypeBuilder.java | 5 ++ .../substrait/proto/substrait/type.proto | 1 + .../expression/ConverterUtils.scala | 8 +-- .../sql/GlutenStatisticsCollectionSuite.scala | 54 ------------------- 5 files changed, 21 insertions(+), 58 deletions(-) diff --git a/gluten-core/src/main/java/io/glutenproject/substrait/type/StructNode.java b/gluten-core/src/main/java/io/glutenproject/substrait/type/StructNode.java index 8b7fbb717ca3..8dc02f116a7b 100644 --- a/gluten-core/src/main/java/io/glutenproject/substrait/type/StructNode.java +++ b/gluten-core/src/main/java/io/glutenproject/substrait/type/StructNode.java @@ -25,6 +25,13 @@ public class StructNode implements TypeNode, Serializable { private final Boolean nullable; private final ArrayList types = new ArrayList<>(); + private final ArrayList names = new ArrayList<>(); + + public StructNode(Boolean nullable, ArrayList types, ArrayList names) { + this.nullable = nullable; + this.types.addAll(types); + this.names.addAll(names); + } public StructNode(Boolean nullable, ArrayList types) { this.nullable = nullable; @@ -43,7 +50,9 @@ public Type toProtobuf() { for (TypeNode typeNode : types) { structBuilder.addTypes(typeNode.toProtobuf()); } - + for (String name : names) { + structBuilder.addNames(name); + } Type.Builder builder = Type.newBuilder(); builder.setStruct(structBuilder.build()); return builder.build(); diff --git a/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java b/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java index c6cd3694c89b..abbe88cf4427 100644 --- a/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java +++ b/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java @@ -79,6 +79,11 @@ public static TypeNode makeTimestamp(Boolean nullable) { return new TimestampTypeNode(nullable); } + public static TypeNode makeStruct(Boolean nullable, ArrayList types, + ArrayList names) { + return new StructNode(nullable, types, names); + } + public static TypeNode makeStruct(Boolean nullable, ArrayList types) { return new StructNode(nullable, types); } diff --git a/gluten-core/src/main/resources/substrait/proto/substrait/type.proto b/gluten-core/src/main/resources/substrait/proto/substrait/type.proto index 5d4a8f918b83..05bde7ff54ab 100644 --- a/gluten-core/src/main/resources/substrait/proto/substrait/type.proto +++ b/gluten-core/src/main/resources/substrait/proto/substrait/type.proto @@ -170,6 +170,7 @@ message Type { repeated Type types = 1; uint32 type_variation_reference = 2; Nullability nullability = 3; + repeated string names = 4; } message List { diff --git a/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala b/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala index 19f96d1b0d9f..941ba6a5c144 100644 --- a/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala +++ b/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala @@ -184,17 +184,19 @@ object ConverterUtils extends Logging { case TimestampType => TypeBuilder.makeTimestamp(nullable) case m: MapType => - TypeBuilder.makeMap(nullable, getTypeNode(m.keyType, false), + TypeBuilder.makeMap(nullable, getTypeNode(m.keyType, nullable = false), getTypeNode(m.valueType, m.valueContainsNull)) case a: ArrayType => TypeBuilder.makeList(nullable, getTypeNode(a.elementType, a.containsNull)) case s: StructType => val fieldNodes = new java.util.ArrayList[TypeNode] + val fieldNames = new java.util.ArrayList[String] for (structField <- s.fields) { fieldNodes.add(getTypeNode(structField.dataType, structField.nullable)) + fieldNames.add(structField.name) } - TypeBuilder.makeStruct(nullable, fieldNodes) - case n: NullType => + TypeBuilder.makeStruct(nullable, fieldNodes, fieldNames) + case _: NullType => TypeBuilder.makeNothing() case unknown => throw new UnsupportedOperationException(s"Type $unknown not supported.") diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenStatisticsCollectionSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenStatisticsCollectionSuite.scala index 0cf1816f222d..86324efe636f 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenStatisticsCollectionSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenStatisticsCollectionSuite.scala @@ -17,59 +17,5 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.plans.logical.ColumnStat -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils -import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC -import org.apache.spark.sql.functions.timestamp_seconds -import org.apache.spark.sql.types.{DataType, DateType, TimestampType} - -import java.util.TimeZone -import java.util.concurrent.TimeUnit - class GlutenStatisticsCollectionSuite extends StatisticsCollectionSuite with GlutenSQLTestsTrait { - - import testImplicits._ - - test(GlutenTestConstants.GLUTEN_TEST + - "store and retrieve column stats in different time zones") { - // TODO: bug fix on TableScan. - // val (start, end) = (0, TimeUnit.DAYS.toSeconds(2)) - val (start, end) = (0, 200) - - def checkTimestampStats(t: DataType, - srcTimeZone: TimeZone, - dstTimeZone: TimeZone)(checker: ColumnStat => Unit): Unit = { - val table = "time_table" - val column = "T" - val original = TimeZone.getDefault - try { - withTable(table) { - TimeZone.setDefault(srcTimeZone) - spark.range(start, end) - .select(timestamp_seconds($"id").cast(t).as(column)) - .write.saveAsTable(table) - sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS $column") - - TimeZone.setDefault(dstTimeZone) - val stats = getCatalogTable(table) - .stats.get.colStats(column).toPlanStat(column, t) - checker(stats) - } - } finally { - TimeZone.setDefault(original) - } - } - - DateTimeTestUtils.outstandingZoneIds.foreach { zid => - val timeZone = TimeZone.getTimeZone(zid) - checkTimestampStats(DateType, TimeZoneUTC, timeZone) { stats => - assert(stats.min.get.asInstanceOf[Int] == TimeUnit.SECONDS.toDays(start)) - assert(stats.max.get.asInstanceOf[Int] == TimeUnit.SECONDS.toDays(end - 1)) - } - checkTimestampStats(TimestampType, TimeZoneUTC, timeZone) { stats => - assert(stats.min.get.asInstanceOf[Long] == TimeUnit.SECONDS.toMicros(start)) - assert(stats.max.get.asInstanceOf[Long] == TimeUnit.SECONDS.toMicros(end - 1)) - } - } - } }