Skip to content

Commit

Permalink
Allow uppercase characters in Iceberg quoted identifiers partition
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Jun 25, 2024
1 parent f8d373f commit 3f5e3ac
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,6 @@ public static void parsePartitionField(PartitionSpec.Builder builder, String fie
public static String fromIdentifierToColumn(String identifier)
{
if (QUOTED_IDENTIFIER_PATTERN.matcher(identifier).matches()) {
// We only support lowercase quoted identifiers for now.
// See https://github.com/trinodb/trino/issues/12226#issuecomment-1128839259
// TODO: Enhance quoted identifiers support in Iceberg partitioning to support mixed case identifiers
// See https://github.com/trinodb/trino/issues/12668
if (!identifier.toLowerCase(ENGLISH).equals(identifier)) {
throw new IllegalArgumentException(format("Uppercase characters in identifier '%s' are not supported.", identifier));
}
return identifier.substring(1, identifier.length() - 1).replace("\"\"", "\"");
}
// Currently, all Iceberg columns are stored in lowercase in the Iceberg metadata files.
Expand Down Expand Up @@ -156,7 +149,7 @@ private static String fromColumnToIdentifier(String column)

public static String quotedName(String name)
{
if (UNQUOTED_IDENTIFIER_PATTERN.matcher(name).matches()) {
if (UNQUOTED_IDENTIFIER_PATTERN.matcher(name).matches() && name.toLowerCase(ENGLISH).equals(name)) {
return name;
}
return '"' + name.replace("\"", "\"\"") + '"';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ public void testParse()
assertParse("truncate(\"nested.nested.value\", 13)", partitionSpec(builder -> builder.truncate("nested.nested.value", 13)));
assertParse("bucket(\"nested.nested.value\", 42)", partitionSpec(builder -> builder.bucket("nested.nested.value", 42)));
assertParse("void(\"nested.nested.value\")", partitionSpec(builder -> builder.alwaysNull("nested.nested.value")));
assertParse("\"MixedTs\"", partitionSpec(builder -> builder.identity("MixedTs")));
assertParse("\"MixedNested.MixedValue\"", partitionSpec(builder -> builder.identity("MixedNested.MixedValue")));
assertParse("year(\"MixedTs\")", partitionSpec(builder -> builder.year("MixedTs")));
assertParse("month(\"MixedTs\")", partitionSpec(builder -> builder.month("MixedTs")));
assertParse("day(\"MixedTs\")", partitionSpec(builder -> builder.day("MixedTs")));
assertParse("hour(\"MixedTs\")", partitionSpec(builder -> builder.hour("MixedTs")));
assertParse("bucket(\"MixedTs\", 42)", partitionSpec(builder -> builder.bucket("MixedTs", 42)));
assertParse("truncate(\"MixedString\", 13)", partitionSpec(builder -> builder.truncate("MixedString", 13)));
assertParse("void(\"MixedString\")", partitionSpec(builder -> builder.alwaysNull("MixedString")));

assertInvalid("bucket()", "Invalid partition field declaration: bucket()");
assertInvalid(".nested", "Invalid partition field declaration: .nested");
Expand All @@ -90,9 +99,9 @@ public void testParse()
assertInvalid("\"test \"with space\"", "Invalid partition field declaration: \"test \"with space\"");
assertInvalid("\"test \"\"\"with space\"", "Invalid partition field declaration: \"test \"\"\"with space\"");
assertInvalid("ABC", "Cannot find source column: abc");
assertInvalid("\"ABC\"", "Uppercase characters in identifier '\"ABC\"' are not supported.");
assertInvalid("\"ABC\"", "Cannot find source column: ABC");
assertInvalid("year(ABC)", "Cannot find source column: abc");
assertInvalid("bucket(\"ABC\", 12)", "Uppercase characters in identifier '\"ABC\"' are not supported.");
assertInvalid("bucket(\"ABC\", 12)", "Cannot find source column: ABC");
assertInvalid("\"nested.list\"", "Cannot partition by non-primitive source field: list<string>");
}

Expand Down Expand Up @@ -140,7 +149,11 @@ private static PartitionSpec partitionSpec(Consumer<PartitionSpec.Builder> consu
NestedField.required(14, "list", ListType.ofRequired(15, StringType.get())),
NestedField.required(16, "nested", Types.StructType.of(
NestedField.required(17, "value", StringType.get()),
NestedField.required(18, "ts", TimestampType.withZone()))))));
NestedField.required(18, "ts", TimestampType.withZone()))))),
NestedField.required(19, "MixedTs", TimestampType.withoutZone()),
NestedField.optional(20, "MixedString", StringType.get()),
NestedField.required(21, "MixedNested", Types.StructType.of(
NestedField.required(22, "MixedValue", StringType.get()))));

PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
consumer.accept(builder);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ public void testParse()
// uppercase
assertParse("ORDER_KEY ASC NULLS LAST", sortOrder(builder -> builder.asc("order_key", NullOrder.NULLS_LAST)));
assertParse("ORDER_KEY DESC NULLS FIRST", sortOrder(builder -> builder.desc("order_key", NullOrder.NULLS_FIRST)));
assertDoesNotParse("\"ORDER_KEY\" ASC NULLS LAST", "Uppercase characters in identifier '\"ORDER_KEY\"' are not supported.");
assertDoesNotParse("\"ORDER_KEY\" DESC NULLS FIRST", "Uppercase characters in identifier '\"ORDER_KEY\"' are not supported.");
assertDoesNotParse("\"ORDER_KEY\" ASC NULLS LAST", "Cannot find field 'ORDER_KEY' .*");
assertDoesNotParse("\"ORDER_KEY\" DESC NULLS FIRST", "Cannot find field 'ORDER_KEY' .*");

// mixed case
assertParse("OrDER_keY Asc NullS LAst", sortOrder(builder -> builder.asc("order_key", NullOrder.NULLS_LAST)));
assertParse("OrDER_keY Desc NullS FIrsT", sortOrder(builder -> builder.desc("order_key", NullOrder.NULLS_FIRST)));
assertDoesNotParse("\"OrDER_keY\" Asc NullS LAst", "Uppercase characters in identifier '\"OrDER_keY\"' are not supported.");
assertDoesNotParse("\"OrDER_keY\" Desc NullS FIrsT", "Uppercase characters in identifier '\"OrDER_keY\"' are not supported.");
assertDoesNotParse("\"OrDER_keY\" Asc NullS LAst", "Cannot find field 'OrDER_keY' .*");
assertDoesNotParse("\"OrDER_keY\" Desc NullS FIrsT", "Cannot find field 'OrDER_keY' .*");

assertParse("comment", sortOrder(builder -> builder.asc("comment")));
assertParse("\"comment\"", sortOrder(builder -> builder.asc("comment")));
Expand Down Expand Up @@ -106,13 +106,13 @@ private static void assertParse(@Language("SQL") String value, SortOrder expecte

private static void assertDoesNotParse(@Language("SQL") String value)
{
assertDoesNotParse(value, "Unable to parse sort field: [%s]".formatted(value));
assertDoesNotParse(value, "\\QUnable to parse sort field: [%s]".formatted(value));
}

private static void assertDoesNotParse(@Language("SQL") String value, String expectedMessage)
private static void assertDoesNotParse(@Language("SQL") String value, @Language("RegExp") String expectedMessage)
{
assertThatThrownBy(() -> parseField(value))
.hasMessage(expectedMessage);
.hasMessageMatching(expectedMessage);
}

private static SortOrder parseField(String value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1300,8 +1300,20 @@ public void testPartitioningWithMixedCaseColumnUnsupportedInTrino()
sparkTableName));
assertQueryFailure(() -> onTrino().executeQuery("ALTER TABLE " + trinoTableName + " SET PROPERTIES partitioning = ARRAY['mIxEd_COL']"))
.hasMessageContaining("Unable to parse partitioning value");
assertQueryFailure(() -> onTrino().executeQuery("ALTER TABLE " + trinoTableName + " SET PROPERTIES partitioning = ARRAY['\"mIxEd_COL\"']"))
.hasMessageContaining("Unable to parse partitioning value");
onTrino().executeQuery("ALTER TABLE " + trinoTableName + " SET PROPERTIES partitioning = ARRAY['\"mIxEd_COL\"']");

onTrino().executeQuery("INSERT INTO " + trinoTableName + " VALUES (1, 'trino')");
onSpark().executeQuery("INSERT INTO " + sparkTableName + " VALUES (2, 'spark')");

List<Row> expected = ImmutableList.of(row(1, "trino"), row(2, "spark"));
assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).contains(expected);
assertThat(onSpark().executeQuery("SELECT * FROM " + sparkTableName)).contains(expected);

assertThat((String) onTrino().executeQuery("SHOW CREATE TABLE " + trinoTableName).getOnlyValue())
.contains("partitioning = ARRAY['\"mIxEd_COL\"']");
assertThat((String) onSpark().executeQuery("SHOW CREATE TABLE " + sparkTableName).getOnlyValue())
.contains("PARTITIONED BY (mIxEd_COL)");

onTrino().executeQuery("DROP TABLE " + trinoTableName);
}

Expand Down

0 comments on commit 3f5e3ac

Please sign in to comment.