Skip to content

Commit

Permalink
[Coral-Spark] Modify coral hive parser and coral spark writer dialect…
Browse files Browse the repository at this point in the history
… to generate spark compliant escaped string literal (#473)
  • Loading branch information
rzhang10 authored Nov 7, 2023
1 parent 3e6fe43 commit d04c55e
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -690,12 +692,33 @@ protected SqlNode visitIdentifier(ASTNode node, ParseContext ctx) {
return new SqlIdentifier(node.getText(), ZERO);
}

/** See {@link #removeBackslashBeforeQuotes}
* We use removeBackslashBeforeQuotes to remove the backslash before quotes,
* so that we maintain patterns like {@code I'm} or {@code abc"xyz} as is in the java object in memory,
* the escaped literal string representation will be generated when the SqlNode is written to string
* by the SqlWriter, which can be controlled by the SqlDialect to decide the choice of escaping mechanism.
* */
@Override
protected SqlNode visitStringLiteral(ASTNode node, ParseContext ctx) {
// TODO: Add charset here. UTF-8 is not supported by calcite
String text = node.getText();
checkState(text.length() >= 2);
return SqlLiteral.createCharString(text.substring(1, text.length() - 1), ZERO);
return SqlLiteral.createCharString(removeBackslashBeforeQuotes(text.substring(1, text.length() - 1)), ZERO);
}

private String removeBackslashBeforeQuotes(String input) {
// matches a \' or \" literal pattern
Pattern pattern = Pattern.compile("\\\\['\"]");
Matcher matcher = pattern.matcher(input);

StringBuffer res = new StringBuffer();
while (matcher.find()) {
String replacement = matcher.group().substring(1);
matcher.appendReplacement(res, replacement);
}
matcher.appendTail(res);

return res.toString();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
package com.linkedin.coral.spark.dialect;

import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.config.NullCollation;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlDialect;
Expand All @@ -30,8 +31,12 @@
*/
public class SparkSqlDialect extends SqlDialect {

public static final SparkSqlDialect INSTANCE = new SparkSqlDialect(
emptyContext().withDatabaseProduct(DatabaseProduct.HIVE).withNullCollation(NullCollation.HIGH));
public static final SqlDialect.Context DEFAULT_CONTEXT =
SqlDialect.EMPTY_CONTEXT.withDatabaseProduct(DatabaseProduct.SPARK).withLiteralQuoteString("'")
.withLiteralEscapedQuoteString("\\'").withNullCollation(NullCollation.LOW)
.withUnquotedCasing(Casing.UNCHANGED).withQuotedCasing(Casing.UNCHANGED).withCaseSensitive(false);

public static final SparkSqlDialect INSTANCE = new SparkSqlDialect(DEFAULT_CONTEXT);

private SparkSqlDialect(Context context) {
super(context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,60 @@ public void testStructColProjectionWithTableAliasPrefix() {
assertEquals(expectedSql, targetSql);
}

@Test
public void testUnescapedSingleQuote() {
RelNode relNode = TestUtils.toRelNode("SELECT 'abc' col1 FROM default.complex");
String targetSql = createCoralSpark(relNode).getSparkSql();

String expectedSql = "SELECT 'abc' col1\n" + "FROM default.complex complex";
assertEquals(targetSql, expectedSql);
}

@Test
public void testUnescapedDoubleQuote() {
RelNode relNode = TestUtils.toRelNode("SELECT \"abc\" col1 FROM default.complex");
String targetSql = createCoralSpark(relNode).getSparkSql();

String expectedSql = "SELECT 'abc' col1\n" + "FROM default.complex complex";
assertEquals(targetSql, expectedSql);
}

@Test
public void testSingleQuoteInsideSingleQuote() {
RelNode relNode = TestUtils.toRelNode("SELECT 'abc[\\'xyz\\']' col1 FROM default.complex");
String targetSql = createCoralSpark(relNode).getSparkSql();

String expectedSql = "SELECT 'abc[\\'xyz\\']' col1\n" + "FROM default.complex complex";
assertEquals(targetSql, expectedSql);
}

@Test
public void testSingleQuoteInsideDoubleQuote() {
RelNode relNode = TestUtils.toRelNode("SELECT \"abc['xyz']\" col1 FROM default.complex");
String targetSql = createCoralSpark(relNode).getSparkSql();

String expectedSql = "SELECT 'abc[\\'xyz\\']' col1\n" + "FROM default.complex complex";
assertEquals(targetSql, expectedSql);
}

@Test
public void testDoubleQuoteInsideDoubleQuote() {
RelNode relNode = TestUtils.toRelNode("SELECT \"abc[\\\"xyz\\\"]\" col1 FROM default.complex");
String targetSql = createCoralSpark(relNode).getSparkSql();

String expectedSql = "SELECT 'abc[\"xyz\"]' col1\n" + "FROM default.complex complex";
assertEquals(targetSql, expectedSql);
}

@Test
public void testDoubleQuoteInsideSingleQuote() {
RelNode relNode = TestUtils.toRelNode("SELECT 'abc[\"xyz\"]' col1 FROM default.complex");
String targetSql = createCoralSpark(relNode).getSparkSql();

String expectedSql = "SELECT 'abc[\"xyz\"]' col1\n" + "FROM default.complex complex";
assertEquals(targetSql, expectedSql);
}

private String getCoralSparkTranslatedSqlWithAliasFromCoralSchema(String db, String view) {
RelNode relNode = TestUtils.toRelNode(db, view);
Schema schema = TestUtils.getAvroSchemaForView(db, view, false);
Expand Down

0 comments on commit d04c55e

Please sign in to comment.