Skip to content

Commit

Permalink
feat: more String functions incl. Lambda based transpilation
Browse files Browse the repository at this point in the history
Signed-off-by: Andreas Reichel <andreas@manticore-projects.com>
  • Loading branch information
manticore-projects committed Mar 30, 2024
1 parent 12aed9a commit 38f30a0
Show file tree
Hide file tree
Showing 6 changed files with 328 additions and 84 deletions.
103 changes: 65 additions & 38 deletions src/main/java/com/manticore/transpiler/ExpressionTranspiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import net.sf.jsqlparser.expression.ExtractExpression;
import net.sf.jsqlparser.expression.Function;
import net.sf.jsqlparser.expression.IntervalExpression;
import net.sf.jsqlparser.expression.LambdaExpression;
import net.sf.jsqlparser.expression.LongValue;
import net.sf.jsqlparser.expression.OracleNamedFunctionParameter;
import net.sf.jsqlparser.expression.Parenthesis;
Expand All @@ -47,6 +48,7 @@
import net.sf.jsqlparser.statement.select.SelectVisitor;
import net.sf.jsqlparser.util.deparser.ExpressionDeParser;

import java.util.Arrays;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -78,15 +80,10 @@ enum TranspiledFunction {

, PARSE_DATE, PARSE_DATETIME, PARSE_TIME, PARSE_TIMESTAMP, DATE_FROM_UNIX_DATE, UNIX_DATE, TIMESTAMP_MICROS, TIMESTAMP_MILLIS, TIMESTAMP_SECONDS, UNIX_MICROS, UNIX_MILLIS, UNIX_SECONDS

, STRING, BYTE_LENGTH, CHAR_LENGTH, CHARACTER_LENGTH, CODE_POINTS_TO_BYTES, CODE_POINTS_TO_STRING, COLLATE
, CONTAINS_SUBSTR, EDIT_DISTANCE, FORMAT, INSTR, LENGTH, LPAD, NORMALIZE, NORMALIZE_AND_CASEFOLD, OCTET_LENGTH
, REGEXP_CONTAINS, REGEXP_EXTRACT, REGEXP_EXTRACT_ALL, REGEXP_INSTR, REGEXP_REPLACE, REGEXP_SUBSTR, REPEAT, REPLACE
, REVERSE
, STRING, BYTE_LENGTH, CHAR_LENGTH, CHARACTER_LENGTH, CODE_POINTS_TO_BYTES, CODE_POINTS_TO_STRING, COLLATE, CONTAINS_SUBSTR, EDIT_DISTANCE, FORMAT, INSTR, LENGTH, LPAD, NORMALIZE, NORMALIZE_AND_CASEFOLD, OCTET_LENGTH, REGEXP_CONTAINS, REGEXP_EXTRACT, REGEXP_EXTRACT_ALL, REGEXP_INSTR, REGEXP_REPLACE, REGEXP_SUBSTR, REPEAT, REPLACE, REVERSE, RPAD, SAFE_CONVERT_BYTES_TO_STRING, TO_CODE_POINTS


, NVL
, UNNEST
;
, NVL, UNNEST;
// @FORMATTER:ON


Expand Down Expand Up @@ -581,6 +578,19 @@ public void visit(Function function) {
// flags not working:
// %t the string representation of the value, e.g. '2023-12-31'
// %T the TYPE STRING representation of the value, e.g. DATE '2023-12-31'
if (parameters.get(0) instanceof StringValue) {
String s = ((StringValue) parameters.get(0)).getValue();
if (s.contains("%t")) {
warning("Format %t is not supported");
s = s.replaceAll("%t", "%s");
}
if (s.contains("%T")) {
warning("Format %T is not supported");
s = s.replaceAll("%T", "%s");
}

function.setParameters(new ExpressionList<>(new StringValue(s), parameters.get(1)));
}
break;
case INSTR:
if (parameters != null && parameters.size() == 2) {
Expand All @@ -594,7 +604,8 @@ public void visit(Function function) {
rewrittenExpression = rewriteLength(parameters);
break;
case LPAD:
rewrittenExpression = rewriteLPad(parameters);
case RPAD:
rewrittenExpression = rewritePad(function, parameters);
break;
case NORMALIZE:
if (parameters != null && parameters.size() == 2
Expand Down Expand Up @@ -648,10 +659,10 @@ THEN Instr( source_value, Regexp_Extract( source_value, reg_exp ) )
ELSE 0
END AS instr
*/
WhenClause when = new WhenClause(
new Function("REGEXP_MATCHES", parameters.get(0), parameters.get(1))
, new Function("INSTR", parameters.get(0), new Function("REGEXP_EXTRACT", parameters.get(0), parameters.get(1)))
);
WhenClause when =
new WhenClause(new Function("REGEXP_MATCHES", parameters.get(0), parameters.get(1)),
new Function("INSTR", parameters.get(0),
new Function("REGEXP_EXTRACT", parameters.get(0), parameters.get(1))));
CaseExpression caseExpression = new CaseExpression(new LongValue(0), when);
visit(caseExpression);

Expand All @@ -661,13 +672,13 @@ THEN Instr( source_value, Regexp_Extract( source_value, reg_exp ) )
// pass through
break;
case UNNEST:
if (parameters!=null) {
if (parameters != null) {
switch (parameters.size()) {
case 1:
boolean recursive = false;
if (parameters.get(0) instanceof ArrayConstructor) {
ArrayConstructor arrayConstructor = (ArrayConstructor) parameters.get(0);
for (Expression e:arrayConstructor.getExpressions()) {
for (Expression e : arrayConstructor.getExpressions()) {
if (e instanceof StructType || e instanceof ParenthesedExpressionList) {
recursive = true;
break;
Expand All @@ -676,14 +687,25 @@ THEN Instr( source_value, Regexp_Extract( source_value, reg_exp ) )
}

if (recursive) {
function.setParameters(
parameters.get(0)
, new OracleNamedFunctionParameter("recursive", new Column("TRUE"))
);
function.setParameters(parameters.get(0),
new OracleNamedFunctionParameter("recursive", new Column("TRUE")));
}
}
}
break;
case SAFE_CONVERT_BYTES_TO_STRING:
warning("SAFE_CONVERT_BYTES_TO_STRING is not supported");
function.setName("decode");
break;
case TO_CODE_POINTS:
// TO_CODE_POINTS(word) as code_points
//
// list_transform( split(word, ''), x -> unicode(x) ) as code_points

function.setName("List_Transform");
function.setParameters(new Function("Split", parameters.get(0), new StringValue("")),
new LambdaExpression(Arrays.asList("x"), new Function("Unicode", new Column("x"))));
break;
}
}
if (rewrittenExpression == null) {
Expand Down Expand Up @@ -720,15 +742,15 @@ case typeof(bytes)
return null;
}

private Expression rewriteLPad(ExpressionList<?> parameters) {
private Expression rewritePad(Function function, ExpressionList<?> parameters) {
if (parameters != null) {
Expression padding = parameters.size() == 3 ? parameters.get(2) : new StringValue(" ");
switch (parameters.size()) {
case 2:
case 3:
WhenClause whenChar =
new WhenClause().withWhenExpression(new StringValue("VARCHAR"))
.withThenExpression(new Function("LPAD$$").withParameters(
.withThenExpression(new Function(function.getName() + "$$").withParameters(
new CastExpression(parameters.get(0), "VARCHAR"), parameters.get(1),
padding));
// @todo: support bytes
Expand Down Expand Up @@ -1297,9 +1319,14 @@ public void visit(StringValue stringValue) {
stringValue.setValue(convertUnicode(stringValue.getValue()));

if ("b".equalsIgnoreCase(stringValue.getPrefix())) {
Function f = new Function().withName("encode").withParameters(stringValue.withPrefix(""));
visit(f);
// Coalesce(TRY_CAST('абвгд' AS BLOB), encode('абвгд'))
CastExpression castExpression =
new CastExpression("Try_Cast", stringValue.withPrefix(""), "BLOB");
Function encode = new Function("encode", stringValue.withPrefix(""));
Function coalesce = new Function("Coalesce", castExpression, encode);
visit(coalesce);
} else {
// @todo: handle "r"
super.visit(stringValue.withPrefix(null));
}
}
Expand Down Expand Up @@ -1349,24 +1376,24 @@ public void visit(CastExpression castExpression) {

public void visit(StructType structType) {
if (structType.getArguments() != null && !structType.getArguments().isEmpty()) {
buffer.append("{ ");
int i = 0;
for (SelectItem<?> e : structType.getArguments()) {
if (0 < i) {
buffer.append(",");
}
if (e.getAlias()!=null) {
buffer.append(e.getAlias().getName());
} else if (structType.getParameters()!=null && i<structType.getParameters().size()) {
buffer.append(structType.getParameters().get(i).getKey());
}
buffer.append("{ ");
int i = 0;
for (SelectItem<?> e : structType.getArguments()) {
if (0 < i) {
buffer.append(",");
}
if (e.getAlias() != null) {
buffer.append(e.getAlias().getName());
} else if (structType.getParameters() != null && i < structType.getParameters().size()) {
buffer.append(structType.getParameters().get(i).getKey());
}

buffer.append(":");
buffer.append(e.getExpression());
buffer.append(":");
buffer.append(e.getExpression());

i++;
}
buffer.append(" }");
i++;
}
buffer.append(" }");
}

if (structType.getParameters() != null && !structType.getParameters().isEmpty()) {
Expand Down
Binary file modified src/main/resources/doc/JSQLTranspiler.ods
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ static Map<File, List<SQLTest>> getSqlTestMap(File[] testFiles,

if (endContent) {
if (k.equalsIgnoreCase("provided")) {
test.providedSqlStr = sanitize(stringBuilder.toString());
test.providedSqlStr = stringBuilder.toString();
} else if (k.equalsIgnoreCase("expected")) {
test.expectedSqlStr = sanitize(stringBuilder.toString());
test.expectedSqlStr = stringBuilder.toString();
} else if (k.equalsIgnoreCase("count") || k.equalsIgnoreCase("tally")) {
test.expectedTally = Integer.parseInt(stringBuilder.toString().trim());
} else if (k.startsWith("result")) {
Expand Down Expand Up @@ -403,7 +403,8 @@ void transpile(File f, int idx, SQLTest t) throws Exception {
// Assertions.assertNotNull(t.expectedSqlStr);
String transpiledSqlStr = JSQLTranspiler.transpileQuery(t.providedSqlStr, t.inputDialect);
Assertions.assertThat(transpiledSqlStr).isNotNull();
Assertions.assertThat(sanitize(transpiledSqlStr)).isEqualTo(t.expectedSqlStr);
Assertions.assertThat(sanitize(transpiledSqlStr, true))
.isEqualTo(sanitize(t.expectedSqlStr, true));

// Expect this transpiled query to succeed since DuckDB does not support `TOP <integer>`
if (t.expectedTally >= 0) {
Expand Down
13 changes: 5 additions & 8 deletions src/test/resources/com/manticore/transpiler/any/debug.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
-- provided
SELECT
DATE_DIFF('2017-12-30', '2014-12-30', YEAR) AS year_diff,
DATE_DIFF('2017-12-30', '2014-12-30', ISOYEAR) AS isoyear_diff;
SELECT SAFE_CONVERT_BYTES_TO_STRING(b'\x61') as safe_convert
;

-- expected
SELECT
DATE_DIFF('YEAR', DATE '2014-12-30', DATE '2017-12-30' ) AS year_diff,
DATE_DIFF('ISOYEAR', DATE '2014-12-30', DATE '2017-12-30') AS isoyear_diff;
SELECT DECODE(COALESCE(TRY_CAST('\x61' AS BLOB),ENCODE('\x61')))AS SAFE_CONVERT;

-- result
"year_diff","isoyear_diff"
"3","2"
"safe_convert"
"a"

Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ SELECT t, len, LPAD(t, len) AS padded FROM UNNEST([
-- expected
SELECT t, len, CASE TYPEOF(T) WHEN 'VARCHAR' THEN LPAD(T::VARCHAR, LEN,' ') END AS padded from (
select Unnest([
{t:'abc', len:5 }::STRUCT(t VARCHAR, len integer),
{ t:'abc', len:5 }::STRUCT(t VARCHAR, len integer),
('abc', 2),
('例子', 4)
], recursive => true)
Expand All @@ -31,7 +31,7 @@ SELECT t, len, LPAD(t, len) AS padded FROM UNNEST([
-- expected
SELECT t, len, CASE TYPEOF(T) WHEN 'VARCHAR' THEN LPAD(T::VARCHAR, LEN,' ') END AS padded from (
select Unnest([
{t:'abc', len:5 },
{ t:'abc', len:5 },
('abc', 2),
('例子', 4)
], recursive => true)
Expand Down
Loading

0 comments on commit 38f30a0

Please sign in to comment.