diff --git a/src/main/java/com/manticore/transpiler/ExpressionTranspiler.java b/src/main/java/com/manticore/transpiler/ExpressionTranspiler.java index 70bdb5d..4056e2a 100644 --- a/src/main/java/com/manticore/transpiler/ExpressionTranspiler.java +++ b/src/main/java/com/manticore/transpiler/ExpressionTranspiler.java @@ -17,6 +17,7 @@ */ package com.manticore.transpiler; +import net.sf.jsqlparser.expression.ArrayConstructor; import net.sf.jsqlparser.expression.CaseExpression; import net.sf.jsqlparser.expression.CastExpression; import net.sf.jsqlparser.expression.DateTimeLiteralExpression; @@ -25,8 +26,10 @@ import net.sf.jsqlparser.expression.Function; import net.sf.jsqlparser.expression.IntervalExpression; import net.sf.jsqlparser.expression.LongValue; +import net.sf.jsqlparser.expression.OracleNamedFunctionParameter; import net.sf.jsqlparser.expression.Parenthesis; import net.sf.jsqlparser.expression.StringValue; +import net.sf.jsqlparser.expression.StructType; import net.sf.jsqlparser.expression.TimezoneExpression; import net.sf.jsqlparser.expression.WhenClause; import net.sf.jsqlparser.expression.operators.arithmetic.Addition; @@ -34,6 +37,7 @@ import net.sf.jsqlparser.expression.operators.arithmetic.Multiplication; import net.sf.jsqlparser.expression.operators.relational.ExpressionList; import net.sf.jsqlparser.expression.operators.relational.LikeExpression; +import net.sf.jsqlparser.expression.operators.relational.ParenthesedExpressionList; import net.sf.jsqlparser.schema.Column; import net.sf.jsqlparser.statement.create.table.ColDataType; import net.sf.jsqlparser.statement.select.ParenthesedSelect; @@ -43,6 +47,7 @@ import net.sf.jsqlparser.statement.select.SelectVisitor; import net.sf.jsqlparser.util.deparser.ExpressionDeParser; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -73,10 +78,15 @@ enum TranspiledFunction { , PARSE_DATE, PARSE_DATETIME, PARSE_TIME, PARSE_TIMESTAMP, DATE_FROM_UNIX_DATE, UNIX_DATE, TIMESTAMP_MICROS, TIMESTAMP_MILLIS, TIMESTAMP_SECONDS, UNIX_MICROS, UNIX_MILLIS, UNIX_SECONDS - , STRING, BYTE_LENGTH, CHAR_LENGTH, CHARACTER_LENGTH, CODE_POINTS_TO_BYTES, CODE_POINTS_TO_STRING, COLLATE, CONTAINS_SUBSTR, EDIT_DISTANCE, FORMAT, INSTR, LENGTH, LPAD, NORMALIZE, NORMALIZE_AND_CASEFOLD, OCTET_LENGTH, REGEXP_CONTAINS, REGEXP_EXTRACT, REGEXP_EXTRACT_ALL, REGEXP_INSTR, REGEXP_REPLACE, REGEXP_SUBSTR + , STRING, BYTE_LENGTH, CHAR_LENGTH, CHARACTER_LENGTH, CODE_POINTS_TO_BYTES, CODE_POINTS_TO_STRING, COLLATE + , CONTAINS_SUBSTR, EDIT_DISTANCE, FORMAT, INSTR, LENGTH, LPAD, NORMALIZE, NORMALIZE_AND_CASEFOLD, OCTET_LENGTH + , REGEXP_CONTAINS, REGEXP_EXTRACT, REGEXP_EXTRACT_ALL, REGEXP_INSTR, REGEXP_REPLACE, REGEXP_SUBSTR, REPEAT, REPLACE + , REVERSE - , NVL; + , NVL + , UNNEST + ; // @FORMATTER:ON @@ -612,19 +622,68 @@ public void visit(Function function) { function.setName("REGEXP_MATCHES"); break; case REGEXP_EXTRACT: + case REGEXP_SUBSTR: if (parameters != null && parameters.size() > 2) { warning("REGEXP_EXTRACT supports only 2 parameters."); while (parameters.size() > 2) { parameters.remove(parameters.size() - 1); } } + function.setName("REGEXP_EXTRACT"); break; case REGEXP_EXTRACT_ALL: // pass through break; case REGEXP_INSTR: + if (parameters != null && parameters.size() > 2) { + warning("REGEXP_INSTR supports only 2 parameters."); + while (parameters.size() > 2) { + parameters.remove(parameters.size() - 1); + } + } + /* + CASE + WHEN Regexp_Matches( source_value, reg_exp ) + THEN Instr( source_value, Regexp_Extract( source_value, reg_exp ) ) + ELSE 0 + END AS instr + */ + WhenClause when = new WhenClause( + new Function("REGEXP_MATCHES", parameters.get(0), parameters.get(1)) + , new Function("INSTR", parameters.get(0), new Function("REGEXP_EXTRACT", parameters.get(0), parameters.get(1))) + ); + CaseExpression caseExpression = new CaseExpression(new LongValue(0), when); + visit(caseExpression); + + rewrittenExpression = caseExpression; + break; case REGEXP_REPLACE: - case REGEXP_SUBSTR: + // pass through + break; + case UNNEST: + if (parameters!=null) { + switch (parameters.size()) { + case 1: + boolean recursive = false; + if (parameters.get(0) instanceof ArrayConstructor) { + ArrayConstructor arrayConstructor = (ArrayConstructor) parameters.get(0); + for (Expression e:arrayConstructor.getExpressions()) { + if (e instanceof StructType || e instanceof ParenthesedExpressionList) { + recursive = true; + break; + } + } + } + + if (recursive) { + function.setParameters( + parameters.get(0) + , new OracleNamedFunctionParameter("recursive", new Column("TRUE")) + ); + } + } + } + break; } } if (rewrittenExpression == null) { @@ -1288,6 +1347,42 @@ public void visit(CastExpression castExpression) { } } + public void visit(StructType structType) { + if (structType.getArguments() != null && !structType.getArguments().isEmpty()) { + buffer.append("{ "); + int i = 0; + for (SelectItem e : structType.getArguments()) { + if (0 < i) { + buffer.append(","); + } + if (e.getAlias()!=null) { + buffer.append(e.getAlias().getName()); + } else if (structType.getParameters()!=null && i e : structType.getParameters()) { + if (0 < i++) { + buffer.append(","); + } + buffer.append(e.getKey()).append(" "); + buffer.append(e.getValue()); + } + buffer.append(")"); + } + } + public final static ColDataType rewriteType(ColDataType colDataType) { if (colDataType.getDataType().equalsIgnoreCase("BYTES")) { diff --git a/src/main/resources/doc/JSQLTranspiler.ods b/src/main/resources/doc/JSQLTranspiler.ods index df233d0..08cabef 100644 Binary files a/src/main/resources/doc/JSQLTranspiler.ods and b/src/main/resources/doc/JSQLTranspiler.ods differ diff --git a/src/site/sphinx/changelog.rst b/src/site/sphinx/changelog.rst index d1e574a..b2e9623 100644 --- a/src/site/sphinx/changelog.rst +++ b/src/site/sphinx/changelog.rst @@ -8,6 +8,42 @@ Latest Changes since |JSQLTRANSPILER_VERSION| ============================================================= + * **feature: support many more TEXT functions** + + Andreas Reichel, 2024-03-25 + * **feat: support more BigQuery Date/Time functions** + + Andreas Reichel, 2024-03-21 + * **feat: support more BigQuery Date/Time functions** + + Andreas Reichel, 2024-03-21 + * **build: Snapshot dependency** + + Andreas Reichel, 2024-03-21 + * **feat: implement a Python SQLGlot based test for comparision** + + Andreas Reichel, 2024-03-21 + * **feat: support more BigQuery Date/Time functions** + + Andreas Reichel, 2024-03-21 + * **style: fix QA exceptions** + + Andreas Reichel, 2024-03-19 + * **doc: fix the link to th Website** + + Andreas Reichel, 2024-03-19 + * **feat: many more DateTime functions** + + Andreas Reichel, 2024-03-19 + * **doc: update/fix the documentation** + + Andreas Reichel, 2024-03-19 + * **style: improve the function rewrite** + + Andreas Reichel, 2024-03-19 + * **test: fix the test template** + + Andreas Reichel, 2024-03-19 * **doc: Google BigQuery date parts and date formats** Andreas Reichel, 2024-03-18 diff --git a/src/site/sphinx/javadoc.rst b/src/site/sphinx/javadoc.rst index 5c99fc3..8cea355 100644 --- a/src/site/sphinx/javadoc.rst +++ b/src/site/sphinx/javadoc.rst @@ -44,10 +44,9 @@ ExpressionTranspiler | The type Expression transpiler. -| **ExpressionTranspiler** (selectVisitor, buffer, inputDialect) +| **ExpressionTranspiler** (selectVisitor, buffer) | SelectVisitor selectVisitor | :ref:`StringBuilder` buffer -| :ref:`Dialect` inputDialect | **isDatePart** (expression, dialect) → boolean @@ -64,11 +63,58 @@ ExpressionTranspiler +| **hasTimeZoneInfo** (timestampStr) → boolean +| :ref:`String` timestampStr +| returns boolean + + + +| **hasTimeZoneInfo** (timestamp) → boolean +| Expression timestamp +| returns boolean + + + +| **rewriteDateLiteral** (p, dateTimeType) → Expression +| Expression p +| :ref:`DateTime` dateTimeType +| returns Expression + + + | *@SuppressWarnings* | **visit** (function) | Function function +| **visit** (extractExpression) +| ExtractExpression extractExpression + + +| **visit** (stringValue) +| StringValue stringValue + + +| **convertUnicode** (input) → :ref:`String` +| :ref:`String` input +| returns :ref:`String` + + + +| **visit** (castExpression) +| CastExpression castExpression + + +| **rewriteType** (colDataType) → ColDataType +| ColDataType colDataType +| returns ColDataType + + + +| **warning** (s) +| :ref:`String` s + + .. _com.manticore.transpiler.JSQLTranspiler: @@ -80,9 +126,8 @@ JSQLTranspiler | The type Jsql transpiler. -| **JSQLTranspiler** (inputDialect) +| **JSQLTranspiler** () | Instantiates a new Jsql transpiler. -| :ref:`Dialect` inputDialect | **getAbsoluteFile** (filename) → :ref:`File` @@ -113,10 +158,9 @@ JSQLTranspiler -| **transpile** (sqlStr, inputDialect, outputFile) +| **transpile** (sqlStr, outputFile) | Transpile a query string from a file or STDIN and write the transformed query string into a file or STDOUT. | :ref:`String` sqlStr | sqlStr the original query string -| :ref:`Dialect` inputDialect | inputDialect the input dialect | :ref:`File` outputFile | outputFile the output file, writing to STDOUT when not defined @@ -171,3 +215,7 @@ JSQLTranspiler | Top top +| **visit** (tableFunction) +| TableFunction tableFunction + + diff --git a/src/test/java/com/manticore/transpiler/JSQLTranspilerTest.java b/src/test/java/com/manticore/transpiler/JSQLTranspilerTest.java index ddcfdd1..d85a572 100644 --- a/src/test/java/com/manticore/transpiler/JSQLTranspilerTest.java +++ b/src/test/java/com/manticore/transpiler/JSQLTranspilerTest.java @@ -80,7 +80,7 @@ class JSQLTranspilerTest { // Assure SPACE around Syntax Characters private static final Pattern SQL_SANITATION_PATTERN2 = - Pattern.compile("\\s*([!/,()=+\\-*|\\]<>:])\\s*", Pattern.MULTILINE); + Pattern.compile("\\s*([!/,()=+\\-*|\\{\\}\\[\\]<>:])\\s*", Pattern.MULTILINE); public final static String TEST_FOLDER_STR = "build/resources/test/com/manticore/transpiler/any"; diff --git a/src/test/resources/com/manticore/transpiler/any/debug.sql b/src/test/resources/com/manticore/transpiler/any/debug.sql index 2c18851..d942a64 100644 --- a/src/test/resources/com/manticore/transpiler/any/debug.sql +++ b/src/test/resources/com/manticore/transpiler/any/debug.sql @@ -1,6 +1,14 @@ -- provided -SELECT ASCII('abcd') as A, ASCII('a') as B, ASCII('') as C, ASCII(NULL) as D; +SELECT + DATE_DIFF('2017-12-30', '2014-12-30', YEAR) AS year_diff, + DATE_DIFF('2017-12-30', '2014-12-30', ISOYEAR) AS isoyear_diff; + +-- expected +SELECT + DATE_DIFF('YEAR', DATE '2014-12-30', DATE '2017-12-30' ) AS year_diff, + DATE_DIFF('ISOYEAR', DATE '2014-12-30', DATE '2017-12-30') AS isoyear_diff; -- result -"A","B","C","D" -"97","97","0","" \ No newline at end of file +"year_diff","isoyear_diff" +"3","2" + diff --git a/src/test/resources/com/manticore/transpiler/google_bigquery/structure.sql b/src/test/resources/com/manticore/transpiler/google_bigquery/structure.sql new file mode 100644 index 0000000..958b324 --- /dev/null +++ b/src/test/resources/com/manticore/transpiler/google_bigquery/structure.sql @@ -0,0 +1,44 @@ +-- provided +SELECT t, len, LPAD(t, len) AS padded FROM UNNEST([ + STRUCT('abc', 5 ), + ('abc', 2), + ('例子', 4) +]); + +-- expected +SELECT t, len, CASE TYPEOF(T) WHEN 'VARCHAR' THEN LPAD(T::VARCHAR, LEN,' ') END AS padded from ( +select Unnest([ + {t:'abc', len:5 }::STRUCT(t VARCHAR, len integer), + ('abc', 2), + ('例子', 4) +], recursive => true) +); + +-- result +"t","len","padded" +"abc","5"," abc" +"abc","2","ab" +"例子","4"," 例子" + + +-- provided +SELECT t, len, LPAD(t, len) AS padded FROM UNNEST([ + STRUCT('abc' AS t, 5 AS len), + ('abc', 2), + ('例子', 4) +]); + +-- expected +SELECT t, len, CASE TYPEOF(T) WHEN 'VARCHAR' THEN LPAD(T::VARCHAR, LEN,' ') END AS padded from ( +select Unnest([ + {t:'abc', len:5 }, + ('abc', 2), + ('例子', 4) +], recursive => true) +); + +-- result +"t","len","padded" +"abc","5"," abc" +"abc","2","ab" +"例子","4"," 例子" diff --git a/src/test/resources/com/manticore/transpiler/google_bigquery/text_function.sql b/src/test/resources/com/manticore/transpiler/google_bigquery/text_function.sql index f02e7b4..0eb824c 100644 --- a/src/test/resources/com/manticore/transpiler/google_bigquery/text_function.sql +++ b/src/test/resources/com/manticore/transpiler/google_bigquery/text_function.sql @@ -472,3 +472,133 @@ FROM code_markdown; "example" "[`function(x)`, `function(y)`]" + +-- provided +WITH example AS ( + SELECT 'ab@cd-ef' AS source_value, '@[^-]*' AS reg_exp UNION ALL + SELECT 'ab@d-ef', '@[^-]*' UNION ALL + SELECT 'abc@cd-ef', '@[^-]*' UNION ALL + SELECT 'abc-ef', '@[^-]*') +SELECT source_value, reg_exp, REGEXP_INSTR(source_value, reg_exp) AS instr +FROM example; + + +-- expected +WITH example AS ( + SELECT 'ab@cd-ef' AS source_value + , '@[^-]*' AS reg_exp + UNION ALL + SELECT 'ab@d-ef' + , '@[^-]*' + UNION ALL + SELECT 'abc@cd-ef' + , '@[^-]*' + UNION ALL + SELECT 'abc-ef' + , '@[^-]*' ) +SELECT source_value + , reg_exp + , CASE + WHEN Regexp_Matches( source_value, reg_exp ) + THEN Instr( source_value, Regexp_Extract( source_value, reg_exp ) ) + ELSE 0 + END AS instr +FROM example +; + +-- result +"source_value","reg_exp","instr" +"ab@cd-ef","@[^-]*","3" +"ab@d-ef","@[^-]*","3" +"abc@cd-ef","@[^-]*","4" +"abc-ef","@[^-]*","0" + + +-- provided +WITH markdown AS + (SELECT '# Heading' as heading + UNION ALL + SELECT '# Another heading' as heading) +SELECT + REGEXP_REPLACE(heading, r'^# ([a-zA-Z0-9\s]+$)', r'

\1

') + AS html +FROM markdown; + +-- expected +WITH markdown AS + (SELECT '# Heading' as heading + UNION ALL + SELECT '# Another heading' as heading) +SELECT + REGEXP_REPLACE(heading, '^# ([a-zA-Z0-9\s]+$)', '

\1

') + AS html +FROM markdown; + +--@todo: fix the CSVWriter problem with HTML tags and whitspace +-- result +"HTML" +"# Heading" +"# Another heading" + + +-- provided +WITH example AS +(SELECT 'Hello World Helloo' AS value, 'H?ello+' AS regex, 1 AS position, 1 AS +occurrence +) +SELECT value, regex, position, occurrence, REGEXP_SUBSTR(value, regex, +position, occurrence) AS regexp_value FROM example; + +-- expected +WITH example AS +(SELECT 'Hello World Helloo' AS value, 'H?ello+' AS regex, 1 AS position, 1 AS +occurrence +) +SELECT value, regex, position, occurrence, REGEXP_EXTRACT(value, regex) AS regexp_value FROM example; + +-- result +"value","regex","position","occurrence","regexp_value" +"Hello World Helloo","H?ello+","1","1","Hello" + + +-- provided +Select repeat('abc', 3) as repeated; + +-- result +"repeated" +"abcabcabc" + + +-- provided +WITH desserts AS + (SELECT 'apple pie' as dessert + UNION ALL + SELECT 'blackberry pie' as dessert + UNION ALL + SELECT 'cherry pie' as dessert) +SELECT + REPLACE (dessert, 'pie', 'cobbler') as example +FROM desserts; + +-- result +"example" +"apple cobbler" +"blackberry cobbler" +"cherry cobbler" + + +-- provided +WITH example AS ( + SELECT 'foo' AS sample_string UNION ALL + SELECT 'абвгд' AS sample_string +) +SELECT + sample_string, + REVERSE(sample_string) AS reverse_string +FROM example; + +-- expected +"sample_string","reverse_string" +"foo","oof" +"абвгд","дгвба" +