Skip to content

Commit

Permalink
feat: support BigQuery Structs, DuckDB structs and translation
Browse files Browse the repository at this point in the history
Signed-off-by: Andreas Reichel <andreas@manticore-projects.com>
  • Loading branch information
manticore-projects committed Mar 28, 2024
1 parent 40e39f4 commit 12aed9a
Show file tree
Hide file tree
Showing 8 changed files with 374 additions and 13 deletions.
101 changes: 98 additions & 3 deletions src/main/java/com/manticore/transpiler/ExpressionTranspiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package com.manticore.transpiler;

import net.sf.jsqlparser.expression.ArrayConstructor;
import net.sf.jsqlparser.expression.CaseExpression;
import net.sf.jsqlparser.expression.CastExpression;
import net.sf.jsqlparser.expression.DateTimeLiteralExpression;
Expand All @@ -25,15 +26,18 @@
import net.sf.jsqlparser.expression.Function;
import net.sf.jsqlparser.expression.IntervalExpression;
import net.sf.jsqlparser.expression.LongValue;
import net.sf.jsqlparser.expression.OracleNamedFunctionParameter;
import net.sf.jsqlparser.expression.Parenthesis;
import net.sf.jsqlparser.expression.StringValue;
import net.sf.jsqlparser.expression.StructType;
import net.sf.jsqlparser.expression.TimezoneExpression;
import net.sf.jsqlparser.expression.WhenClause;
import net.sf.jsqlparser.expression.operators.arithmetic.Addition;
import net.sf.jsqlparser.expression.operators.arithmetic.Concat;
import net.sf.jsqlparser.expression.operators.arithmetic.Multiplication;
import net.sf.jsqlparser.expression.operators.relational.ExpressionList;
import net.sf.jsqlparser.expression.operators.relational.LikeExpression;
import net.sf.jsqlparser.expression.operators.relational.ParenthesedExpressionList;
import net.sf.jsqlparser.schema.Column;
import net.sf.jsqlparser.statement.create.table.ColDataType;
import net.sf.jsqlparser.statement.select.ParenthesedSelect;
Expand All @@ -43,6 +47,7 @@
import net.sf.jsqlparser.statement.select.SelectVisitor;
import net.sf.jsqlparser.util.deparser.ExpressionDeParser;

import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -73,10 +78,15 @@ enum TranspiledFunction {

, PARSE_DATE, PARSE_DATETIME, PARSE_TIME, PARSE_TIMESTAMP, DATE_FROM_UNIX_DATE, UNIX_DATE, TIMESTAMP_MICROS, TIMESTAMP_MILLIS, TIMESTAMP_SECONDS, UNIX_MICROS, UNIX_MILLIS, UNIX_SECONDS

, STRING, BYTE_LENGTH, CHAR_LENGTH, CHARACTER_LENGTH, CODE_POINTS_TO_BYTES, CODE_POINTS_TO_STRING, COLLATE, CONTAINS_SUBSTR, EDIT_DISTANCE, FORMAT, INSTR, LENGTH, LPAD, NORMALIZE, NORMALIZE_AND_CASEFOLD, OCTET_LENGTH, REGEXP_CONTAINS, REGEXP_EXTRACT, REGEXP_EXTRACT_ALL, REGEXP_INSTR, REGEXP_REPLACE, REGEXP_SUBSTR
, STRING, BYTE_LENGTH, CHAR_LENGTH, CHARACTER_LENGTH, CODE_POINTS_TO_BYTES, CODE_POINTS_TO_STRING, COLLATE
, CONTAINS_SUBSTR, EDIT_DISTANCE, FORMAT, INSTR, LENGTH, LPAD, NORMALIZE, NORMALIZE_AND_CASEFOLD, OCTET_LENGTH
, REGEXP_CONTAINS, REGEXP_EXTRACT, REGEXP_EXTRACT_ALL, REGEXP_INSTR, REGEXP_REPLACE, REGEXP_SUBSTR, REPEAT, REPLACE
, REVERSE


, NVL;
, NVL
, UNNEST
;
// @FORMATTER:ON


Expand Down Expand Up @@ -612,19 +622,68 @@ public void visit(Function function) {
function.setName("REGEXP_MATCHES");
break;
case REGEXP_EXTRACT:
case REGEXP_SUBSTR:
if (parameters != null && parameters.size() > 2) {
warning("REGEXP_EXTRACT supports only 2 parameters.");
while (parameters.size() > 2) {
parameters.remove(parameters.size() - 1);
}
}
function.setName("REGEXP_EXTRACT");
break;
case REGEXP_EXTRACT_ALL:
// pass through
break;
case REGEXP_INSTR:
if (parameters != null && parameters.size() > 2) {
warning("REGEXP_INSTR supports only 2 parameters.");
while (parameters.size() > 2) {
parameters.remove(parameters.size() - 1);
}
}
/*
CASE
WHEN Regexp_Matches( source_value, reg_exp )
THEN Instr( source_value, Regexp_Extract( source_value, reg_exp ) )
ELSE 0
END AS instr
*/
WhenClause when = new WhenClause(
new Function("REGEXP_MATCHES", parameters.get(0), parameters.get(1))
, new Function("INSTR", parameters.get(0), new Function("REGEXP_EXTRACT", parameters.get(0), parameters.get(1)))
);
CaseExpression caseExpression = new CaseExpression(new LongValue(0), when);
visit(caseExpression);

rewrittenExpression = caseExpression;
break;
case REGEXP_REPLACE:
case REGEXP_SUBSTR:
// pass through
break;
case UNNEST:
if (parameters!=null) {
switch (parameters.size()) {
case 1:
boolean recursive = false;
if (parameters.get(0) instanceof ArrayConstructor) {
ArrayConstructor arrayConstructor = (ArrayConstructor) parameters.get(0);
for (Expression e:arrayConstructor.getExpressions()) {
if (e instanceof StructType || e instanceof ParenthesedExpressionList) {
recursive = true;
break;
}
}
}

if (recursive) {
function.setParameters(
parameters.get(0)
, new OracleNamedFunctionParameter("recursive", new Column("TRUE"))
);
}
}
}
break;
}
}
if (rewrittenExpression == null) {
Expand Down Expand Up @@ -1288,6 +1347,42 @@ public void visit(CastExpression castExpression) {
}
}

public void visit(StructType structType) {
if (structType.getArguments() != null && !structType.getArguments().isEmpty()) {
buffer.append("{ ");
int i = 0;
for (SelectItem<?> e : structType.getArguments()) {
if (0 < i) {
buffer.append(",");
}
if (e.getAlias()!=null) {
buffer.append(e.getAlias().getName());
} else if (structType.getParameters()!=null && i<structType.getParameters().size()) {
buffer.append(structType.getParameters().get(i).getKey());
}

buffer.append(":");
buffer.append(e.getExpression());

i++;
}
buffer.append(" }");
}

if (structType.getParameters() != null && !structType.getParameters().isEmpty()) {
buffer.append("::STRUCT( ");
int i = 0;
for (Map.Entry<String, ColDataType> e : structType.getParameters()) {
if (0 < i++) {
buffer.append(",");
}
buffer.append(e.getKey()).append(" ");
buffer.append(e.getValue());
}
buffer.append(")");
}
}


public final static ColDataType rewriteType(ColDataType colDataType) {
if (colDataType.getDataType().equalsIgnoreCase("BYTES")) {
Expand Down
Binary file modified src/main/resources/doc/JSQLTranspiler.ods
Binary file not shown.
36 changes: 36 additions & 0 deletions src/site/sphinx/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,42 @@ Latest Changes since |JSQLTRANSPILER_VERSION|
=============================================================


* **feature: support many more TEXT functions**

Andreas Reichel, 2024-03-25
* **feat: support more BigQuery Date/Time functions**

Andreas Reichel, 2024-03-21
* **feat: support more BigQuery Date/Time functions**

Andreas Reichel, 2024-03-21
* **build: Snapshot dependency**

Andreas Reichel, 2024-03-21
* **feat: implement a Python SQLGlot based test for comparision**

Andreas Reichel, 2024-03-21
* **feat: support more BigQuery Date/Time functions**

Andreas Reichel, 2024-03-21
* **style: fix QA exceptions**

Andreas Reichel, 2024-03-19
* **doc: fix the link to th Website**

Andreas Reichel, 2024-03-19
* **feat: many more DateTime functions**

Andreas Reichel, 2024-03-19
* **doc: update/fix the documentation**

Andreas Reichel, 2024-03-19
* **style: improve the function rewrite**

Andreas Reichel, 2024-03-19
* **test: fix the test template**

Andreas Reichel, 2024-03-19
* **doc: Google BigQuery date parts and date formats**

Andreas Reichel, 2024-03-18
Expand Down
60 changes: 54 additions & 6 deletions src/site/sphinx/javadoc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,9 @@ ExpressionTranspiler

| The type Expression transpiler.
| **ExpressionTranspiler** (selectVisitor, buffer, inputDialect)
| **ExpressionTranspiler** (selectVisitor, buffer)
| SelectVisitor selectVisitor
| :ref:`StringBuilder<java.lang.StringBuilder>` buffer
| :ref:`Dialect<com.manticore.transpiler.JSQLTranspiler.Dialect>` inputDialect

| **isDatePart** (expression, dialect) → boolean
Expand All @@ -64,11 +63,58 @@ ExpressionTranspiler


| **hasTimeZoneInfo** (timestampStr) → boolean
| :ref:`String<java.lang.String>` timestampStr
| returns boolean


| **hasTimeZoneInfo** (timestamp) → boolean
| Expression timestamp
| returns boolean


| **rewriteDateLiteral** (p, dateTimeType) → Expression
| Expression p
| :ref:`DateTime<DateTimeLiteralExpression.DateTime>` dateTimeType
| returns Expression


| *@SuppressWarnings*
| **visit** (function)
| Function function

| **visit** (extractExpression)
| ExtractExpression extractExpression

| **visit** (stringValue)
| StringValue stringValue

| **convertUnicode** (input) → :ref:`String<java.lang.String>`
| :ref:`String<java.lang.String>` input
| returns :ref:`String<java.lang.String>`


| **visit** (castExpression)
| CastExpression castExpression

| **rewriteType** (colDataType) → ColDataType
| ColDataType colDataType
| returns ColDataType


| **warning** (s)
| :ref:`String<java.lang.String>` s


.. _com.manticore.transpiler.JSQLTranspiler:

Expand All @@ -80,9 +126,8 @@ JSQLTranspiler

| The type Jsql transpiler.
| **JSQLTranspiler** (inputDialect)
| **JSQLTranspiler** ()
| Instantiates a new Jsql transpiler.
| :ref:`Dialect<com.manticore.transpiler.JSQLTranspiler.Dialect>` inputDialect

| **getAbsoluteFile** (filename) → :ref:`File<java.io.File>`
Expand Down Expand Up @@ -113,10 +158,9 @@ JSQLTranspiler


| **transpile** (sqlStr, inputDialect, outputFile)
| **transpile** (sqlStr, outputFile)
| Transpile a query string from a file or STDIN and write the transformed query string into a file or STDOUT.
| :ref:`String<java.lang.String>` sqlStr | sqlStr the original query string
| :ref:`Dialect<com.manticore.transpiler.JSQLTranspiler.Dialect>` inputDialect | inputDialect the input dialect
| :ref:`File<java.io.File>` outputFile | outputFile the output file, writing to STDOUT when not defined

Expand Down Expand Up @@ -171,3 +215,7 @@ JSQLTranspiler
| Top top

| **visit** (tableFunction)
| TableFunction tableFunction

Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class JSQLTranspilerTest {

// Assure SPACE around Syntax Characters
private static final Pattern SQL_SANITATION_PATTERN2 =
Pattern.compile("\\s*([!/,()=+\\-*|\\]<>:])\\s*", Pattern.MULTILINE);
Pattern.compile("\\s*([!/,()=+\\-*|\\{\\}\\[\\]<>:])\\s*", Pattern.MULTILINE);

public final static String TEST_FOLDER_STR = "build/resources/test/com/manticore/transpiler/any";

Expand Down
14 changes: 11 additions & 3 deletions src/test/resources/com/manticore/transpiler/any/debug.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
-- provided
SELECT ASCII('abcd') as A, ASCII('a') as B, ASCII('') as C, ASCII(NULL) as D;
SELECT
DATE_DIFF('2017-12-30', '2014-12-30', YEAR) AS year_diff,
DATE_DIFF('2017-12-30', '2014-12-30', ISOYEAR) AS isoyear_diff;

-- expected
SELECT
DATE_DIFF('YEAR', DATE '2014-12-30', DATE '2017-12-30' ) AS year_diff,
DATE_DIFF('ISOYEAR', DATE '2014-12-30', DATE '2017-12-30') AS isoyear_diff;

-- result
"A","B","C","D"
"97","97","0",""
"year_diff","isoyear_diff"
"3","2"

Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
-- provided
SELECT t, len, LPAD(t, len) AS padded FROM UNNEST([
STRUCT<t VARCHAR, len integer>('abc', 5 ),
('abc', 2),
('例子', 4)
]);

-- expected
SELECT t, len, CASE TYPEOF(T) WHEN 'VARCHAR' THEN LPAD(T::VARCHAR, LEN,' ') END AS padded from (
select Unnest([
{t:'abc', len:5 }::STRUCT(t VARCHAR, len integer),
('abc', 2),
('例子', 4)
], recursive => true)
);

-- result
"t","len","padded"
"abc","5"," abc"
"abc","2","ab"
"例子","4"," 例子"


-- provided
SELECT t, len, LPAD(t, len) AS padded FROM UNNEST([
STRUCT('abc' AS t, 5 AS len),
('abc', 2),
('例子', 4)
]);

-- expected
SELECT t, len, CASE TYPEOF(T) WHEN 'VARCHAR' THEN LPAD(T::VARCHAR, LEN,' ') END AS padded from (
select Unnest([
{t:'abc', len:5 },
('abc', 2),
('例子', 4)
], recursive => true)
);

-- result
"t","len","padded"
"abc","5"," abc"
"abc","2","ab"
"例子","4"," 例子"
Loading

0 comments on commit 12aed9a

Please sign in to comment.