diff --git a/src/main/java/ai/starlake/transpiler/databricks/DatabricksExpressionTranspiler.java b/src/main/java/ai/starlake/transpiler/databricks/DatabricksExpressionTranspiler.java index 51771ef..3e7e6b3 100644 --- a/src/main/java/ai/starlake/transpiler/databricks/DatabricksExpressionTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/databricks/DatabricksExpressionTranspiler.java @@ -59,9 +59,9 @@ enum TranspiledFunction { , ANY, APPROX_PERCENTILE, ARRAY_AGG, COLLECT_LIST, COLLECT_SET, COUNT, COUNT_IF, FIRST, FIRST_VALUE, LAST, LAST_VALUE - , PERCENTILE, PERCENTILE_APPROX, REGR_INTERCEPT, REGR_SLOPE, KURTOSIS, SKEWNESS, STD + , PERCENTILE, PERCENTILE_APPROX, REGR_INTERCEPT, REGR_SLOPE, KURTOSIS, SKEWNESS, STD, NTH_VALUE - , TRY_AVG, TRY_SUM + , TRY_AVG, TRY_SUM, PERCENT_RANK ; // @FORMATTER:ON @@ -586,9 +586,29 @@ public void visit(AnalyticExpression function) { case TRY_AVG: warning("TRY error handling not supported."); function.setName("Avg"); + break; case TRY_SUM: warning("TRY error handling not supported."); function.setName("Sum"); + break; + case NTH_VALUE: + // , ignoreNulls + if (function.getDefaultValue() != null) { + if (function.getDefaultValue().toString().equalsIgnoreCase("TRUE")) { + function.setNullHandling(Function.NullHandling.IGNORE_NULLS); + } else if (function.getDefaultValue().toString().equalsIgnoreCase("FALSE")) { + function.setNullHandling(Function.NullHandling.RESPECT_NULLS); + } + warning("ignoreNulls parameter not supported, use IGNORE/RESPECT NULLS instead."); + function.setDefaultValue(null); + } + break; + case PERCENT_RANK: + if (function.getExpression() != null) { + warning("PERCENT_RANK needs 0 parameters, got 1"); + function.setExpression(null); + } + break; } } if (rewrittenExpression == null) { diff --git a/src/main/java/ai/starlake/transpiler/schemas/SchemaProvider.java b/src/main/java/ai/starlake/transpiler/schemas/SchemaProvider.java index 2fd4d61..782df89 100644 --- a/src/main/java/ai/starlake/transpiler/schemas/SchemaProvider.java +++ b/src/main/java/ai/starlake/transpiler/schemas/SchemaProvider.java @@ -1,34 +1,36 @@ -package ai.starlake.transpiler.schemas -import java.util.Map; - - - +package ai.starlake.transpiler.schemas; +import java.util.Map; interface SchemaProvider { - /** - * Get all tables in the schema - * @return Map of tables with schema name and table name as key and map of field name and field type as value - */ - Map> getTables(); - - - /** - * Get all fields in the table - * @param schemaName schema name - * @param tableName table name - * @return Map of field name and field type - */ - Map getTable(String schemaName, String tableName); - - /** - * Get table regardless of schema name - * @param tableName table name - * @return Map of schema name where the table is found and map of field name and field type. Returning more than one key means - * the table is found in multiple schemas and the resolution is ambiguous. - * In the future, resolution may be done by jsqltranspiler based on the context. - */ - Map> getTables(String tableName); + /** + * Get all tables in the schema + * + * @return Map of tables with schema name and table name as key and map of field name and field + * type as value + */ + Map> getTables(); + + + /** + * Get all fields in the table + * + * @param schemaName schema name + * @param tableName table name + * @return Map of field name and field type + */ + Map getTable(String schemaName, String tableName); + + /** + * Get table regardless of schema name + * + * @param tableName table name + * @return Map of schema name where the table is found and map of field name and field type. + * Returning more than one key means the table is found in multiple schemas and the + * resolution is ambiguous. In the future, resolution may be done by jsqltranspiler based + * on the context. + */ + Map> getTables(String tableName); } diff --git a/src/site/sphinx/_static/JSQLTranspiler.ods b/src/site/sphinx/_static/JSQLTranspiler.ods index 0ba40e7..a1f20df 100644 Binary files a/src/site/sphinx/_static/JSQLTranspiler.ods and b/src/site/sphinx/_static/JSQLTranspiler.ods differ diff --git a/src/test/resources/ai/starlake/transpiler/databricks/aggregate_function.sql b/src/test/resources/ai/starlake/transpiler/databricks/aggregate_function.sql index 2105c46..e6b8cdc 100644 --- a/src/test/resources/ai/starlake/transpiler/databricks/aggregate_function.sql +++ b/src/test/resources/ai/starlake/transpiler/databricks/aggregate_function.sql @@ -366,4 +366,109 @@ SELECT stddev_pop(DISTINCT col) AS stddev_pop FROM VALUES (1), (2), (3), (3) AS -- result "stddev_pop" -"0.816496580927726" \ No newline at end of file +"0.816496580927726" + + +-- provided +SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) AS cume_dist + FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) + ORDER BY 1,2; + +-- result +"a","b","cume_dist" +"A1","1","0.6666666666666666" +"A1","1","0.6666666666666666" +"A1","2","1.0" +"A2","3","1.0" + + +-- provided +SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) AS lag + FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) + ORDER BY 1,2; + +-- result +"a","b","lag" +"A1","1","" +"A1","1","1" +"A1","2","1" +"A2","3","" + + +-- provided +SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) AS lead + FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) + ORDER BY 1,2 + ; + +-- result +"a","b","lead" +"A1","1","1" +"A1","1","2" +"A1","2","" +"A2","3","" + + +-- provided +SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) AS nth_value + FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) +ORDER BY 1,2; + +-- result +"a","b","nth_value" +"A1","1","1" +"A1","1","1" +"A1","2","1" +"A2","3","" + + +-- provided +SELECT a, + b, + dense_rank() OVER(PARTITION BY a ORDER BY b) AS dense_rank, + rank() OVER(PARTITION BY a ORDER BY b) AS rank, + row_number() OVER(PARTITION BY a ORDER BY b) AS row_number + FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) +ORDER BY 1,2,3; + +-- result +"a","b","dense_rank","rank","row_number" +"A1","1","1","1","1" +"A1","1","1","1","2" +"A1","2","2","3","3" +"A2","3","1","1","1" + +-- provided + SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) AS ntile + FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) + ORDER BY 1,2; + +-- result +"a","b","ntile" +"A1","1","1" +"A1","1","1" +"A1","2","2" +"A2","3","1" + + +-- provided +SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) AS percent_rank + FROM VALUES ('A1', 2), ('A1', 1), ('A1', 3), ('A1', 6), ('A1', 7), ('A1', 7), ('A2', 3), ('A1', 1) tab(a, b) + ORDER BY 1,2; + +-- expected +SELECT a, b, percent_rank() OVER (PARTITION BY a ORDER BY b) AS percent_rank + FROM VALUES ('A1', 2), ('A1', 1), ('A1', 3), ('A1', 6), ('A1', 7), ('A1', 7), ('A2', 3), ('A1', 1) tab(a, b) + ORDER BY 1,2; + + +-- result +"a","b","percent_rank" +"A1","1","0.0" +"A1","1","0.0" +"A1","2","0.3333333333333333" +"A1","3","0.5" +"A1","6","0.6666666666666666" +"A1","7","0.8333333333333334" +"A1","7","0.8333333333333334" +"A2","3","0.0" \ No newline at end of file