Skip to content

Commit

Permalink
feat: Databricks Aggregate functions
Browse files Browse the repository at this point in the history
Signed-off-by: Andreas Reichel <[email protected]>
  • Loading branch information
manticore-projects committed May 29, 2024
1 parent f5653c8 commit 8087289
Show file tree
Hide file tree
Showing 6 changed files with 285 additions and 35 deletions.
36 changes: 23 additions & 13 deletions src/main/java/ai/starlake/transpiler/JSQLExpressionTranspiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,19 +85,18 @@ public class JSQLExpressionTranspiler extends ExpressionDeParser {
{"default", "reserved"}, {"deferrable", "reserved"}, {"desc", "reserved"},
{"describe", "reserved"}, {"distinct", "reserved"}, {"do", "reserved"}, {"else", "reserved"},
{"end", "reserved"}, {"except", "reserved"}
//, {"false", "reserved"}
, {"fetch", "reserved"},
{"for", "reserved"}, {"foreign", "reserved"}, {"from", "reserved"}, {"grant", "reserved"},
{"group", "reserved"}, {"having", "reserved"}, {"in", "reserved"}, {"initially", "reserved"},
{"intersect", "reserved"}, {"into", "reserved"}, {"lateral", "reserved"},
{"leading", "reserved"}, {"limit", "reserved"}, {"not", "reserved"}, {"null", "reserved"},
{"offset", "reserved"}, {"on", "reserved"}, {"only", "reserved"}, {"or", "reserved"},
{"order", "reserved"}, {"pivot", "reserved"}, {"pivot_longer", "reserved"},
{"pivot_wider", "reserved"}, {"placing", "reserved"}, {"primary", "reserved"},
{"qualify", "reserved"}, {"references", "reserved"}, {"returning", "reserved"},
{"select", "reserved"}, {"show", "reserved"}, {"some", "reserved"}, {"summarize", "reserved"},
{"symmetric", "reserved"}, {"table", "reserved"}, {"then", "reserved"}, {"to", "reserved"},
{"trailing", "reserved"}
// , {"false", "reserved"}
, {"fetch", "reserved"}, {"for", "reserved"}, {"foreign", "reserved"}, {"from", "reserved"},
{"grant", "reserved"}, {"group", "reserved"}, {"having", "reserved"}, {"in", "reserved"},
{"initially", "reserved"}, {"intersect", "reserved"}, {"into", "reserved"},
{"lateral", "reserved"}, {"leading", "reserved"}, {"limit", "reserved"}, {"not", "reserved"},
{"null", "reserved"}, {"offset", "reserved"}, {"on", "reserved"}, {"only", "reserved"},
{"or", "reserved"}, {"order", "reserved"}, {"pivot", "reserved"},
{"pivot_longer", "reserved"}, {"pivot_wider", "reserved"}, {"placing", "reserved"},
{"primary", "reserved"}, {"qualify", "reserved"}, {"references", "reserved"},
{"returning", "reserved"}, {"select", "reserved"}, {"show", "reserved"}, {"some", "reserved"},
{"summarize", "reserved"}, {"symmetric", "reserved"}, {"table", "reserved"},
{"then", "reserved"}, {"to", "reserved"}, {"trailing", "reserved"}
// , { "true", "reserved" }
, {"union", "reserved"}, {"unique", "reserved"}, {"unpivot", "reserved"},
{"using", "reserved"}, {"variadic", "reserved"}, {"when", "reserved"}, {"where", "reserved"},
Expand Down Expand Up @@ -2202,4 +2201,15 @@ public void visit(Column column) {
}
super.visit(column);
}

public void visit(ExpressionList expressionList) {
// reduce obsolete parentheses like in:
// VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y)
if (expressionList.size() == 1 && expressionList.get(0) instanceof ParenthesedExpressionList) {
ParenthesedExpressionList subList = (ParenthesedExpressionList) expressionList.get(0);
super.visit(subList);
} else {
super.visit(expressionList);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import ai.starlake.transpiler.redshift.RedshiftExpressionTranspiler;
import net.sf.jsqlparser.expression.AnalyticExpression;
import net.sf.jsqlparser.expression.AnalyticType;
import net.sf.jsqlparser.expression.ArrayConstructor;
import net.sf.jsqlparser.expression.ArrayExpression;
import net.sf.jsqlparser.expression.BinaryExpression;
import net.sf.jsqlparser.expression.CastExpression;
Expand Down Expand Up @@ -58,6 +59,10 @@ enum TranspiledFunction {

, ANY, APPROX_PERCENTILE, ARRAY_AGG, COLLECT_LIST, COLLECT_SET, COUNT, COUNT_IF, FIRST, FIRST_VALUE, LAST, LAST_VALUE

, PERCENTILE, PERCENTILE_APPROX, REGR_INTERCEPT, REGR_SLOPE, KURTOSIS, SKEWNESS, STD

, TRY_AVG, TRY_SUM

;
// @FORMATTER:ON

Expand All @@ -79,7 +84,7 @@ public static TranspiledFunction from(Function f) {
}

enum UnsupportedFunction {
CRC32, DIFFERENCE, INITCAP, SOUNDEX, STRTOL, NEXT_DAY, KURTOSIS
CRC32, DIFFERENCE, INITCAP, SOUNDEX, STRTOL, NEXT_DAY

;

Expand Down Expand Up @@ -181,7 +186,9 @@ public void visit(Function function) {
}
break;
case ARRAY:
function.setName("Array_Value");
// see fixed issue #12252
// function.setName("Array_Value");
rewrittenExpression = new ArrayConstructor(parameters, false);
break;
case ENDSWITH:
function.setName("Ends_With");
Expand Down Expand Up @@ -429,7 +436,7 @@ public void visit(Function function) {
break;
case APPROX_PERCENTILE:
function.setName("Approx_Quantile");
if (paramCount==3) {
if (paramCount == 3) {
warning("PRECISION parameter not supported");
parameters.remove(2);
}
Expand All @@ -440,16 +447,16 @@ public void visit(Function function) {
// enforce an AnalyticExpression to get access to the aggregate function syntax
rewrittenExpression = new AnalyticExpression(function).withType(AnalyticType.FILTER_ONLY);
// preserve the position in the AST
rewrittenExpression.setASTNode( function.getASTNode() );
rewrittenExpression.setASTNode(function.getASTNode());
break;

case COUNT:
// @todo: add support for multiple columns
// @todo: NULL suppression
if (paramCount>1) {
if (paramCount > 1) {
warning("Only one column supported.");
while (parameters.size()>1) {
parameters.remove( parameters.size() - 1);
while (parameters.size() > 1) {
parameters.remove(parameters.size() - 1);
}
}
break;
Expand All @@ -463,24 +470,53 @@ public void visit(Function function) {
function.setName("First");
case FIRST:
// @todo: NULL suppression
if (paramCount>1) {
if (paramCount > 1) {
warning("Ignore NULLs is not supported.");
while (parameters.size()>1) {
parameters.remove( parameters.size() - 1);
while (parameters.size() > 1) {
parameters.remove(parameters.size() - 1);
}
}
break;
case LAST_VALUE:
function.setName("Last");
case LAST:
// @todo: NULL suppression
if (paramCount>1) {
if (paramCount > 1) {
warning("Ignore NULLs is not supported.");
while (parameters.size()>1) {
parameters.remove( parameters.size() - 1);
while (parameters.size() > 1) {
parameters.remove(parameters.size() - 1);
}
}
break;
case PERCENTILE:
function.setName("Quantile_Cont");
if (paramCount == 3) {
warning("FREQUENCY not supported");
parameters.remove(2);
}
break;
case PERCENTILE_APPROX:
function.setName("Approx_Quantile");
if (paramCount == 3) {
warning("ACCURACY not supported");
parameters.remove(2);
}
break;
case REGR_INTERCEPT:
case REGR_SLOPE:
case KURTOSIS:
case SKEWNESS:
warning("Unreliable, results may differ.");
break;
case STD:
function.setName("StdDev");
break;
case TRY_AVG:
warning("TRY error handling not supported.");
function.setName("Avg");
case TRY_SUM:
warning("TRY error handling not supported.");
function.setName("Sum");
}
}
if (rewrittenExpression == null) {
Expand Down Expand Up @@ -517,25 +553,42 @@ public void visit(AnalyticExpression function) {
break;
case ARRAY_AGG:
if (isEmpty(function.getFuncOrderBy())) {
function.setFuncOrderBy(List.of( new OrderByElement().withExpression(function.getExpression())));
function.setFuncOrderBy(
List.of(new OrderByElement().withExpression(function.getExpression())));
}
break;
case COLLECT_LIST:
// todo: add FILTER( column IS NOT NULL)
function.setName("List");
if (isEmpty(function.getFuncOrderBy())) {
function.setFuncOrderBy(List.of( new OrderByElement().withExpression(function.getExpression())));
function.setFuncOrderBy(
List.of(new OrderByElement().withExpression(function.getExpression())));
}
break;
case COLLECT_SET:
// todo: add FILTER( column IS NOT NULL)
function.setDistinct(true);
function.setName("List");
if (isEmpty(function.getFuncOrderBy())) {
function.setFuncOrderBy(List.of( new OrderByElement().withExpression(function.getExpression())));
function.setFuncOrderBy(
List.of(new OrderByElement().withExpression(function.getExpression())));
}

break;
case REGR_INTERCEPT:
case REGR_SLOPE:
case KURTOSIS:
case SKEWNESS:
warning("Unreliable, results may differ.");
break;
case STD:
function.setName("StdDev");
break;
case TRY_AVG:
warning("TRY error handling not supported.");
function.setName("Avg");
case TRY_SUM:
warning("TRY error handling not supported.");
function.setName("Sum");
}
}
if (rewrittenExpression == null) {
Expand Down
Binary file modified src/site/sphinx/_static/JSQLTranspiler.ods
Binary file not shown.
2 changes: 1 addition & 1 deletion src/test/java/ai/starlake/transpiler/DebugTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public boolean accept(File dir, String name) {

static Stream<Arguments> getSqlTestMap() {
return unrollParameterMap(getSqlTestMap(new File(TEST_FOLDER_STR).listFiles(FILENAME_FILTER),
JSQLTranspiler.Dialect.SNOWFLAKE, JSQLTranspiler.Dialect.DUCK_DB));
JSQLTranspiler.Dialect.DATABRICKS, JSQLTranspiler.Dialect.DUCK_DB));
}

@ParameterizedTest(name = "{index} {0} {1}: {2}")
Expand Down
8 changes: 4 additions & 4 deletions src/test/resources/ai/starlake/transpiler/any/debug.sql
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
-- provided
SELECT current_timezone() as tz;
SELECT max_by(x, y) AS max_by FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);

-- expected
SELECT strftime( current_timestamp, '%Z') as tz;
SELECT max_by(x, y) AS max_by FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y);

-- result
"tz"
"Asia/Bangkok"
"max_by"
"b"
Loading

0 comments on commit 8087289

Please sign in to comment.