diff --git a/README.md b/README.md index 019e3da..3ba3922 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ A pure Java stand-alone SQL Transpiler, Column- and Lineage Resolver for transla Supports `SELECT` queries as well as `INSERT`, `UPDATE`, `DELETE` and `MERGE` statements. -Internal Functions will be rewritten based on the actual meaning and purpose of the function (since the DuckDB `Any()` function does not necessarily behaves like the RDBMS specific `Any()`). Respecting different function arguments count, order and type. +Internal Functions will be rewritten based on the actual meaning and purpose of the function (since the DuckDB `Any()` function does not necessarily behave like the RDBMS specific `Any()`). Respecting different function arguments count, order and type. Rewrite of Window- and Aggregate-Functions with full coverage of the RDBMS specific published samples. The [matrix of supported features and functions](https://docs.google.com/spreadsheets/d/1jK6E1s2c0CWcw9rFeDvALdZ5wCshztdtlAHuNDaKQt4/edit?usp=sharing) is shared on Google Sheets. diff --git a/src/main/java/ai/starlake/transpiler/redshift/RedshiftExpressionTranspiler.java b/src/main/java/ai/starlake/transpiler/redshift/RedshiftExpressionTranspiler.java index f1fc6fc..6fabde7 100644 --- a/src/main/java/ai/starlake/transpiler/redshift/RedshiftExpressionTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/redshift/RedshiftExpressionTranspiler.java @@ -27,8 +27,6 @@ import net.sf.jsqlparser.expression.Expression; import net.sf.jsqlparser.expression.Function; import net.sf.jsqlparser.expression.JsonExpression; -import net.sf.jsqlparser.expression.JsonFunction; -import net.sf.jsqlparser.expression.JsonFunctionExpression; import net.sf.jsqlparser.expression.LongValue; import net.sf.jsqlparser.expression.StringValue; import net.sf.jsqlparser.expression.TimezoneExpression; @@ -40,7 +38,6 @@ import net.sf.jsqlparser.expression.operators.relational.ExpressionList; import net.sf.jsqlparser.expression.operators.relational.GreaterThan; import net.sf.jsqlparser.expression.operators.relational.IsNullExpression; -import net.sf.jsqlparser.expression.operators.relational.JsonOperator; import net.sf.jsqlparser.expression.operators.relational.MinorThan; import net.sf.jsqlparser.expression.operators.relational.NotEqualsTo; import net.sf.jsqlparser.expression.operators.relational.ParenthesedExpressionList; @@ -672,34 +669,46 @@ public StringBuilder visit(Function function, S params) { break; case CAN_JSON_PARSE: if (paramCount == 1) { - rewrittenExpression = new IsNullExpression(new CastExpression("Try_Cast", parameters.get(0), "JSON")).withNot(true); + rewrittenExpression = + new IsNullExpression(new CastExpression("Try_Cast", parameters.get(0), "JSON")) + .withNot(true); } break; case IS_VALID_JSON: if (paramCount == 1) { // json_valid(json_strings) AND json_type(try_cast(json_strings AS JSON))!='ARRAY' function.setName("Json_Valid"); - rewrittenExpression = new AndExpression(function, new NotEqualsTo(new Function("Json_type", new CastExpression("Try_cast", parameters.get(0), "JSON")), new StringValue("ARRAY"))); + rewrittenExpression = new AndExpression(function, + new NotEqualsTo( + new Function("Json_type", + new CastExpression("Try_cast", parameters.get(0), "JSON")), + new StringValue("ARRAY"))); } break; case IS_VALID_JSON_ARRAY: if (paramCount == 1) { // json_valid(json_strings) AND json_type(try_cast(json_strings AS JSON))='ARRAY' function.setName("Json_Valid"); - rewrittenExpression = new AndExpression(function, new EqualsTo(new Function("Json_type", new CastExpression("Try_cast", parameters.get(0), "JSON")), new StringValue("ARRAY"))); + rewrittenExpression = new AndExpression(function, + new EqualsTo( + new Function("Json_type", + new CastExpression("Try_cast", parameters.get(0), "JSON")), + new StringValue("ARRAY"))); } break; case JSON_EXTRACT_ARRAY_ELEMENT_TEXT: if (paramCount == 2) { // SELECT ('[111,112,113]'::JSON)[2] e; - rewrittenExpression = new ArrayExpression( new CastExpression("Try_Cast", parameters.get(0), "JSON"), parameters.get(1) ); + rewrittenExpression = new ArrayExpression( + new CastExpression("Try_Cast", parameters.get(0), "JSON"), parameters.get(1)); } break; case JSON_EXTRACT_PATH_TEXT: - if (paramCount>1) { + if (paramCount > 1) { rewrittenExpression = new CastExpression(parameters.get(0), "JSON"); - for (int i=1; i(parameters.get(i), "->"))); + for (int i = 1; i < paramCount; i++) { + rewrittenExpression = new JsonExpression(rewrittenExpression, + List.of(new AbstractMap.SimpleEntry<>(parameters.get(i), "->"))); } } } diff --git a/src/main/java/ai/starlake/transpiler/snowflake/SnowflakeExpressionTranspiler.java b/src/main/java/ai/starlake/transpiler/snowflake/SnowflakeExpressionTranspiler.java index b3e2f33..4bf5744 100644 --- a/src/main/java/ai/starlake/transpiler/snowflake/SnowflakeExpressionTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/snowflake/SnowflakeExpressionTranspiler.java @@ -57,59 +57,45 @@ public SnowflakeExpressionTranspiler(SelectDeParser deParser, StringBuilder buff } enum DatePart { + //@formatter:off year, y, yy, yyy, yyyy, yr, years, yrs, month, mm, mon, mons, months, day, d, dd, days, dayofmonth, dayofweek - , weekday, dow, dw, dayofweekiso, weekday_iso, dow_iso, dw_iso, dayofyear, yearday, doy, dy, week, w, wk, weekofyear - , woy, wy, weekiso, week_iso, weekofyeariso, weekofyear_iso, quarter, q, qtr, qtrs, quarters, yearofweek, yearofweekiso + //@formatter:on } enum TimePart { + //@formatter:off hour, h, hh, hr, hours, hrs, minute, m, mi, min, minutes, mins, second, s, sec, seconds, secs, millisecond, ms - , msec, milliseconds, microsecond, us, usec, microseconds, nanosecond, ns, nsec, nanosec, nsecond, nanoseconds - , nanosecs, nseconds, epoch_second, epoch, epoch_seconds, epoch_millisecond, epoch_milliseconds, epoch_microsecond - , epoch_microseconds, epoch_nanosecond, epoch_nanoseconds, timezone_hour, tzh, timezone_minute, tzm + //@formatter:on } enum TranspiledFunction { - // @FORMATTER:OFF + //@formatter:off DATE_FROM_PARTS, DATEFROMPARTS, TIME_FROM_PARTS, TIMEFROMPARTS, TIMESTAMP_FROM_PARTS, TIMESTAMPFROMPARTS - , TIMESTAMP_TZ_FROM_PARTS, TIMESTAMPTZFROMPARTS, TIMESTAMP_LTZ_FROM_PARTS, TIMESTAMPLTZFROMPARTS - , TIMESTAMP_NTZ_FROM_PARTS, TIMESTAMPNTZFROMPARTS, DATE_PART, DAYNAME, LAST_DAY, MONTHNAME, ADD_MONTHS, DATEADD - , TIMEADD, TIMESTAMPADD, DATEDIFF, TIMEDIFF, TIMESTAMPDIFF, TIME_SLICE, TRUNC, DATE, TIME, TO_TIMESTAMP_LTZ - , TO_TIMESTAMP_NTZ, TO_TIMESTAMP_TZ, CONVERT_TIMEZONE, TO_DATE, TO_TIME, TO_TIMESTAMP - , REGEXP_COUNT, REGEXP_EXTRACT_ALL, REGEXP_SUBSTR_ALL, REGEXP_INSTR, REGEXP_SUBSTR, REGEXP_LIKE, REGEXP_REPLACE - , BIT_LENGTH, OCTET_LENGTH, CHAR, INSERT, RTRIMMED_LENGTH, SPACE, SPLIT_TO_TABLE, STRTOK_SPLIT_TO_TABLE, STRTOK - , STRTOK_TO_ARRAY, UUID_STRING, CHARINDEX, POSITION, EDITDISTANCE, ENDSWITH, STARTSWITH, JAROWINKLER_SIMILARITY - , ARRAYAGG, ARRAY_CONSTRUCT, ARRAY_COMPACT, ARRAY_CONSTRUCT_COMPACT, ARRAY_CONTAINS, ARRAY_EXCEPT - , ARRAY_FLATTEN, ARRAY_GENERATE_RANGE, ARRAY_INSERT, ARRAY_INTERSECTION, ARRAY_MAX, ARRAY_MIN, ARRAY_POSITION - , ARRAY_PREPEND, ARRAY_REMOVE, ARRAY_REMOVE_AT, ARRAY_SIZE, ARRAY_SLICE, ARRAY_SORT, ARRAY_TO_STRING, ARRAYS_OVERLAP - , VARIANCE_POP, VARIANCE_SAMP, BITAND_AGG, BITOR_AGG, BITXOR_AGG, BOOLAND_AGG, BOOLOR_AGG, BOOLXOR_AGG, SKEW - , GROUPING_ID, TO_VARCHAR, TO_BINARY, TRY_TO_BINARY, TO_DECIMAL, TO_NUMBER, TO_NUMERIC, TRY_TO_DECIMAL - , TRY_TO_NUMBER, TRY_TO_NUMERIC, TO_DOUBLE, TRY_TO_DOUBLE, TO_BOOLEAN, TRY_TO_BOOLEAN, TRY_TO_DATE, TRY_TO_TIME - , TRY_TO_TIMESTAMP, TRY_TO_TIMESTAMP_LTZ, TRY_TO_TIMESTAMP_NTZ, TRY_TO_TIMESTAMP_TZ - , RANDOM, DIV0, DIV0NULL, ROUND, SQUARE - + , CHECK_JSON, TRY_PARSE_JSON, GET_PATH, OBJECT_KEYS, STRIP_NULL_VALUE + , ST_GEOMFROMWKB , ST_GEOMETRYFROMEWKB , ST_GEOMFROMEWKB, ST_GEOMETRYFROMWKB, ST_GEOMFROMWKT , ST_GEOMETRYFROMEWKT + , ST_GEOMFROMEWKT , ST_GEOMETRYFROMTEXT , ST_GEOMFROMTEXT, ST_GEOMETRYFROMWKT, TO_GEOMETRY, TRY_TO_GEOMETRY ; - // @FORMATTER:ON + //@formatter:on @SuppressWarnings({"PMD.EmptyCatchBlock"}) @@ -987,6 +973,73 @@ public StringBuilder visit(Function function, S params) { function.setParameters(parameters.get(0), new LongValue(2)); } break; + case CHECK_JSON: + function.setName("JSon_Valid"); + break; + case TRY_PARSE_JSON: + if (paramCount == 1) { + rewrittenExpression = new CastExpression("Try_Cast", parameters.get(0), "JSON"); + } + break; + case GET_PATH: + function.setName("Json_Value"); + break; + case OBJECT_KEYS: + function.setName("Json_Keys"); + break; + case STRIP_NULL_VALUE: + if (paramCount == 1) { + rewrittenExpression = parameters.get(0); + } + break; + case ST_GEOMETRYFROMWKB: + case ST_GEOMFROMWKB: + case ST_GEOMETRYFROMEWKB: + case ST_GEOMFROMEWKB: + if (paramCount > 1) { + warning("SRID and ALLOW_INVALID are not supported."); + } + function.setName("If"); + function.setParameters( + new Function("REGEXP_MATCHES", parameters.get(0), new StringValue("^[0-9A-Fa-f]+$")), + new Function("ST_GeomFromHEXEWKB$$", parameters.get(0)), + new Function("ST_GeomFromWKB$$", new CastExpression(parameters.get(0), "BLOB"))); + break; + case ST_GEOMFROMWKT: + case ST_GEOMETRYFROMEWKT: + case ST_GEOMFROMEWKT: + case ST_GEOMETRYFROMTEXT: + case ST_GEOMFROMTEXT: + case ST_GEOMETRYFROMWKT: + function.setName("ST_GEOMFROMTEXT$$"); + if (paramCount > 1) { + warning("SRID and ALLOW_INVALID are not supported."); + } + if (parameters.get(0) instanceof StringValue) { + String regex = "(?i)SRID=\\d+;"; + String s = ((StringValue) parameters.get(0)).getValue(); + if (s.toUpperCase().contains("SRID")) { + warning("SRID is not supported"); + function.setParameters(new StringValue(s.replaceAll(regex, ""))); + } else { + function.setParameters(parameters.get(0)); + } + } else { + function.setParameters(parameters.get(0)); + } + break; + case TO_GEOMETRY: + if (paramCount > 1) { + warning("SRID and ALLOW_INVALID are not supported."); + } + rewrittenExpression = new CastExpression("Cast", parameters.get(0), "GEOMETRY"); + break; + case TRY_TO_GEOMETRY: + if (paramCount > 1) { + warning("SRID and ALLOW_INVALID are not supported."); + } + rewrittenExpression = new CastExpression("Try_Cast", parameters.get(0), "GEOMETRY"); + break; } } if (rewrittenExpression == null) { diff --git a/src/test/java/ai/starlake/transpiler/JSQLTranspilerTest.java b/src/test/java/ai/starlake/transpiler/JSQLTranspilerTest.java index 33cb8b4..ee6c0c1 100644 --- a/src/test/java/ai/starlake/transpiler/JSQLTranspilerTest.java +++ b/src/test/java/ai/starlake/transpiler/JSQLTranspilerTest.java @@ -292,6 +292,16 @@ static synchronized void init() LOGGER.log(Level.FINE, "Failed to INSTALL/LOAD the SPATIAL extension", ex); } + LOGGER.info("Preparing H3 Community Extension"); + try (Statement st = connDuck.createStatement()) { + for (String s : new String[] {"INSTALL h3 FROM community;", "LOAD h3;"}) { + LOGGER.fine("execute: " + s); + st.execute(s); + } + } catch (Exception ex) { + LOGGER.log(Level.FINE, "Failed to INSTALL/LOAD the SPATIAL extension", ex); + } + String sqlStr = IOUtils.resourceToString( JSQLTranspilerTest.class.getCanonicalName().replaceAll("\\.", "/") + "_DDL.sql", Charset.defaultCharset(), JSQLTranspilerTest.class.getClassLoader()); diff --git a/src/test/resources/ai/starlake/transpiler/any/debug.sql b/src/test/resources/ai/starlake/transpiler/any/debug.sql index 6df80f7..de79175 100644 --- a/src/test/resources/ai/starlake/transpiler/any/debug.sql +++ b/src/test/resources/ai/starlake/transpiler/any/debug.sql @@ -1,8 +1,8 @@ -- provided -SELECT ST_AsBinary(ST_GeomFromText('POLYGON((0 0,0 1,1 1,1 0,0 0))',4326)) b; +SELECT 1; -- expected -SELECT ST_ASWKB(ST_GeomFromText('POLYGON((0 0,0 1,1 1,1 0,0 0))'))::BLOB b; +SELECT 1; -- count 1 \ No newline at end of file diff --git a/src/test/resources/ai/starlake/transpiler/redshift/json.sql b/src/test/resources/ai/starlake/transpiler/redshift/json.sql index a3ec2a3..2defd3a 100644 --- a/src/test/resources/ai/starlake/transpiler/redshift/json.sql +++ b/src/test/resources/ai/starlake/transpiler/redshift/json.sql @@ -133,6 +133,6 @@ SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}}','f4', -- expected SELECT '{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}}'::JSON -> 'f4' -> 'f6' e; --- expected +-- result "e" """star""" diff --git a/src/test/resources/ai/starlake/transpiler/snowflake/geometry.sql b/src/test/resources/ai/starlake/transpiler/snowflake/geometry.sql new file mode 100644 index 0000000..0ac96a2 --- /dev/null +++ b/src/test/resources/ai/starlake/transpiler/snowflake/geometry.sql @@ -0,0 +1,26 @@ +-- provided +SELECT ST_GEOMETRYFROMEWKB('0101000020797F000066666666A9CB17411F85EBC19E325641') g; + +-- expected +SELECT + IF( + REGEXP_MATCHES('0101000020797F000066666666A9CB17411F85EBC19E325641','^[0-9A-Fa-f]+$') + , ST_GEOMFROMHEXEWKB('0101000020797F000066666666A9CB17411F85EBC19E325641') + , ST_GEOMFROMWKB(ENCODE('0101000020797F000066666666A9CB17411F85EBC19E325641') + ) +) g; + +-- result +"g" +"POINT (389866.35 5819003.03)" + + +-- provided +SELECT TO_GEOMETRY('SRID=4326;POINT(1820.12 890.56)') g; + +-- expected +SELECT Cast('SRID=4326;POINT(1820.12 890.56)' AS GEOMETRY) g; + +-- result +"g" +"POINT (1820.12 890.56)" \ No newline at end of file diff --git a/src/test/resources/ai/starlake/transpiler/snowflake/json.sql b/src/test/resources/ai/starlake/transpiler/snowflake/json.sql new file mode 100644 index 0000000..6465873 --- /dev/null +++ b/src/test/resources/ai/starlake/transpiler/snowflake/json.sql @@ -0,0 +1,89 @@ +-- prolog +DROP TABLE IF EXISTS sample_json_table; +CREATE TABLE sample_json_table (ID INTEGER, varchar1 VARCHAR, variant1 BLOB); +INSERT INTO sample_json_table (ID, varchar1) VALUES + (1, '{"ValidKey1": "ValidValue1"}'), + (2, '{"Malformed -- Missing value": }'), + (3, NULL) + ; +UPDATE sample_json_table SET variant1 = varchar1::BLOB; + +-- provided +SELECT ID, CHECK_JSON(varchar1) v, varchar1 FROM sample_json_table ORDER BY ID; + +-- expected +SELECT ID, json_valid(varchar1) v, varchar1 FROM sample_json_table ORDER BY ID; + +-- result +"ID","v","varchar1" +"1","true","{""ValidKey1"": ""ValidValue1""}" +"2","false","{""Malformed -- Missing value"": }" +"3","","" + +-- epilog +DROP TABLE IF EXISTS sample_json_table; + + +-- prolog +DROP TABLE IF EXISTS demo1; +CREATE TABLE demo1 (id INTEGER, json_data VARCHAR); +INSERT INTO demo1 SELECT + 1, '{"level_1_key": "level_1_value"}'; +INSERT INTO demo1 SELECT + 2, '{"level_1_key": {"level_2_key": "level_2_value"}}'; +INSERT INTO demo1 SELECT + 3, '{"level_1_key": {"level_2_key": ["zero", "one", "two"]}}'; + +-- provided +SELECT + JSON_EXTRACT_PATH_TEXT(json_data, 'level_1_key') + AS JSON_EXTRACT_PATH_TEXT + FROM demo1 + ORDER BY id; + + +-- expected +SELECT + json_data::JSON -> 'level_1_key' + AS JSON_EXTRACT_PATH_TEXT + FROM demo1 + ORDER BY id; + +-- result +"JSON_EXTRACT_PATH_TEXT" +"""level_1_value""" +"{""level_2_key"":""level_2_value""}" +"{""level_2_key"":[""zero"",""one"",""two""]}" + + +-- epilog +DROP TABLE IF EXISTS demo1; + + +-- prolog +DROP TABLE IF EXISTS vartab; +CREATE OR REPLACE TABLE vartab (ID INTEGER, v VARCHAR); + +INSERT INTO vartab (id, v) VALUES + (1, '[-1, 12, 289, 2188, false,]'), + (2, '{ "x" : "abc", "y" : false, "z": 10} '), + (3, '{ "bad" : "json", "missing" : true, "close_brace": 10 '); + +-- provided +SELECT ID, TRY_PARSE_JSON(v) j + FROM vartab + ORDER BY ID; + +-- expected +SELECT ID, Try_Cast(v AS JSON) j + FROM vartab + ORDER BY ID; + +-- result +"ID","j" +"1","[-1, 12, 289, 2188, false,]" +"2","{ ""x"" : ""abc"", ""y"" : false, ""z"": 10} " +"3","" + +-- epilog +DROP TABLE IF EXISTS vartab; \ No newline at end of file