Skip to content

Commit

Permalink
feat: Snowflake JSON and Geo-Spatial functions
Browse files Browse the repository at this point in the history
Signed-off-by: Andreas Reichel <[email protected]>
  • Loading branch information
manticore-projects committed Nov 4, 2024
1 parent 8e16734 commit 35f05fa
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 37 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ A pure Java stand-alone SQL Transpiler, Column- and Lineage Resolver for transla

Supports `SELECT` queries as well as `INSERT`, `UPDATE`, `DELETE` and `MERGE` statements.

Internal Functions will be rewritten based on the actual meaning and purpose of the function (since the DuckDB `Any()` function does not necessarily behaves like the RDBMS specific `Any()`). Respecting different function arguments count, order and type.
Internal Functions will be rewritten based on the actual meaning and purpose of the function (since the DuckDB `Any()` function does not necessarily behave like the RDBMS specific `Any()`). Respecting different function arguments count, order and type.

Rewrite of Window- and Aggregate-Functions with full coverage of the RDBMS specific published samples.
The [matrix of supported features and functions](https://docs.google.com/spreadsheets/d/1jK6E1s2c0CWcw9rFeDvALdZ5wCshztdtlAHuNDaKQt4/edit?usp=sharing) is shared on Google Sheets.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
import net.sf.jsqlparser.expression.JsonExpression;
import net.sf.jsqlparser.expression.JsonFunction;
import net.sf.jsqlparser.expression.JsonFunctionExpression;
import net.sf.jsqlparser.expression.LongValue;
import net.sf.jsqlparser.expression.StringValue;
import net.sf.jsqlparser.expression.TimezoneExpression;
Expand All @@ -40,7 +38,6 @@
import net.sf.jsqlparser.expression.operators.relational.ExpressionList;
import net.sf.jsqlparser.expression.operators.relational.GreaterThan;
import net.sf.jsqlparser.expression.operators.relational.IsNullExpression;
import net.sf.jsqlparser.expression.operators.relational.JsonOperator;
import net.sf.jsqlparser.expression.operators.relational.MinorThan;
import net.sf.jsqlparser.expression.operators.relational.NotEqualsTo;
import net.sf.jsqlparser.expression.operators.relational.ParenthesedExpressionList;
Expand Down Expand Up @@ -672,34 +669,46 @@ public <S> StringBuilder visit(Function function, S params) {
break;
case CAN_JSON_PARSE:
if (paramCount == 1) {
rewrittenExpression = new IsNullExpression(new CastExpression("Try_Cast", parameters.get(0), "JSON")).withNot(true);
rewrittenExpression =
new IsNullExpression(new CastExpression("Try_Cast", parameters.get(0), "JSON"))
.withNot(true);
}
break;
case IS_VALID_JSON:
if (paramCount == 1) {
// json_valid(json_strings) AND json_type(try_cast(json_strings AS JSON))!='ARRAY'
function.setName("Json_Valid");
rewrittenExpression = new AndExpression(function, new NotEqualsTo(new Function("Json_type", new CastExpression("Try_cast", parameters.get(0), "JSON")), new StringValue("ARRAY")));
rewrittenExpression = new AndExpression(function,
new NotEqualsTo(
new Function("Json_type",
new CastExpression("Try_cast", parameters.get(0), "JSON")),
new StringValue("ARRAY")));
}
break;
case IS_VALID_JSON_ARRAY:
if (paramCount == 1) {
// json_valid(json_strings) AND json_type(try_cast(json_strings AS JSON))='ARRAY'
function.setName("Json_Valid");
rewrittenExpression = new AndExpression(function, new EqualsTo(new Function("Json_type", new CastExpression("Try_cast", parameters.get(0), "JSON")), new StringValue("ARRAY")));
rewrittenExpression = new AndExpression(function,
new EqualsTo(
new Function("Json_type",
new CastExpression("Try_cast", parameters.get(0), "JSON")),
new StringValue("ARRAY")));
}
break;
case JSON_EXTRACT_ARRAY_ELEMENT_TEXT:
if (paramCount == 2) {
// SELECT ('[111,112,113]'::JSON)[2] e;
rewrittenExpression = new ArrayExpression( new CastExpression("Try_Cast", parameters.get(0), "JSON"), parameters.get(1) );
rewrittenExpression = new ArrayExpression(
new CastExpression("Try_Cast", parameters.get(0), "JSON"), parameters.get(1));
}
break;
case JSON_EXTRACT_PATH_TEXT:
if (paramCount>1) {
if (paramCount > 1) {
rewrittenExpression = new CastExpression(parameters.get(0), "JSON");
for (int i=1; i<paramCount; i++) {
rewrittenExpression = new JsonExpression(rewrittenExpression, List.of(new AbstractMap.SimpleEntry<>(parameters.get(i), "->")));
for (int i = 1; i < paramCount; i++) {
rewrittenExpression = new JsonExpression(rewrittenExpression,
List.of(new AbstractMap.SimpleEntry<>(parameters.get(i), "->")));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,59 +57,45 @@ public SnowflakeExpressionTranspiler(SelectDeParser deParser, StringBuilder buff
}

enum DatePart {
//@formatter:off
year, y, yy, yyy, yyyy, yr, years, yrs, month, mm, mon, mons, months, day, d, dd, days, dayofmonth, dayofweek

, weekday, dow, dw, dayofweekiso, weekday_iso, dow_iso, dw_iso, dayofyear, yearday, doy, dy, week, w, wk, weekofyear

, woy, wy, weekiso, week_iso, weekofyeariso, weekofyear_iso, quarter, q, qtr, qtrs, quarters, yearofweek, yearofweekiso
//@formatter:on
}

enum TimePart {
//@formatter:off
hour, h, hh, hr, hours, hrs, minute, m, mi, min, minutes, mins, second, s, sec, seconds, secs, millisecond, ms

, msec, milliseconds, microsecond, us, usec, microseconds, nanosecond, ns, nsec, nanosec, nsecond, nanoseconds

, nanosecs, nseconds, epoch_second, epoch, epoch_seconds, epoch_millisecond, epoch_milliseconds, epoch_microsecond

, epoch_microseconds, epoch_nanosecond, epoch_nanoseconds, timezone_hour, tzh, timezone_minute, tzm
//@formatter:on
}

enum TranspiledFunction {
// @FORMATTER:OFF
//@formatter:off
DATE_FROM_PARTS, DATEFROMPARTS, TIME_FROM_PARTS, TIMEFROMPARTS, TIMESTAMP_FROM_PARTS, TIMESTAMPFROMPARTS

, TIMESTAMP_TZ_FROM_PARTS, TIMESTAMPTZFROMPARTS, TIMESTAMP_LTZ_FROM_PARTS, TIMESTAMPLTZFROMPARTS

, TIMESTAMP_NTZ_FROM_PARTS, TIMESTAMPNTZFROMPARTS, DATE_PART, DAYNAME, LAST_DAY, MONTHNAME, ADD_MONTHS, DATEADD

, TIMEADD, TIMESTAMPADD, DATEDIFF, TIMEDIFF, TIMESTAMPDIFF, TIME_SLICE, TRUNC, DATE, TIME, TO_TIMESTAMP_LTZ

, TO_TIMESTAMP_NTZ, TO_TIMESTAMP_TZ, CONVERT_TIMEZONE, TO_DATE, TO_TIME, TO_TIMESTAMP

, REGEXP_COUNT, REGEXP_EXTRACT_ALL, REGEXP_SUBSTR_ALL, REGEXP_INSTR, REGEXP_SUBSTR, REGEXP_LIKE, REGEXP_REPLACE

, BIT_LENGTH, OCTET_LENGTH, CHAR, INSERT, RTRIMMED_LENGTH, SPACE, SPLIT_TO_TABLE, STRTOK_SPLIT_TO_TABLE, STRTOK

, STRTOK_TO_ARRAY, UUID_STRING, CHARINDEX, POSITION, EDITDISTANCE, ENDSWITH, STARTSWITH, JAROWINKLER_SIMILARITY

, ARRAYAGG, ARRAY_CONSTRUCT, ARRAY_COMPACT, ARRAY_CONSTRUCT_COMPACT, ARRAY_CONTAINS, ARRAY_EXCEPT

, ARRAY_FLATTEN, ARRAY_GENERATE_RANGE, ARRAY_INSERT, ARRAY_INTERSECTION, ARRAY_MAX, ARRAY_MIN, ARRAY_POSITION

, ARRAY_PREPEND, ARRAY_REMOVE, ARRAY_REMOVE_AT, ARRAY_SIZE, ARRAY_SLICE, ARRAY_SORT, ARRAY_TO_STRING, ARRAYS_OVERLAP

, VARIANCE_POP, VARIANCE_SAMP, BITAND_AGG, BITOR_AGG, BITXOR_AGG, BOOLAND_AGG, BOOLOR_AGG, BOOLXOR_AGG, SKEW

, GROUPING_ID, TO_VARCHAR, TO_BINARY, TRY_TO_BINARY, TO_DECIMAL, TO_NUMBER, TO_NUMERIC, TRY_TO_DECIMAL

, TRY_TO_NUMBER, TRY_TO_NUMERIC, TO_DOUBLE, TRY_TO_DOUBLE, TO_BOOLEAN, TRY_TO_BOOLEAN, TRY_TO_DATE, TRY_TO_TIME

, TRY_TO_TIMESTAMP, TRY_TO_TIMESTAMP_LTZ, TRY_TO_TIMESTAMP_NTZ, TRY_TO_TIMESTAMP_TZ

, RANDOM, DIV0, DIV0NULL, ROUND, SQUARE

, CHECK_JSON, TRY_PARSE_JSON, GET_PATH, OBJECT_KEYS, STRIP_NULL_VALUE
, ST_GEOMFROMWKB , ST_GEOMETRYFROMEWKB , ST_GEOMFROMEWKB, ST_GEOMETRYFROMWKB, ST_GEOMFROMWKT , ST_GEOMETRYFROMEWKT
, ST_GEOMFROMEWKT , ST_GEOMETRYFROMTEXT , ST_GEOMFROMTEXT, ST_GEOMETRYFROMWKT, TO_GEOMETRY, TRY_TO_GEOMETRY
;
// @FORMATTER:ON
//@formatter:on


@SuppressWarnings({"PMD.EmptyCatchBlock"})
Expand Down Expand Up @@ -987,6 +973,73 @@ public <S> StringBuilder visit(Function function, S params) {
function.setParameters(parameters.get(0), new LongValue(2));
}
break;
case CHECK_JSON:
function.setName("JSon_Valid");
break;
case TRY_PARSE_JSON:
if (paramCount == 1) {
rewrittenExpression = new CastExpression("Try_Cast", parameters.get(0), "JSON");
}
break;
case GET_PATH:
function.setName("Json_Value");
break;
case OBJECT_KEYS:
function.setName("Json_Keys");
break;
case STRIP_NULL_VALUE:
if (paramCount == 1) {
rewrittenExpression = parameters.get(0);
}
break;
case ST_GEOMETRYFROMWKB:
case ST_GEOMFROMWKB:
case ST_GEOMETRYFROMEWKB:
case ST_GEOMFROMEWKB:
if (paramCount > 1) {
warning("SRID and ALLOW_INVALID are not supported.");
}
function.setName("If");
function.setParameters(
new Function("REGEXP_MATCHES", parameters.get(0), new StringValue("^[0-9A-Fa-f]+$")),
new Function("ST_GeomFromHEXEWKB$$", parameters.get(0)),
new Function("ST_GeomFromWKB$$", new CastExpression(parameters.get(0), "BLOB")));
break;
case ST_GEOMFROMWKT:
case ST_GEOMETRYFROMEWKT:
case ST_GEOMFROMEWKT:
case ST_GEOMETRYFROMTEXT:
case ST_GEOMFROMTEXT:
case ST_GEOMETRYFROMWKT:
function.setName("ST_GEOMFROMTEXT$$");
if (paramCount > 1) {
warning("SRID and ALLOW_INVALID are not supported.");
}
if (parameters.get(0) instanceof StringValue) {
String regex = "(?i)SRID=\\d+;";
String s = ((StringValue) parameters.get(0)).getValue();
if (s.toUpperCase().contains("SRID")) {
warning("SRID is not supported");
function.setParameters(new StringValue(s.replaceAll(regex, "")));
} else {
function.setParameters(parameters.get(0));
}
} else {
function.setParameters(parameters.get(0));
}
break;
case TO_GEOMETRY:
if (paramCount > 1) {
warning("SRID and ALLOW_INVALID are not supported.");
}
rewrittenExpression = new CastExpression("Cast", parameters.get(0), "GEOMETRY");
break;
case TRY_TO_GEOMETRY:
if (paramCount > 1) {
warning("SRID and ALLOW_INVALID are not supported.");
}
rewrittenExpression = new CastExpression("Try_Cast", parameters.get(0), "GEOMETRY");
break;
}
}
if (rewrittenExpression == null) {
Expand Down
10 changes: 10 additions & 0 deletions src/test/java/ai/starlake/transpiler/JSQLTranspilerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,16 @@ static synchronized void init()
LOGGER.log(Level.FINE, "Failed to INSTALL/LOAD the SPATIAL extension", ex);
}

LOGGER.info("Preparing H3 Community Extension");
try (Statement st = connDuck.createStatement()) {
for (String s : new String[] {"INSTALL h3 FROM community;", "LOAD h3;"}) {
LOGGER.fine("execute: " + s);
st.execute(s);
}
} catch (Exception ex) {
LOGGER.log(Level.FINE, "Failed to INSTALL/LOAD the SPATIAL extension", ex);
}

String sqlStr = IOUtils.resourceToString(
JSQLTranspilerTest.class.getCanonicalName().replaceAll("\\.", "/") + "_DDL.sql",
Charset.defaultCharset(), JSQLTranspilerTest.class.getClassLoader());
Expand Down
4 changes: 2 additions & 2 deletions src/test/resources/ai/starlake/transpiler/any/debug.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
-- provided
SELECT ST_AsBinary(ST_GeomFromText('POLYGON((0 0,0 1,1 1,1 0,0 0))',4326)) b;
SELECT 1;

-- expected
SELECT ST_ASWKB(ST_GeomFromText('POLYGON((0 0,0 1,1 1,1 0,0 0))'))::BLOB b;
SELECT 1;

-- count
1
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,6 @@ SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}}','f4',
-- expected
SELECT '{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}}'::JSON -> 'f4' -> 'f6' e;

-- expected
-- result
"e"
"""star"""
26 changes: 26 additions & 0 deletions src/test/resources/ai/starlake/transpiler/snowflake/geometry.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
-- provided
SELECT ST_GEOMETRYFROMEWKB('0101000020797F000066666666A9CB17411F85EBC19E325641') g;

-- expected
SELECT
IF(
REGEXP_MATCHES('0101000020797F000066666666A9CB17411F85EBC19E325641','^[0-9A-Fa-f]+$')
, ST_GEOMFROMHEXEWKB('0101000020797F000066666666A9CB17411F85EBC19E325641')
, ST_GEOMFROMWKB(ENCODE('0101000020797F000066666666A9CB17411F85EBC19E325641')
)
) g;

-- result
"g"
"POINT (389866.35 5819003.03)"


-- provided
SELECT TO_GEOMETRY('SRID=4326;POINT(1820.12 890.56)') g;

-- expected
SELECT Cast('SRID=4326;POINT(1820.12 890.56)' AS GEOMETRY) g;

-- result
"g"
"POINT (1820.12 890.56)"
89 changes: 89 additions & 0 deletions src/test/resources/ai/starlake/transpiler/snowflake/json.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
-- prolog
DROP TABLE IF EXISTS sample_json_table;
CREATE TABLE sample_json_table (ID INTEGER, varchar1 VARCHAR, variant1 BLOB);
INSERT INTO sample_json_table (ID, varchar1) VALUES
(1, '{"ValidKey1": "ValidValue1"}'),
(2, '{"Malformed -- Missing value": }'),
(3, NULL)
;
UPDATE sample_json_table SET variant1 = varchar1::BLOB;

-- provided
SELECT ID, CHECK_JSON(varchar1) v, varchar1 FROM sample_json_table ORDER BY ID;

-- expected
SELECT ID, json_valid(varchar1) v, varchar1 FROM sample_json_table ORDER BY ID;

-- result
"ID","v","varchar1"
"1","true","{""ValidKey1"": ""ValidValue1""}"
"2","false","{""Malformed -- Missing value"": }"
"3","",""

-- epilog
DROP TABLE IF EXISTS sample_json_table;


-- prolog
DROP TABLE IF EXISTS demo1;
CREATE TABLE demo1 (id INTEGER, json_data VARCHAR);
INSERT INTO demo1 SELECT
1, '{"level_1_key": "level_1_value"}';
INSERT INTO demo1 SELECT
2, '{"level_1_key": {"level_2_key": "level_2_value"}}';
INSERT INTO demo1 SELECT
3, '{"level_1_key": {"level_2_key": ["zero", "one", "two"]}}';

-- provided
SELECT
JSON_EXTRACT_PATH_TEXT(json_data, 'level_1_key')
AS JSON_EXTRACT_PATH_TEXT
FROM demo1
ORDER BY id;


-- expected
SELECT
json_data::JSON -> 'level_1_key'
AS JSON_EXTRACT_PATH_TEXT
FROM demo1
ORDER BY id;

-- result
"JSON_EXTRACT_PATH_TEXT"
"""level_1_value"""
"{""level_2_key"":""level_2_value""}"
"{""level_2_key"":[""zero"",""one"",""two""]}"


-- epilog
DROP TABLE IF EXISTS demo1;


-- prolog
DROP TABLE IF EXISTS vartab;
CREATE OR REPLACE TABLE vartab (ID INTEGER, v VARCHAR);

INSERT INTO vartab (id, v) VALUES
(1, '[-1, 12, 289, 2188, false,]'),
(2, '{ "x" : "abc", "y" : false, "z": 10} '),
(3, '{ "bad" : "json", "missing" : true, "close_brace": 10 ');

-- provided
SELECT ID, TRY_PARSE_JSON(v) j
FROM vartab
ORDER BY ID;

-- expected
SELECT ID, Try_Cast(v AS JSON) j
FROM vartab
ORDER BY ID;

-- result
"ID","j"
"1","[-1, 12, 289, 2188, false,]"
"2","{ ""x"" : ""abc"", ""y"" : false, ""z"": 10} "
"3",""

-- epilog
DROP TABLE IF EXISTS vartab;

0 comments on commit 35f05fa

Please sign in to comment.