Skip to content

Commit

Permalink
feat: Amazon RedShift JSON
Browse files Browse the repository at this point in the history
Signed-off-by: Andreas Reichel <[email protected]>
  • Loading branch information
manticore-projects committed Nov 3, 2024
1 parent 33d8366 commit 8e16734
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 4 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ dependencies {
// for the ASCII Trees
testImplementation 'hu.webarticum:tree-printer:+'

testImplementation 'org.duckdb:duckdb_jdbc:+'
testImplementation 'org.duckdb:duckdb_jdbc:1.1.2'
testImplementation 'org.apache.commons:commons-compress:+'
testImplementation 'com.opencsv:opencsv:+'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,30 @@
import net.sf.jsqlparser.expression.DateTimeLiteralExpression;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
import net.sf.jsqlparser.expression.JsonExpression;
import net.sf.jsqlparser.expression.JsonFunction;
import net.sf.jsqlparser.expression.JsonFunctionExpression;
import net.sf.jsqlparser.expression.LongValue;
import net.sf.jsqlparser.expression.StringValue;
import net.sf.jsqlparser.expression.TimezoneExpression;
import net.sf.jsqlparser.expression.WhenClause;
import net.sf.jsqlparser.expression.operators.arithmetic.Addition;
import net.sf.jsqlparser.expression.operators.arithmetic.Subtraction;
import net.sf.jsqlparser.expression.operators.conditional.AndExpression;
import net.sf.jsqlparser.expression.operators.relational.EqualsTo;
import net.sf.jsqlparser.expression.operators.relational.ExpressionList;
import net.sf.jsqlparser.expression.operators.relational.GreaterThan;
import net.sf.jsqlparser.expression.operators.relational.IsNullExpression;
import net.sf.jsqlparser.expression.operators.relational.JsonOperator;
import net.sf.jsqlparser.expression.operators.relational.MinorThan;
import net.sf.jsqlparser.expression.operators.relational.NotEqualsTo;
import net.sf.jsqlparser.expression.operators.relational.ParenthesedExpressionList;
import net.sf.jsqlparser.schema.Column;
import net.sf.jsqlparser.statement.create.table.ColDataType;
import net.sf.jsqlparser.util.deparser.SelectDeParser;

import java.util.AbstractMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand All @@ -54,9 +63,19 @@ public RedshiftExpressionTranspiler(SelectDeParser deParser, StringBuilder buffe
}

enum TranspiledFunction {
// @FORMATTER:OFF
BPCHARCMP, BTRIM, BTTEXT_PATTERN_CMP, CHAR_LENGTH, CHARACTER_LENGTH, TEXTLEN, LEN, CHARINDEX, STRPOS, COLLATE, OCTETINDEX, REGEXP_COUNT, REGEXP_INSTR, REGEXP_REPLACE, REGEXP_SUBSTR, REPLICATE, ADD_MONTHS, CONVERT_TIMEZONE, DATE_CMP, DATE_CMP_TIMESTAMP, DATE_CMP_TIMESTAMPTZ, DATEADD, DATEDIFF, DATE_PART, DATE_PART_YEAR, DATE_TRUNC, GETDATE, INTERVAL_CMP, MONTHS_BETWEEN, SYSDATE, TIMEOFDAY, TIMESTAMP_CMP, TIMESTAMP_CMP_DATE, TIMESTAMP_CMP_TIMESTAMPTZ, TIMESTAMPTZ_CMP, TIMESTAMPTZ_CMP_DATE, TIMESTAMPTZ_CMP_TIMESTAMP, TIMEZONE, TO_TIMESTAMP, ARRAY, ARRAY_FLATTEN, GET_ARRAY_LENGTH, SPLIT_TO_ARRAY, SUBARRAY, DEXP, DLOG1, DLOG10, LOG, TRUNC, TO_CHAR, TO_NUMBER, CONVERT, APPROXIMATE_PERCENTILE_DISC, APPROXIMATE_COUNT, GEOMETRYTYPE, ST_GEOMFROMTEXT, ST_GEOGFROMTEXT, ST_ASEWKB, ST_ASEWKT, ST_ASBINARY, ST_ASGEOJSON, ST_ASHEXEWKB, ST_ASTEXT, ST_BUFFER, ST_COLLECT, ST_DISTANCESPHERE, ST_FORCE3D, ST_GEOGFROMWKB, ST_GEOMFROMWKB, ST_GEOMFROMEWKB, ST_GEOMFROMEWKT, ST_LENGTHSPHERE, ST_LENGTH2D, ST_MAKEPOINT, ST_NDIMS, ST_PERIMETER2D, ST_POLYGON;
// @FORMATTER:ON
// @formatter:off
BPCHARCMP, BTRIM, BTTEXT_PATTERN_CMP, CHAR_LENGTH, CHARACTER_LENGTH, TEXTLEN, LEN, CHARINDEX, STRPOS, COLLATE
, OCTETINDEX, REGEXP_COUNT, REGEXP_INSTR, REGEXP_REPLACE, REGEXP_SUBSTR, REPLICATE, ADD_MONTHS, CONVERT_TIMEZONE
, DATE_CMP, DATE_CMP_TIMESTAMP, DATE_CMP_TIMESTAMPTZ, DATEADD, DATEDIFF, DATE_PART, DATE_PART_YEAR, DATE_TRUNC
, GETDATE, INTERVAL_CMP, MONTHS_BETWEEN, SYSDATE, TIMEOFDAY, TIMESTAMP_CMP, TIMESTAMP_CMP_DATE
, TIMESTAMP_CMP_TIMESTAMPTZ, TIMESTAMPTZ_CMP, TIMESTAMPTZ_CMP_DATE, TIMESTAMPTZ_CMP_TIMESTAMP, TIMEZONE
, TO_TIMESTAMP, ARRAY, ARRAY_FLATTEN, GET_ARRAY_LENGTH, SPLIT_TO_ARRAY, SUBARRAY, DEXP, DLOG1, DLOG10, LOG
, TRUNC, TO_CHAR, TO_NUMBER, CONVERT, APPROXIMATE_PERCENTILE_DISC, APPROXIMATE_COUNT, GEOMETRYTYPE, ST_GEOMFROMTEXT
, ST_GEOGFROMTEXT, ST_ASEWKB, ST_ASEWKT, ST_ASBINARY, ST_ASGEOJSON, ST_ASHEXEWKB, ST_ASTEXT, ST_BUFFER, ST_COLLECT
, ST_DISTANCESPHERE, ST_FORCE3D, ST_GEOGFROMWKB, ST_GEOMFROMWKB, ST_GEOMFROMEWKB, ST_GEOMFROMEWKT, ST_LENGTHSPHERE
, ST_LENGTH2D, ST_MAKEPOINT, ST_NDIMS, ST_PERIMETER2D, ST_POLYGON
, JSON_PARSE, CAN_JSON_PARSE, IS_VALID_JSON, IS_VALID_JSON_ARRAY, JSON_EXTRACT_ARRAY_ELEMENT_TEXT, JSON_EXTRACT_PATH_TEXT;
// @formatter:on


@SuppressWarnings({"PMD.EmptyCatchBlock"})
Expand Down Expand Up @@ -646,6 +665,43 @@ public <S> StringBuilder visit(Function function, S params) {
function.setName("ST_MakePolygon");
function.setParameters(parameters.get(0));
break;
case JSON_PARSE:
if (paramCount == 1) {
rewrittenExpression = new CastExpression(parameters.get(0), "JSON");
}
break;
case CAN_JSON_PARSE:
if (paramCount == 1) {
rewrittenExpression = new IsNullExpression(new CastExpression("Try_Cast", parameters.get(0), "JSON")).withNot(true);
}
break;
case IS_VALID_JSON:
if (paramCount == 1) {
// json_valid(json_strings) AND json_type(try_cast(json_strings AS JSON))!='ARRAY'
function.setName("Json_Valid");
rewrittenExpression = new AndExpression(function, new NotEqualsTo(new Function("Json_type", new CastExpression("Try_cast", parameters.get(0), "JSON")), new StringValue("ARRAY")));
}
break;
case IS_VALID_JSON_ARRAY:
if (paramCount == 1) {
// json_valid(json_strings) AND json_type(try_cast(json_strings AS JSON))='ARRAY'
function.setName("Json_Valid");
rewrittenExpression = new AndExpression(function, new EqualsTo(new Function("Json_type", new CastExpression("Try_cast", parameters.get(0), "JSON")), new StringValue("ARRAY")));
}
break;
case JSON_EXTRACT_ARRAY_ELEMENT_TEXT:
if (paramCount == 2) {
// SELECT ('[111,112,113]'::JSON)[2] e;
rewrittenExpression = new ArrayExpression( new CastExpression("Try_Cast", parameters.get(0), "JSON"), parameters.get(1) );
}
break;
case JSON_EXTRACT_PATH_TEXT:
if (paramCount>1) {
rewrittenExpression = new CastExpression(parameters.get(0), "JSON");
for (int i=1; i<paramCount; i++) {
rewrittenExpression = new JsonExpression(rewrittenExpression, List.of(new AbstractMap.SimpleEntry<>(parameters.get(i), "->")));
}
}
}
}
if (rewrittenExpression == null) {
Expand Down
138 changes: 138 additions & 0 deletions src/test/resources/ai/starlake/transpiler/redshift/json.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
-- provided
SELECT JSON_PARSE('[10001,10002,"abc"]') j;

-- expected
SELECT '[10001,10002,"abc"]'::JSON j;


-- result
"j"
"[10001,10002,""abc""]"


-- provided
SELECT CASE
WHEN CAN_JSON_PARSE('[10001,10002,"abc"]')
THEN JSON_PARSE('[10001,10002,"abc"]')
END t;

-- expected
SELECT CASE
WHEN try_cast('[10001,10002,"abc"]' AS JSON) is not null
THEN '[10001,10002,"abc"]'::JSON
END t;


-- result
"t"
"[10001,10002,""abc""]"


-- provided
SELECT CAN_JSON_PARSE('This is a string.') t;

-- provided
SELECT try_cast('This is a string.' AS JSON) is not null t;

-- result
"t"
"false"

-- prolog
drop table if exists test_json;
CREATE TABLE test_json(id int primary key, json_strings VARCHAR);

INSERT INTO test_json VALUES
(1, '{"a":2}'),
(2, '{"a":{"b":{"c":1}}}'),
(3, '{"a": [1,2,"b"]}');

INSERT INTO test_json VALUES
(4, '{{}}'),
(5, '{1:"a"}'),
(6, '[1,2,3]');

-- provided
SELECT id, json_strings, IS_VALID_JSON(json_strings) v
FROM test_json
ORDER BY id;

-- expected
SELECT id, json_strings, json_valid(json_strings) AND json_type(try_cast(json_strings AS JSON))<>'ARRAY' v
FROM test_json
ORDER BY id;

-- result
"id","json_strings","v"
"1","{""a"":2}","true"
"2","{""a"":{""b"":{""c"":1}}}","true"
"3","{""a"": [1,2,""b""]}","true"
"4","{{}}","false"
"5","{1:""a""}","false"
"6","[1,2,3]","false"

-- epilog
drop table if exists test_json;


-- prolog
DROP TABLE IF EXISTS test_json_arrays;
CREATE TABLE test_json_arrays(id int primary key, json_arrays VARCHAR);

INSERT INTO test_json_arrays
VALUES(1, '[]'),
(2, '["a","b"]'),
(3, '["a",["b",1,["c",2,3,null]]]'),
(4, '{"a":1}'),
(5, 'a'),
(6, '[1,2,]');

-- provided
SELECT json_arrays, IS_VALID_JSON_ARRAY(json_arrays) v
FROM test_json_arrays ORDER BY id;

-- expected
SELECT json_arrays, json_valid(json_arrays) AND json_type(try_cast(json_arrays AS JSON))='ARRAY' v
FROM test_json_arrays ORDER BY id;

-- result
"json_arrays","v"
"[]","true"
"[""a"",""b""]","true"
"[""a"",[""b"",1,[""c"",2,3,null]]]","true"
"{""a"":1}","false"
"a","false"
"[1,2,]","true"

-- epilog
DROP TABLE IF EXISTS test_json_arrays;


-- provided
SELECT JSON_ARRAY_LENGTH('[11,12,13,{"f1":21,"f2":[25,26]},14]') l;

-- expected
"l"
"5.00"


-- provided
SELECT JSON_EXTRACT_ARRAY_ELEMENT_TEXT('[111,112,113]', 2) e;

-- expected
SELECT Try_Cast('[111,112,113]' AS JSON)[2] e;

-- result
"e"
"113"


-- provided
SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}}','f4', 'f6') e;

-- expected
SELECT '{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}}'::JSON -> 'f4' -> 'f6' e;

-- expected
"e"
"""star"""

0 comments on commit 8e16734

Please sign in to comment.