From a18230d4202c3adc3d9829ad833cdc7baa694884 Mon Sep 17 00:00:00 2001 From: manticore-projects Date: Sat, 7 Dec 2024 20:46:37 +0700 Subject: [PATCH] feat: switch `GEO_MODE` to `GEOMETRY` or `GEOGRAPHY` Signed-off-by: Andreas Reichel Signed-off-by: manticore-projects --- README.md | 30 +++++- .../transpiler/JSQLExpressionTranspiler.java | 31 +++++- .../starlake/transpiler/JSQLTranspiler.java | 5 +- .../bigquery/BigQueryTranspiler.java | 2 +- .../databricks/DatabricksTranspiler.java | 2 +- .../redshift/RedshiftTranspiler.java | 2 +- .../snowflake/SnowflakeTranspiler.java | 2 +- .../bigquery/BigQueryTranspilerTest.java | 94 +++++++++++++++++++ 8 files changed, 156 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 3ba3922..c93848c 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,35 @@ String actual = Assertions.assertThat(actual).isEqualTo(expected); ``` + +### Geography vs. Geometry + +DuckDB currently only supports the `GEOMETRY` type. So for overloaded functions we need to decide if to interpret as `GEOMETRY` or as `GEOGRAPHY`. One can use the property `GEO_MODE` and set it either to `GEOGRAPHY` or `GEOMETRY` (with `GEOMETRY` being the default). Alternatively the Parameter Map can be used. + +```java +String expected = "SELECT ST_Area_Spheroid(ST_GEOMFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) AS area"; +String actual = JSQLTranspiler + .transpileQuery( + "select st_area(ST_GEOGFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) as area;" + , JSQLTranspiler.Dialect.GOOGLE_BIG_QUERY + , Map.of("GEO_MODE", "GEOGRAPHY") + ); +Assertions.assertEquals(expected, actual); +Assertions.assertEquals(12308778361.469452, getQueryResults(actual)[1][0]); +``` + +```java +System.setProperty("GEO_MODE", "GEOMETRY"); +String expected = "SELECT ST_Area(ST_GEOMFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) AS area"; +String actual = JSQLTranspiler + .transpileQuery( + "select ST_Area(ST_GEOGFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) as area;" + , JSQLTranspiler.Dialect.GOOGLE_BIG_QUERY + ); +Assertions.assertEquals(expected, actual); +Assertions.assertEquals(1.0, getQueryResults(actual)[1][0]); +``` + ### Error Handling In case the query refers to objects not existing in the provided database schema, the `JSQLColumnResolver` offers three modes: @@ -302,7 +331,6 @@ String lineage = Please refer to the [Feature Matrix](https://docs.google.com/spreadsheets/d/1jK6E1s2c0CWcw9rFeDvALdZ5wCshztdtlAHuNDaKQt4/edit?usp=sharing): - DuckDB's Number and Currency formatting is very limited right now -- `Geography`, `JSon` and `XML` functions have not been implemented yet, but are planned - `SELECT * REPLACE(...)` on DuckDB works very differently (replaces value instead of label) ## License diff --git a/src/main/java/ai/starlake/transpiler/JSQLExpressionTranspiler.java b/src/main/java/ai/starlake/transpiler/JSQLExpressionTranspiler.java index d71eb90..241d135 100644 --- a/src/main/java/ai/starlake/transpiler/JSQLExpressionTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/JSQLExpressionTranspiler.java @@ -89,7 +89,7 @@ public class JSQLExpressionTranspiler extends ExpressionDeParser { final private Pattern ARRAY_COLUMN_TYPE_PATTERN = Pattern.compile("ARRAY<(.+)>"); - public final HashMap parameters = new LinkedHashMap<>(); + public final HashMap parameterMap = new LinkedHashMap<>(); public JSQLExpressionTranspiler(SelectDeParser deParser, StringBuilder buffer) { super(deParser, buffer); @@ -125,6 +125,10 @@ public JSQLExpressionTranspiler(SelectDeParser deParser, StringBuilder buffer) { }; + enum GeoMode { + GEOGRAPHY, GEOMETRY; + } + enum TranspiledFunction { //@formatter:off CURRENT_DATE, CURRENT_DATETIME, CURRENT_TIME, CURRENT_TIMESTAMP, DATE, DATETIME, TIME, TIMESTAMP, DATE_ADD @@ -143,7 +147,10 @@ enum TranspiledFunction { , FLOAT64, LAX_FLOAT64, INT64, LAX_INT64, LAX_STRING, JSON_QUERY, JSON_VALUE, JSON_QUERY_ARRAY, JSON_VALUE_ARRAY , JSON_EXTRACT, JSON_EXTRACT_ARRAY, JSON_EXTRACT_SCALAR, JSON_EXTRACT_STRING_ARRAY, PARSE_JSON, TO_JSON, TO_JSON_STRING, NVL , UNNEST, ST_GEOGPOINT, ST_GEOGFROMTEXT, ST_GEOGFROMGEOJSON, ST_GEOGFROMWKB, ST_ASBINARY, ST_ASGEOJSON, ST_ASTEXT - , ST_BUFFER, ST_NUMPOINTS, ST_DISTANCE, ST_BOUNDINGBOX, ST_EXTENT, ST_PERIMETER; + , ST_BUFFER, ST_NUMPOINTS, ST_DISTANCE, ST_BOUNDINGBOX, ST_EXTENT, ST_PERIMETER + // GEO_MODE + , ST_AREA + ; //@formatter:on @@ -494,6 +501,14 @@ public StringBuilder visit(Function function, S params) { boolean hasSafePrefix = false; int paramCount = hasParameters ? function.getParameters().size() : 0; + GeoMode geoMode = GeoMode.GEOMETRY; + if ("GEOGRAPHY".equalsIgnoreCase(String.valueOf(parameterMap.get("GEO_MODE")))) { + geoMode = GeoMode.GEOGRAPHY; + } else if ("GEOGRAPHY" + .equalsIgnoreCase(String.valueOf(System.getProperties().get("GEO_MODE")))) { + geoMode = GeoMode.GEOGRAPHY; + } + if (UnsupportedFunction.from(function) != null) { throw new RuntimeException( "Unsupported: " + functionName + " is not supported by DuckDB (yet)."); @@ -1361,6 +1376,14 @@ ELSE JSON_VALUE('{"a.b":{"c":"world"}}','$."a.b".c') warning("USE_SPHEROID is not supported."); } break; + case ST_AREA: + switch (geoMode) { + case GEOMETRY: + break; + case GEOGRAPHY: + function.setName("ST_Area_Spheroid"); + break; + } } } if (rewrittenExpression == null) { @@ -2646,9 +2669,9 @@ public static Expression castInterval(IntervalExpression expression) { public StringBuilder visit(TimeKeyExpression expression, S context) { String value = expression.getStringValue().toUpperCase().replaceAll("[()]", ""); - if (parameters.containsKey(value)) { + if (parameterMap.containsKey(value)) { // @todo: Cast Date/Time types - castDateTime(parameters.get(value).toString()).accept(this, null); + castDateTime(parameterMap.get(value).toString()).accept(this, null); } else if (System.getProperties().containsKey(value)) { castDateTime(System.getProperty(value)).accept(this, null); } else if (value.equals("CURRENT_TIMEZONE")) { diff --git a/src/main/java/ai/starlake/transpiler/JSQLTranspiler.java b/src/main/java/ai/starlake/transpiler/JSQLTranspiler.java index e80fc2e..f2d0944 100644 --- a/src/main/java/ai/starlake/transpiler/JSQLTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/JSQLTranspiler.java @@ -107,7 +107,7 @@ public JSQLTranspiler(Map parameters) throws InvocationTargetExc NoSuchMethodException, InstantiationException, IllegalAccessException { this(JSQLSelectTranspiler.class, JSQLExpressionTranspiler.class); this.parameters.putAll(parameters); - this.expressionTranspiler.parameters.putAll(parameters); + this.expressionTranspiler.parameterMap.putAll(parameters); } public JSQLTranspiler() throws InvocationTargetException, NoSuchMethodException, @@ -150,8 +150,7 @@ public static String transpileQuery(String qryStr, Dialect dialect, } else /* Toggle state for double quotes */ if (c == '\"' && !inSingleQuote) { inDoubleQuote = !inDoubleQuote; sb.append('\''); // Replace outer double quotes with single quotes - } else /*Replace inner single quotes with double quotes if inside double-quoted context */ if (c == '\'' - && inDoubleQuote) { + } else /*Replace inner single quotes with double quotes if inside double-quoted context */ if (c == '\'') { sb.append('"'); } else /* Append everything else as-is */ { sb.append(c); diff --git a/src/main/java/ai/starlake/transpiler/bigquery/BigQueryTranspiler.java b/src/main/java/ai/starlake/transpiler/bigquery/BigQueryTranspiler.java index a0f9336..8a86bb9 100644 --- a/src/main/java/ai/starlake/transpiler/bigquery/BigQueryTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/bigquery/BigQueryTranspiler.java @@ -26,6 +26,6 @@ public BigQueryTranspiler(Map parameters) throws InvocationTarge NoSuchMethodException, InstantiationException, IllegalAccessException { super(BigQuerySelectTranspiler.class, BigQueryExpressionTranspiler.class); this.parameters.putAll(parameters); - this.expressionTranspiler.parameters.putAll(parameters); + this.expressionTranspiler.parameterMap.putAll(parameters); } } diff --git a/src/main/java/ai/starlake/transpiler/databricks/DatabricksTranspiler.java b/src/main/java/ai/starlake/transpiler/databricks/DatabricksTranspiler.java index ca61e44..514ba46 100644 --- a/src/main/java/ai/starlake/transpiler/databricks/DatabricksTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/databricks/DatabricksTranspiler.java @@ -27,6 +27,6 @@ public DatabricksTranspiler(Map parameters) throws InvocationTar NoSuchMethodException, InstantiationException, IllegalAccessException { super(DatabricksSelectTranspiler.class, DatabricksExpressionTranspiler.class); this.parameters.putAll(parameters); - this.expressionTranspiler.parameters.putAll(parameters); + this.expressionTranspiler.parameterMap.putAll(parameters); } } diff --git a/src/main/java/ai/starlake/transpiler/redshift/RedshiftTranspiler.java b/src/main/java/ai/starlake/transpiler/redshift/RedshiftTranspiler.java index f6f432d..bad9414 100644 --- a/src/main/java/ai/starlake/transpiler/redshift/RedshiftTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/redshift/RedshiftTranspiler.java @@ -26,6 +26,6 @@ public RedshiftTranspiler(Map parameters) throws InvocationTarge NoSuchMethodException, InstantiationException, IllegalAccessException { super(RedshiftSelectTranspiler.class, RedshiftExpressionTranspiler.class); this.parameters.putAll(parameters); - this.expressionTranspiler.parameters.putAll(parameters); + this.expressionTranspiler.parameterMap.putAll(parameters); } } diff --git a/src/main/java/ai/starlake/transpiler/snowflake/SnowflakeTranspiler.java b/src/main/java/ai/starlake/transpiler/snowflake/SnowflakeTranspiler.java index 6830df0..c52a72a 100644 --- a/src/main/java/ai/starlake/transpiler/snowflake/SnowflakeTranspiler.java +++ b/src/main/java/ai/starlake/transpiler/snowflake/SnowflakeTranspiler.java @@ -26,6 +26,6 @@ public SnowflakeTranspiler(Map parameters) throws InvocationTarg NoSuchMethodException, InstantiationException, IllegalAccessException { super(SnowflakeSelectTranspiler.class, SnowflakeExpressionTranspiler.class); this.parameters.putAll(parameters); - this.expressionTranspiler.parameters.putAll(parameters); + this.expressionTranspiler.parameterMap.putAll(parameters); } } diff --git a/src/test/java/ai/starlake/transpiler/bigquery/BigQueryTranspilerTest.java b/src/test/java/ai/starlake/transpiler/bigquery/BigQueryTranspilerTest.java index 5d34576..1620355 100644 --- a/src/test/java/ai/starlake/transpiler/bigquery/BigQueryTranspilerTest.java +++ b/src/test/java/ai/starlake/transpiler/bigquery/BigQueryTranspilerTest.java @@ -18,6 +18,7 @@ import ai.starlake.transpiler.JSQLTranspiler; import ai.starlake.transpiler.JSQLTranspilerTest; +import net.sf.jsqlparser.JSQLParserException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -25,6 +26,13 @@ import org.junit.jupiter.params.provider.MethodSource; import java.io.File; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Stream; @@ -44,6 +52,36 @@ protected void transpile(File f, int idx, SQLTest t) throws Exception { super.transpile(f, idx, t); } + public static Object[][] getQueryResults(String query) throws SQLException { + try (Statement statement = connDuck.createStatement(); + ResultSet resultSet = statement.executeQuery(query)) { + + ResultSetMetaData metaData = resultSet.getMetaData(); + int columnCount = metaData.getColumnCount(); + + // Use an ArrayList to build rows dynamically + List data = new ArrayList<>(); + + // Add column headers as the first row + Object[] headers = new Object[columnCount]; + for (int i = 0; i < columnCount; i++) { + headers[i] = metaData.getColumnLabel(i + 1); + } + data.add(headers); + + // Add each row of data + while (resultSet.next()) { + Object[] row = new Object[columnCount]; + for (int colIndex = 0; colIndex < columnCount; colIndex++) { + row[colIndex] = resultSet.getObject(colIndex + 1); + } + data.add(row); + } + + // Convert List to Object[][] + return data.toArray(new Object[0][]); + } + } @Test void testRegex() { @@ -67,4 +105,60 @@ void testRegex() { Assertions.assertEquals(expected, sb.toString()); } + + @Test + void testGeoModeGeometryUsingProperty() + throws JSQLParserException, InterruptedException, SQLException { + System.setProperty("GEO_MODE", "GEOMETRY"); + + String expected = + "SELECT ST_Area(ST_GEOMFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) AS area"; + String actual = JSQLTranspiler.transpileQuery( + "select ST_Area(ST_GEOGFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) as area;", + JSQLTranspiler.Dialect.GOOGLE_BIG_QUERY); + + Assertions.assertEquals(expected, actual); + + Assertions.assertEquals(1.0, getQueryResults(actual)[1][0]); + } + + @Test + void testGeoModeGeometryUsingParameterMap() + throws JSQLParserException, InterruptedException, SQLException { + String expected = + "SELECT ST_Area(ST_GEOMFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) AS area"; + String actual = JSQLTranspiler.transpileQuery( + "select ST_Area(ST_GEOGFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) as area;", + JSQLTranspiler.Dialect.GOOGLE_BIG_QUERY, Map.of("GEO_MODE", "GEOMETRY")); + + Assertions.assertEquals(expected, actual); + + Assertions.assertEquals(1.0, getQueryResults(actual)[1][0]); + } + + @Test + void testGeoModeGeographyUsingProperty() + throws JSQLParserException, InterruptedException, SQLException { + System.setProperty("GEO_MODE", "GEOGRAPHY"); + + String expected = + "SELECT ST_Area_Spheroid(ST_GEOMFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) AS area"; + String actual = JSQLTranspiler.transpileQuery( + "select st_area(ST_GEOGFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) as area;", + JSQLTranspiler.Dialect.GOOGLE_BIG_QUERY); + Assertions.assertEquals(expected, actual); + Assertions.assertEquals(12308778361.469452, getQueryResults(actual)[1][0]); + } + + @Test + void testGeoModeGeographyUsingParameterMap() + throws JSQLParserException, InterruptedException, SQLException { + String expected = + "SELECT ST_Area_Spheroid(ST_GEOMFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) AS area"; + String actual = JSQLTranspiler.transpileQuery( + "select st_area(ST_GEOGFROMTEXT('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')) as area;", + JSQLTranspiler.Dialect.GOOGLE_BIG_QUERY, Map.of("GEO_MODE", "GEOGRAPHY")); + Assertions.assertEquals(expected, actual); + Assertions.assertEquals(12308778361.469452, getQueryResults(actual)[1][0]); + } }