From 3168ed5525cd275f83d556b7fe33e8b09666b8cd Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 3 Nov 2022 15:01:15 -0700 Subject: [PATCH] Fix incorrect results returned by `min`, `max` and `avg` (#1000) (#1022) * Fix incorrect results returned by `min`, `max` and `avg` aggregations on null/missing values. Signed-off-by: Yury-Fridlyand * Fix indentation. Signed-off-by: Yury-Fridlyand * Activate and fix integration tests. Signed-off-by: Yury-Fridlyand * Add more tests. Signed-off-by: Yury-Fridlyand Signed-off-by: Yury-Fridlyand (cherry picked from commit 4282450878005c88126d25333cad662baa1ee2ed) Co-authored-by: Yury-Fridlyand --- .../sql/legacy/SQLIntegTestCase.java | 11 +- .../opensearch/sql/legacy/TestsConstants.java | 2 + .../org/opensearch/sql/sql/AggregationIT.java | 206 +++++++++++++++++- integ-test/src/test/resources/calcs.json | 34 +++ .../calcs_index_mappings.json | 94 ++++++++ .../null_missing_index_mapping.json | 37 ++++ .../src/test/resources/null_missing.json | 18 ++ .../response/agg/SingleValueParser.java | 4 +- .../opensearch/response/agg/StatsParser.java | 4 +- .../sql/opensearch/response/agg/Utils.java | 8 +- ...enSearchAggregationResponseParserTest.java | 6 +- 11 files changed, 409 insertions(+), 15 deletions(-) create mode 100644 integ-test/src/test/resources/calcs.json create mode 100644 integ-test/src/test/resources/indexDefinitions/calcs_index_mappings.json create mode 100644 integ-test/src/test/resources/indexDefinitions/null_missing_index_mapping.json create mode 100644 integ-test/src/test/resources/null_missing.json diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 5c339cc7bb..f03acbbbfd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -48,6 +48,7 @@ import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getJoinTypeIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getLocationIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getMappingFile; import static org.opensearch.sql.legacy.TestUtils.getNestedSimpleIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getNestedTypeIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getOdbcIndexMapping; @@ -575,7 +576,15 @@ public enum Index { BEER(TestsConstants.TEST_INDEX_BEER, "beer", null, - "src/test/resources/beer.stackexchange.json"),; + "src/test/resources/beer.stackexchange.json"), + NULL_MISSING(TestsConstants.TEST_INDEX_NULL_MISSING, + "null_missing", + getMappingFile("null_missing_index_mapping.json"), + "src/test/resources/null_missing.json"), + CALCS(TestsConstants.TEST_INDEX_CALCS, + "calcs", + getMappingFile("calcs_index_mappings.json"), + "src/test/resources/calcs.json"),; private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index f54f079bb6..a9f81c68fe 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -51,6 +51,8 @@ public class TestsConstants { public final static String TEST_INDEX_DATATYPE_NUMERIC = TEST_INDEX + "_datatypes_numeric"; public final static String TEST_INDEX_DATATYPE_NONNUMERIC = TEST_INDEX + "_datatypes_nonnumeric"; public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer"; + public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing"; + public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs"; public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java index edd78b9506..95f5b5e3e4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java @@ -6,36 +6,234 @@ package org.opensearch.sql.sql; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CALCS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NULL_MISSING; +import static org.opensearch.sql.legacy.plugin.RestSqlAction.QUERY_API_ENDPOINT; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; +import static org.opensearch.sql.util.MatcherUtils.verifySome; +import static org.opensearch.sql.util.TestUtils.getResponseBody; import java.io.IOException; +import java.util.List; +import java.util.Locale; import org.json.JSONObject; import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; import org.opensearch.sql.legacy.SQLIntegTestCase; public class AggregationIT extends SQLIntegTestCase { @Override protected void init() throws Exception { + super.init(); loadIndex(Index.BANK); + loadIndex(Index.NULL_MISSING); + loadIndex(Index.CALCS); } @Test - void filteredAggregatePushedDown() throws IOException { + public void testFilteredAggregatePushDown() throws IOException { JSONObject response = executeQuery( "SELECT COUNT(*) FILTER(WHERE age > 35) FROM " + TEST_INDEX_BANK); - verifySchema(response, schema("COUNT(*)", null, "integer")); + verifySchema(response, schema("COUNT(*) FILTER(WHERE age > 35)", null, "integer")); verifyDataRows(response, rows(3)); } @Test - void filteredAggregateNotPushedDown() throws IOException { + public void testFilteredAggregateNotPushDown() throws IOException { JSONObject response = executeQuery( "SELECT COUNT(*) FILTER(WHERE age > 35) FROM (SELECT * FROM " + TEST_INDEX_BANK + ") AS a"); - verifySchema(response, schema("COUNT(*)", null, "integer")); + verifySchema(response, schema("COUNT(*) FILTER(WHERE age > 35)", null, "integer")); verifyDataRows(response, rows(3)); } + + @Test + public void testPushDownAggregationOnNullValues() throws IOException { + // OpenSearch aggregation query (MetricAggregation) + var response = executeQuery(String.format( + "SELECT min(`int`), max(`int`), avg(`int`), min(`dbl`), max(`dbl`), avg(`dbl`) " + + "FROM %s WHERE `key` = 'null'", TEST_INDEX_NULL_MISSING)); + verifySchema(response, + schema("min(`int`)", null, "integer"), schema("max(`int`)", null, "integer"), + schema("avg(`int`)", null, "double"), schema("min(`dbl`)", null, "double"), + schema("max(`dbl`)", null, "double"), schema("avg(`dbl`)", null, "double")); + verifyDataRows(response, rows(null, null, null, null, null, null)); + } + + @Test + public void testPushDownAggregationOnMissingValues() throws IOException { + // OpenSearch aggregation query (MetricAggregation) + var response = executeQuery(String.format( + "SELECT min(`int`), max(`int`), avg(`int`), min(`dbl`), max(`dbl`), avg(`dbl`) " + + "FROM %s WHERE `key` = 'null'", TEST_INDEX_NULL_MISSING)); + verifySchema(response, + schema("min(`int`)", null, "integer"), schema("max(`int`)", null, "integer"), + schema("avg(`int`)", null, "double"), schema("min(`dbl`)", null, "double"), + schema("max(`dbl`)", null, "double"), schema("avg(`dbl`)", null, "double")); + verifyDataRows(response, rows(null, null, null, null, null, null)); + } + + @Test + public void testInMemoryAggregationOnNullValues() throws IOException { + // In-memory aggregation performed by the plugin + var response = executeQuery(String.format("SELECT" + + " min(`int`) over (PARTITION BY `key`), max(`int`) over (PARTITION BY `key`)," + + " avg(`int`) over (PARTITION BY `key`), min(`dbl`) over (PARTITION BY `key`)," + + " max(`dbl`) over (PARTITION BY `key`), avg(`dbl`) over (PARTITION BY `key`)" + + " FROM %s WHERE `key` = 'null'", TEST_INDEX_NULL_MISSING)); + verifySchema(response, + schema("min(`int`) over (PARTITION BY `key`)", null, "integer"), + schema("max(`int`) over (PARTITION BY `key`)", null, "integer"), + schema("avg(`int`) over (PARTITION BY `key`)", null, "double"), + schema("min(`dbl`) over (PARTITION BY `key`)", null, "double"), + schema("max(`dbl`) over (PARTITION BY `key`)", null, "double"), + schema("avg(`dbl`) over (PARTITION BY `key`)", null, "double")); + verifyDataRows(response, // 4 rows with null values + rows(null, null, null, null, null, null), + rows(null, null, null, null, null, null), + rows(null, null, null, null, null, null), + rows(null, null, null, null, null, null)); + } + + @Test + public void testInMemoryAggregationOnMissingValues() throws IOException { + // In-memory aggregation performed by the plugin + var response = executeQuery(String.format("SELECT" + + " min(`int`) over (PARTITION BY `key`), max(`int`) over (PARTITION BY `key`)," + + " avg(`int`) over (PARTITION BY `key`), min(`dbl`) over (PARTITION BY `key`)," + + " max(`dbl`) over (PARTITION BY `key`), avg(`dbl`) over (PARTITION BY `key`)" + + " FROM %s WHERE `key` = 'missing'", TEST_INDEX_NULL_MISSING)); + verifySchema(response, + schema("min(`int`) over (PARTITION BY `key`)", null, "integer"), + schema("max(`int`) over (PARTITION BY `key`)", null, "integer"), + schema("avg(`int`) over (PARTITION BY `key`)", null, "double"), + schema("min(`dbl`) over (PARTITION BY `key`)", null, "double"), + schema("max(`dbl`) over (PARTITION BY `key`)", null, "double"), + schema("avg(`dbl`) over (PARTITION BY `key`)", null, "double")); + verifyDataRows(response, // 4 rows with null values + rows(null, null, null, null, null, null), + rows(null, null, null, null, null, null), + rows(null, null, null, null, null, null), + rows(null, null, null, null, null, null)); + } + + @Test + public void testInMemoryAggregationOnNullValuesReturnsNull() throws IOException { + var response = executeQuery(String.format("SELECT " + + " max(int0) over (PARTITION BY `datetime1`)," + + " min(int0) over (PARTITION BY `datetime1`)," + + " avg(int0) over (PARTITION BY `datetime1`)" + + "from %s where int0 IS NULL;", TEST_INDEX_CALCS)); + verifySchema(response, + schema("max(int0) over (PARTITION BY `datetime1`)", null, "integer"), + schema("min(int0) over (PARTITION BY `datetime1`)", null, "integer"), + schema("avg(int0) over (PARTITION BY `datetime1`)", null, "double")); + verifySome(response.getJSONArray("datarows"), rows(null, null, null)); + } + + @Test + public void testInMemoryAggregationOnAllValuesAndOnNotNullReturnsSameResult() throws IOException { + var responseNotNulls = executeQuery(String.format("SELECT " + + " max(int0) over (PARTITION BY `datetime1`)," + + " min(int0) over (PARTITION BY `datetime1`)," + + " avg(int0) over (PARTITION BY `datetime1`)" + + "from %s where int0 IS NOT NULL;", TEST_INDEX_CALCS)); + var responseAllValues = executeQuery(String.format("SELECT " + + " max(int0) over (PARTITION BY `datetime1`)," + + " min(int0) over (PARTITION BY `datetime1`)," + + " avg(int0) over (PARTITION BY `datetime1`)" + + "from %s;", TEST_INDEX_CALCS)); + verifySchema(responseNotNulls, + schema("max(int0) over (PARTITION BY `datetime1`)", null, "integer"), + schema("min(int0) over (PARTITION BY `datetime1`)", null, "integer"), + schema("avg(int0) over (PARTITION BY `datetime1`)", null, "double")); + verifySchema(responseAllValues, + schema("max(int0) over (PARTITION BY `datetime1`)", null, "integer"), + schema("min(int0) over (PARTITION BY `datetime1`)", null, "integer"), + schema("avg(int0) over (PARTITION BY `datetime1`)", null, "double")); + assertEquals(responseNotNulls.query("/datarows/0/0"), responseAllValues.query("/datarows/0/0")); + assertEquals(responseNotNulls.query("/datarows/0/1"), responseAllValues.query("/datarows/0/1")); + assertEquals(responseNotNulls.query("/datarows/0/2"), responseAllValues.query("/datarows/0/2")); + } + + @Test + public void testPushDownAggregationOnNullValuesReturnsNull() throws IOException { + var response = executeQuery(String.format("SELECT " + + "max(int0), min(int0), avg(int0) from %s where int0 IS NULL;", TEST_INDEX_CALCS)); + verifySchema(response, + schema("max(int0)", null, "integer"), + schema("min(int0)", null, "integer"), + schema("avg(int0)", null, "double")); + verifyDataRows(response, rows(null, null, null)); + } + + @Test + public void testPushDownAggregationOnAllValuesAndOnNotNullReturnsSameResult() throws IOException { + var responseNotNulls = executeQuery(String.format("SELECT " + + "max(int0), min(int0), avg(int0) from %s where int0 IS NOT NULL;", TEST_INDEX_CALCS)); + var responseAllValues = executeQuery(String.format("SELECT " + + "max(int0), min(int0), avg(int0) from %s;", TEST_INDEX_CALCS)); + verifySchema(responseNotNulls, + schema("max(int0)", null, "integer"), + schema("min(int0)", null, "integer"), + schema("avg(int0)", null, "double")); + verifySchema(responseAllValues, + schema("max(int0)", null, "integer"), + schema("min(int0)", null, "integer"), + schema("avg(int0)", null, "double")); + assertEquals(responseNotNulls.query("/datarows/0/0"), responseAllValues.query("/datarows/0/0")); + assertEquals(responseNotNulls.query("/datarows/0/1"), responseAllValues.query("/datarows/0/1")); + assertEquals(responseNotNulls.query("/datarows/0/2"), responseAllValues.query("/datarows/0/2")); + } + + @Test + public void testPushDownAndInMemoryAggregationReturnTheSameResult() throws IOException { + // Playing with 'over (PARTITION BY `datetime1`)' - `datetime1` column has the same value for all rows + // so partitioning by this column has no sense and doesn't (shouldn't) affect the results + // Aggregations with `OVER` clause are executed in memory (in SQL plugin memory), + // Aggregations without it are performed the OpenSearch node itself (pushed down to opensearch) + // Going to compare results of `min`, `max` and `avg` aggregation on all numeric columns in `calcs` + var columns = List.of("int0", "int1", "int2", "int3", "num0", "num1", "num2", "num3", "num4"); + var aggregations = List.of("min", "max", "avg"); + var inMemoryAggregQuery = new StringBuilder("SELECT "); + var pushDownAggregQuery = new StringBuilder("SELECT "); + for (var col : columns) { + for (var aggreg : aggregations) { + inMemoryAggregQuery.append(String.format(" %s(%s) over (PARTITION BY `datetime1`),", aggreg, col)); + pushDownAggregQuery.append(String.format(" %s(%s),", aggreg, col)); + } + } + // delete last comma + inMemoryAggregQuery.deleteCharAt(inMemoryAggregQuery.length() - 1); + pushDownAggregQuery.deleteCharAt(pushDownAggregQuery.length() - 1); + + var responseInMemory = executeQuery( + inMemoryAggregQuery.append("from " + TEST_INDEX_CALCS).toString()); + var responsePushDown = executeQuery( + pushDownAggregQuery.append("from " + TEST_INDEX_CALCS).toString()); + + for (int i = 0; i < columns.size() * aggregations.size(); i++) { + assertEquals( + ((Number)responseInMemory.query("/datarows/0/" + i)).doubleValue(), + ((Number)responsePushDown.query("/datarows/0/" + i)).doubleValue(), + 0.0000001); // a minor delta is affordable + } + } + + protected JSONObject executeQuery(String query) throws IOException { + Request request = new Request("POST", QUERY_API_ENDPOINT); + request.setJsonEntity(String.format(Locale.ROOT, "{\n" + " \"query\": \"%s\"\n" + "}", query)); + + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + + Response response = client().performRequest(request); + return new JSONObject(getResponseBody(response)); + } } diff --git a/integ-test/src/test/resources/calcs.json b/integ-test/src/test/resources/calcs.json new file mode 100644 index 0000000000..c310cc8d0f --- /dev/null +++ b/integ-test/src/test/resources/calcs.json @@ -0,0 +1,34 @@ +{"index": {}} +{"key": "key00", "num0": 12.3, "num1": 8.42, "num2": 17.86, "num3": -11.52, "num4": null, "str0": "FURNITURE", "str1": "CLAMP ON LAMPS", "str2": "one", "str3": "e", "int0": 1, "int1": -3, "int2": 5, "int3": 8, "bool0": true, "bool1": true, "bool2": false, "bool3": true, "date0": "2004-04-15", "date1": "2004-04-01", "date2": "1977-04-20", "date3": "1986-03-20", "time0": "1899-12-30T21:07:32Z", "time1": "19:36:22", "datetime0": "2004-07-09T10:17:35Z", "datetime1": null, "zzz": "a"} +{"index": {}} +{"key": "key01", "num0": -12.3, "num1": 6.71, "num2": 16.73, "num3": -9.31, "num4": 10.85, "str0": "FURNITURE", "str1": "CLOCKS", "str2": "two", "str3": "e", "int0": null, "int1": -6, "int2": -4, "int3": 13, "bool0": false, "bool1": true, "bool2": false, "bool3": null, "date0": "1972-07-04", "date1": "2004-04-02", "date2": "1995-09-03", "date3": null, "time0": "1900-01-01T13:48:48Z", "time1": "02:05:25", "datetime0": "2004-07-26T12:30:34Z", "datetime1": null, "zzz": "b"} +{"index": {}} +{"key": "key02", "num0": 15.7, "num1": 9.78, "num2": null, "num3": -12.17, "num4": -13.47, "str0": "OFFICE SUPPLIES", "str1": "AIR PURIFIERS", "str2": "three", "str3": "e", "int0": null, "int1": null, "int2": 5, "int3": 2, "bool0": null, "bool1": true, "bool2": false, "bool3": null, "date0": "1975-11-12", "date1": "2004-04-03", "date2": "1997-09-19", "date3": "1997-02-02", "time0": "1900-01-01T18:21:08Z", "time1": "09:33:31", "datetime0": "2004-08-02T07:59:23Z", "datetime1": null, "zzz": "c"} +{"index": {}} +{"key": "key03", "num0": -15.7, "num1": 7.43, "num2": 8.51, "num3": -7.25, "num4": -6.05, "str0": "OFFICE SUPPLIES", "str1": "BINDER ACCESSORIES", "str2": null, "str3": "e", "int0": null, "int1": -4, "int2": -5, "int3": 5, "bool0": true, "bool1": false, "bool2": false, "bool3": null, "date0": "2004-06-04", "date1": "2004-04-04", "date2": "1980-07-26", "date3": null, "time0": "1900-01-01T18:51:48Z", "time1": "22:50:16", "datetime0": "2004-07-05T13:14:20Z", "datetime1": null, "zzz": "d"} +{"index": {}} +{"key": "key04", "num0": 3.5, "num1": 9.05, "num2": 6.46, "num3": 12.93, "num4": 8.32, "str0": "OFFICE SUPPLIES", "str1": "BINDER CLIPS", "str2": "five", "str3": null, "int0": 7, "int1": null, "int2": 3, "int3": 9, "bool0": false, "bool1": false, "bool2": true, "bool3": true, "date0": "2004-06-19", "date1": "2004-04-05", "date2": "1997-05-30", "date3": "1996-03-07", "time0": "1900-01-01T15:01:19Z", "time1": null, "datetime0": "2004-07-28T23:30:22Z", "datetime1": null, "zzz": "e"} +{"index": {}} +{"key": "key05", "num0": -3.5, "num1": 9.38, "num2": 8.98, "num3": -19.96, "num4": 10.71, "str0": "OFFICE SUPPLIES", "str1": "BINDING MACHINES", "str2": "six", "str3": null, "int0": 3, "int1": null, "int2": 2, "int3": 7, "bool0": null, "bool1": false, "bool2": true, "bool3": false, "date0": null, "date1": "2004-04-06", "date2": "1980-11-07", "date3": "1979-04-01", "time0": "1900-01-01T08:59:39Z", "time1": "19:57:33", "datetime0": "2004-07-22T00:30:23Z", "datetime1": null, "zzz": "f"} +{"index": {}} +{"key": "key06", "num0": 0, "num1": 16.42, "num2": 11.69, "num3": 10.93, "num4": null, "str0": "OFFICE SUPPLIES", "str1": "BINDING SUPPLIES", "str2": null, "str3": "e", "int0": 8, "int1": null, "int2": 9, "int3": 18, "bool0": true, "bool1": null, "bool2": false, "bool3": null, "date0": null, "date1": "2004-04-07", "date2": "1977-02-08", "date3": null, "time0": "1900-01-01T07:37:48Z", "time1": null, "datetime0": "2004-07-28T06:54:50Z", "datetime1": null, "zzz": "g"} +{"index": {}} +{"key": "key07", "num0": null, "num1": 11.38, "num2": 17.25, "num3": 3.64, "num4": -10.24, "str0": "OFFICE SUPPLIES", "str1": "BUSINESS ENVELOPES", "str2": "eight", "str3": "e", "int0": null, "int1": 2, "int2": 0, "int3": 3, "bool0": false, "bool1": null, "bool2": true, "bool3": false, "date0": null, "date1": "2004-04-08", "date2": "1974-05-03", "date3": null, "time0": "1900-01-01T19:45:54Z", "time1": "19:48:23", "datetime0": "2004-07-12T17:30:16Z", "datetime1": null, "zzz": "h"} +{"index": {}} +{"key": "key08", "num0": 10, "num1": 9.47, "num2": null, "num3": -13.38, "num4": 4.77, "str0": "TECHNOLOGY", "str1": "ANSWERING MACHINES", "str2": "nine", "str3": null, "int0": null, "int1": 3, "int2": -6, "int3": 17, "bool0": null, "bool1": null, "bool2": false, "bool3": false, "date0": null, "date1": "2004-04-09", "date2": "1976-09-09", "date3": "1983-05-22", "time0": "1900-01-01T09:00:59Z", "time1": "22:20:14", "datetime0": "2004-07-04T22:49:28Z", "datetime1": null, "zzz": "i"} +{"index": {}} +{"key": "key09", "num0": null, "num1": 12.4, "num2": 11.5, "num3": -10.56, "num4": null, "str0": "TECHNOLOGY", "str1": "BUSINESS COPIERS", "str2": "ten", "str3": "e", "int0": 8, "int1": 3, "int2": -9, "int3": 2, "bool0": null, "bool1": true, "bool2": false, "bool3": null, "date0": null, "date1": "2004-04-10", "date2": "1998-08-12", "date3": null, "time0": "1900-01-01T20:36:00Z", "time1": null, "datetime0": "2004-07-23T21:13:37Z", "datetime1": null, "zzz": "j"} +{"index": {}} +{"key": "key10", "num0": null, "num1": 10.32, "num2": 6.8, "num3": -4.79, "num4": 19.39, "str0": "TECHNOLOGY", "str1": "CD-R MEDIA", "str2": "eleven", "str3": "e", "int0": 4, "int1": null, "int2": -3, "int3": 11, "bool0": true, "bool1": true, "bool2": false, "bool3": null, "date0": null, "date1": "2004-04-11", "date2": "1974-03-17", "date3": "1999-08-20", "time0": "1900-01-01T01:31:32Z", "time1": "00:05:57", "datetime0": "2004-07-14T08:16:44Z", "datetime1": null, "zzz": "k"} +{"index": {}} +{"key": "key11", "num0": null, "num1": 2.47, "num2": 3.79, "num3": -10.81, "num4": 3.82, "str0": "TECHNOLOGY", "str1": "CONFERENCE PHONES", "str2": "twelve", "str3": null, "int0": 10, "int1": -8, "int2": -4, "int3": 2, "bool0": false, "bool1": true, "bool2": true, "bool3": null, "date0": null, "date1": "2004-04-12", "date2": "1994-04-20", "date3": null, "time0": "1899-12-30T22:15:40Z", "time1": "04:40:49", "datetime0": "2004-07-25T15:22:26Z", "datetime1": null, "zzz": "l"} +{"index": {}} +{"key": "key12", "num0": null, "num1": 12.05, "num2": null, "num3": -6.62, "num4": 3.38, "str0": "TECHNOLOGY", "str1": "CORDED KEYBOARDS", "str2": null, "str3": null, "int0": null, "int1": null, "int2": 0, "int3": 11, "bool0": null, "bool1": false, "bool2": true, "bool3": true, "date0": null, "date1": "2004-04-13", "date2": "2001-02-04", "date3": null, "time0": "1900-01-01T13:53:46Z", "time1": "04:48:07", "datetime0": "2004-07-17T14:01:56Z", "datetime1": null, "zzz": "m"} +{"index": {}} +{"key": "key13", "num0": null, "num1": 10.37, "num2": 13.04, "num3": -18.43, "num4": null, "str0": "TECHNOLOGY", "str1": "CORDLESS KEYBOARDS", "str2": "fourteen", "str3": null, "int0": 4, "int1": null, "int2": 4, "int3": 18, "bool0": null, "bool1": false, "bool2": true, "bool3": true, "date0": null, "date1": "2004-04-14", "date2": "1988-01-05", "date3": "1996-05-13", "time0": "1900-01-01T04:57:51Z", "time1": null, "datetime0": "2004-07-19T22:21:31Z", "datetime1": null, "zzz": "n"} +{"index": {}} +{"key": "key14", "num0": null, "num1": 7.1, "num2": null, "num3": 6.84, "num4": -14.21, "str0": "TECHNOLOGY", "str1": "DOT MATRIX PRINTERS", "str2": "fifteen", "str3": "e", "int0": 11, "int1": null, "int2": -8, "int3": 18, "bool0": true, "bool1": false, "bool2": true, "bool3": null, "date0": null, "date1": "2004-04-15", "date2": "1972-07-12", "date3": "1986-11-08", "time0": "1899-12-30T22:42:43Z", "time1": "18:58:41", "datetime0": "2004-07-31T11:57:52Z", "datetime1": null, "zzz": "o"} +{"index": {}} +{"key": "key15", "num0": null, "num1": 16.81, "num2": 10.98, "num3": -10.98, "num4": 6.75, "str0": "TECHNOLOGY", "str1": "DVD", "str2": "sixteen", "str3": "e", "int0": 4, "int1": null, "int2": -9, "int3": 11, "bool0": false, "bool1": null, "bool2": false, "bool3": true, "date0": null, "date1": "2004-04-16", "date2": "1995-06-04", "date3": null, "time0": "1899-12-30T22:24:08Z", "time1": null, "datetime0": "2004-07-14T07:43:00Z", "datetime1": null, "zzz": "p"} +{"index": {}} +{"key": "key16", "num0": null, "num1": 7.12, "num2": 7.87, "num3": -2.6, "num4": null, "str0": "TECHNOLOGY", "str1": "ERICSSON", "str2": null, "str3": null, "int0": 8, "int1": -9, "int2": 6, "int3": 0, "bool0": null, "bool1": null, "bool2": false, "bool3": null, "date0": null, "date1": "2004-04-17", "date2": "2002-04-27", "date3": "1992-01-18", "time0": "1900-01-01T11:58:29Z", "time1": "12:33:57", "datetime0": "2004-07-28T12:34:28Z", "datetime1": null, "zzz": "q"} diff --git a/integ-test/src/test/resources/indexDefinitions/calcs_index_mappings.json b/integ-test/src/test/resources/indexDefinitions/calcs_index_mappings.json new file mode 100644 index 0000000000..08a88a9d32 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/calcs_index_mappings.json @@ -0,0 +1,94 @@ +{ + "mappings" : { + "properties" : { + "key" : { + "type" : "keyword" + }, + "num0" : { + "type" : "double" + }, + "num1" : { + "type" : "double" + }, + "num2" : { + "type" : "double" + }, + "num3" : { + "type" : "double" + }, + "num4" : { + "type" : "double" + }, + "str0" : { + "type" : "keyword" + }, + "str1" : { + "type" : "keyword" + }, + "str2" : { + "type" : "keyword" + }, + "str3" : { + "type" : "keyword" + }, + "int0" : { + "type" : "integer" + }, + "int1" : { + "type" : "integer" + }, + "int2" : { + "type" : "integer" + }, + "int3" : { + "type" : "integer" + }, + "bool0" : { + "type" : "boolean" + }, + "bool1" : { + "type" : "boolean" + }, + "bool2" : { + "type" : "boolean" + }, + "bool3" : { + "type" : "boolean" + }, + "date0" : { + "type" : "date", + "format": "yyyy-MM-dd" + }, + "date1" : { + "type" : "date", + "format": "yyyy-MM-dd" + }, + "date2" : { + "type" : "date", + "format": "yyyy-MM-dd" + }, + "date3" : { + "type" : "date", + "format": "yyyy-MM-dd" + }, + "time0" : { + "type" : "date", + "format": "date_time_no_millis" + }, + "time1" : { + "type" : "date", + "format": "hour_minute_second" + }, + "datetime0" : { + "type" : "date", + "format": "date_time_no_millis" + }, + "datetime1" : { + "type" : "date" + }, + "zzz" : { + "type" : "keyword" + } + } + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/null_missing_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/null_missing_index_mapping.json new file mode 100644 index 0000000000..52faafae93 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/null_missing_index_mapping.json @@ -0,0 +1,37 @@ +{ + "mappings" : { + "properties" : { + "key" : { + "type" : "keyword" + }, + "int" : { + "type" : "integer" + }, + "dbl" : { + "type" : "double" + }, + "bool" : { + "type" : "boolean" + }, + "str" : { + "type" : "text" + }, + "date" : { + "type" : "date", + "format": "yyyy-MM-dd" + }, + "time" : { + "type" : "date", + "format": "HH:mm:ss" + }, + "datetime" : { + "type" : "date", + "format": "yyyy-MM-dd HH:mm:ss" + }, + "timestamp" : { + "type" : "date", + "format": "yyyy-MM-dd HH:mm:ss" + } + } + } +} diff --git a/integ-test/src/test/resources/null_missing.json b/integ-test/src/test/resources/null_missing.json new file mode 100644 index 0000000000..40c7d4a131 --- /dev/null +++ b/integ-test/src/test/resources/null_missing.json @@ -0,0 +1,18 @@ +{"index":{}} +{"key" : "values", "int" : 42, "dbl" : 3.1415, "bool" : true, "str" : "pewpew", "date" : "1984-05-22", "time" : "22:13:37", "datetime" : "1984-05-22 22:13:37", "timestamp" : "2000-01-02 03:04:05" } +{"index":{}} +{"key" : "missing"} +{"index":{}} +{"key" : "missing"} +{"index":{}} +{"key" : "missing"} +{"index":{}} +{"key" : "missing"} +{"index":{}} +{"key" : "null", "int" : null, "dbl" : null, "bool" : null, "str" : null, "date" : null, "time" : null, "datetime" : null, "timestamp" : null} +{"index":{}} +{"key" : "null", "int" : null, "dbl" : null, "bool" : null, "str" : null, "date" : null, "time" : null, "datetime" : null, "timestamp" : null} +{"index":{}} +{"key" : "null", "int" : null, "dbl" : null, "bool" : null, "str" : null, "date" : null, "time" : null, "datetime" : null, "timestamp" : null} +{"index":{}} +{"key" : "null", "int" : null, "dbl" : null, "bool" : null, "str" : null, "date" : null, "time" : null, "datetime" : null, "timestamp" : null} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java index 7536a24661..88d9604137 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java @@ -13,7 +13,7 @@ package org.opensearch.sql.opensearch.response.agg; -import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanValue; +import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanInfValue; import java.util.Collections; import java.util.Map; @@ -34,6 +34,6 @@ public class SingleValueParser implements MetricParser { public Map parse(Aggregation agg) { return Collections.singletonMap( agg.getName(), - handleNanValue(((NumericMetricsAggregation.SingleValue) agg).value())); + handleNanInfValue(((NumericMetricsAggregation.SingleValue) agg).value())); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java index 6cac2fbdc9..5928b7efc9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java @@ -13,7 +13,7 @@ package org.opensearch.sql.opensearch.response.agg; -import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanValue; +import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanInfValue; import java.util.Collections; import java.util.Map; @@ -36,6 +36,6 @@ public class StatsParser implements MetricParser { @Override public Map parse(Aggregation agg) { return Collections.singletonMap( - agg.getName(), handleNanValue(valueExtractor.apply((ExtendedStats) agg))); + agg.getName(), handleNanInfValue(valueExtractor.apply((ExtendedStats) agg))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/Utils.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/Utils.java index 28b9d41e83..953f4d19b4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/Utils.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/Utils.java @@ -18,10 +18,10 @@ @UtilityClass public class Utils { /** - * Utils to handle Nan Value. - * @return null if is Nan. + * Utils to handle Nan/Infinite Value. + * @return null if is Nan or is +-Infinity. */ - public static Object handleNanValue(double value) { - return Double.isNaN(value) ? null : value; + public static Object handleNanInfValue(double value) { + return Double.isNaN(value) || Double.isInfinite(value) ? null : value; } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java index 5108e03ad9..318110bdde 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java @@ -13,7 +13,7 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.opensearch.response.AggregationResponseUtils.fromJson; -import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanValue; +import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanInfValue; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -163,7 +163,9 @@ void unsupported_aggregation_should_fail() { @Test void nan_value_should_return_null() { - assertNull(handleNanValue(Double.NaN)); + assertNull(handleNanInfValue(Double.NaN)); + assertNull(handleNanInfValue(Double.NEGATIVE_INFINITY)); + assertNull(handleNanInfValue(Double.POSITIVE_INFINITY)); } @Test