diff --git a/.github/workflows/draft-release-notes-workflow.yml b/.github/workflows/draft-release-notes-workflow.yml index 5afb4ff55e..0c6190bce1 100644 --- a/.github/workflows/draft-release-notes-workflow.yml +++ b/.github/workflows/draft-release-notes-workflow.yml @@ -3,7 +3,7 @@ name: Release Drafter on: push: branches: - - develop + - main jobs: update_release_draft: @@ -16,6 +16,6 @@ jobs: with: config-name: draft-release-notes-config.yml tag: (None) - version: 1.0.0.0-rc1 + version: 1.1.0.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/link-checker.yml b/.github/workflows/link-checker.yml index be6f481978..10cab37f8a 100644 --- a/.github/workflows/link-checker.yml +++ b/.github/workflows/link-checker.yml @@ -16,7 +16,7 @@ jobs: id: lychee uses: lycheeverse/lychee-action@master with: - args: --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" --exclude "http://localhost*" "https://localhost" "https://odfe-node1:9200/" "https://community.tableau.com/docs/DOC-17978" ".*family.zzz" "https://pypi.python.org/pypi/opensearch-sql-cli/" "opensearch*" ".*@amazon.com" ".*email.com" "git@github.com" "http://timestamp.verisign.com/scripts/timstamp.dll" + args: --accept=200,403,429,999 "**/*.html" "**/*.md" "**/*.txt" --exclude "http://localhost*" "https://localhost" "https://odfe-node1:9200/" "https://community.tableau.com/docs/DOC-17978" ".*family.zzz" "https://pypi.python.org/pypi/opensearch-sql-cli/" "opensearch*" ".*@amazon.com" ".*email.com" "git@github.com" "http://timestamp.verisign.com/scripts/timstamp.dll" env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} - name: Fail if there were link errors diff --git a/.github/workflows/sql-cli-release-workflow.yml b/.github/workflows/sql-cli-release-workflow.yml index 2959df51b6..a7042bcd32 100644 --- a/.github/workflows/sql-cli-release-workflow.yml +++ b/.github/workflows/sql-cli-release-workflow.yml @@ -8,7 +8,7 @@ on: jobs: build: - runs-on: [ubuntu-16.04] + runs-on: ubuntu-latest defaults: run: working-directory: sql-cli diff --git a/.github/workflows/sql-cli-test-and-build-workflow.yml b/.github/workflows/sql-cli-test-and-build-workflow.yml index fd83a89373..876780a86c 100644 --- a/.github/workflows/sql-cli-test-and-build-workflow.yml +++ b/.github/workflows/sql-cli-test-and-build-workflow.yml @@ -5,7 +5,7 @@ on: [pull_request, push] jobs: build: - runs-on: [ubuntu-16.04] + runs-on: ubuntu-latest defaults: run: working-directory: sql-cli diff --git a/.github/workflows/sql-jdbc-push-jdbc-maven.yml b/.github/workflows/sql-jdbc-push-jdbc-maven.yml index 53f2d5d391..3c96447233 100644 --- a/.github/workflows/sql-jdbc-push-jdbc-maven.yml +++ b/.github/workflows/sql-jdbc-push-jdbc-maven.yml @@ -8,7 +8,7 @@ on: jobs: upload-jdbc-jar: - runs-on: [ubuntu-16.04] + runs-on: ubuntu-latest defaults: run: working-directory: sql-jdbc diff --git a/.github/workflows/sql-odbc-release-workflow.yml b/.github/workflows/sql-odbc-release-workflow.yml index c7901f58c8..6e471248a5 100644 --- a/.github/workflows/sql-odbc-release-workflow.yml +++ b/.github/workflows/sql-odbc-release-workflow.yml @@ -12,7 +12,7 @@ env: ODBC_BUILD_PATH: "./build/odbc/build" AWS_SDK_INSTALL_PATH: "./build/aws-sdk/install" PLUGIN_NAME: opensearch-sql-odbc - OD_VERSION: 1.0.0.0 + OD_VERSION: 1.1.0.0 jobs: build-mac: diff --git a/.github/workflows/sql-odbc-rename-and-release-workflow.yml b/.github/workflows/sql-odbc-rename-and-release-workflow.yml index 467bf99121..cf79e281ff 100644 --- a/.github/workflows/sql-odbc-rename-and-release-workflow.yml +++ b/.github/workflows/sql-odbc-rename-and-release-workflow.yml @@ -8,7 +8,7 @@ on: - rename* env: - OD_VERSION: 1.0.0.0 + OD_VERSION: 1.1.0.0 jobs: upload-odbc: diff --git a/.github/workflows/sql-release-workflow.yml b/.github/workflows/sql-release-workflow.yml index bbfbd929a8..974f801d36 100644 --- a/.github/workflows/sql-release-workflow.yml +++ b/.github/workflows/sql-release-workflow.yml @@ -12,7 +12,7 @@ jobs: java: [14] name: Build and Release SQL Plugin - runs-on: [ubuntu-16.04] + runs-on: ubuntu-latest steps: - name: Checkout SQL diff --git a/.github/workflows/sql-test-and-build-workflow.yml b/.github/workflows/sql-test-and-build-workflow.yml index 8ad294d853..5ae3f6aec4 100644 --- a/.github/workflows/sql-test-and-build-workflow.yml +++ b/.github/workflows/sql-test-and-build-workflow.yml @@ -21,14 +21,14 @@ jobs: with: repository: 'opensearch-project/OpenSearch' path: OpenSearch - ref: '1.0' + ref: '1.1' - name: Build OpenSearch working-directory: ./OpenSearch - run: ./gradlew publishToMavenLocal -Dbuild.version_qualifier=rc1 -Dbuild.snapshot=false + run: ./gradlew publishToMavenLocal - name: Build with Gradle - run: ./gradlew build assemble + run: ./gradlew build assemble -Dopensearch.version=1.1.0-SNAPSHOT - name: Create Artifact Path run: | diff --git a/.github/workflows/sql-workbench-release-workflow.yml b/.github/workflows/sql-workbench-release-workflow.yml index b22f7649b3..0e8e712690 100644 --- a/.github/workflows/sql-workbench-release-workflow.yml +++ b/.github/workflows/sql-workbench-release-workflow.yml @@ -8,7 +8,7 @@ on: env: PLUGIN_NAME: query-workbench-dashboards OPENSEARCH_VERSION: '1.0' - OPENSEARCH_PLUGIN_VERSION: 1.0.0.0-rc1 + OPENSEARCH_PLUGIN_VERSION: 1.0.0.0 jobs: @@ -38,7 +38,7 @@ jobs: - name: Setup Node uses: actions/setup-node@v1 with: - node-version: '10.23.1' + node-version: '10.24.1' - name: Move Workbench to Plugins Dir run: | diff --git a/.github/workflows/sql-workbench-test-and-build-workflow.yml b/.github/workflows/sql-workbench-test-and-build-workflow.yml index 05a6541e34..071f7dfb29 100644 --- a/.github/workflows/sql-workbench-test-and-build-workflow.yml +++ b/.github/workflows/sql-workbench-test-and-build-workflow.yml @@ -4,8 +4,8 @@ on: [pull_request, push] env: PLUGIN_NAME: query-workbench-dashboards - OPENSEARCH_VERSION: '1.0' - OPENSEARCH_PLUGIN_VERSION: 1.0.0.0-rc1 + OPENSEARCH_VERSION: '1.x' + OPENSEARCH_PLUGIN_VERSION: 1.1.0.0 jobs: @@ -27,7 +27,7 @@ jobs: - name: Setup Node uses: actions/setup-node@v1 with: - node-version: '10.23.1' + node-version: '10.24.1' - name: Move Workbench to Plugins Dir run: | diff --git a/README.md b/README.md index bf76415459..e679c64ccc 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,22 @@ [![Chat](https://img.shields.io/badge/chat-on%20forums-blue)](https://discuss.opendistrocommunity.dev/c/sql/) ![PRs welcome!](https://img.shields.io/badge/PRs-welcome!-success) -# OpenSearch SQL + +- [OpenSearch SQL](#opensearch-sql) +- [Highlights](#highlights) +- [Documentation](#documentation) +- [Contributing](#contributing) +- [Attribution](#attribution) +- [Code of Conduct](#code-of-conduct) +- [Security](#security) +- [License](#license) +- [Copyright](#copyright) -OpenSearch enables you to extract insights out of OpenSearch using the familiar SQL query syntax. Use aggregations, group by, and where clauses to investigate your data. Read your data as JSON documents or CSV tables so you have the flexibility to use the format that works best for you. +# OpenSearch SQL -## SQL Related Projects +OpenSearch enables you to extract insights out of OpenSearch using the familiar SQL or Piped Processing Language (PPL) query syntax. Use aggregations, group by, and where clauses to investigate your data. Read your data as JSON documents or CSV tables so you have the flexibility to use the format that works best for you. The following projects have been merged into this repository as separate folders as of July 9, 2020. Please refer to links below for details. This document will focus on the SQL plugin for OpenSearch. @@ -21,197 +30,39 @@ The following projects have been merged into this repository as separate folders * [Query Workbench](https://github.com/opensearch-project/sql/tree/main/workbench) -## Documentation - -Please refer to the [SQL Language Reference Manual](./docs/user/index.rst), [Piped Processing Language (PPL) Reference Manual](./docs/user/ppl/index.rst) and [Technical Documentation](https://docs-beta.opensearch.org/) for detailed information on installing and configuring plugin. Looking to contribute? Read the instructions on [Developer Guide](./DEVELOPER_GUIDE.rst) and then submit a patch! - -## SQL Engine V2 - -Recently we have been actively improving our query engine primarily for better correctness and extensibility. Behind the scene, the new enhanced engine has already supported the new released Piped Processing Language. However, it was experimental and disabled by default for SQL query processing. With most important features and full testing complete, now we're ready to promote it as our default SQL query engine. Please find more details in [SQL Engine V2 - Release Notes](/docs/dev/NewSQLEngine.md). - - -## Setup - -Install as plugin: build plugin from source code by following the instruction in Build section and install it to your OpenSearch. - -After doing this, you need to restart the OpenSearch server. Otherwise you may get errors like `Invalid index name [sql], must not start with '']; ","status":400}`. - - -## Build - -The package uses the [Gradle](https://docs.gradle.org/4.10.2/userguide/userguide.html) build system. - -1. Checkout this package from version control. -2. To build from command line set `JAVA_HOME` to point to a JDK >=14 -3. Run `./gradlew build` - - -## Basic Usage - -To use the feature, send requests to the `_plugins/_sql` URI. You can use a request parameter or the request body (recommended). Note that for backward compatibility, old `_opendistro/_sql` endpoint is still available, though any future API will be only accessible by new OpenSearch endpoint. - -* Simple query - -``` -POST https://:/_plugins/_sql -{ - "query": "SELECT * FROM my-index LIMIT 50" -} -``` - -* Explain SQL to OpenSearch query DSL -``` -POST _plugins/_sql/_explain -{ - "query": "SELECT * FROM my-index LIMIT 50" -} -``` - -* For a sample curl command with the OpenSearch Security plugin, try: -``` -curl -XPOST https://localhost:9200/_plugins/_sql -u admin:admin -k -d '{"query": "SELECT * FROM my-index LIMIT 10"}' -H 'Content-Type: application/json' -``` - - -## SQL Usage - -* Query - - SELECT * FROM bank WHERE age >30 AND gender = 'm' +## Highlights -* Aggregation +Besides basic filtering and aggregation, OpenSearch SQL also supports complex queries, such as querying semi-structured data, JOINs, set operations, sub-queries etc. Beyond the standard functions, OpenSearch functions are provided for better analytics and visualization. Please check our [documentation](#documentation) for more details. - SELECT COUNT(*),SUM(age),MIN(age) as m, MAX(age),AVG(age) - FROM bank - GROUP BY gender - HAVING m >= 20 - ORDER BY SUM(age), m DESC +Recently we have been actively improving our query engine primarily for better correctness and extensibility. Behind the scene, the new enhanced engine has already supported both SQL and Piped Processing Language. Please find more details in [SQL Engine V2 - Release Notes](./docs/dev/NewSQLEngine.md). -* Join - SELECT b1.firstname, b1.lastname, b2.age - FROM bank b1 - LEFT JOIN bank b2 - ON b1.age = b2.age AND b1.state = b2.state - -* Show - - SHOW TABLES LIKE ban% - DESCRIBE TABLES LIKE bank - -* Delete - - DELETE FROM bank WHERE age >30 AND gender = 'm' - - -## Beyond SQL - -* Search - - SELECT address FROM bank WHERE address = matchQuery('880 Holmes Lane') ORDER BY _score DESC LIMIT 3 - -* Nested Field - - + - - SELECT address FROM bank b, b.nestedField e WHERE b.state = 'WA' and e.name = 'test' - - + - SELECT address, nested(nestedField.name) - FROM bank - WHERE nested(nestedField, nestedField.state = 'WA' AND nestedField.name = 'test') - OR nested(nestedField.state) = 'CA' - -* Aggregations - - + range age group 20-25,25-30,30-35,35-40 - - SELECT COUNT(age) FROM bank GROUP BY range(age, 20,25,30,35,40) - - + range date group by day - - SELECT online FROM online GROUP BY date_histogram(field='insert_time','interval'='1d') - - + range date group by your config - - SELECT online FROM online GROUP BY date_range(field='insert_time','format'='yyyy-MM-dd' ,'2014-08-18','2014-08-17','now-8d','now-7d','now-6d','now') - -* OpenSearch Geographic - - SELECT * FROM locations WHERE GEO_BOUNDING_BOX(fieldname,100.0,1.0,101,0.0) - -* Select type or pattern - - SELECT * FROM indexName/type - SELECT * FROM index* - - -## SQL Features +## Documentation -* SQL Select -* SQL Delete -* SQL Where -* SQL Order By -* SQL Group By -* SQL Having -* SQL Inner Join -* SQL Left Join -* SQL Show -* SQL Describe -* SQL AND & OR -* SQL Like -* SQL COUNT distinct -* SQL In -* SQL Between -* SQL Aliases -* SQL Not Null -* SQL(OpenSearch) Date -* SQL avg() -* SQL count() -* SQL max() -* SQL min() -* SQL sum() -* SQL Nulls -* SQL isnull() -* SQL floor -* SQL trim -* SQL log -* SQL log10 -* SQL substring -* SQL round -* SQL sqrt -* SQL concat_ws -* SQL union and minus +Please refer to the [SQL Language Reference Manual](./docs/user/index.rst), [Piped Processing Language (PPL) Reference Manual](./docs/user/ppl/index.rst) and [Technical Documentation](https://docs-beta.opensearch.org/) for detailed information on installing and configuring plugin. -## JDBC Support -Please check out JDBC driver repository for more details. +## Contributing -## Beyond SQL features +See [developer guide](DEVELOPER_GUIDE.rst) and [how to contribute to this project](CONTRIBUTING.md). -* OpenSearch TopHits -* OpenSearch MISSING -* OpenSearch STATS -* OpenSearch GEO_INTERSECTS -* OpenSearch GEO_BOUNDING_BOX -* OpenSearch GEO_DISTANCE -* OpenSearch GEOHASH_GRID aggregation ## Attribution This project is based on the Apache 2.0-licensed [elasticsearch-sql](https://github.com/NLPchina/elasticsearch-sql) project. Thank you [eliranmoyal](https://github.com/eliranmoyal), [shi-yuan](https://github.com/shi-yuan), [ansjsun](https://github.com/ansjsun) and everyone else who contributed great code to that project. Read this for more details [Attributions](./docs/attributions.md). + ## Code of Conduct This project has adopted an [Open Source Code of Conduct](./CODE_OF_CONDUCT.md). -## Security issue notifications +## Security If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public GitHub issue. -## Licensing +## License See the [LICENSE](./LICENSE.txt) file for our project's licensing. We will ask you to confirm the licensing of your contribution. diff --git a/build.gradle b/build.gradle index 1b69cf810b..fce02d546e 100644 --- a/build.gradle +++ b/build.gradle @@ -26,7 +26,7 @@ buildscript { ext { - opensearch_version = "1.0.0-rc1" + opensearch_version = System.getProperty("opensearch.version", "1.1.0-SNAPSHOT") } repositories { @@ -55,12 +55,14 @@ repositories { } ext { - opensearchVersion = '1.0.0' isSnapshot = "true" == System.getProperty("build.snapshot", "true") } allprojects { - version = "${opensearchVersion}.0-rc1" + version = opensearch_version - "-SNAPSHOT" + ".0" + if (isSnapshot) { + version += "-SNAPSHOT" + } plugins.withId('java') { sourceCompatibility = targetCompatibility = "1.8" diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index d5c1538b77..933e68085a 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -161,8 +161,9 @@ public Expression visitAggregateFunction(AggregateFunction node, AnalysisContext Expression arg = node.getField().accept(this, context); Aggregator aggregator = (Aggregator) repository.compile( builtinFunctionName.get().getName(), Collections.singletonList(arg)); - if (node.getCondition() != null) { - aggregator.condition(analyze(node.getCondition(), context)); + aggregator.distinct(node.getDistinct()); + if (node.condition() != null) { + aggregator.condition(analyze(node.condition(), context)); } return aggregator; } else { diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 7400ae20e6..3b78483736 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -211,7 +211,16 @@ public static UnresolvedExpression aggregate( public static UnresolvedExpression filteredAggregate( String func, UnresolvedExpression field, UnresolvedExpression condition) { - return new AggregateFunction(func, field, condition); + return new AggregateFunction(func, field).condition(condition); + } + + public static UnresolvedExpression distinctAggregate(String func, UnresolvedExpression field) { + return new AggregateFunction(func, field, true); + } + + public static UnresolvedExpression filteredDistinctCount( + String func, UnresolvedExpression field, UnresolvedExpression condition) { + return new AggregateFunction(func, field, true).condition(condition); } public static Function function(String funcName, UnresolvedExpression... funcArgs) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/AggregateFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/AggregateFunction.java index 8753e35ed9..e909c46ee7 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/AggregateFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/AggregateFunction.java @@ -28,9 +28,12 @@ import java.util.Collections; import java.util.List; +import javax.annotation.Nullable; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; +import lombok.Setter; +import lombok.experimental.Accessors; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.common.utils.StringUtils; @@ -45,7 +48,10 @@ public class AggregateFunction extends UnresolvedExpression { private final String funcName; private final UnresolvedExpression field; private final List argList; + @Setter + @Accessors(fluent = true) private UnresolvedExpression condition; + private Boolean distinct = false; /** * Constructor. @@ -62,14 +68,13 @@ public AggregateFunction(String funcName, UnresolvedExpression field) { * Constructor. * @param funcName function name. * @param field {@link UnresolvedExpression}. - * @param condition condition in aggregation filter. + * @param distinct whether distinct field is specified or not. */ - public AggregateFunction(String funcName, UnresolvedExpression field, - UnresolvedExpression condition) { + public AggregateFunction(String funcName, UnresolvedExpression field, Boolean distinct) { this.funcName = funcName; this.field = field; this.argList = Collections.emptyList(); - this.condition = condition; + this.distinct = distinct; } @Override diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java index 382ef325ff..099bd23a58 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java @@ -29,11 +29,14 @@ package org.opensearch.sql.ast.expression; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BOOLEAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BYTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DATETIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DOUBLE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_FLOAT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_INT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_LONG; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_SHORT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TIMESTAMP; @@ -49,6 +52,7 @@ import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.FunctionName; /** @@ -60,9 +64,11 @@ @ToString public class Cast extends UnresolvedExpression { - private static Map CONVERTED_TYPE_FUNCTION_NAME_MAP = + private static final Map CONVERTED_TYPE_FUNCTION_NAME_MAP = new ImmutableMap.Builder() .put("string", CAST_TO_STRING.getName()) + .put("byte", CAST_TO_BYTE.getName()) + .put("short", CAST_TO_SHORT.getName()) .put("int", CAST_TO_INT.getName()) .put("integer", CAST_TO_INT.getName()) .put("long", CAST_TO_LONG.getName()) @@ -72,6 +78,7 @@ public class Cast extends UnresolvedExpression { .put("date", CAST_TO_DATE.getName()) .put("time", CAST_TO_TIME.getName()) .put("timestamp", CAST_TO_TIMESTAMP.getName()) + .put("datetime", CAST_TO_DATETIME.getName()) .build(); /** @@ -84,6 +91,25 @@ public class Cast extends UnresolvedExpression { */ private final UnresolvedExpression convertedType; + /** + * Check if the given function name is a cast function or not. + * @param name function name + * @return true if cast function, otherwise false. + */ + public static boolean isCastFunction(FunctionName name) { + return CONVERTED_TYPE_FUNCTION_NAME_MAP.containsValue(name); + } + + /** + * Get the cast function name for a given target data type. + * @param targetType target data type + * @return cast function name corresponding + */ + public static FunctionName getCastFunctionName(ExprType targetType) { + String type = targetType.typeName().toLowerCase(Locale.ROOT); + return CONVERTED_TYPE_FUNCTION_NAME_MAP.get(type); + } + /** * Get the converted type. * diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index e2c5fb6a39..b2172e54f1 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -157,6 +157,14 @@ public static ExprValue fromObjectValue(Object o, ExprCoreType type) { } } + public static Byte getByteValue(ExprValue exprValue) { + return exprValue.byteValue(); + } + + public static Short getShortValue(ExprValue exprValue) { + return exprValue.shortValue(); + } + public static Integer getIntegerValue(ExprValue exprValue) { return exprValue.integerValue(); } diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java index 3b37cfbf31..4fa023bd06 100644 --- a/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java +++ b/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java @@ -28,12 +28,13 @@ package org.opensearch.sql.data.type; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** @@ -62,25 +63,24 @@ public enum ExprCoreType implements ExprType { FLOAT(LONG), DOUBLE(FLOAT), - /** - * Boolean. - */ - BOOLEAN(UNDEFINED), - /** * String. */ STRING(UNDEFINED), + /** + * Boolean. + */ + BOOLEAN(STRING), /** * Date. * Todo. compatible relationship. */ - TIMESTAMP(UNDEFINED), - DATE(UNDEFINED), - TIME(UNDEFINED), - DATETIME(UNDEFINED), + TIMESTAMP(STRING), + DATE(STRING), + TIME(STRING), + DATETIME(STRING), INTERVAL(UNDEFINED), /** @@ -108,6 +108,16 @@ public enum ExprCoreType implements ExprType { .put(STRING, "keyword") .build(); + private static final Set NUMBER_TYPES = + new ImmutableSet.Builder() + .add(BYTE) + .add(SHORT) + .add(INTEGER) + .add(LONG) + .add(FLOAT) + .add(DOUBLE) + .build(); + ExprCoreType(ExprCoreType... compatibleTypes) { for (ExprCoreType subType : compatibleTypes) { subType.parents.add(this); @@ -139,7 +149,7 @@ public static List coreTypes() { .collect(Collectors.toList()); } - public static List numberTypes() { - return ImmutableList.of(INTEGER, LONG, FLOAT, DOUBLE); + public static Set numberTypes() { + return NUMBER_TYPES; } } diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java index a26f758b20..97c46ca4e5 100644 --- a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java +++ b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java @@ -58,6 +58,16 @@ default boolean isCompatible(ExprType other) { } } + /** + * Should cast this type to other type or not. By default, cast is always required + * if the given type is different from this type. + * @param other other data type + * @return true if cast is required, otherwise false + */ + default boolean shouldCast(ExprType other) { + return !this.equals(other); + } + /** * Get the parent type. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 560414592c..af51d0898a 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -500,6 +500,10 @@ public Aggregator count(Expression... expressions) { return aggregate(BuiltinFunctionName.COUNT, expressions); } + public Aggregator distinctCount(Expression... expressions) { + return count(expressions).distinct(true); + } + public Aggregator varSamp(Expression... expressions) { return aggregate(BuiltinFunctionName.VARSAMP, expressions); } @@ -592,6 +596,16 @@ public FunctionExpression castString(Expression value) { .compile(BuiltinFunctionName.CAST_TO_STRING.getName(), Arrays.asList(value)); } + public FunctionExpression castByte(Expression value) { + return (FunctionExpression) repository + .compile(BuiltinFunctionName.CAST_TO_BYTE.getName(), Arrays.asList(value)); + } + + public FunctionExpression castShort(Expression value) { + return (FunctionExpression) repository + .compile(BuiltinFunctionName.CAST_TO_SHORT.getName(), Arrays.asList(value)); + } + public FunctionExpression castInt(Expression value) { return (FunctionExpression) repository .compile(BuiltinFunctionName.CAST_TO_INT.getName(), Arrays.asList(value)); @@ -631,4 +645,9 @@ public FunctionExpression castTimestamp(Expression value) { return (FunctionExpression) repository .compile(BuiltinFunctionName.CAST_TO_TIMESTAMP.getName(), Arrays.asList(value)); } + + public FunctionExpression castDatetime(Expression value) { + return (FunctionExpression) repository + .compile(BuiltinFunctionName.CAST_TO_DATETIME.getName(), Arrays.asList(value)); + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java b/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java index 80944172ea..5328e11aad 100644 --- a/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java +++ b/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java @@ -64,6 +64,10 @@ public abstract class Aggregator @Getter @Accessors(fluent = true) protected Expression condition; + @Setter + @Getter + @Accessors(fluent = true) + protected Boolean distinct = false; /** * Create an {@link AggregationState} which will be used for aggregation. diff --git a/core/src/main/java/org/opensearch/sql/expression/aggregation/CountAggregator.java b/core/src/main/java/org/opensearch/sql/expression/aggregation/CountAggregator.java index 3195bf3941..f1bd088967 100644 --- a/core/src/main/java/org/opensearch/sql/expression/aggregation/CountAggregator.java +++ b/core/src/main/java/org/opensearch/sql/expression/aggregation/CountAggregator.java @@ -28,8 +28,10 @@ import static org.opensearch.sql.utils.ExpressionUtils.format; +import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Set; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.data.type.ExprCoreType; @@ -45,33 +47,51 @@ public CountAggregator(List arguments, ExprCoreType returnType) { @Override public CountAggregator.CountState create() { - return new CountState(); + return distinct ? new DistinctCountState() : new CountState(); } @Override protected CountState iterate(ExprValue value, CountState state) { - state.count++; + state.count(value); return state; } @Override public String toString() { - return String.format(Locale.ROOT, "count(%s)", format(getArguments())); + return distinct + ? String.format(Locale.ROOT, "count(distinct %s)", format(getArguments())) + : String.format(Locale.ROOT, "count(%s)", format(getArguments())); } /** * Count State. */ protected static class CountState implements AggregationState { - private int count; + protected int count; CountState() { this.count = 0; } + public void count(ExprValue value) { + count++; + } + @Override public ExprValue result() { return ExprValueUtils.integerValue(count); } } + + protected static class DistinctCountState extends CountState { + private final Set distinctValues = new HashSet<>(); + + @Override + public void count(ExprValue value) { + if (!distinctValues.contains(value)) { + distinctValues.add(value); + count++; + } + } + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/aggregation/NamedAggregator.java b/core/src/main/java/org/opensearch/sql/expression/aggregation/NamedAggregator.java index 346bd2d28c..92176e8648 100644 --- a/core/src/main/java/org/opensearch/sql/expression/aggregation/NamedAggregator.java +++ b/core/src/main/java/org/opensearch/sql/expression/aggregation/NamedAggregator.java @@ -54,8 +54,8 @@ public class NamedAggregator extends Aggregator { /** * NamedAggregator. - * The aggregator properties {@link #condition} is inherited by named aggregator - * to avoid errors introduced by the property inconsistency. + * The aggregator properties {@link #condition} and {@link #distinct} + * are inherited by named aggregator to avoid errors introduced by the property inconsistency. * * @param name name * @param delegated delegated @@ -67,6 +67,7 @@ public NamedAggregator( this.name = name; this.delegated = delegated; this.condition = delegated.condition; + this.distinct = delegated.distinct; } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 24e65d4b5d..cd66825567 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -177,6 +177,8 @@ public enum BuiltinFunctionName { * Data Type Convert Function. */ CAST_TO_STRING(FunctionName.of("cast_to_string")), + CAST_TO_BYTE(FunctionName.of("cast_to_byte")), + CAST_TO_SHORT(FunctionName.of("cast_to_short")), CAST_TO_INT(FunctionName.of("cast_to_int")), CAST_TO_LONG(FunctionName.of("cast_to_long")), CAST_TO_FLOAT(FunctionName.of("cast_to_float")), @@ -184,7 +186,8 @@ public enum BuiltinFunctionName { CAST_TO_BOOLEAN(FunctionName.of("cast_to_boolean")), CAST_TO_DATE(FunctionName.of("cast_to_date")), CAST_TO_TIME(FunctionName.of("cast_to_time")), - CAST_TO_TIMESTAMP(FunctionName.of("cast_to_timestamp")); + CAST_TO_TIMESTAMP(FunctionName.of("cast_to_timestamp")), + CAST_TO_DATETIME(FunctionName.of("cast_to_datetime")); private final FunctionName name; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java index ebb432d7f0..3898af6682 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java @@ -11,10 +11,19 @@ package org.opensearch.sql.expression.function; +import static org.opensearch.sql.ast.expression.Cast.getCastFunctionName; +import static org.opensearch.sql.ast.expression.Cast.isCastFunction; + +import com.google.common.collect.ImmutableList; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.expression.Expression; @@ -47,15 +56,70 @@ public FunctionImplementation compile(FunctionName functionName, List resolvedSignature = + functionResolverMap.get(functionName).resolve(functionSignature); + + List sourceTypes = functionSignature.getParamTypeList(); + List targetTypes = resolvedSignature.getKey().getParamTypeList(); + FunctionBuilder funcBuilder = resolvedSignature.getValue(); + if (isCastFunction(functionName) || sourceTypes.equals(targetTypes)) { + return funcBuilder; + } + return castArguments(sourceTypes, targetTypes, funcBuilder); } else { throw new ExpressionEvaluationException( String.format("unsupported function name: %s", functionName.getFunctionName())); } } + + /** + * Wrap resolved function builder's arguments by cast function to cast input expression value + * to value of target type at runtime. For example, suppose unresolved signature is + * equal(BOOL,STRING) and its resolved function builder is F with signature equal(BOOL,BOOL). + * In this case, wrap F and return equal(BOOL, cast_to_bool(STRING)). + */ + private FunctionBuilder castArguments(List sourceTypes, + List targetTypes, + FunctionBuilder funcBuilder) { + return arguments -> { + List argsCasted = new ArrayList<>(); + for (int i = 0; i < arguments.size(); i++) { + Expression arg = arguments.get(i); + ExprType sourceType = sourceTypes.get(i); + ExprType targetType = targetTypes.get(i); + + if (isCastRequired(sourceType, targetType)) { + argsCasted.add(cast(arg, targetType)); + } else { + argsCasted.add(arg); + } + } + return funcBuilder.apply(argsCasted); + }; + } + + private boolean isCastRequired(ExprType sourceType, ExprType targetType) { + // TODO: Remove this special case after fixing all failed UTs + if (ExprCoreType.numberTypes().contains(sourceType) + && ExprCoreType.numberTypes().contains(targetType)) { + return false; + } + return sourceType.shouldCast(targetType); + } + + private Expression cast(Expression arg, ExprType targetType) { + FunctionName castFunctionName = getCastFunctionName(targetType); + if (castFunctionName == null) { + throw new ExpressionEvaluationException(StringUtils.format( + "Type conversion to type %s is not supported", targetType)); + } + return (Expression) compile(castFunctionName, ImmutableList.of(arg)); + } + } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java b/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java index d9d01be891..5bd63015a5 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java @@ -20,6 +20,7 @@ import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.Singular; +import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.exception.ExpressionEvaluationException; /** @@ -41,8 +42,10 @@ public class FunctionResolver { * If the {@link FunctionBuilder} exactly match the input {@link FunctionSignature}, return it. * If applying the widening rule, found the most match one, return it. * If nothing found, throw {@link ExpressionEvaluationException} + * + * @return function signature and its builder */ - public FunctionBuilder resolve(FunctionSignature unresolvedSignature) { + public Pair resolve(FunctionSignature unresolvedSignature) { PriorityQueue> functionMatchQueue = new PriorityQueue<>( Map.Entry.comparingByKey()); @@ -59,7 +62,8 @@ public FunctionBuilder resolve(FunctionSignature unresolvedSignature) { unresolvedSignature.formatTypes() )); } else { - return functionBundle.get(bestMatchEntry.getValue()); + FunctionSignature resolvedSignature = bestMatchEntry.getValue(); + return Pair.of(resolvedSignature, functionBundle.get(resolvedSignature)); } } diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java index df6b6f935f..c6a84985a0 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java @@ -48,11 +48,14 @@ import java.util.stream.Stream; import lombok.experimental.UtilityClass; import org.opensearch.sql.data.model.ExprBooleanValue; +import org.opensearch.sql.data.model.ExprByteValue; import org.opensearch.sql.data.model.ExprDateValue; +import org.opensearch.sql.data.model.ExprDatetimeValue; import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; import org.opensearch.sql.data.model.ExprLongValue; +import org.opensearch.sql.data.model.ExprShortValue; import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprTimeValue; import org.opensearch.sql.data.model.ExprTimestampValue; @@ -68,6 +71,8 @@ public class TypeCastOperator { */ public static void register(BuiltinFunctionRepository repository) { repository.register(castToString()); + repository.register(castToByte()); + repository.register(castToShort()); repository.register(castToInt()); repository.register(castToLong()); repository.register(castToFloat()); @@ -76,6 +81,7 @@ public static void register(BuiltinFunctionRepository repository) { repository.register(castToDate()); repository.register(castToTime()); repository.register(castToTimestamp()); + repository.register(castToDatetime()); } @@ -92,6 +98,28 @@ private static FunctionResolver castToString() { ); } + private static FunctionResolver castToByte() { + return FunctionDSL.define(BuiltinFunctionName.CAST_TO_BYTE.getName(), + impl(nullMissingHandling( + (v) -> new ExprByteValue(Short.valueOf(v.stringValue()))), BYTE, STRING), + impl(nullMissingHandling( + (v) -> new ExprByteValue(v.shortValue())), BYTE, DOUBLE), + impl(nullMissingHandling( + (v) -> new ExprByteValue(v.booleanValue() ? 1 : 0)), BYTE, BOOLEAN) + ); + } + + private static FunctionResolver castToShort() { + return FunctionDSL.define(BuiltinFunctionName.CAST_TO_SHORT.getName(), + impl(nullMissingHandling( + (v) -> new ExprShortValue(Short.valueOf(v.stringValue()))), SHORT, STRING), + impl(nullMissingHandling( + (v) -> new ExprShortValue(v.shortValue())), SHORT, DOUBLE), + impl(nullMissingHandling( + (v) -> new ExprShortValue(v.booleanValue() ? 1 : 0)), SHORT, BOOLEAN) + ); + } + private static FunctionResolver castToInt() { return FunctionDSL.define(BuiltinFunctionName.CAST_TO_INT.getName(), impl(nullMissingHandling( @@ -179,4 +207,15 @@ private static FunctionResolver castToTimestamp() { impl(nullMissingHandling((v) -> v), TIMESTAMP, TIMESTAMP) ); } + + private static FunctionResolver castToDatetime() { + return FunctionDSL.define(BuiltinFunctionName.CAST_TO_DATETIME.getName(), + impl(nullMissingHandling( + (v) -> new ExprDatetimeValue(v.stringValue())), DATETIME, STRING), + impl(nullMissingHandling( + (v) -> new ExprDatetimeValue(v.datetimeValue())), DATETIME, TIMESTAMP), + impl(nullMissingHandling( + (v) -> new ExprDatetimeValue(v.datetimeValue())), DATETIME, DATE) + ); + } } diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index 8cb7288273..b0b1e7e773 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -160,7 +160,7 @@ public void castAnalyzer() { ); assertThrows(IllegalStateException.class, () -> analyze(AstDSL.cast(AstDSL.unresolvedAttr( - "boolean_value"), AstDSL.stringLiteral("DATETIME")))); + "boolean_value"), AstDSL.stringLiteral("INTERVAL")))); } @Test @@ -300,6 +300,24 @@ public void variance_mapto_varPop() { ); } + @Test + public void distinct_count() { + assertAnalyzeEqual( + dsl.distinctCount(DSL.ref("integer_value", INTEGER)), + AstDSL.distinctAggregate("count", qualifiedName("integer_value")) + ); + } + + @Test + public void filtered_distinct_count() { + assertAnalyzeEqual( + dsl.distinctCount(DSL.ref("integer_value", INTEGER)) + .condition(dsl.greater(DSL.ref("integer_value", INTEGER), DSL.literal(1))), + AstDSL.filteredDistinctCount("count", qualifiedName("integer_value"), function( + ">", qualifiedName("integer_value"), intLiteral(1))) + ); + } + protected Expression analyze(UnresolvedExpression unresolvedExpression) { return expressionAnalyzer.analyze(unresolvedExpression, analysisContext); } diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java index a27d90f35d..af2dbf22fc 100644 --- a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java @@ -96,8 +96,8 @@ public class ExprValueUtilsTest { Lists.newArrayList(Iterables.concat(numberValues, nonNumberValues)); private static List> numberValueExtractor = Arrays.asList( - ExprValue::byteValue, - ExprValue::shortValue, + ExprValueUtils::getByteValue, + ExprValueUtils::getShortValue, ExprValueUtils::getIntegerValue, ExprValueUtils::getLongValue, ExprValueUtils::getFloatValue, diff --git a/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java b/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java index 0dc8b8f4cf..dc63c7d224 100644 --- a/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java +++ b/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java @@ -33,6 +33,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -40,6 +43,8 @@ import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; import static org.opensearch.sql.data.type.ExprCoreType.UNKNOWN; @@ -58,6 +63,16 @@ public void isCompatible() { assertTrue(FLOAT.isCompatible(LONG)); assertTrue(FLOAT.isCompatible(INTEGER)); assertTrue(FLOAT.isCompatible(SHORT)); + + assertTrue(BOOLEAN.isCompatible(STRING)); + assertTrue(TIMESTAMP.isCompatible(STRING)); + assertTrue(DATE.isCompatible(STRING)); + assertTrue(TIME.isCompatible(STRING)); + assertTrue(DATETIME.isCompatible(STRING)); + } + + @Test + public void isNotCompatible() { assertFalse(INTEGER.isCompatible(DOUBLE)); assertFalse(STRING.isCompatible(DOUBLE)); assertFalse(INTEGER.isCompatible(UNKNOWN)); @@ -69,6 +84,13 @@ public void isCompatibleWithUndefined() { ExprCoreType.coreTypes().forEach(type -> assertFalse(UNDEFINED.isCompatible(type))); } + @Test + public void shouldCast() { + assertTrue(UNDEFINED.shouldCast(STRING)); + assertTrue(STRING.shouldCast(BOOLEAN)); + assertFalse(STRING.shouldCast(STRING)); + } + @Test public void getParent() { assertThat(((ExprType) () -> "test").getParent(), Matchers.contains(UNKNOWN)); diff --git a/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java b/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java index cc2825858a..1db33ac9d5 100644 --- a/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java @@ -116,6 +116,29 @@ public class AggregationTest extends ExpressionTestBase { "timestamp_value", "2040-01-01 07:00:00"))); + protected static List tuples_with_duplicates = + Arrays.asList( + ExprValueUtils.tupleValue(ImmutableMap.of( + "integer_value", 1, + "double_value", 4d, + "struct_value", ImmutableMap.of("str", 1), + "array_value", ImmutableList.of(1))), + ExprValueUtils.tupleValue(ImmutableMap.of( + "integer_value", 1, + "double_value", 3d, + "struct_value", ImmutableMap.of("str", 1), + "array_value", ImmutableList.of(1))), + ExprValueUtils.tupleValue(ImmutableMap.of( + "integer_value", 2, + "double_value", 2d, + "struct_value", ImmutableMap.of("str", 2), + "array_value", ImmutableList.of(2))), + ExprValueUtils.tupleValue(ImmutableMap.of( + "integer_value", 3, + "double_value", 1d, + "struct_value", ImmutableMap.of("str1", 1), + "array_value", ImmutableList.of(1, 2)))); + protected static List tuples_with_null_and_missing = Arrays.asList( ExprValueUtils.tupleValue( diff --git a/core/src/test/java/org/opensearch/sql/expression/aggregation/CountAggregatorTest.java b/core/src/test/java/org/opensearch/sql/expression/aggregation/CountAggregatorTest.java index 0fdadfc692..ee183dafce 100644 --- a/core/src/test/java/org/opensearch/sql/expression/aggregation/CountAggregatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/aggregation/CountAggregatorTest.java @@ -129,6 +129,35 @@ public void filtered_count() { assertEquals(3, result.value()); } + @Test + public void distinct_count() { + ExprValue result = aggregation(dsl.distinctCount(DSL.ref("integer_value", INTEGER)), + tuples_with_duplicates); + assertEquals(3, result.value()); + } + + @Test + public void filtered_distinct_count() { + ExprValue result = aggregation(dsl.distinctCount(DSL.ref("integer_value", INTEGER)) + .condition(dsl.greater(DSL.ref("double_value", DOUBLE), DSL.literal(1d))), + tuples_with_duplicates); + assertEquals(2, result.value()); + } + + @Test + public void distinct_count_map() { + ExprValue result = aggregation(dsl.distinctCount(DSL.ref("struct_value", STRUCT)), + tuples_with_duplicates); + assertEquals(3, result.value()); + } + + @Test + public void distinct_count_array() { + ExprValue result = aggregation(dsl.distinctCount(DSL.ref("array_value", ARRAY)), + tuples_with_duplicates); + assertEquals(3, result.value()); + } + @Test public void count_with_missing() { ExprValue result = aggregation(dsl.count(DSL.ref("integer_value", INTEGER)), @@ -166,6 +195,9 @@ public void valueOf() { public void test_to_string() { Aggregator countAggregator = dsl.count(DSL.ref("integer_value", INTEGER)); assertEquals("count(integer_value)", countAggregator.toString()); + + countAggregator = dsl.distinctCount(DSL.ref("integer_value", INTEGER)); + assertEquals("count(distinct integer_value)", countAggregator.toString()); } @Test diff --git a/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java b/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java index 6f8b3600ea..d6b372a12a 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java @@ -29,20 +29,39 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.BYTE; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; +import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BOOLEAN; +import com.google.common.collect.ImmutableList; import java.util.Arrays; +import java.util.List; import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.env.Environment; @ExtendWith(MockitoExtension.class) @@ -62,6 +81,13 @@ class BuiltinFunctionRepositoryTest { @Mock private Environment emptyEnv; + private BuiltinFunctionRepository repo; + + @BeforeEach + void setUp() { + repo = new BuiltinFunctionRepository(mockMap); + } + @Test void register() { BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap); @@ -73,8 +99,11 @@ void register() { @Test void compile() { + when(mockExpression.type()).thenReturn(UNDEFINED); + when(functionSignature.getParamTypeList()).thenReturn(Arrays.asList(UNDEFINED)); when(mockfunctionResolver.getFunctionName()).thenReturn(mockFunctionName); - when(mockfunctionResolver.resolve(any())).thenReturn(functionExpressionBuilder); + when(mockfunctionResolver.resolve(any())).thenReturn( + Pair.of(functionSignature, functionExpressionBuilder)); when(mockMap.containsKey(any())).thenReturn(true); when(mockMap.get(any())).thenReturn(mockfunctionResolver); BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap); @@ -89,7 +118,8 @@ void compile() { void resolve() { when(functionSignature.getFunctionName()).thenReturn(mockFunctionName); when(mockfunctionResolver.getFunctionName()).thenReturn(mockFunctionName); - when(mockfunctionResolver.resolve(functionSignature)).thenReturn(functionExpressionBuilder); + when(mockfunctionResolver.resolve(functionSignature)).thenReturn( + Pair.of(functionSignature, functionExpressionBuilder)); when(mockMap.containsKey(mockFunctionName)).thenReturn(true); when(mockMap.get(mockFunctionName)).thenReturn(mockfunctionResolver); BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap); @@ -98,6 +128,60 @@ void resolve() { assertEquals(functionExpressionBuilder, repo.resolve(functionSignature)); } + @Test + void resolve_should_not_cast_arguments_in_cast_function() { + when(mockExpression.toString()).thenReturn("string"); + FunctionImplementation function = + repo.resolve(registerFunctionResolver(CAST_TO_BOOLEAN.getName(), DATETIME, BOOLEAN)) + .apply(ImmutableList.of(mockExpression)); + assertEquals("cast_to_boolean(string)", function.toString()); + } + + @Test + void resolve_should_not_cast_arguments_if_same_type() { + when(mockFunctionName.getFunctionName()).thenReturn("mock"); + when(mockExpression.toString()).thenReturn("string"); + FunctionImplementation function = + repo.resolve(registerFunctionResolver(mockFunctionName, STRING, STRING)) + .apply(ImmutableList.of(mockExpression)); + assertEquals("mock(string)", function.toString()); + } + + @Test + void resolve_should_not_cast_arguments_if_both_numbers() { + when(mockFunctionName.getFunctionName()).thenReturn("mock"); + when(mockExpression.toString()).thenReturn("byte"); + FunctionImplementation function = + repo.resolve(registerFunctionResolver(mockFunctionName, BYTE, INTEGER)) + .apply(ImmutableList.of(mockExpression)); + assertEquals("mock(byte)", function.toString()); + } + + @Test + void resolve_should_cast_arguments() { + when(mockFunctionName.getFunctionName()).thenReturn("mock"); + when(mockExpression.toString()).thenReturn("string"); + when(mockExpression.type()).thenReturn(STRING); + + FunctionSignature signature = + registerFunctionResolver(mockFunctionName, STRING, BOOLEAN); + registerFunctionResolver(CAST_TO_BOOLEAN.getName(), STRING, STRING); + + FunctionImplementation function = + repo.resolve(signature) + .apply(ImmutableList.of(mockExpression)); + assertEquals("mock(cast_to_boolean(string))", function.toString()); + } + + @Test + void resolve_should_throw_exception_for_unsupported_conversion() { + ExpressionEvaluationException error = + assertThrows(ExpressionEvaluationException.class, () -> + repo.resolve(registerFunctionResolver(mockFunctionName, BYTE, STRUCT)) + .apply(ImmutableList.of(mockExpression))); + assertEquals(error.getMessage(), "Type conversion to type STRUCT is not supported"); + } + @Test @DisplayName("resolve unregistered function should throw exception") void resolve_unregistered() { @@ -109,4 +193,52 @@ void resolve_unregistered() { () -> repo.resolve(new FunctionSignature(FunctionName.of("unknown"), Arrays.asList()))); assertEquals("unsupported function name: unknown", exception.getMessage()); } + + private FunctionSignature registerFunctionResolver(FunctionName funcName, + ExprType sourceType, + ExprType targetType) { + FunctionSignature unresolvedSignature = new FunctionSignature( + funcName, ImmutableList.of(sourceType)); + FunctionSignature resolvedSignature = new FunctionSignature( + funcName, ImmutableList.of(targetType)); + + FunctionResolver funcResolver = mock(FunctionResolver.class); + FunctionBuilder funcBuilder = mock(FunctionBuilder.class); + + when(mockMap.containsKey(eq(funcName))).thenReturn(true); + when(mockMap.get(eq(funcName))).thenReturn(funcResolver); + when(funcResolver.resolve(eq(unresolvedSignature))).thenReturn( + Pair.of(resolvedSignature, funcBuilder)); + repo.register(funcResolver); + + // Relax unnecessary stubbing check because error case test doesn't call this + lenient().doAnswer(invocation -> + new FakeFunctionExpression(funcName, invocation.getArgument(0)) + ).when(funcBuilder).apply(any()); + return unresolvedSignature; + } + + private static class FakeFunctionExpression extends FunctionExpression { + + public FakeFunctionExpression(FunctionName functionName, List arguments) { + super(functionName, arguments); + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + return null; + } + + @Override + public ExprType type() { + return null; + } + + @Override + public String toString() { + return getFunctionName().getFunctionName() + + "(" + StringUtils.join(getArguments(), ", ") + ")"; + } + } + } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java b/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java index 1cd1e3756b..6887837b35 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java @@ -70,7 +70,7 @@ void resolve_function_signature_exactly_match() { FunctionResolver resolver = new FunctionResolver(functionName, ImmutableMap.of(exactlyMatchFS, exactlyMatchBuilder)); - assertEquals(exactlyMatchBuilder, resolver.resolve(functionSignature)); + assertEquals(exactlyMatchBuilder, resolver.resolve(functionSignature).getValue()); } @Test @@ -80,7 +80,7 @@ void resolve_function_signature_best_match() { FunctionResolver resolver = new FunctionResolver(functionName, ImmutableMap.of(bestMatchFS, bestMatchBuilder, leastMatchFS, leastMatchBuilder)); - assertEquals(bestMatchBuilder, resolver.resolve(functionSignature)); + assertEquals(bestMatchBuilder, resolver.resolve(functionSignature).getValue()); } @Test diff --git a/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java b/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java index dc57a13694..745aa9cc3b 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java @@ -28,12 +28,18 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.BYTE; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.SHORT; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; import static org.opensearch.sql.data.type.WideningTypeRule.IMPOSSIBLE_WIDENING; import static org.opensearch.sql.data.type.WideningTypeRule.TYPE_EQUAL; @@ -70,6 +76,11 @@ class WideningTypeRuleTest { .put(LONG, FLOAT, 1) .put(LONG, DOUBLE, 2) .put(FLOAT, DOUBLE, 1) + .put(STRING, BOOLEAN, 1) + .put(STRING, TIMESTAMP, 1) + .put(STRING, DATE, 1) + .put(STRING, TIME, 1) + .put(STRING, DATETIME, 1) .put(UNDEFINED, BYTE, 1) .put(UNDEFINED, SHORT, 2) .put(UNDEFINED, INTEGER, 3) diff --git a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java index ffccf9a62e..31bdca1426 100644 --- a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java @@ -31,11 +31,14 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.BYTE; import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.LONG; +import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; @@ -103,6 +106,22 @@ void castToString(ExprValue value) { assertEquals(new ExprStringValue(value.value().toString()), expression.valueOf(null)); } + @ParameterizedTest(name = "castToByte({0})") + @MethodSource({"numberData"}) + void castToByte(ExprValue value) { + FunctionExpression expression = dsl.castByte(DSL.literal(value)); + assertEquals(BYTE, expression.type()); + assertEquals(new ExprByteValue(value.byteValue()), expression.valueOf(null)); + } + + @ParameterizedTest(name = "castToShort({0})") + @MethodSource({"numberData"}) + void castToShort(ExprValue value) { + FunctionExpression expression = dsl.castShort(DSL.literal(value)); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(value.shortValue()), expression.valueOf(null)); + } + @ParameterizedTest(name = "castToInt({0})") @MethodSource({"numberData"}) void castToInt(ExprValue value) { @@ -111,6 +130,20 @@ void castToInt(ExprValue value) { assertEquals(new ExprIntegerValue(value.integerValue()), expression.valueOf(null)); } + @Test + void castStringToByte() { + FunctionExpression expression = dsl.castByte(DSL.literal("100")); + assertEquals(BYTE, expression.type()); + assertEquals(new ExprByteValue(100), expression.valueOf(null)); + } + + @Test + void castStringToShort() { + FunctionExpression expression = dsl.castShort(DSL.literal("100")); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(100), expression.valueOf(null)); + } + @Test void castStringToInt() { FunctionExpression expression = dsl.castInt(DSL.literal("100")); @@ -124,6 +157,28 @@ void castStringToIntException() { assertThrows(RuntimeException.class, () -> expression.valueOf(null)); } + @Test + void castBooleanToByte() { + FunctionExpression expression = dsl.castByte(DSL.literal(true)); + assertEquals(BYTE, expression.type()); + assertEquals(new ExprByteValue(1), expression.valueOf(null)); + + expression = dsl.castByte(DSL.literal(false)); + assertEquals(BYTE, expression.type()); + assertEquals(new ExprByteValue(0), expression.valueOf(null)); + } + + @Test + void castBooleanToShort() { + FunctionExpression expression = dsl.castShort(DSL.literal(true)); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(1), expression.valueOf(null)); + + expression = dsl.castShort(DSL.literal(false)); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(0), expression.valueOf(null)); + } + @Test void castBooleanToInt() { FunctionExpression expression = dsl.castInt(DSL.literal(true)); @@ -312,4 +367,20 @@ void castToTimestamp() { assertEquals(TIMESTAMP, expression.type()); assertEquals(new ExprTimestampValue("2012-08-07 01:01:01"), expression.valueOf(null)); } + + @Test + void castToDatetime() { + FunctionExpression expression = dsl.castDatetime(DSL.literal("2012-08-07 01:01:01")); + assertEquals(DATETIME, expression.type()); + assertEquals(new ExprDatetimeValue("2012-08-07 01:01:01"), expression.valueOf(null)); + + expression = dsl.castDatetime(DSL.literal(new ExprTimestampValue("2012-08-07 01:01:01"))); + assertEquals(DATETIME, expression.type()); + assertEquals(new ExprDatetimeValue("2012-08-07 01:01:01"), expression.valueOf(null)); + + expression = dsl.castDatetime(DSL.literal(new ExprDateValue("2012-08-07"))); + assertEquals(DATETIME, expression.type()); + assertEquals(new ExprDatetimeValue("2012-08-07 00:00:00"), expression.valueOf(null)); + } + } diff --git a/docs/dev/NewSQLEngine.md b/docs/dev/NewSQLEngine.md index 0be3370f33..cd82e61571 100644 --- a/docs/dev/NewSQLEngine.md +++ b/docs/dev/NewSQLEngine.md @@ -12,23 +12,23 @@ The current SQL query engine provides users the basic query capability for using With the architecture and extensibility improved significantly, the following SQL features are able to be introduced in the new query engine: * **Language Structure** - * [Identifiers](/docs/user/general/identifiers.rst): added support for identifier names with special characters - * [Data types](/docs/user/general/datatypes.rst): added support for date and interval types - * [Expressions](/docs/user/dql/expressions.rst): complex nested expression support - * [SQL functions](/docs/user/dql/functions.rst): more date function support, `ADDDATE`, `DATE_ADD`, `DATE_SUB`, `DAY`, `DAYNAME`, `DAYOFMONTH`, `DAYOFWEEK`, `DAYOFYEAR`, `FROM_DAYS`, `HOUR`, `MICROSECOND`, `MINUTE`, `QUARTER`, `SECOND`, `SUBDATE`, `TIME`, `TIME_TO_SEC`, `TO_DAYS`, `WEEK` - * [Comments](/docs/user/general/comments.rst): SQL comment support + * [Identifiers](../../docs/user/general/identifiers.rst): added support for identifier names with special characters + * [Data types](../../docs/user/general/datatypes.rst): added support for date and interval types + * [Expressions](../../docs/user/dql/expressions.rst): complex nested expression support + * [SQL functions](../../docs/user/dql/functions.rst): more date function support, `ADDDATE`, `DATE_ADD`, `DATE_SUB`, `DAY`, `DAYNAME`, `DAYOFMONTH`, `DAYOFWEEK`, `DAYOFYEAR`, `FROM_DAYS`, `HOUR`, `MICROSECOND`, `MINUTE`, `QUARTER`, `SECOND`, `SUBDATE`, `TIME`, `TIME_TO_SEC`, `TO_DAYS`, `WEEK` + * [Comments](../../docs/user/general/comments.rst): SQL comment support * **Basic queries** - * [HAVING without GROUP BY clause](/docs/user/dql/aggregations.rst#having-without-group-by) - * [Aggregate over arbitrary expression](/docs/user/dql/aggregations.rst#expression) - * [Ordering by NULLS FIRST/LAST](/docs/user/dql/basics.rst#example-2-specifying-order-for-null) - * [Ordering by aggregate function](/docs/user/dql/basics.rst#example-3-ordering-by-aggregate-functions) + * [HAVING without GROUP BY clause](../../docs/user/dql/aggregations.rst#having-without-group-by) + * [Aggregate over arbitrary expression](../../docs/user/dql/aggregations.rst#expression) + * [Ordering by NULLS FIRST/LAST](../../docs/user/dql/basics.rst#example-2-specifying-order-for-null) + * [Ordering by aggregate function](../../docs/user/dql/basics.rst#example-3-ordering-by-aggregate-functions) * **Complex queries** - * [Subqueries in FROM clause](/docs/user/dql/complex.rst#example-2-subquery-in-from-clause): support arbitrary nesting level and aggregation + * [Subqueries in FROM clause](../../docs/user/dql/complex.rst#example-2-subquery-in-from-clause): support arbitrary nesting level and aggregation * **Advanced Features** - * [Window functions](/docs/user/dql/window.rst): ranking and aggregate window functions - * [Selective aggregation](/docs/user/dql/aggregations.rst#filter-clause): by standard `FILTER` function + * [Window functions](../../docs/user/dql/window.rst): ranking and aggregate window functions + * [Selective aggregation](../../docs/user/dql/aggregations.rst#filter-clause): by standard `FILTER` function * **Beyond SQL** - * [Semi-structured data query](/docs/user/beyond/partiql.rst#example-2-selecting-deeper-levels): support querying OpenSearch object fields on arbitrary level + * [Semi-structured data query](../../docs/user/beyond/partiql.rst#example-2-selecting-deeper-levels): support querying OpenSearch object fields on arbitrary level * OpenSearch multi-field: handled automatically and users won't have the access, ex. `text` is converted to `text.keyword` if it’s a multi-field As for correctness, besides full coverage of unit and integration test, we developed a new comparison test framework to ensure correctness by comparing with other databases. Please find more details in [Testing](./Testing.md). @@ -57,7 +57,7 @@ For the following features unsupported in the new engine, the query will be forw ### 3.3 Limitations -You can find all the limitations in [Limitations](/docs/user/limitations/limitations.rst). +You can find all the limitations in [Limitations](../../docs/user/limitations/limitations.rst). --- diff --git a/docs/dev/Pagination.md b/docs/dev/Pagination.md index 38ab9d1793..4982b13d7f 100644 --- a/docs/dev/Pagination.md +++ b/docs/dev/Pagination.md @@ -149,7 +149,7 @@ POST _plugins/_sql/close ### 3.3 Support in JDBC -To use the pagination functionality programmatically using the[JDBC 4.1 specification](https://download.oracle.com/otn-pub/jcp/jdbc-4_1-mrel-spec/jdbc4.1-fr-spec.pdf?AuthParam=1574798710_305327d63d91e91e19dd80953454597a), page size is being used as performance hint given by `Statement.setFetchSize()` and “applied to each result set produced by the statement”. We need to re-implement `Statement.executequery()` and `ResultSet.next()` to take advantage of cursor. +To use the pagination functionality programmatically using the[JDBC 4.1 specification](https://download.oracle.com/otn-pub/jcp/jdbc-4_1-mrel-spec/jdbc4.1-fr-spec.pdf), page size is being used as performance hint given by `Statement.setFetchSize()` and “applied to each result set produced by the statement”. We need to re-implement `Statement.executequery()` and `ResultSet.next()` to take advantage of cursor. We will not support backward scroll on result set. The `Statement` must be created with a `ResultSet` type of `ResultSet.TYPE_FORWARD_ONLY`. Attempt to scroll backwards or otherwise jump around in the `ResultSet` should throw an exception. diff --git a/docs/dev/TypeConversion.md b/docs/dev/TypeConversion.md new file mode 100644 index 0000000000..07697fed07 --- /dev/null +++ b/docs/dev/TypeConversion.md @@ -0,0 +1,172 @@ +# Data Type Conversion in SQL/PPL + +## 1.Overview + +### 1.1 Type Conversion + +Type conversion means conversion from one data type to another which has two aspects to consider: + +1. Whether the conversion is implicit or explicit (implicit conversion is often called coercion) +2. Whether the data is converted within the family or reinterpreted as another data type outside + +It’s common that strong typed language only supports little implicit conversions and no data reinterpretation. While languages with weak typing allows many implicit conversions and flexible reinterpretation. + +### 1.2 Problem Statement + +Currently, there are only 2 implicit conversions allowed which are defined by type hierarchy tree: + +1. Numeric type coercion: narrower numeric types are closer to the root on the top. For example, an integer is converted to a long integer automatically similar as in JAVA. +2. NULL literals: `UNDEFINED` type can be converted to any other so that NULL literal can be accepted by any expression at runtime. + +![Current type hierarchy](img/type-hierarchy-tree-old.png) + +However, more general conversions for non-numeric types are missing, such as conversions between string, bool and date types. The strict type check causes inconvenience and other problems discussed below. + + +--- +## 2.Requirements + +### 2.1 Use Cases + +The common use case and motivation include: + +1. *User-friendly*: Although it doesn’t matter for application or BI tool which can always follow the strict grammar rule, it’s more friendly and accessible to human by implicit type conversion, ex. `date > DATE('2020-06-01') => date > '2020-06-01'` +2. *Schema-on-read*: More importantly, implicit conversion from string is required for schema on read (stored as raw string on write and extract field(s) on read), ex. `regex ‘...’ | abs(a)` + +### 2.2 Functionalities + +Immediate: + +1. Implicit conversion between bool and string: https://github.com/opendistro-for-elasticsearch/sql/issues/1061 +2. Implicit conversion between date and string: https://github.com/opendistro-for-elasticsearch/sql/issues/1056 + +Future: + +1. Implicit conversion between string and more other types for regex command support + + +--- +## 3.Design + +### 3.1 Type Precedence + +Type precedence determines the direction of conversion when fields involved in an expression has different type from resolved signature. Before introducing it into our type system, let’s check how an expression is resolved to a function implementation and why type precedence is required. + +``` +Compiling time: + Expression: 1 = 1.0 + Unresolved signature: equal(INT, DOUBLE) + Resovled signature: equal(DOUBLE, DOUBLE) , distance=1 + Function builder: returns equal(DOUBLE, DOUBLE) impl +``` + +Now let’s follow the same idea to add support for conversion from `BOOLEAN` to `STRING`. Because all boolean values can be converted to a string (in other word string is “wider”), String type is made the parent of Boolean. However, this leads to wrong semantic as the following expression `false = ‘FALSE’` for example: + +``` +Compiling time: + Expression: false = 'FALSE' + Unresolved signature: equal(BOOL, STRING) + Resovled signature: equal(STRING, STRING) + Function builder: returns equal(STRING, STRING) impl + +Runtime: + Function impl: String.value(false).equals('FALSE') + Evaluation result: *false* +``` + +Therefore type precedence is supposed to be defined based on semantic expected rather than intuitive “width” of type. Now let’s reverse the direction and make Boolean the parent of String type. + +![New type hierarchy](img/type-hierarchy-tree-with-implicit-cast.png) + +``` +Compiling time: + Expression: false = 'FALSE' + Unresolved signature: equal(BOOL, STRING) + Resovled signature: equal(BOOL, BOOL) + Function builder: 1) returns equal(BOOL, cast_to_bool(STRING)) impl + 2) returns equal(BOOL, BOOL) impl +Runtime: + equal impl: false.equals(cast_to_bool('FALSE')) + cast_to_bool impl: Boolean.valueOf('FALSE') + Evaluation result: *true* +``` + +### 3.2 General Rules + +1. Implicit conversion is defined by type precedence which is represented by the type hierarchy tree. +2. Explicit conversion defines the complete set of conversion allowed. If no explicit conversion defined, implicit conversion should be impossible too. +3. On the other hand, if implicit conversion can occur between 2 types, then explicit conversion should be allowed too. +4. Conversion within a data type family is considered as conversion between different data representation and should be supported as much as possible. +5. Conversion across 2 data type families is considered as data reinterpretation and should be enabled with strong motivation. + +--- +## 4.Implementation + +### 4.1 Explicit Conversion + +Explicit conversion is defined as the set of `CAST` function implementation which includes all the conversions allowed between data types. Same as before, missing cast function is added and implemented by the conversion logic in `ExprType` class. + +```java +public class Cast extends UnresolvedExpression { + + private static final Map CONVERTED_TYPE_FUNCTION_NAME_MAP = + new ImmutableMap.Builder() + .put("string", CAST_TO_STRING.getName()) + .put("byte", CAST_TO_BYTE.getName()) + .put("short", CAST_TO_SHORT.getName()) + .put("int", CAST_TO_INT.getName()) + .put("integer", CAST_TO_INT.getName()) + .put("long", CAST_TO_LONG.getName()) + .put("float", CAST_TO_FLOAT.getName()) + .put("double", CAST_TO_DOUBLE.getName()) + .put("boolean", CAST_TO_BOOLEAN.getName()) + .put("date", CAST_TO_DATE.getName()) + .put("time", CAST_TO_TIME.getName()) + .put("timestamp", CAST_TO_TIMESTAMP.getName()) + .build(); +} +``` + +### 4.2 Implicit Conversion + +Implicit conversion and precedence are defined by the type hierarchy tree. The data type at the head of an arrow has higher precedence than the type at the tail. + +```java +public enum ExprCoreType implements ExprType { + UNKNOWN, + UNDEFINED, + + /** + * Numbers. + */ + BYTE(UNDEFINED), + SHORT(BYTE), + INTEGER(SHORT), + LONG(INTEGER), + FLOAT(LONG), + DOUBLE(FLOAT), + + STRING(UNDEFINED), + BOOLEAN(STRING), // PR: change STRING's parent to BOOLEAN + + /** + * Date. + */ + TIMESTAMP(UNDEFINED), + DATE(UNDEFINED), + TIME(UNDEFINED), + DATETIME(UNDEFINED), + INTERVAL(UNDEFINED), + + STRUCT(UNDEFINED), + ARRAY(UNDEFINED); +} +``` + +### 4.3 Type Casting Logic + +As with examples in section 3.1, the implementation is: + +1. Define all possible conversions in CAST function family. +2. Define implicit conversions by type hierarchy tree (auto implicit cast from child to parent) +3. During compile time, wrap original function builder by a new one which cast arguments to target type. diff --git a/docs/dev/img/type-hierarchy-tree-old.png b/docs/dev/img/type-hierarchy-tree-old.png new file mode 100644 index 0000000000..7add83f286 Binary files /dev/null and b/docs/dev/img/type-hierarchy-tree-old.png differ diff --git a/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png b/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png new file mode 100644 index 0000000000..b729e23ab9 Binary files /dev/null and b/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png differ diff --git a/docs/user/dql/aggregations.rst b/docs/user/dql/aggregations.rst index 1d6d172981..275666e7ba 100644 --- a/docs/user/dql/aggregations.rst +++ b/docs/user/dql/aggregations.rst @@ -357,6 +357,19 @@ Example:: | 2.8613807855648994 | +--------------------+ +DISTINCT COUNT Aggregation +-------------------------- + +To get the count of distinct values of a field, you can add a keyword ``DISTINCT`` before the field in the count aggregation. Example:: + + os> SELECT COUNT(DISTINCT gender), COUNT(gender) FROM accounts; + fetched rows / total rows = 1/1 + +--------------------------+-----------------+ + | COUNT(DISTINCT gender) | COUNT(gender) | + |--------------------------+-----------------| + | 2 | 4 | + +--------------------------+-----------------+ + HAVING Clause ============= @@ -456,3 +469,16 @@ The ``FILTER`` clause can be used in aggregation functions without GROUP BY as w | 4 | 1 | +--------------+------------+ +Distinct count aggregate with FILTER +------------------------------------ + +The ``FILTER`` clause is also used in distinct count to do the filtering before count the distinct values of specific field. For example:: + + os> SELECT COUNT(DISTINCT firstname) FILTER(WHERE age > 30) AS distinct_count FROM accounts + fetched rows / total rows = 1/1 + +------------------+ + | distinct_count | + |------------------| + | 3 | + +------------------+ + diff --git a/docs/user/general/datatypes.rst b/docs/user/general/datatypes.rst index 0077422a74..c0a3bf62ac 100644 --- a/docs/user/general/datatypes.rst +++ b/docs/user/general/datatypes.rst @@ -1,4 +1,3 @@ - ========== Data Types ========== @@ -105,6 +104,106 @@ The table below list the mapping between OpenSearch Data Type, OpenSearch SQL Da Notes: Not all the OpenSearch SQL Type has correspond OpenSearch Type. e.g. data and time. To use function which required such data type, user should explicitly convert the data type. +Data Type Conversion +==================== + +A data type can be converted to another, implicitly or explicitly or impossibly, according to type precedence defined and whether the conversion is supported by query engine. + +The general rules and design tenets for data type conversion include: + +1. Implicit conversion is defined by type precedence which is represented by the type hierarchy tree. See `Data Type Conversion in SQL/PPL `_ for more details. +2. Explicit conversion defines the complete set of conversion allowed. If no explicit conversion defined, implicit conversion should be impossible too. +3. On the other hand, if implicit conversion can occur between 2 types, then explicit conversion should be allowed too. +4. Conversion within a data type family is considered as conversion between different data representation and should be supported as much as possible. +5. Conversion across two data type families is considered as data reinterpretation and should be enabled with strong motivation. + +Type Conversion Matrix +---------------------- + +The following matrix illustrates the conversions allowed by our query engine for all the built-in data types as well as types provided by OpenSearch storage engine. + ++--------------+------------------------------------------------+---------+------------------------------+-----------------------------------------------+--------------------------+---------------------+ +| Data Types | Numeric Type Family | BOOLEAN | String Type Family | Datetime Type Family | OpenSearch Type Family | Complex Type Family | +| +------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| | BYTE | SHORT | INTEGER | LONG | FLOAT | DOUBLE | BOOLEAN | TEXT_KEYWORD | TEXT | STRING | TIMESTAMP | DATE | TIME | DATETIME | INTERVAL | GEO_POINT | IP | BINARY | STRUCT | ARRAY | ++==============+======+=======+=========+======+=======+========+=========+==============+======+========+===========+======+======+==========+==========+===========+=====+========+===========+=========+ +| UNDEFINED | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| BYTE | N/A | IE | IE | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| SHORT | E | N/A | IE | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| INTEGER | E | E | N/A | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| LONG | E | E | E | N/A | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| FLOAT | E | E | E | E | N/A | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| DOUBLE | E | E | E | E | E | N/A | X | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| BOOLEAN | E | E | E | E | E | E | N/A | X | X | E | X | X | X | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| TEXT_KEYWORD | | | | | | | | N/A | | IE | | | | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| TEXT | | | | | | | | | N/A | IE | | | | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| STRING | E | E | E | E | E | E | IE | X | X | N/A | IE | IE | IE | IE | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| TIMESTAMP | X | X | X | X | X | X | X | X | X | E | N/A | | | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| DATE | X | X | X | X | X | X | X | X | X | E | | N/A | | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| TIME | X | X | X | X | X | X | X | X | X | E | | | N/A | X | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| DATETIME | X | X | X | X | X | X | X | X | X | E | | | | N/A | X | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| INTERVAL | X | X | X | X | X | X | X | X | X | E | | | | X | N/A | X | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| GEO_POINT | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | N/A | X | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| IP | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | N/A | X | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| BINARY | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | N/A | X | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| STRUCT | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | X | N/A | X | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ +| ARRAY | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | X | X | N/A | ++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+ + +Note that: + +1. ``I`` means if implicit conversion will occur automatically. ``E`` stands for explicit conversion by ``CAST`` function. ``X`` for impossible to convert. Empty means not clear and need more test. +2. There is no ``UNDEFINED`` column because it's only for ``NULL`` literal at runtime and should not be present in function signature definition. +3. OpenSearch and complex types are not supported by ``CAST`` function, so it's impossible to convert a type to it for now. + +Examples +-------- + +Here are a few examples for implicit type conversion:: + + os> SELECT + ... 1 = 1.0, + ... 'True' = true, + ... DATE('2021-06-10') < '2021-06-11'; + fetched rows / total rows = 1/1 + +-----------+-----------------+-------------------------------------+ + | 1 = 1.0 | 'True' = true | DATE('2021-06-10') < '2021-06-11' | + |-----------+-----------------+-------------------------------------| + | True | True | True | + +-----------+-----------------+-------------------------------------+ + +Here are a few examples for explicit type conversion:: + + os> SELECT + ... CAST(true AS INT), + ... CAST(1.2 AS STRING), + ... CAST('2021-06-10 00:00:00' AS TIMESTAMP); + fetched rows / total rows = 1/1 + +---------------------+-----------------------+--------------------------------------------+ + | CAST(true AS INT) | CAST(1.2 AS STRING) | CAST('2021-06-10 00:00:00' AS TIMESTAMP) | + |---------------------+-----------------------+--------------------------------------------| + | 1 | 1.2 | 2021-06-10 00:00:00 | + +---------------------+-----------------------+--------------------------------------------+ Undefined Data Type =================== @@ -233,6 +332,21 @@ Conversion from TIMESTAMP - Conversion from timestamp is much more straightforward. To convert it to date is to extract the date value, and conversion to time is to extract the time value. Conversion to datetime, it will extracts the datetime value and leave the timezone information over. For example, the result to convert datetime '2020-08-17 14:09:00' UTC to date is date '2020-08-17', to time is '14:09:00' and to datetime is datetime '2020-08-17 14:09:00'. +Conversion from string to date and time types +--------------------------------------------- + +A string can also represent and be converted to date and time types (except to interval type). As long as the string value is of valid format required by the target date and time types, the conversion can happen implicitly or explicitly as follows:: + + os> SELECT + ... TIMESTAMP('2021-06-17 00:00:00') = '2021-06-17 00:00:00', + ... '2021-06-18' < DATE('2021-06-17'), + ... '10:20:00' <= TIME('11:00:00'); + fetched rows / total rows = 1/1 + +------------------------------------------------------------+-------------------------------------+----------------------------------+ + | TIMESTAMP('2021-06-17 00:00:00') = '2021-06-17 00:00:00' | '2021-06-18' < DATE('2021-06-17') | '10:20:00' <= TIME('11:00:00') | + |------------------------------------------------------------+-------------------------------------+----------------------------------| + | True | False | True | + +------------------------------------------------------------+-------------------------------------+----------------------------------+ String Data Types ================= @@ -248,7 +362,17 @@ A string is a sequence of characters enclosed in either single or double quotes. +-----------+-----------+-------------+-------------+ +Boolean Data Types +================== +A boolean can be represented by constant value ``TRUE`` or ``FALSE``. Besides, certain string representation is also accepted by function with boolean input. For example, string 'true', 'TRUE', 'false', 'FALSE' are all valid representation and can be converted to boolean implicitly or explicitly:: - - + os> SELECT + ... true, FALSE, + ... CAST('TRUE' AS boolean), CAST('false' AS boolean); + fetched rows / total rows = 1/1 + +--------+---------+---------------------------+----------------------------+ + | true | FALSE | CAST('TRUE' AS boolean) | CAST('false' AS boolean) | + |--------+---------+---------------------------+----------------------------| + | True | False | True | False | + +--------+---------+---------------------------+----------------------------+ diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index f6dad255ef..ee91d86d14 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -302,3 +302,18 @@ PPL query:: | 36 | 32 | M | +------------+------------+----------+ +Example 7: Calculate the distinct count of a field +================================================== + +To get the count of distinct values of a field, you can use ``DISTINCT_COUNT`` (or ``DC``) function instead of ``COUNT``. The example calculates both the count and the distinct count of gender field of all the accounts. + +PPL query:: + + os> source=accounts | stats count(gender), distinct_count(gender); + fetched rows / total rows = 1/1 + +-----------------+--------------------------+ + | count(gender) | distinct_count(gender) | + |-----------------+--------------------------| + | 4 | 2 | + +-----------------+--------------------------+ + diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java index ff3ad2a6c8..4a9603fe6b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java @@ -77,6 +77,19 @@ public void testStatsCountAll() throws IOException { verifyDataRows(response, rows(1000)); } + @Test + public void testStatsDistinctCount() throws IOException { + JSONObject response = + executeQuery(String.format("source=%s | stats distinct_count(gender)", TEST_INDEX_ACCOUNT)); + verifySchema(response, schema("distinct_count(gender)", null, "integer")); + verifyDataRows(response, rows(2)); + + response = + executeQuery(String.format("source=%s | stats dc(age)", TEST_INDEX_ACCOUNT)); + verifySchema(response, schema("dc(age)", null, "integer")); + verifyDataRows(response, rows(21)); + } + @Test public void testStatsMin() throws IOException { JSONObject response = executeQuery(String.format( diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java index 3cbb222afe..33cddc6f1f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java @@ -30,7 +30,15 @@ protected void init() throws Exception { } @Test - void filteredAggregateWithSubquery() throws IOException { + void filteredAggregatePushedDown() throws IOException { + JSONObject response = executeQuery( + "SELECT COUNT(*) FILTER(WHERE age > 35) FROM " + TEST_INDEX_BANK); + verifySchema(response, schema("COUNT(*)", null, "integer")); + verifyDataRows(response, rows(3)); + } + + @Test + void filteredAggregateNotPushedDown() throws IOException { JSONObject response = executeQuery( "SELECT COUNT(*) FILTER(WHERE age > 35) FROM (SELECT * FROM " + TEST_INDEX_BANK + ") AS a"); diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/WindowFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/WindowFunctionIT.java index b92ca17238..52373a72e3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/WindowFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/WindowFunctionIT.java @@ -29,6 +29,8 @@ import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder; + import org.json.JSONObject; import org.junit.Test; @@ -40,6 +42,7 @@ public class WindowFunctionIT extends SQLIntegTestCase { @Override protected void init() throws Exception { loadIndex(Index.BANK_WITH_NULL_VALUES); + loadIndex(Index.BANK); } @Test @@ -74,4 +77,49 @@ public void testOrderByNullLast() { rows(null, 7)); } + @Test + public void testDistinctCountOverNull() { + JSONObject response = new JSONObject(executeQuery( + "SELECT lastname, COUNT(DISTINCT gender) OVER() " + + "FROM " + TestsConstants.TEST_INDEX_BANK, "jdbc")); + verifyDataRows(response, + rows("Duke Willmington", 2), + rows("Bond", 2), + rows("Bates", 2), + rows("Adams", 2), + rows("Ratliff", 2), + rows("Ayala", 2), + rows("Mcpherson", 2)); + } + + @Test + public void testDistinctCountOver() { + JSONObject response = new JSONObject(executeQuery( + "SELECT lastname, COUNT(DISTINCT gender) OVER(ORDER BY lastname) " + + "FROM " + TestsConstants.TEST_INDEX_BANK, "jdbc")); + verifyDataRowsInOrder(response, + rows("Adams", 1), + rows("Ayala", 2), + rows("Bates", 2), + rows("Bond", 2), + rows("Duke Willmington", 2), + rows("Mcpherson", 2), + rows("Ratliff", 2)); + } + + @Test + public void testDistinctCountPartition() { + JSONObject response = new JSONObject(executeQuery( + "SELECT lastname, COUNT(DISTINCT gender) OVER(PARTITION BY gender ORDER BY lastname) " + + "FROM " + TestsConstants.TEST_INDEX_BANK, "jdbc")); + verifyDataRowsInOrder(response, + rows("Ayala", 1), + rows("Bates", 1), + rows("Mcpherson", 1), + rows("Adams", 1), + rows("Bond", 1), + rows("Duke Willmington", 1), + rows("Ratliff", 1)); + } + } diff --git a/integ-test/src/test/resources/correctness/expressions/cast.txt b/integ-test/src/test/resources/correctness/expressions/cast.txt index 4018a73f09..2d313203b4 100644 --- a/integ-test/src/test/resources/correctness/expressions/cast.txt +++ b/integ-test/src/test/resources/correctness/expressions/cast.txt @@ -17,3 +17,10 @@ cast('01:01:01' as time) as castTime cast('true' as boolean) as castBool cast(1 as boolean) as castBool cast(cast(1 as string) as int) castCombine +false = 'False' as implicitCast +false = 'true' as implicitCast +'TRUE' = true as implicitCast +'false' = true as implicitCast +CAST('2021-06-17 00:00:00' AS TIMESTAMP) = '2021-06-17 00:00:00' as implicitCast +'2021-06-18' < CAST('2021-06-17' AS DATE) as implicitCast +'10:20:00' <= CAST('11:00:00' AS TIME) as implicitCast diff --git a/integ-test/src/test/resources/correctness/queries/aggregation.txt b/integ-test/src/test/resources/correctness/queries/aggregation.txt index 45aa658783..0c0648a937 100644 --- a/integ-test/src/test/resources/correctness/queries/aggregation.txt +++ b/integ-test/src/test/resources/correctness/queries/aggregation.txt @@ -9,4 +9,6 @@ SELECT MIN(timestamp) FROM opensearch_dashboards_sample_data_flights SELECT VAR_POP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights SELECT VAR_SAMP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights SELECT STDDEV_POP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights -SELECT STDDEV_SAMP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights \ No newline at end of file +SELECT STDDEV_SAMP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights +SELECT COUNT(DISTINCT Origin), COUNT(DISTINCT Dest) FROM opensearch_dashboards_sample_data_flights +SELECT COUNT(DISTINCT Origin) FROM (SELECT * FROM opensearch_dashboards_sample_data_flights) AS flights \ No newline at end of file diff --git a/integtest.sh b/integtest.sh deleted file mode 100755 index 8be8831a60..0000000000 --- a/integtest.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash - -set -e - -function usage() { - echo "" - echo "This script is used to run integration tests for plugin installed on a remote OpenSearch/Dashboards cluster." - echo "--------------------------------------------------------------------------" - echo "Usage: $0 [args]" - echo "" - echo "Required arguments:" - echo "None" - echo "" - echo "Optional arguments:" - echo -e "-b BIND_ADDRESS\t, defaults to localhost | 127.0.0.1, can be changed to any IP or domain name for the cluster location." - echo -e "-p BIND_PORT\t, defaults to 9200 or 5601 depends on OpenSearch or Dashboards, can be changed to any port for the cluster location." - echo -e "-s SECURITY_ENABLED\t(true | false), defaults to true. Specify the OpenSearch/Dashboards have security enabled or not." - echo -e "-c CREDENTIAL\t(usename:password), no defaults, effective when SECURITY_ENABLED=true." - echo -e "-h\tPrint this message." - echo "--------------------------------------------------------------------------" -} - -while getopts ":hb:p:s:c:" arg; do - case $arg in - h) - usage - exit 1 - ;; - b) - BIND_ADDRESS=$OPTARG - ;; - p) - BIND_PORT=$OPTARG - ;; - s) - SECURITY_ENABLED=$OPTARG - ;; - c) - CREDENTIAL=$OPTARG - ;; - :) - echo "-${OPTARG} requires an argument" - usage - exit 1 - ;; - ?) - echo "Invalid option: -${OPTARG}" - exit 1 - ;; - esac -done - - -if [ -z "$BIND_ADDRESS" ] -then - BIND_ADDRESS="localhost" -fi - -if [ -z "$BIND_PORT" ] -then - BIND_PORT="9200" -fi - -if [ -z "$SECURITY_ENABLED" ] -then - SECURITY_ENABLED="true" -fi - -if [ -z "$CREDENTIAL" ] -then - CREDENTIAL="admin:admin" - USERNAME=`echo $CREDENTIAL | awk -F ':' '{print $1}'` - PASSWORD=`echo $CREDENTIAL | awk -F ':' '{print $2}'` -fi - -./gradlew integTest -Dtests.rest.cluster="$BIND_ADDRESS:$BIND_PORT" -Dtests.cluster="$BIND_ADDRESS:$BIND_PORT" -Dtests.clustername="opensearch-integrationtest" -Dhttps=$SECURITY_ENABLED -Duser=$USERNAME -Dpassword=$PASSWORD --console=plain - diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java index 60d7f8c684..2d40218688 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java @@ -45,16 +45,27 @@ @RequiredArgsConstructor public enum OpenSearchDataType implements ExprType { /** - * OpenSearch Text. + * OpenSearch Text. Rather than cast text to other types (STRING), leave it alone to prevent + * cast_to_string(OPENSEARCH_TEXT). * Ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/text.html */ - OPENSEARCH_TEXT(Collections.singletonList(STRING), "string"), + OPENSEARCH_TEXT(Collections.singletonList(STRING), "string") { + @Override + public boolean shouldCast(ExprType other) { + return false; + } + }, /** * OpenSearch multi-fields which has text and keyword. * Ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-fields.html */ - OPENSEARCH_TEXT_KEYWORD(Arrays.asList(STRING, OPENSEARCH_TEXT), "string"), + OPENSEARCH_TEXT_KEYWORD(Arrays.asList(STRING, OPENSEARCH_TEXT), "string") { + @Override + public boolean shouldCast(ExprType other) { + return false; + } + }, OPENSEARCH_IP(Arrays.asList(UNKNOWN), "ip"), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/AggregationBuilderHelper.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/AggregationBuilderHelper.java index 73d58d793e..cd793c9046 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/AggregationBuilderHelper.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/AggregationBuilderHelper.java @@ -43,11 +43,9 @@ /** * Abstract Aggregation Builder. - * - * @param type of the actual AggregationBuilder to be built. */ @RequiredArgsConstructor -public class AggregationBuilderHelper { +public class AggregationBuilderHelper { private final ExpressionSerializer serializer; @@ -57,7 +55,7 @@ public class AggregationBuilderHelper { * @param expression Expression * @return AggregationBuilder */ - public T build(Expression expression, Function fieldBuilder, + public T build(Expression expression, Function fieldBuilder, Function scriptBuilder) { if (expression instanceof ReferenceExpression) { String fieldName = ((ReferenceExpression) expression).getAttr(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java index b1aff2c5b4..d137cce75d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java @@ -42,11 +42,11 @@ */ public class BucketAggregationBuilder { - private final AggregationBuilderHelper> helper; + private final AggregationBuilderHelper helper; public BucketAggregationBuilder( ExpressionSerializer serializer) { - this.helper = new AggregationBuilderHelper<>(serializer); + this.helper = new AggregationBuilderHelper(serializer); } /** @@ -62,8 +62,8 @@ public List> build( .missingBucket(true) .order(groupPair.getRight()); resultBuilder - .add(helper.build(groupPair.getLeft().getDelegated(), valuesSourceBuilder::field, - valuesSourceBuilder::script)); + .add((CompositeValuesSourceBuilder) helper.build(groupPair.getLeft().getDelegated(), + valuesSourceBuilder::field, valuesSourceBuilder::script)); } return resultBuilder.build(); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java index 3d40258288..754da49862 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java @@ -32,11 +32,13 @@ import java.util.ArrayList; import java.util.List; +import java.util.Locale; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.bucket.filter.FilterAggregationBuilder; +import org.opensearch.search.aggregations.metrics.CardinalityAggregationBuilder; import org.opensearch.search.aggregations.metrics.ExtendedStats; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.sql.expression.Expression; @@ -57,11 +59,14 @@ public class MetricAggregationBuilder extends ExpressionNodeVisitor, Object> { - private final AggregationBuilderHelper> helper; + private final AggregationBuilderHelper helper; private final FilterQueryBuilder filterBuilder; + /** + * Constructor. + */ public MetricAggregationBuilder(ExpressionSerializer serializer) { - this.helper = new AggregationBuilderHelper<>(serializer); + this.helper = new AggregationBuilderHelper(serializer); this.filterBuilder = new FilterQueryBuilder(serializer); } @@ -88,9 +93,26 @@ public Pair visitNamedAggregator( NamedAggregator node, Object context) { Expression expression = node.getArguments().get(0); Expression condition = node.getDelegated().condition(); + Boolean distinct = node.getDelegated().distinct(); String name = node.getName(); + String functionName = node.getFunctionName().getFunctionName().toLowerCase(Locale.ROOT); + + if (distinct) { + switch (functionName) { + case "count": + return make( + AggregationBuilders.cardinality(name), + expression, + condition, + name, + new SingleValueParser(name)); + default: + throw new IllegalStateException(String.format( + "unsupported distinct aggregator %s", node.getFunctionName().getFunctionName())); + } + } - switch (node.getFunctionName().getFunctionName()) { + switch (functionName) { case "avg": return make( AggregationBuilders.avg(name), @@ -176,6 +198,24 @@ private Pair make( return Pair.of(aggregationBuilder, parser); } + /** + * Make {@link CardinalityAggregationBuilder} for distinct count aggregations. + */ + private Pair make(CardinalityAggregationBuilder builder, + Expression expression, + Expression condition, + String name, + MetricParser parser) { + CardinalityAggregationBuilder aggregationBuilder = + helper.build(expression, builder::field, builder::script); + if (condition != null) { + return Pair.of( + makeFilterAggregation(aggregationBuilder, condition, name), + FilterParser.builder().name(name).metricsParser(parser).build()); + } + return Pair.of(aggregationBuilder, parser); + } + /** * Replace star or literal with OpenSearch metadata field "_index". Because: 1) Analyzer already * converts * to string literal, literal check here can handle both COUNT(*) and COUNT(1). 2) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java index 825200ce00..49af26eb46 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java @@ -58,4 +58,10 @@ public void legacyTypeName() { assertEquals("text", OPENSEARCH_TEXT.legacyTypeName()); assertEquals("text", OPENSEARCH_TEXT_KEYWORD.legacyTypeName()); } + + @Test + public void testShouldCast() { + assertFalse(OPENSEARCH_TEXT.shouldCast(STRING)); + assertFalse(OPENSEARCH_TEXT_KEYWORD.shouldCast(STRING)); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilderTest.java index 95a2383475..129814d45f 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilderTest.java @@ -32,6 +32,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.Mockito.when; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.expression.DSL.literal; import static org.opensearch.sql.expression.DSL.named; import static org.opensearch.sql.expression.DSL.ref; @@ -42,6 +43,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import java.util.Arrays; +import java.util.Collections; import java.util.List; import lombok.SneakyThrows; import org.junit.jupiter.api.BeforeEach; @@ -51,19 +53,21 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.aggregation.AvgAggregator; import org.opensearch.sql.expression.aggregation.CountAggregator; import org.opensearch.sql.expression.aggregation.MaxAggregator; import org.opensearch.sql.expression.aggregation.MinAggregator; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.expression.aggregation.SumAggregator; -import org.opensearch.sql.expression.aggregation.VarianceAggregator; +import org.opensearch.sql.expression.config.ExpressionConfig; import org.opensearch.sql.expression.function.FunctionName; import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer; @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) @ExtendWith(MockitoExtension.class) class MetricAggregationBuilderTest { + private final DSL dsl = new ExpressionConfig().dsl(new ExpressionConfig().functionRepository()); @Mock private ExpressionSerializer serializer; @@ -258,6 +262,61 @@ void should_build_stddevSamp_aggregation() { stddevSample(Arrays.asList(ref("age", INTEGER)), INTEGER))))); } + @Test + void should_build_cardinality_aggregation() { + assertEquals( + "{\n" + + " \"count(distinct name)\" : {\n" + + " \"cardinality\" : {\n" + + " \"field\" : \"name\"\n" + + " }\n" + + " }\n" + + "}", + buildQuery( + Collections.singletonList(named("count(distinct name)", new CountAggregator( + Collections.singletonList(ref("name", STRING)), INTEGER).distinct(true))))); + } + + @Test + void should_build_filtered_cardinality_aggregation() { + assertEquals( + "{\n" + + " \"count(distinct name) filter(where age > 30)\" : {\n" + + " \"filter\" : {\n" + + " \"range\" : {\n" + + " \"age\" : {\n" + + " \"from\" : 30,\n" + + " \"to\" : null,\n" + + " \"include_lower\" : false,\n" + + " \"include_upper\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " },\n" + + " \"aggregations\" : {\n" + + " \"count(distinct name) filter(where age > 30)\" : {\n" + + " \"cardinality\" : {\n" + + " \"field\" : \"name\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", + buildQuery(Collections.singletonList(named( + "count(distinct name) filter(where age > 30)", + new CountAggregator(Collections.singletonList(ref("name", STRING)), INTEGER) + .condition(dsl.greater(ref("age", INTEGER), literal(30))) + .distinct(true))))); + } + + @Test + void should_throw_exception_for_unsupported_distinct_aggregator() { + assertThrows(IllegalStateException.class, + () -> buildQuery(Collections.singletonList(named("avg(distinct age)", new AvgAggregator( + Collections.singletonList(ref("name", STRING)), STRING).distinct(true)))), + "unsupported distinct aggregator avg"); + } + @Test void should_throw_exception_for_unsupported_aggregator() { when(aggregator.getFunctionName()).thenReturn(new FunctionName("unsupported_agg")); diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java index 4a5f35bddb..5a230b8d05 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java @@ -97,8 +97,6 @@ public class RestPPLQueryAction extends BaseRestHandler { private final Supplier pplEnabled; - private PPLQueryRequest pplRequest; - /** * Constructor of RestPPLQueryAction. */ @@ -155,12 +153,12 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient nod } PPLService pplService = createPPLService(nodeClient); - pplRequest = PPLQueryRequestFactory.getPPLRequest(request); + PPLQueryRequest pplRequest = PPLQueryRequestFactory.getPPLRequest(request); if (pplRequest.isExplainRequest()) { return channel -> pplService.explain(pplRequest, createExplainResponseListener(channel)); } - return channel -> pplService.execute(pplRequest, createListener(channel)); + return channel -> pplService.execute(pplRequest, createListener(channel, pplRequest)); } /** @@ -215,7 +213,8 @@ public void onFailure(Exception e) { }; } - private ResponseListener createListener(RestChannel channel) { + private ResponseListener createListener(RestChannel channel, + PPLQueryRequest pplRequest) { Format format = pplRequest.format(); ResponseFormatter formatter; if (format.equals(Format.CSV)) { diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index d552ad0756..fa3c5b64b7 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -135,6 +135,7 @@ statsAggTerm statsFunction : statsFunctionName LT_PRTHS valueExpression RT_PRTHS #statsFunctionCall | COUNT LT_PRTHS RT_PRTHS #countAllFunctionCall + | (DISTINCT_COUNT | DC) LT_PRTHS valueExpression RT_PRTHS #distinctCountFunctionCall | percentileAggFunction #percentileAggFunctionCall ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 9fdf8d636d..7da4f90cf0 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -35,6 +35,7 @@ import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.CompareExprContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.CountAllFunctionCallContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DecimalLiteralContext; +import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DistinctCountFunctionCallContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EvalClauseContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EvalFunctionCallContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldExpressionContext; @@ -203,6 +204,11 @@ public UnresolvedExpression visitCountAllFunctionCall(CountAllFunctionCallContex return new AggregateFunction("count", AllFields.of()); } + @Override + public UnresolvedExpression visitDistinctCountFunctionCall(DistinctCountFunctionCallContext ctx) { + return new AggregateFunction("count", visit(ctx.valueExpression()), true); + } + @Override public UnresolvedExpression visitPercentileAggFunction(PercentileAggFunctionContext ctx) { return new AggregateFunction(ctx.PERCENTILE().getText(), visit(ctx.aggField), diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 71ef692abf..bfb975ba74 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -37,6 +37,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.defaultFieldsArgs; import static org.opensearch.sql.ast.dsl.AstDSL.defaultSortFieldArgs; import static org.opensearch.sql.ast.dsl.AstDSL.defaultStatsArgs; +import static org.opensearch.sql.ast.dsl.AstDSL.distinctAggregate; import static org.opensearch.sql.ast.dsl.AstDSL.doubleLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.equalTo; import static org.opensearch.sql.ast.dsl.AstDSL.eval; @@ -460,6 +461,19 @@ public void testCountFuncCallExpr() { )); } + @Test + public void testDistinctCount() { + assertEqual("source=t | stats distinct_count(a)", + agg( + relation("t"), + exprList( + alias("distinct_count(a)", + distinctAggregate("count", field("a")))), + emptyList(), + emptyList(), + defaultStatsArgs())); + } + @Test public void testEvalFuncCallExpr() { assertEqual("source=t | eval f=abs(a)", diff --git a/release-notes/opensearch-sql.release-notes-1.0.0.0.md b/release-notes/opensearch-sql.release-notes-1.0.0.0.md new file mode 100644 index 0000000000..9f4ba69278 --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-1.0.0.0.md @@ -0,0 +1,54 @@ +## 2021-07-12 Version 1.0.0.0 + +Compatible with OpenSearch and OpenSearch Dashboards Version 1.0.0 + +### Enhancements + +* Support querying a data stream ([#56](https://github.com/opensearch-project/sql/pull/56)) + +### Bug Fixes + +* Bug Fix: Enable legacy settings in new setting action ([#97](https://github.com/opensearch-project/sql/pull/97)) +* Fix NPE for SHOW statement without filter ([#150](https://github.com/opensearch-project/sql/pull/150)) + +### OpenSearch Migration + +* Remove debug logging in ODBC driver ([#27](https://github.com/opensearch-project/sql/pull/27)) +* Update workbench nav category to opensearch ([#28](https://github.com/opensearch-project/sql/pull/28)) +* fix opendistro related renaming for sql-cli ([#29](https://github.com/opensearch-project/sql/pull/29)) +* Fix issue of workbench not outputting errors ([#32](https://github.com/opensearch-project/sql/pull/32)) +* Update issue template with multiple labels ([#34](https://github.com/opensearch-project/sql/pull/34)) +* SQL/PPL and JDBC package renaming ([#54](https://github.com/opensearch-project/sql/pull/54)) +* Upgrade dependencies to address high severity CVE-2021-20270 ([#61](https://github.com/opensearch-project/sql/pull/61)) +* ODBC folder, file and code renaming ([#62](https://github.com/opensearch-project/sql/pull/62)) +* Update workbench documentation links, complete renaming ([#67](https://github.com/opensearch-project/sql/pull/67)) +* Update sqli-cli documentation links to OpenSearch ([#72](https://github.com/opensearch-project/sql/pull/72)) +* Remove opensearch.sql.engine.new.enabled setting ([#70](https://github.com/opensearch-project/sql/pull/70)) +* SQL/PPL API endpoint backward compatibility ([#66](https://github.com/opensearch-project/sql/pull/66)) +* Remove opensearch.sql.query.analysis.* related settings ([#76](https://github.com/opensearch-project/sql/pull/76)) +* Remove opensearch.sql.query.response.format setting ([#77](https://github.com/opensearch-project/sql/pull/77)) +* Migrate #1097: Adding support to NOT REGEXP_QUERY ([#79](https://github.com/opensearch-project/sql/pull/79)) +* Migrate #1083: Support long literals in SQL/PPL ([#80](https://github.com/opensearch-project/sql/pull/80)) +* Change strategy to test connectivity between ODBC driver and SQL plugin ([#69](https://github.com/opensearch-project/sql/pull/69)) +* Remove cursor enabling and fetch size setting ([#75](https://github.com/opensearch-project/sql/pull/75)) +* Disable DELETE clause by defaut and add opensearch.sql.delete.enabled setting ([#81](https://github.com/opensearch-project/sql/pull/81)) +* Support Plugin Settings Backwards Compatibility ([#82](https://github.com/opensearch-project/sql/pull/82)) +* Updated icon and background images in ODBC installers ([#84](https://github.com/opensearch-project/sql/pull/84)) +* Build SQL/PPL against OpenSearch rc1 and rename artifacts ([#83](https://github.com/opensearch-project/sql/pull/83)) +* Support text functions ASCII, LEFT, LOCATE, REPLACE in new engine ([#88](https://github.com/opensearch-project/sql/pull/88)) +* Update PowerBI custom connector .mez file for ODBC driver ([#90](https://github.com/opensearch-project/sql/pull/90)) +* Rename remaining beta1 references in sql-cli/workbench ([#91](https://github.com/opensearch-project/sql/pull/91)) +* Build SQL/PPL against OpenSearch 1.0 branch ([#94](https://github.com/opensearch-project/sql/pull/94)) +* Bump OpenSearch Dashboards version to 1.0 in Workbench ([#98](https://github.com/opensearch-project/sql/pull/98)) +* Add Integtest.sh for OpenSearch integtest setups ([#128](https://github.com/opensearch-project/sql/pull/128)) +* Merge develop into main ([#142](https://github.com/opensearch-project/sql/pull/142)) +* Build against OpenSearch 1.0.0 and bump artifact version to 1.0.0.0 ([#146](https://github.com/opensearch-project/sql/pull/146)) + +### Documentation + +* Migrate SQL/PPL, JDBC, ODBC docs to OpenSearch ([#68](https://github.com/opensearch-project/sql/pull/68)) +* Level up README markdown ([#148](https://github.com/opensearch-project/sql/pull/148)) + +### Infrastructure + +* Bump glob-parent from 5.1.1 to 5.1.2 in /workbench ([#125](https://github.com/opensearch-project/sql/pull/125)) diff --git a/release-notes/opensearch-sql.release-notes-1.1.0.0.md b/release-notes/opensearch-sql.release-notes-1.1.0.0.md new file mode 100644 index 0000000000..a99aa7b56a --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-1.1.0.0.md @@ -0,0 +1,25 @@ +## 2021-09-02 Version 1.1.0.0 + +Compatible with OpenSearch and OpenSearch Dashboards Version 1.1.0 + +### Enhancements + +* Support implicit type conversion from string to boolean ([#166](https://github.com/opensearch-project/sql/pull/166)) +* Support distinct count aggregation ([#167](https://github.com/opensearch-project/sql/pull/167)) +* Support implicit type conversion from string to temporal ([#171](https://github.com/opensearch-project/sql/pull/171)) +* Workbench: auto dump cypress test data, support security ([#199](https://github.com/opensearch-project/sql/pull/199)) + +### Bug Fixes + +* Fix for SQL-ODBC AWS Init and Shutdown Behaviour ([#163](https://github.com/opensearch-project/sql/pull/163)) +* Fix import path for cypress constant ([#201](https://github.com/opensearch-project/sql/pull/201)) + + +### Infrastructure + +* Add Integtest.sh for OpenSearch integtest setups (workbench) ([#157](https://github.com/opensearch-project/sql/pull/157)) +* Bump path-parse from 1.0.6 to 1.0.7 in /workbench ([#178](https://github.com/opensearch-project/sql/pull/178)) +* Use externally-defined OpenSearch version when specified. ([#179](https://github.com/opensearch-project/sql/pull/179)) +* Use OpenSearch 1.1 and build snapshot by default in CI. ([#181](https://github.com/opensearch-project/sql/pull/181)) +* Workbench: remove curl commands in integtest.sh ([#200](https://github.com/opensearch-project/sql/pull/200)) +* Bump opensearch ref to 1.1 in CI ([#205](https://github.com/opensearch-project/dashboards-reports/pull/205)) diff --git a/sql-cli/CONTRIBUTING.md b/sql-cli/CONTRIBUTING.md index a0c928b87c..231b2be86a 100644 --- a/sql-cli/CONTRIBUTING.md +++ b/sql-cli/CONTRIBUTING.md @@ -58,11 +58,11 @@ If you've thought of a way that OpenSearch could be better, we want to hear abou As with other types of contributions, the first step is to [**open an issue on GitHub**](https://github.com/opensearch-project/OpenSearch/issues/new/choose). Opening an issue before you make changes makes sure that someone else isn't already working on that particular problem. It also lets us all work together to find the right approach before you spend a bunch of time on a PR. So again, when in doubt, open an issue. -Once you've opened an issue, check out our [Developer Guide](./DEVELOPER_GUIDE.md) for instructions on how to get started. +Once you've opened an issue, check out our [Developer Guide](../DEVELOPER_GUIDE.rst) for instructions on how to get started. ## Developer Certificate of Origin -OpenSearch is an open source product released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](./LICENSE.txt)). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software. +OpenSearch is an open source product released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](../LICENSE.txt)). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software. We respect intellectual property rights of others and we want to make sure all incoming contributions are correctly attributed and licensed. A Developer Certificate of Origin (DCO) is a lightweight mechanism to do that. diff --git a/sql-cli/README.md b/sql-cli/README.md index 89f8ba5977..3e690c2a9c 100644 --- a/sql-cli/README.md +++ b/sql-cli/README.md @@ -123,7 +123,7 @@ Run single query from command line with options ## Code of Conduct -This project has adopted an [Open Source Code of Conduct](/CODE_OF_CONDUCT.md). +This project has adopted an [Open Source Code of Conduct](./CODE_OF_CONDUCT.md). @@ -133,7 +133,7 @@ If you discover a potential security issue in this project we ask that you notif ## Licensing -See the [LICENSE](/LICENSE.TXT) file for our project's licensing. We will ask you to confirm the licensing of your contribution. +See the [LICENSE](./LICENSE.TXT) file for our project's licensing. We will ask you to confirm the licensing of your contribution. diff --git a/sql-cli/src/opensearch_sql_cli/__init__.py b/sql-cli/src/opensearch_sql_cli/__init__.py index 2ffe495725..d4f6ce3d73 100644 --- a/sql-cli/src/opensearch_sql_cli/__init__.py +++ b/sql-cli/src/opensearch_sql_cli/__init__.py @@ -22,4 +22,4 @@ express or implied. See the License for the specific language governing permissions and limitations under the License. """ -__version__ = "1.0.0.0-rc1" +__version__ = "1.1.0.0" diff --git a/sql-jdbc/CONTRIBUTING.md b/sql-jdbc/CONTRIBUTING.md index 1461eb76c9..4fb91b5283 100644 --- a/sql-jdbc/CONTRIBUTING.md +++ b/sql-jdbc/CONTRIBUTING.md @@ -20,7 +20,7 @@ reported the issue. Please try to include as much information as you can. Detail * Anything unusual about your environment or deployment ## Sign your work -OpenSearch is an open source product released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](./LICENSE.txt)). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software. +OpenSearch is an open source product released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](../LICENSE.txt)). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software. We respect intellectual property rights of others and we want to make sure all incoming contributions are correctly attributed and licensed. A Developer Certificate of Origin (DCO) is a lightweight mechanism to do that. diff --git a/sql-jdbc/build.gradle b/sql-jdbc/build.gradle index 955a75e366..516d3f5175 100644 --- a/sql-jdbc/build.gradle +++ b/sql-jdbc/build.gradle @@ -43,7 +43,7 @@ plugins { group 'org.opensearch.client' // keep version in sync with version in Driver source -version '1.0.0.0-rc1' +version '1.1.0.0' boolean snapshot = "true".equals(System.getProperty("build.snapshot", "false")); if (snapshot) { diff --git a/sql-odbc/CONTRIBUTING.md b/sql-odbc/CONTRIBUTING.md index bd03ee53a7..c83884fa50 100644 --- a/sql-odbc/CONTRIBUTING.md +++ b/sql-odbc/CONTRIBUTING.md @@ -20,7 +20,7 @@ reported the issue. Please try to include as much information as you can. Detail * Anything unusual about your environment or deployment ## Sign your work -OpenSearch is an open source product released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](./LICENSE.txt)). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software. +OpenSearch is an open source product released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](../LICENSE.txt)). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software. We respect intellectual property rights of others and we want to make sure all incoming contributions are correctly attributed and licensed. A Developer Certificate of Origin (DCO) is a lightweight mechanism to do that. diff --git a/sql-odbc/src/CMakeLists.txt b/sql-odbc/src/CMakeLists.txt index fd24ea9e8f..ae5db3b98c 100644 --- a/sql-odbc/src/CMakeLists.txt +++ b/sql-odbc/src/CMakeLists.txt @@ -87,8 +87,8 @@ set(INSTALL_SRC "${CMAKE_CURRENT_SOURCE_DIR}/installer") set(DSN_INSTALLER_SRC "${CMAKE_CURRENT_SOURCE_DIR}/DSNInstaller") # ODBC Driver version -set(DRIVER_PACKAGE_VERSION "1.0.0.0") -set(DRIVER_PACKAGE_VERSION_COMMA_SEPARATED "1,0,0,0") +set(DRIVER_PACKAGE_VERSION "1.1.0.0") +set(DRIVER_PACKAGE_VERSION_COMMA_SEPARATED "1,1,0,0") add_compile_definitions( OPENSEARCH_ODBC_VERSION="${DRIVER_PACKAGE_VERSION}" # Comma separated version is required for odbc administrator's driver file. OPENSEARCH_ODBC_DRVFILE_VERSION=${DRIVER_PACKAGE_VERSION_COMMA_SEPARATED} ) diff --git a/sql-odbc/src/TableauConnector/opensearch_sql_odbc/manifest.xml b/sql-odbc/src/TableauConnector/opensearch_sql_odbc/manifest.xml index 077e7d01d2..be2e73897c 100644 --- a/sql-odbc/src/TableauConnector/opensearch_sql_odbc/manifest.xml +++ b/sql-odbc/src/TableauConnector/opensearch_sql_odbc/manifest.xml @@ -1,6 +1,6 @@ - + diff --git a/sql-odbc/src/TableauConnector/opensearch_sql_odbc_dev/manifest.xml b/sql-odbc/src/TableauConnector/opensearch_sql_odbc_dev/manifest.xml index 0f470f3a8d..c59a0e74b0 100644 --- a/sql-odbc/src/TableauConnector/opensearch_sql_odbc_dev/manifest.xml +++ b/sql-odbc/src/TableauConnector/opensearch_sql_odbc_dev/manifest.xml @@ -1,6 +1,6 @@ - + diff --git a/sql-odbc/src/sqlodbc/opensearch_communication.cpp b/sql-odbc/src/sqlodbc/opensearch_communication.cpp index 66ca0e03a6..8fdfcae4f2 100644 --- a/sql-odbc/src/sqlodbc/opensearch_communication.cpp +++ b/sql-odbc/src/sqlodbc/opensearch_communication.cpp @@ -32,6 +32,8 @@ // clang-format off #include "opensearch_odbc.h" #include "mylog.h" +#include +#include #include #include #include @@ -121,6 +123,40 @@ static const std::string ERROR_RESPONSE_SCHEMA = R"EOF( } )EOF"; +namespace { + /** + * A helper class to initialize/shutdown AWS API once per DLL load/unload. + */ + class AwsSdkHelper { + public: + AwsSdkHelper() : + m_reference_count(0) { + } + + AwsSdkHelper& operator++() { + if (1 == ++m_reference_count) { + std::scoped_lock lock(m_mutex); + Aws::InitAPI(m_sdk_options); + } + return *this; + } + + AwsSdkHelper& operator--() { + if (0 == --m_reference_count) { + std::scoped_lock lock(m_mutex); + Aws::ShutdownAPI(m_sdk_options); + } + return *this; + } + + Aws::SDKOptions m_sdk_options; + std::atomic m_reference_count; + std::mutex m_mutex; + }; + + AwsSdkHelper AWS_SDK_HELPER; +} + void OpenSearchCommunication::AwsHttpResponseToString( std::shared_ptr< Aws::Http::HttpResponse > response, std::string& output) { // This function has some unconventional stream operations because we need @@ -237,13 +273,11 @@ OpenSearchCommunication::OpenSearchCommunication() #pragma clang diagnostic pop #endif // __APPLE__ { - LogMsg(OPENSEARCH_ALL, "Initializing Aws API."); - Aws::InitAPI(m_options); + ++AWS_SDK_HELPER; } OpenSearchCommunication::~OpenSearchCommunication() { - LogMsg(OPENSEARCH_ALL, "Shutting down Aws API."); - Aws::ShutdownAPI(m_options); + --AWS_SDK_HELPER; } std::string OpenSearchCommunication::GetErrorMessage() { diff --git a/sql-odbc/src/sqlodbc/opensearch_communication.h b/sql-odbc/src/sqlodbc/opensearch_communication.h index 6c141bfe08..4a328ece3b 100644 --- a/sql-odbc/src/sqlodbc/opensearch_communication.h +++ b/sql-odbc/src/sqlodbc/opensearch_communication.h @@ -116,7 +116,6 @@ class OpenSearchCommunication { OpenSearchResultQueue m_result_queue; runtime_options m_rt_opts; std::string m_client_encoding; - Aws::SDKOptions m_options; std::string m_response_str; std::shared_ptr< Aws::Http::HttpClient > m_http_client; std::string m_error_message_to_user; diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 18c75b94ff..61d2f5990f 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -72,7 +72,7 @@ adminStatement ; showStatement - : SHOW TABLES tableFilter? + : SHOW TABLES tableFilter ; describeStatement @@ -336,8 +336,10 @@ caseFuncAlternative ; aggregateFunction - : functionName=aggregationFunctionName LR_BRACKET functionArg RR_BRACKET #regularAggregateFunctionCall - | COUNT LR_BRACKET STAR RR_BRACKET #countStarFunctionCall + : functionName=aggregationFunctionName LR_BRACKET functionArg RR_BRACKET + #regularAggregateFunctionCall + | COUNT LR_BRACKET STAR RR_BRACKET #countStarFunctionCall + | COUNT LR_BRACKET DISTINCT functionArg RR_BRACKET #distinctCountFunctionCall ; filterClause diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index b1630aed50..8dda63b750 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -43,6 +43,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.CountStarFunctionCallContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.DataTypeFunctionCallContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.DateLiteralContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.DistinctCountFunctionCallContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.IsNullPredicateContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.LikePredicateContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MathExpressionAtomContext; @@ -171,7 +172,7 @@ public UnresolvedExpression visitShowDescribePattern( public UnresolvedExpression visitFilteredAggregationFunctionCall( OpenSearchSQLParser.FilteredAggregationFunctionCallContext ctx) { AggregateFunction agg = (AggregateFunction) visit(ctx.aggregateFunction()); - return new AggregateFunction(agg.getFuncName(), agg.getField(), visit(ctx.filterClause())); + return agg.condition(visit(ctx.filterClause())); } @Override @@ -212,6 +213,14 @@ public UnresolvedExpression visitRegularAggregateFunctionCall( visitFunctionArg(ctx.functionArg())); } + @Override + public UnresolvedExpression visitDistinctCountFunctionCall(DistinctCountFunctionCallContext ctx) { + return new AggregateFunction( + ctx.COUNT().getText(), + visitFunctionArg(ctx.functionArg()), + true); + } + @Override public UnresolvedExpression visitCountStarFunctionCall(CountStarFunctionCallContext ctx) { return new AggregateFunction("COUNT", AllFields.of()); diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index 53de19a0fd..cf1ca36f02 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -160,4 +160,9 @@ public void canParseOrderByClause() { "SELECT name, age FROM test ORDER BY name ASC NULLS FIRST, age DESC NULLS LAST")); } + @Test + public void canNotParseShowStatementWithoutFilterClause() { + assertThrows(SyntaxCheckException.class, () -> parser.parse("SHOW TABLES")); + } + } diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstAggregationBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstAggregationBuilderTest.java index 1d9516f816..44c84495c2 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstAggregationBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstAggregationBuilderTest.java @@ -36,6 +36,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; import static org.opensearch.sql.ast.dsl.AstDSL.alias; +import static org.opensearch.sql.ast.dsl.AstDSL.distinctAggregate; import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; @@ -50,6 +51,7 @@ import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; +import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.UnresolvedPlan; @@ -167,6 +169,17 @@ void can_build_implicit_group_by_for_aggregator_in_having_clause() { alias("AVG(age)", aggregate("AVG", qualifiedName("age")))))); } + @Test + void can_build_distinct_aggregator() { + assertThat( + buildAggregation("SELECT COUNT(DISTINCT name) FROM test group by age"), + allOf( + hasGroupByItems(alias("age", qualifiedName("age"))), + hasAggregators( + alias("COUNT(DISTINCT name)", distinctAggregate("COUNT", qualifiedName( + "name")))))); + } + @Test void should_build_nothing_if_no_group_by_and_no_aggregators_in_select() { assertNull(buildAggregation("SELECT name FROM test")); diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index e4e8028f05..e101eb9404 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -431,6 +431,23 @@ public void canBuildVariance() { buildExprAst("variance(age)")); } + @Test + public void distinctCount() { + assertEquals( + AstDSL.distinctAggregate("count", qualifiedName("name")), + buildExprAst("count(distinct name)") + ); + } + + @Test + public void filteredDistinctCount() { + assertEquals( + AstDSL.filteredDistinctCount("count", qualifiedName("name"), function( + ">", qualifiedName("age"), intLiteral(30))), + buildExprAst("count(distinct name) filter(where age > 30)") + ); + } + private Node buildExprAst(String expr) { OpenSearchSQLLexer lexer = new OpenSearchSQLLexer(new CaseInsensitiveCharStream(expr)); OpenSearchSQLParser parser = new OpenSearchSQLParser(new CommonTokenStream(lexer)); diff --git a/workbench/.cypress/integration/ui.spec.js b/workbench/.cypress/integration/ui.spec.js index aa32ab1824..abb5ce1bdb 100644 --- a/workbench/.cypress/integration/ui.spec.js +++ b/workbench/.cypress/integration/ui.spec.js @@ -26,9 +26,32 @@ /// -import { edit } from "brace"; -import { delay, testQueries, verifyDownloadData, files } from "../utils/constants"; +import { edit } from 'brace'; +import { delay, files, testDataSet, testQueries, verifyDownloadData } from '../utils/constants'; +describe('Dump test data', () => { + it('Indexes test data for SQL and PPL', () => { + const dumpDataSet = (url, index) => + cy.request(url).then((response) => { + cy.request({ + method: 'POST', + form: true, + url: 'api/console/proxy', + headers: { + 'content-type': 'application/json;charset=UTF-8', + 'osd-xsrf': true, + }, + qs: { + path: `${index}/_bulk`, + method: 'POST', + }, + body: response.body, + }); + }); + + testDataSet.forEach(({url, index}) => dumpDataSet(url, index)); + }); +}); describe('Test PPL UI', () => { beforeEach(() => { @@ -183,13 +206,12 @@ describe('Test and verify SQL downloads', () => { 'osd-xsrf': true, }, body: { - 'query': 'select * from accounts where balance > 49500' - } + query: 'select * from accounts where balance > 49500', + }, }).then((response) => { if (title === 'Download and verify CSV' || title === 'Download and verify Text') { expect(response.body.data.body).to.have.string(files[file]); - } - else { + } else { expect(response.body.data.resp).to.have.string(files[file]); } }); diff --git a/workbench/.cypress/support/commands.js b/workbench/.cypress/support/commands.js index e4d90a3918..2525085abb 100644 --- a/workbench/.cypress/support/commands.js +++ b/workbench/.cypress/support/commands.js @@ -49,3 +49,44 @@ // // -- This will overwrite an existing command -- // Cypress.Commands.overwrite("visit", (originalFn, url, options) => { ... }) + +const { ADMIN_AUTH } = require('./constants'); + +Cypress.Commands.overwrite('visit', (originalFn, url, options) => { + // Add the basic auth header when security enabled in the OpenSearch cluster + // https://github.com/cypress-io/cypress/issues/1288 + if (Cypress.env('security_enabled')) { + if (options) { + options.auth = ADMIN_AUTH; + } else { + options = { auth: ADMIN_AUTH }; + } + // Add query parameters - select the default OpenSearch Dashboards tenant + options.qs = { security_tenant: 'private' }; + return originalFn(url, options); + } else { + return originalFn(url, options); + } +}); + +// Be able to add default options to cy.request(), https://github.com/cypress-io/cypress/issues/726 +Cypress.Commands.overwrite('request', (originalFn, ...args) => { + let defaults = {}; + // Add the basic authentication header when security enabled in the OpenSearch cluster + if (Cypress.env('security_enabled')) { + defaults.auth = ADMIN_AUTH; + } + + let options = {}; + if (typeof args[0] === 'object' && args[0] !== null) { + options = Object.assign({}, args[0]); + } else if (args.length === 1) { + [options.url] = args; + } else if (args.length === 2) { + [options.method, options.url] = args; + } else if (args.length === 3) { + [options.method, options.url, options.body] = args; + } + + return originalFn(Object.assign({}, defaults, options)); +}); diff --git a/workbench/.cypress/support/constants.js b/workbench/.cypress/support/constants.js new file mode 100644 index 0000000000..48ee4ff9ff --- /dev/null +++ b/workbench/.cypress/support/constants.js @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +export const ADMIN_AUTH = { + username: 'admin', + password: 'admin', +}; diff --git a/workbench/.cypress/support/index.js b/workbench/.cypress/support/index.js index a7af4e66b9..a3ce8c563c 100644 --- a/workbench/.cypress/support/index.js +++ b/workbench/.cypress/support/index.js @@ -44,3 +44,8 @@ import './commands'; // Alternatively you can use CommonJS syntax: // require('./commands') + +// Switch the base URL of OpenSearch when security enabled in the cluster +if (Cypress.env('security_enabled')) { + Cypress.env('opensearch', 'https://localhost:9200'); +} diff --git a/workbench/.cypress/tsconfig.json b/workbench/.cypress/tsconfig.json new file mode 100644 index 0000000000..36de33deef --- /dev/null +++ b/workbench/.cypress/tsconfig.json @@ -0,0 +1,8 @@ +{ + "compilerOptions": { + "allowJs": true, + "baseUrl": "../node_modules", + "types": ["cypress"] + }, + "include": ["**/*.*"] +} diff --git a/workbench/.cypress/utils/constants.js b/workbench/.cypress/utils/constants.js index cc5211a444..b83e780572 100644 --- a/workbench/.cypress/utils/constants.js +++ b/workbench/.cypress/utils/constants.js @@ -26,6 +26,17 @@ export const delay = 1000; +export const testDataSet = [ + { + url: 'https://raw.githubusercontent.com/opensearch-project/sql/main/integ-test/src/test/resources/accounts.json', + index: 'accounts', + }, + { + url: 'https://raw.githubusercontent.com/opensearch-project/sql/main/integ-test/src/test/resources/employee_nested.json', + index: 'employee_nested' + } +] + export const verifyDownloadData = [ { title: 'Download and verify JSON', diff --git a/workbench/README.md b/workbench/README.md index e8c50e6b58..de371a8479 100644 --- a/workbench/README.md +++ b/workbench/README.md @@ -28,7 +28,7 @@ If you discover a potential security issue in this project we ask that you notif ## License -This project is licensed under the [Apache v2.0 License](LICENSE.txt). +This project is licensed under the [Apache v2.0 License](../LICENSE.txt). ## Copyright diff --git a/workbench/cypress.json b/workbench/cypress.json index e5441904ad..53c4ba96d8 100644 --- a/workbench/cypress.json +++ b/workbench/cypress.json @@ -9,5 +9,10 @@ "videosFolder": ".cypress/videos", "requestTimeout": 60000, "responseTimeout": 60000, - "defaultCommandTimeout": 60000 + "defaultCommandTimeout": 60000, + "env": { + "opensearch": "localhost:9200", + "opensearchDashboards": "localhost:5601", + "security_enabled": true + } } diff --git a/workbench/opensearch_dashboards.json b/workbench/opensearch_dashboards.json index a873b67a9f..c1b6ab2677 100644 --- a/workbench/opensearch_dashboards.json +++ b/workbench/opensearch_dashboards.json @@ -1,7 +1,7 @@ { "id": "queryWorkbenchDashboards", - "version": "1.0.0.0-rc1", - "opensearchDashboardsVersion": "1.0.0-rc1", + "version": "1.1.0.0", + "opensearchDashboardsVersion": "1.1.0", "server": true, "ui": true, "requiredPlugins": ["navigation"], diff --git a/workbench/package.json b/workbench/package.json index 54d009704e..3d9af584a9 100644 --- a/workbench/package.json +++ b/workbench/package.json @@ -1,13 +1,13 @@ { "name": "opensearch-query-workbench", - "version": "1.0.0.0-rc1", + "version": "1.1.0.0", "description": "Query Workbench", "main": "index.js", "license": "Apache-2.0", "homepage": "https://github.com/opensearch-project/sql/tree/main/workbench", "opensearchDashboards": { - "version": "1.0.0-rc1", - "templateVersion": "1.0.0-rc1" + "version": "1.1.0", + "templateVersion": "1.0.0" }, "repository": { "type": "git", @@ -47,7 +47,7 @@ "tslint-plugin-prettier": "^2.0.1" }, "engines": { - "node": "10.23.1", + "node": "10.24.1", "yarn": "^1.21.1" }, "resolutions": { diff --git a/workbench/release-notes/sql-workbench.release-notes-1.7.0.0.md b/workbench/release-notes/sql-workbench.release-notes-1.7.0.0.md index c9976a8600..34c0ae444e 100644 --- a/workbench/release-notes/sql-workbench.release-notes-1.7.0.0.md +++ b/workbench/release-notes/sql-workbench.release-notes-1.7.0.0.md @@ -18,7 +18,7 @@ To use the SQL Workbench, you will need the [Open Distro SQL plugin](https://git - Updated configureation for v7.3 compatibility ([#7](https://github.com/opendistro-for-elasticsearch/sql-workbench/pull/7)) - Opendistro-1.3 compatible with ES and Kibana 7.3 ([#8](https://github.com/opendistro-for-elasticsearch/sql-workbench/pull/8)) - Changed kibana version to v7.3.2 ([#9](https://github.com/opendistro-for-elasticsearch/sql-workbench/pull/9)) -- Support v7.1.1 Compatibility ([#13](v13)) +- Support v7.1.1 Compatibility ([#13](https://github.com/opendistro-for-elasticsearch/sql-workbench/pull/13)) - Bump pr-branch to v1.3 ([#21](https://github.com/opendistro-for-elasticsearch/sql-workbench/pull/21)) - Support v7.4 compatibility for kibana and sql-plugin ([#23](https://github.com/opendistro-for-elasticsearch/sql-workbench/pull/23)) - Improve the performance by ridding of sending redundant requests ([#24](https://github.com/opendistro-for-elasticsearch/sql-workbench/pull/24)) diff --git a/workbench/yarn.lock b/workbench/yarn.lock index a342b782cc..7436adc700 100644 --- a/workbench/yarn.lock +++ b/workbench/yarn.lock @@ -2063,9 +2063,9 @@ path-key@^3.0.0, path-key@^3.1.0: integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== path-parse@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" - integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== path-type@^4.0.0: version "4.0.0"