diff --git a/.github/workflows/draft-release-notes-workflow.yml b/.github/workflows/draft-release-notes-workflow.yml
index 5afb4ff55e..0c6190bce1 100644
--- a/.github/workflows/draft-release-notes-workflow.yml
+++ b/.github/workflows/draft-release-notes-workflow.yml
@@ -3,7 +3,7 @@ name: Release Drafter
on:
push:
branches:
- - develop
+ - main
jobs:
update_release_draft:
@@ -16,6 +16,6 @@ jobs:
with:
config-name: draft-release-notes-config.yml
tag: (None)
- version: 1.0.0.0-rc1
+ version: 1.1.0.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/link-checker.yml b/.github/workflows/link-checker.yml
index be6f481978..10cab37f8a 100644
--- a/.github/workflows/link-checker.yml
+++ b/.github/workflows/link-checker.yml
@@ -16,7 +16,7 @@ jobs:
id: lychee
uses: lycheeverse/lychee-action@master
with:
- args: --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" --exclude "http://localhost*" "https://localhost" "https://odfe-node1:9200/" "https://community.tableau.com/docs/DOC-17978" ".*family.zzz" "https://pypi.python.org/pypi/opensearch-sql-cli/" "opensearch*" ".*@amazon.com" ".*email.com" "git@github.com" "http://timestamp.verisign.com/scripts/timstamp.dll"
+ args: --accept=200,403,429,999 "**/*.html" "**/*.md" "**/*.txt" --exclude "http://localhost*" "https://localhost" "https://odfe-node1:9200/" "https://community.tableau.com/docs/DOC-17978" ".*family.zzz" "https://pypi.python.org/pypi/opensearch-sql-cli/" "opensearch*" ".*@amazon.com" ".*email.com" "git@github.com" "http://timestamp.verisign.com/scripts/timstamp.dll"
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
- name: Fail if there were link errors
diff --git a/.github/workflows/sql-cli-release-workflow.yml b/.github/workflows/sql-cli-release-workflow.yml
index 2959df51b6..a7042bcd32 100644
--- a/.github/workflows/sql-cli-release-workflow.yml
+++ b/.github/workflows/sql-cli-release-workflow.yml
@@ -8,7 +8,7 @@ on:
jobs:
build:
- runs-on: [ubuntu-16.04]
+ runs-on: ubuntu-latest
defaults:
run:
working-directory: sql-cli
diff --git a/.github/workflows/sql-cli-test-and-build-workflow.yml b/.github/workflows/sql-cli-test-and-build-workflow.yml
index fd83a89373..876780a86c 100644
--- a/.github/workflows/sql-cli-test-and-build-workflow.yml
+++ b/.github/workflows/sql-cli-test-and-build-workflow.yml
@@ -5,7 +5,7 @@ on: [pull_request, push]
jobs:
build:
- runs-on: [ubuntu-16.04]
+ runs-on: ubuntu-latest
defaults:
run:
working-directory: sql-cli
diff --git a/.github/workflows/sql-jdbc-push-jdbc-maven.yml b/.github/workflows/sql-jdbc-push-jdbc-maven.yml
index 53f2d5d391..3c96447233 100644
--- a/.github/workflows/sql-jdbc-push-jdbc-maven.yml
+++ b/.github/workflows/sql-jdbc-push-jdbc-maven.yml
@@ -8,7 +8,7 @@ on:
jobs:
upload-jdbc-jar:
- runs-on: [ubuntu-16.04]
+ runs-on: ubuntu-latest
defaults:
run:
working-directory: sql-jdbc
diff --git a/.github/workflows/sql-odbc-release-workflow.yml b/.github/workflows/sql-odbc-release-workflow.yml
index c7901f58c8..6e471248a5 100644
--- a/.github/workflows/sql-odbc-release-workflow.yml
+++ b/.github/workflows/sql-odbc-release-workflow.yml
@@ -12,7 +12,7 @@ env:
ODBC_BUILD_PATH: "./build/odbc/build"
AWS_SDK_INSTALL_PATH: "./build/aws-sdk/install"
PLUGIN_NAME: opensearch-sql-odbc
- OD_VERSION: 1.0.0.0
+ OD_VERSION: 1.1.0.0
jobs:
build-mac:
diff --git a/.github/workflows/sql-odbc-rename-and-release-workflow.yml b/.github/workflows/sql-odbc-rename-and-release-workflow.yml
index 467bf99121..cf79e281ff 100644
--- a/.github/workflows/sql-odbc-rename-and-release-workflow.yml
+++ b/.github/workflows/sql-odbc-rename-and-release-workflow.yml
@@ -8,7 +8,7 @@ on:
- rename*
env:
- OD_VERSION: 1.0.0.0
+ OD_VERSION: 1.1.0.0
jobs:
upload-odbc:
diff --git a/.github/workflows/sql-release-workflow.yml b/.github/workflows/sql-release-workflow.yml
index bbfbd929a8..974f801d36 100644
--- a/.github/workflows/sql-release-workflow.yml
+++ b/.github/workflows/sql-release-workflow.yml
@@ -12,7 +12,7 @@ jobs:
java: [14]
name: Build and Release SQL Plugin
- runs-on: [ubuntu-16.04]
+ runs-on: ubuntu-latest
steps:
- name: Checkout SQL
diff --git a/.github/workflows/sql-test-and-build-workflow.yml b/.github/workflows/sql-test-and-build-workflow.yml
index 8ad294d853..5ae3f6aec4 100644
--- a/.github/workflows/sql-test-and-build-workflow.yml
+++ b/.github/workflows/sql-test-and-build-workflow.yml
@@ -21,14 +21,14 @@ jobs:
with:
repository: 'opensearch-project/OpenSearch'
path: OpenSearch
- ref: '1.0'
+ ref: '1.1'
- name: Build OpenSearch
working-directory: ./OpenSearch
- run: ./gradlew publishToMavenLocal -Dbuild.version_qualifier=rc1 -Dbuild.snapshot=false
+ run: ./gradlew publishToMavenLocal
- name: Build with Gradle
- run: ./gradlew build assemble
+ run: ./gradlew build assemble -Dopensearch.version=1.1.0-SNAPSHOT
- name: Create Artifact Path
run: |
diff --git a/.github/workflows/sql-workbench-release-workflow.yml b/.github/workflows/sql-workbench-release-workflow.yml
index b22f7649b3..0e8e712690 100644
--- a/.github/workflows/sql-workbench-release-workflow.yml
+++ b/.github/workflows/sql-workbench-release-workflow.yml
@@ -8,7 +8,7 @@ on:
env:
PLUGIN_NAME: query-workbench-dashboards
OPENSEARCH_VERSION: '1.0'
- OPENSEARCH_PLUGIN_VERSION: 1.0.0.0-rc1
+ OPENSEARCH_PLUGIN_VERSION: 1.0.0.0
jobs:
@@ -38,7 +38,7 @@ jobs:
- name: Setup Node
uses: actions/setup-node@v1
with:
- node-version: '10.23.1'
+ node-version: '10.24.1'
- name: Move Workbench to Plugins Dir
run: |
diff --git a/.github/workflows/sql-workbench-test-and-build-workflow.yml b/.github/workflows/sql-workbench-test-and-build-workflow.yml
index 05a6541e34..071f7dfb29 100644
--- a/.github/workflows/sql-workbench-test-and-build-workflow.yml
+++ b/.github/workflows/sql-workbench-test-and-build-workflow.yml
@@ -4,8 +4,8 @@ on: [pull_request, push]
env:
PLUGIN_NAME: query-workbench-dashboards
- OPENSEARCH_VERSION: '1.0'
- OPENSEARCH_PLUGIN_VERSION: 1.0.0.0-rc1
+ OPENSEARCH_VERSION: '1.x'
+ OPENSEARCH_PLUGIN_VERSION: 1.1.0.0
jobs:
@@ -27,7 +27,7 @@ jobs:
- name: Setup Node
uses: actions/setup-node@v1
with:
- node-version: '10.23.1'
+ node-version: '10.24.1'
- name: Move Workbench to Plugins Dir
run: |
diff --git a/README.md b/README.md
index bf76415459..e679c64ccc 100644
--- a/README.md
+++ b/README.md
@@ -5,13 +5,22 @@
[![Chat](https://img.shields.io/badge/chat-on%20forums-blue)](https://discuss.opendistrocommunity.dev/c/sql/)
![PRs welcome!](https://img.shields.io/badge/PRs-welcome!-success)
-# OpenSearch SQL
+
+- [OpenSearch SQL](#opensearch-sql)
+- [Highlights](#highlights)
+- [Documentation](#documentation)
+- [Contributing](#contributing)
+- [Attribution](#attribution)
+- [Code of Conduct](#code-of-conduct)
+- [Security](#security)
+- [License](#license)
+- [Copyright](#copyright)
-OpenSearch enables you to extract insights out of OpenSearch using the familiar SQL query syntax. Use aggregations, group by, and where clauses to investigate your data. Read your data as JSON documents or CSV tables so you have the flexibility to use the format that works best for you.
+# OpenSearch SQL
-## SQL Related Projects
+OpenSearch enables you to extract insights out of OpenSearch using the familiar SQL or Piped Processing Language (PPL) query syntax. Use aggregations, group by, and where clauses to investigate your data. Read your data as JSON documents or CSV tables so you have the flexibility to use the format that works best for you.
The following projects have been merged into this repository as separate folders as of July 9, 2020. Please refer to links below for details. This document will focus on the SQL plugin for OpenSearch.
@@ -21,197 +30,39 @@ The following projects have been merged into this repository as separate folders
* [Query Workbench](https://github.com/opensearch-project/sql/tree/main/workbench)
-## Documentation
-
-Please refer to the [SQL Language Reference Manual](./docs/user/index.rst), [Piped Processing Language (PPL) Reference Manual](./docs/user/ppl/index.rst) and [Technical Documentation](https://docs-beta.opensearch.org/) for detailed information on installing and configuring plugin. Looking to contribute? Read the instructions on [Developer Guide](./DEVELOPER_GUIDE.rst) and then submit a patch!
-
-## SQL Engine V2
-
-Recently we have been actively improving our query engine primarily for better correctness and extensibility. Behind the scene, the new enhanced engine has already supported the new released Piped Processing Language. However, it was experimental and disabled by default for SQL query processing. With most important features and full testing complete, now we're ready to promote it as our default SQL query engine. Please find more details in [SQL Engine V2 - Release Notes](/docs/dev/NewSQLEngine.md).
-
-
-## Setup
-
-Install as plugin: build plugin from source code by following the instruction in Build section and install it to your OpenSearch.
-
-After doing this, you need to restart the OpenSearch server. Otherwise you may get errors like `Invalid index name [sql], must not start with '']; ","status":400}`.
-
-
-## Build
-
-The package uses the [Gradle](https://docs.gradle.org/4.10.2/userguide/userguide.html) build system.
-
-1. Checkout this package from version control.
-2. To build from command line set `JAVA_HOME` to point to a JDK >=14
-3. Run `./gradlew build`
-
-
-## Basic Usage
-
-To use the feature, send requests to the `_plugins/_sql` URI. You can use a request parameter or the request body (recommended). Note that for backward compatibility, old `_opendistro/_sql` endpoint is still available, though any future API will be only accessible by new OpenSearch endpoint.
-
-* Simple query
-
-```
-POST https://:/_plugins/_sql
-{
- "query": "SELECT * FROM my-index LIMIT 50"
-}
-```
-
-* Explain SQL to OpenSearch query DSL
-```
-POST _plugins/_sql/_explain
-{
- "query": "SELECT * FROM my-index LIMIT 50"
-}
-```
-
-* For a sample curl command with the OpenSearch Security plugin, try:
-```
-curl -XPOST https://localhost:9200/_plugins/_sql -u admin:admin -k -d '{"query": "SELECT * FROM my-index LIMIT 10"}' -H 'Content-Type: application/json'
-```
-
-
-## SQL Usage
-
-* Query
-
- SELECT * FROM bank WHERE age >30 AND gender = 'm'
+## Highlights
-* Aggregation
+Besides basic filtering and aggregation, OpenSearch SQL also supports complex queries, such as querying semi-structured data, JOINs, set operations, sub-queries etc. Beyond the standard functions, OpenSearch functions are provided for better analytics and visualization. Please check our [documentation](#documentation) for more details.
- SELECT COUNT(*),SUM(age),MIN(age) as m, MAX(age),AVG(age)
- FROM bank
- GROUP BY gender
- HAVING m >= 20
- ORDER BY SUM(age), m DESC
+Recently we have been actively improving our query engine primarily for better correctness and extensibility. Behind the scene, the new enhanced engine has already supported both SQL and Piped Processing Language. Please find more details in [SQL Engine V2 - Release Notes](./docs/dev/NewSQLEngine.md).
-* Join
- SELECT b1.firstname, b1.lastname, b2.age
- FROM bank b1
- LEFT JOIN bank b2
- ON b1.age = b2.age AND b1.state = b2.state
-
-* Show
-
- SHOW TABLES LIKE ban%
- DESCRIBE TABLES LIKE bank
-
-* Delete
-
- DELETE FROM bank WHERE age >30 AND gender = 'm'
-
-
-## Beyond SQL
-
-* Search
-
- SELECT address FROM bank WHERE address = matchQuery('880 Holmes Lane') ORDER BY _score DESC LIMIT 3
-
-* Nested Field
-
- +
-
- SELECT address FROM bank b, b.nestedField e WHERE b.state = 'WA' and e.name = 'test'
-
- +
- SELECT address, nested(nestedField.name)
- FROM bank
- WHERE nested(nestedField, nestedField.state = 'WA' AND nestedField.name = 'test')
- OR nested(nestedField.state) = 'CA'
-
-* Aggregations
-
- + range age group 20-25,25-30,30-35,35-40
-
- SELECT COUNT(age) FROM bank GROUP BY range(age, 20,25,30,35,40)
-
- + range date group by day
-
- SELECT online FROM online GROUP BY date_histogram(field='insert_time','interval'='1d')
-
- + range date group by your config
-
- SELECT online FROM online GROUP BY date_range(field='insert_time','format'='yyyy-MM-dd' ,'2014-08-18','2014-08-17','now-8d','now-7d','now-6d','now')
-
-* OpenSearch Geographic
-
- SELECT * FROM locations WHERE GEO_BOUNDING_BOX(fieldname,100.0,1.0,101,0.0)
-
-* Select type or pattern
-
- SELECT * FROM indexName/type
- SELECT * FROM index*
-
-
-## SQL Features
+## Documentation
-* SQL Select
-* SQL Delete
-* SQL Where
-* SQL Order By
-* SQL Group By
-* SQL Having
-* SQL Inner Join
-* SQL Left Join
-* SQL Show
-* SQL Describe
-* SQL AND & OR
-* SQL Like
-* SQL COUNT distinct
-* SQL In
-* SQL Between
-* SQL Aliases
-* SQL Not Null
-* SQL(OpenSearch) Date
-* SQL avg()
-* SQL count()
-* SQL max()
-* SQL min()
-* SQL sum()
-* SQL Nulls
-* SQL isnull()
-* SQL floor
-* SQL trim
-* SQL log
-* SQL log10
-* SQL substring
-* SQL round
-* SQL sqrt
-* SQL concat_ws
-* SQL union and minus
+Please refer to the [SQL Language Reference Manual](./docs/user/index.rst), [Piped Processing Language (PPL) Reference Manual](./docs/user/ppl/index.rst) and [Technical Documentation](https://docs-beta.opensearch.org/) for detailed information on installing and configuring plugin.
-## JDBC Support
-Please check out JDBC driver repository for more details.
+## Contributing
-## Beyond SQL features
+See [developer guide](DEVELOPER_GUIDE.rst) and [how to contribute to this project](CONTRIBUTING.md).
-* OpenSearch TopHits
-* OpenSearch MISSING
-* OpenSearch STATS
-* OpenSearch GEO_INTERSECTS
-* OpenSearch GEO_BOUNDING_BOX
-* OpenSearch GEO_DISTANCE
-* OpenSearch GEOHASH_GRID aggregation
## Attribution
This project is based on the Apache 2.0-licensed [elasticsearch-sql](https://github.com/NLPchina/elasticsearch-sql) project. Thank you [eliranmoyal](https://github.com/eliranmoyal), [shi-yuan](https://github.com/shi-yuan), [ansjsun](https://github.com/ansjsun) and everyone else who contributed great code to that project. Read this for more details [Attributions](./docs/attributions.md).
+
## Code of Conduct
This project has adopted an [Open Source Code of Conduct](./CODE_OF_CONDUCT.md).
-## Security issue notifications
+## Security
If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public GitHub issue.
-## Licensing
+## License
See the [LICENSE](./LICENSE.txt) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
diff --git a/build.gradle b/build.gradle
index 1b69cf810b..fce02d546e 100644
--- a/build.gradle
+++ b/build.gradle
@@ -26,7 +26,7 @@
buildscript {
ext {
- opensearch_version = "1.0.0-rc1"
+ opensearch_version = System.getProperty("opensearch.version", "1.1.0-SNAPSHOT")
}
repositories {
@@ -55,12 +55,14 @@ repositories {
}
ext {
- opensearchVersion = '1.0.0'
isSnapshot = "true" == System.getProperty("build.snapshot", "true")
}
allprojects {
- version = "${opensearchVersion}.0-rc1"
+ version = opensearch_version - "-SNAPSHOT" + ".0"
+ if (isSnapshot) {
+ version += "-SNAPSHOT"
+ }
plugins.withId('java') {
sourceCompatibility = targetCompatibility = "1.8"
diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java
index d5c1538b77..933e68085a 100644
--- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java
+++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java
@@ -161,8 +161,9 @@ public Expression visitAggregateFunction(AggregateFunction node, AnalysisContext
Expression arg = node.getField().accept(this, context);
Aggregator aggregator = (Aggregator) repository.compile(
builtinFunctionName.get().getName(), Collections.singletonList(arg));
- if (node.getCondition() != null) {
- aggregator.condition(analyze(node.getCondition(), context));
+ aggregator.distinct(node.getDistinct());
+ if (node.condition() != null) {
+ aggregator.condition(analyze(node.condition(), context));
}
return aggregator;
} else {
diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
index 7400ae20e6..3b78483736 100644
--- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
+++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
@@ -211,7 +211,16 @@ public static UnresolvedExpression aggregate(
public static UnresolvedExpression filteredAggregate(
String func, UnresolvedExpression field, UnresolvedExpression condition) {
- return new AggregateFunction(func, field, condition);
+ return new AggregateFunction(func, field).condition(condition);
+ }
+
+ public static UnresolvedExpression distinctAggregate(String func, UnresolvedExpression field) {
+ return new AggregateFunction(func, field, true);
+ }
+
+ public static UnresolvedExpression filteredDistinctCount(
+ String func, UnresolvedExpression field, UnresolvedExpression condition) {
+ return new AggregateFunction(func, field, true).condition(condition);
}
public static Function function(String funcName, UnresolvedExpression... funcArgs) {
diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/AggregateFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/AggregateFunction.java
index 8753e35ed9..e909c46ee7 100644
--- a/core/src/main/java/org/opensearch/sql/ast/expression/AggregateFunction.java
+++ b/core/src/main/java/org/opensearch/sql/ast/expression/AggregateFunction.java
@@ -28,9 +28,12 @@
import java.util.Collections;
import java.util.List;
+import javax.annotation.Nullable;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
+import lombok.Setter;
+import lombok.experimental.Accessors;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.common.utils.StringUtils;
@@ -45,7 +48,10 @@ public class AggregateFunction extends UnresolvedExpression {
private final String funcName;
private final UnresolvedExpression field;
private final List argList;
+ @Setter
+ @Accessors(fluent = true)
private UnresolvedExpression condition;
+ private Boolean distinct = false;
/**
* Constructor.
@@ -62,14 +68,13 @@ public AggregateFunction(String funcName, UnresolvedExpression field) {
* Constructor.
* @param funcName function name.
* @param field {@link UnresolvedExpression}.
- * @param condition condition in aggregation filter.
+ * @param distinct whether distinct field is specified or not.
*/
- public AggregateFunction(String funcName, UnresolvedExpression field,
- UnresolvedExpression condition) {
+ public AggregateFunction(String funcName, UnresolvedExpression field, Boolean distinct) {
this.funcName = funcName;
this.field = field;
this.argList = Collections.emptyList();
- this.condition = condition;
+ this.distinct = distinct;
}
@Override
diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java
index 382ef325ff..099bd23a58 100644
--- a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java
+++ b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java
@@ -29,11 +29,14 @@
package org.opensearch.sql.ast.expression;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BOOLEAN;
+import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BYTE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DATE;
+import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DATETIME;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DOUBLE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_FLOAT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_INT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_LONG;
+import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_SHORT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_STRING;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TIME;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TIMESTAMP;
@@ -49,6 +52,7 @@
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.Node;
+import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.function.FunctionName;
/**
@@ -60,9 +64,11 @@
@ToString
public class Cast extends UnresolvedExpression {
- private static Map CONVERTED_TYPE_FUNCTION_NAME_MAP =
+ private static final Map CONVERTED_TYPE_FUNCTION_NAME_MAP =
new ImmutableMap.Builder()
.put("string", CAST_TO_STRING.getName())
+ .put("byte", CAST_TO_BYTE.getName())
+ .put("short", CAST_TO_SHORT.getName())
.put("int", CAST_TO_INT.getName())
.put("integer", CAST_TO_INT.getName())
.put("long", CAST_TO_LONG.getName())
@@ -72,6 +78,7 @@ public class Cast extends UnresolvedExpression {
.put("date", CAST_TO_DATE.getName())
.put("time", CAST_TO_TIME.getName())
.put("timestamp", CAST_TO_TIMESTAMP.getName())
+ .put("datetime", CAST_TO_DATETIME.getName())
.build();
/**
@@ -84,6 +91,25 @@ public class Cast extends UnresolvedExpression {
*/
private final UnresolvedExpression convertedType;
+ /**
+ * Check if the given function name is a cast function or not.
+ * @param name function name
+ * @return true if cast function, otherwise false.
+ */
+ public static boolean isCastFunction(FunctionName name) {
+ return CONVERTED_TYPE_FUNCTION_NAME_MAP.containsValue(name);
+ }
+
+ /**
+ * Get the cast function name for a given target data type.
+ * @param targetType target data type
+ * @return cast function name corresponding
+ */
+ public static FunctionName getCastFunctionName(ExprType targetType) {
+ String type = targetType.typeName().toLowerCase(Locale.ROOT);
+ return CONVERTED_TYPE_FUNCTION_NAME_MAP.get(type);
+ }
+
/**
* Get the converted type.
*
diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java
index e2c5fb6a39..b2172e54f1 100644
--- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java
+++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java
@@ -157,6 +157,14 @@ public static ExprValue fromObjectValue(Object o, ExprCoreType type) {
}
}
+ public static Byte getByteValue(ExprValue exprValue) {
+ return exprValue.byteValue();
+ }
+
+ public static Short getShortValue(ExprValue exprValue) {
+ return exprValue.shortValue();
+ }
+
public static Integer getIntegerValue(ExprValue exprValue) {
return exprValue.integerValue();
}
diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java
index 3b37cfbf31..4fa023bd06 100644
--- a/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java
+++ b/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java
@@ -28,12 +28,13 @@
package org.opensearch.sql.data.type;
-import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.stream.Collectors;
/**
@@ -62,25 +63,24 @@ public enum ExprCoreType implements ExprType {
FLOAT(LONG),
DOUBLE(FLOAT),
- /**
- * Boolean.
- */
- BOOLEAN(UNDEFINED),
-
/**
* String.
*/
STRING(UNDEFINED),
+ /**
+ * Boolean.
+ */
+ BOOLEAN(STRING),
/**
* Date.
* Todo. compatible relationship.
*/
- TIMESTAMP(UNDEFINED),
- DATE(UNDEFINED),
- TIME(UNDEFINED),
- DATETIME(UNDEFINED),
+ TIMESTAMP(STRING),
+ DATE(STRING),
+ TIME(STRING),
+ DATETIME(STRING),
INTERVAL(UNDEFINED),
/**
@@ -108,6 +108,16 @@ public enum ExprCoreType implements ExprType {
.put(STRING, "keyword")
.build();
+ private static final Set NUMBER_TYPES =
+ new ImmutableSet.Builder()
+ .add(BYTE)
+ .add(SHORT)
+ .add(INTEGER)
+ .add(LONG)
+ .add(FLOAT)
+ .add(DOUBLE)
+ .build();
+
ExprCoreType(ExprCoreType... compatibleTypes) {
for (ExprCoreType subType : compatibleTypes) {
subType.parents.add(this);
@@ -139,7 +149,7 @@ public static List coreTypes() {
.collect(Collectors.toList());
}
- public static List numberTypes() {
- return ImmutableList.of(INTEGER, LONG, FLOAT, DOUBLE);
+ public static Set numberTypes() {
+ return NUMBER_TYPES;
}
}
diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java
index a26f758b20..97c46ca4e5 100644
--- a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java
+++ b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java
@@ -58,6 +58,16 @@ default boolean isCompatible(ExprType other) {
}
}
+ /**
+ * Should cast this type to other type or not. By default, cast is always required
+ * if the given type is different from this type.
+ * @param other other data type
+ * @return true if cast is required, otherwise false
+ */
+ default boolean shouldCast(ExprType other) {
+ return !this.equals(other);
+ }
+
/**
* Get the parent type.
*/
diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java
index 560414592c..af51d0898a 100644
--- a/core/src/main/java/org/opensearch/sql/expression/DSL.java
+++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java
@@ -500,6 +500,10 @@ public Aggregator count(Expression... expressions) {
return aggregate(BuiltinFunctionName.COUNT, expressions);
}
+ public Aggregator distinctCount(Expression... expressions) {
+ return count(expressions).distinct(true);
+ }
+
public Aggregator varSamp(Expression... expressions) {
return aggregate(BuiltinFunctionName.VARSAMP, expressions);
}
@@ -592,6 +596,16 @@ public FunctionExpression castString(Expression value) {
.compile(BuiltinFunctionName.CAST_TO_STRING.getName(), Arrays.asList(value));
}
+ public FunctionExpression castByte(Expression value) {
+ return (FunctionExpression) repository
+ .compile(BuiltinFunctionName.CAST_TO_BYTE.getName(), Arrays.asList(value));
+ }
+
+ public FunctionExpression castShort(Expression value) {
+ return (FunctionExpression) repository
+ .compile(BuiltinFunctionName.CAST_TO_SHORT.getName(), Arrays.asList(value));
+ }
+
public FunctionExpression castInt(Expression value) {
return (FunctionExpression) repository
.compile(BuiltinFunctionName.CAST_TO_INT.getName(), Arrays.asList(value));
@@ -631,4 +645,9 @@ public FunctionExpression castTimestamp(Expression value) {
return (FunctionExpression) repository
.compile(BuiltinFunctionName.CAST_TO_TIMESTAMP.getName(), Arrays.asList(value));
}
+
+ public FunctionExpression castDatetime(Expression value) {
+ return (FunctionExpression) repository
+ .compile(BuiltinFunctionName.CAST_TO_DATETIME.getName(), Arrays.asList(value));
+ }
}
diff --git a/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java b/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java
index 80944172ea..5328e11aad 100644
--- a/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java
+++ b/core/src/main/java/org/opensearch/sql/expression/aggregation/Aggregator.java
@@ -64,6 +64,10 @@ public abstract class Aggregator
@Getter
@Accessors(fluent = true)
protected Expression condition;
+ @Setter
+ @Getter
+ @Accessors(fluent = true)
+ protected Boolean distinct = false;
/**
* Create an {@link AggregationState} which will be used for aggregation.
diff --git a/core/src/main/java/org/opensearch/sql/expression/aggregation/CountAggregator.java b/core/src/main/java/org/opensearch/sql/expression/aggregation/CountAggregator.java
index 3195bf3941..f1bd088967 100644
--- a/core/src/main/java/org/opensearch/sql/expression/aggregation/CountAggregator.java
+++ b/core/src/main/java/org/opensearch/sql/expression/aggregation/CountAggregator.java
@@ -28,8 +28,10 @@
import static org.opensearch.sql.utils.ExpressionUtils.format;
+import java.util.HashSet;
import java.util.List;
import java.util.Locale;
+import java.util.Set;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
@@ -45,33 +47,51 @@ public CountAggregator(List arguments, ExprCoreType returnType) {
@Override
public CountAggregator.CountState create() {
- return new CountState();
+ return distinct ? new DistinctCountState() : new CountState();
}
@Override
protected CountState iterate(ExprValue value, CountState state) {
- state.count++;
+ state.count(value);
return state;
}
@Override
public String toString() {
- return String.format(Locale.ROOT, "count(%s)", format(getArguments()));
+ return distinct
+ ? String.format(Locale.ROOT, "count(distinct %s)", format(getArguments()))
+ : String.format(Locale.ROOT, "count(%s)", format(getArguments()));
}
/**
* Count State.
*/
protected static class CountState implements AggregationState {
- private int count;
+ protected int count;
CountState() {
this.count = 0;
}
+ public void count(ExprValue value) {
+ count++;
+ }
+
@Override
public ExprValue result() {
return ExprValueUtils.integerValue(count);
}
}
+
+ protected static class DistinctCountState extends CountState {
+ private final Set distinctValues = new HashSet<>();
+
+ @Override
+ public void count(ExprValue value) {
+ if (!distinctValues.contains(value)) {
+ distinctValues.add(value);
+ count++;
+ }
+ }
+ }
}
diff --git a/core/src/main/java/org/opensearch/sql/expression/aggregation/NamedAggregator.java b/core/src/main/java/org/opensearch/sql/expression/aggregation/NamedAggregator.java
index 346bd2d28c..92176e8648 100644
--- a/core/src/main/java/org/opensearch/sql/expression/aggregation/NamedAggregator.java
+++ b/core/src/main/java/org/opensearch/sql/expression/aggregation/NamedAggregator.java
@@ -54,8 +54,8 @@ public class NamedAggregator extends Aggregator {
/**
* NamedAggregator.
- * The aggregator properties {@link #condition} is inherited by named aggregator
- * to avoid errors introduced by the property inconsistency.
+ * The aggregator properties {@link #condition} and {@link #distinct}
+ * are inherited by named aggregator to avoid errors introduced by the property inconsistency.
*
* @param name name
* @param delegated delegated
@@ -67,6 +67,7 @@ public NamedAggregator(
this.name = name;
this.delegated = delegated;
this.condition = delegated.condition;
+ this.distinct = delegated.distinct;
}
@Override
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java
index 24e65d4b5d..cd66825567 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java
@@ -177,6 +177,8 @@ public enum BuiltinFunctionName {
* Data Type Convert Function.
*/
CAST_TO_STRING(FunctionName.of("cast_to_string")),
+ CAST_TO_BYTE(FunctionName.of("cast_to_byte")),
+ CAST_TO_SHORT(FunctionName.of("cast_to_short")),
CAST_TO_INT(FunctionName.of("cast_to_int")),
CAST_TO_LONG(FunctionName.of("cast_to_long")),
CAST_TO_FLOAT(FunctionName.of("cast_to_float")),
@@ -184,7 +186,8 @@ public enum BuiltinFunctionName {
CAST_TO_BOOLEAN(FunctionName.of("cast_to_boolean")),
CAST_TO_DATE(FunctionName.of("cast_to_date")),
CAST_TO_TIME(FunctionName.of("cast_to_time")),
- CAST_TO_TIMESTAMP(FunctionName.of("cast_to_timestamp"));
+ CAST_TO_TIMESTAMP(FunctionName.of("cast_to_timestamp")),
+ CAST_TO_DATETIME(FunctionName.of("cast_to_datetime"));
private final FunctionName name;
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java
index ebb432d7f0..3898af6682 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java
@@ -11,10 +11,19 @@
package org.opensearch.sql.expression.function;
+import static org.opensearch.sql.ast.expression.Cast.getCastFunctionName;
+import static org.opensearch.sql.ast.expression.Cast.isCastFunction;
+
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
+import org.apache.commons.lang3.tuple.Pair;
+import org.opensearch.sql.common.utils.StringUtils;
+import org.opensearch.sql.data.type.ExprCoreType;
+import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.ExpressionEvaluationException;
import org.opensearch.sql.expression.Expression;
@@ -47,15 +56,70 @@ public FunctionImplementation compile(FunctionName functionName, List resolvedSignature =
+ functionResolverMap.get(functionName).resolve(functionSignature);
+
+ List sourceTypes = functionSignature.getParamTypeList();
+ List targetTypes = resolvedSignature.getKey().getParamTypeList();
+ FunctionBuilder funcBuilder = resolvedSignature.getValue();
+ if (isCastFunction(functionName) || sourceTypes.equals(targetTypes)) {
+ return funcBuilder;
+ }
+ return castArguments(sourceTypes, targetTypes, funcBuilder);
} else {
throw new ExpressionEvaluationException(
String.format("unsupported function name: %s", functionName.getFunctionName()));
}
}
+
+ /**
+ * Wrap resolved function builder's arguments by cast function to cast input expression value
+ * to value of target type at runtime. For example, suppose unresolved signature is
+ * equal(BOOL,STRING) and its resolved function builder is F with signature equal(BOOL,BOOL).
+ * In this case, wrap F and return equal(BOOL, cast_to_bool(STRING)).
+ */
+ private FunctionBuilder castArguments(List sourceTypes,
+ List targetTypes,
+ FunctionBuilder funcBuilder) {
+ return arguments -> {
+ List argsCasted = new ArrayList<>();
+ for (int i = 0; i < arguments.size(); i++) {
+ Expression arg = arguments.get(i);
+ ExprType sourceType = sourceTypes.get(i);
+ ExprType targetType = targetTypes.get(i);
+
+ if (isCastRequired(sourceType, targetType)) {
+ argsCasted.add(cast(arg, targetType));
+ } else {
+ argsCasted.add(arg);
+ }
+ }
+ return funcBuilder.apply(argsCasted);
+ };
+ }
+
+ private boolean isCastRequired(ExprType sourceType, ExprType targetType) {
+ // TODO: Remove this special case after fixing all failed UTs
+ if (ExprCoreType.numberTypes().contains(sourceType)
+ && ExprCoreType.numberTypes().contains(targetType)) {
+ return false;
+ }
+ return sourceType.shouldCast(targetType);
+ }
+
+ private Expression cast(Expression arg, ExprType targetType) {
+ FunctionName castFunctionName = getCastFunctionName(targetType);
+ if (castFunctionName == null) {
+ throw new ExpressionEvaluationException(StringUtils.format(
+ "Type conversion to type %s is not supported", targetType));
+ }
+ return (Expression) compile(castFunctionName, ImmutableList.of(arg));
+ }
+
}
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java b/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java
index d9d01be891..5bd63015a5 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/FunctionResolver.java
@@ -20,6 +20,7 @@
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Singular;
+import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.sql.exception.ExpressionEvaluationException;
/**
@@ -41,8 +42,10 @@ public class FunctionResolver {
* If the {@link FunctionBuilder} exactly match the input {@link FunctionSignature}, return it.
* If applying the widening rule, found the most match one, return it.
* If nothing found, throw {@link ExpressionEvaluationException}
+ *
+ * @return function signature and its builder
*/
- public FunctionBuilder resolve(FunctionSignature unresolvedSignature) {
+ public Pair resolve(FunctionSignature unresolvedSignature) {
PriorityQueue> functionMatchQueue = new PriorityQueue<>(
Map.Entry.comparingByKey());
@@ -59,7 +62,8 @@ public FunctionBuilder resolve(FunctionSignature unresolvedSignature) {
unresolvedSignature.formatTypes()
));
} else {
- return functionBundle.get(bestMatchEntry.getValue());
+ FunctionSignature resolvedSignature = bestMatchEntry.getValue();
+ return Pair.of(resolvedSignature, functionBundle.get(resolvedSignature));
}
}
diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java
index df6b6f935f..c6a84985a0 100644
--- a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java
+++ b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java
@@ -48,11 +48,14 @@
import java.util.stream.Stream;
import lombok.experimental.UtilityClass;
import org.opensearch.sql.data.model.ExprBooleanValue;
+import org.opensearch.sql.data.model.ExprByteValue;
import org.opensearch.sql.data.model.ExprDateValue;
+import org.opensearch.sql.data.model.ExprDatetimeValue;
import org.opensearch.sql.data.model.ExprDoubleValue;
import org.opensearch.sql.data.model.ExprFloatValue;
import org.opensearch.sql.data.model.ExprIntegerValue;
import org.opensearch.sql.data.model.ExprLongValue;
+import org.opensearch.sql.data.model.ExprShortValue;
import org.opensearch.sql.data.model.ExprStringValue;
import org.opensearch.sql.data.model.ExprTimeValue;
import org.opensearch.sql.data.model.ExprTimestampValue;
@@ -68,6 +71,8 @@ public class TypeCastOperator {
*/
public static void register(BuiltinFunctionRepository repository) {
repository.register(castToString());
+ repository.register(castToByte());
+ repository.register(castToShort());
repository.register(castToInt());
repository.register(castToLong());
repository.register(castToFloat());
@@ -76,6 +81,7 @@ public static void register(BuiltinFunctionRepository repository) {
repository.register(castToDate());
repository.register(castToTime());
repository.register(castToTimestamp());
+ repository.register(castToDatetime());
}
@@ -92,6 +98,28 @@ private static FunctionResolver castToString() {
);
}
+ private static FunctionResolver castToByte() {
+ return FunctionDSL.define(BuiltinFunctionName.CAST_TO_BYTE.getName(),
+ impl(nullMissingHandling(
+ (v) -> new ExprByteValue(Short.valueOf(v.stringValue()))), BYTE, STRING),
+ impl(nullMissingHandling(
+ (v) -> new ExprByteValue(v.shortValue())), BYTE, DOUBLE),
+ impl(nullMissingHandling(
+ (v) -> new ExprByteValue(v.booleanValue() ? 1 : 0)), BYTE, BOOLEAN)
+ );
+ }
+
+ private static FunctionResolver castToShort() {
+ return FunctionDSL.define(BuiltinFunctionName.CAST_TO_SHORT.getName(),
+ impl(nullMissingHandling(
+ (v) -> new ExprShortValue(Short.valueOf(v.stringValue()))), SHORT, STRING),
+ impl(nullMissingHandling(
+ (v) -> new ExprShortValue(v.shortValue())), SHORT, DOUBLE),
+ impl(nullMissingHandling(
+ (v) -> new ExprShortValue(v.booleanValue() ? 1 : 0)), SHORT, BOOLEAN)
+ );
+ }
+
private static FunctionResolver castToInt() {
return FunctionDSL.define(BuiltinFunctionName.CAST_TO_INT.getName(),
impl(nullMissingHandling(
@@ -179,4 +207,15 @@ private static FunctionResolver castToTimestamp() {
impl(nullMissingHandling((v) -> v), TIMESTAMP, TIMESTAMP)
);
}
+
+ private static FunctionResolver castToDatetime() {
+ return FunctionDSL.define(BuiltinFunctionName.CAST_TO_DATETIME.getName(),
+ impl(nullMissingHandling(
+ (v) -> new ExprDatetimeValue(v.stringValue())), DATETIME, STRING),
+ impl(nullMissingHandling(
+ (v) -> new ExprDatetimeValue(v.datetimeValue())), DATETIME, TIMESTAMP),
+ impl(nullMissingHandling(
+ (v) -> new ExprDatetimeValue(v.datetimeValue())), DATETIME, DATE)
+ );
+ }
}
diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java
index 8cb7288273..b0b1e7e773 100644
--- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java
+++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java
@@ -160,7 +160,7 @@ public void castAnalyzer() {
);
assertThrows(IllegalStateException.class, () -> analyze(AstDSL.cast(AstDSL.unresolvedAttr(
- "boolean_value"), AstDSL.stringLiteral("DATETIME"))));
+ "boolean_value"), AstDSL.stringLiteral("INTERVAL"))));
}
@Test
@@ -300,6 +300,24 @@ public void variance_mapto_varPop() {
);
}
+ @Test
+ public void distinct_count() {
+ assertAnalyzeEqual(
+ dsl.distinctCount(DSL.ref("integer_value", INTEGER)),
+ AstDSL.distinctAggregate("count", qualifiedName("integer_value"))
+ );
+ }
+
+ @Test
+ public void filtered_distinct_count() {
+ assertAnalyzeEqual(
+ dsl.distinctCount(DSL.ref("integer_value", INTEGER))
+ .condition(dsl.greater(DSL.ref("integer_value", INTEGER), DSL.literal(1))),
+ AstDSL.filteredDistinctCount("count", qualifiedName("integer_value"), function(
+ ">", qualifiedName("integer_value"), intLiteral(1)))
+ );
+ }
+
protected Expression analyze(UnresolvedExpression unresolvedExpression) {
return expressionAnalyzer.analyze(unresolvedExpression, analysisContext);
}
diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java
index a27d90f35d..af2dbf22fc 100644
--- a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java
+++ b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java
@@ -96,8 +96,8 @@ public class ExprValueUtilsTest {
Lists.newArrayList(Iterables.concat(numberValues, nonNumberValues));
private static List> numberValueExtractor = Arrays.asList(
- ExprValue::byteValue,
- ExprValue::shortValue,
+ ExprValueUtils::getByteValue,
+ ExprValueUtils::getShortValue,
ExprValueUtils::getIntegerValue,
ExprValueUtils::getLongValue,
ExprValueUtils::getFloatValue,
diff --git a/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java b/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java
index 0dc8b8f4cf..dc63c7d224 100644
--- a/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java
+++ b/core/src/test/java/org/opensearch/sql/data/type/ExprTypeTest.java
@@ -33,6 +33,9 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.opensearch.sql.data.type.ExprCoreType.ARRAY;
+import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
+import static org.opensearch.sql.data.type.ExprCoreType.DATE;
+import static org.opensearch.sql.data.type.ExprCoreType.DATETIME;
import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE;
import static org.opensearch.sql.data.type.ExprCoreType.FLOAT;
import static org.opensearch.sql.data.type.ExprCoreType.INTEGER;
@@ -40,6 +43,8 @@
import static org.opensearch.sql.data.type.ExprCoreType.SHORT;
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.STRUCT;
+import static org.opensearch.sql.data.type.ExprCoreType.TIME;
+import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;
import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED;
import static org.opensearch.sql.data.type.ExprCoreType.UNKNOWN;
@@ -58,6 +63,16 @@ public void isCompatible() {
assertTrue(FLOAT.isCompatible(LONG));
assertTrue(FLOAT.isCompatible(INTEGER));
assertTrue(FLOAT.isCompatible(SHORT));
+
+ assertTrue(BOOLEAN.isCompatible(STRING));
+ assertTrue(TIMESTAMP.isCompatible(STRING));
+ assertTrue(DATE.isCompatible(STRING));
+ assertTrue(TIME.isCompatible(STRING));
+ assertTrue(DATETIME.isCompatible(STRING));
+ }
+
+ @Test
+ public void isNotCompatible() {
assertFalse(INTEGER.isCompatible(DOUBLE));
assertFalse(STRING.isCompatible(DOUBLE));
assertFalse(INTEGER.isCompatible(UNKNOWN));
@@ -69,6 +84,13 @@ public void isCompatibleWithUndefined() {
ExprCoreType.coreTypes().forEach(type -> assertFalse(UNDEFINED.isCompatible(type)));
}
+ @Test
+ public void shouldCast() {
+ assertTrue(UNDEFINED.shouldCast(STRING));
+ assertTrue(STRING.shouldCast(BOOLEAN));
+ assertFalse(STRING.shouldCast(STRING));
+ }
+
@Test
public void getParent() {
assertThat(((ExprType) () -> "test").getParent(), Matchers.contains(UNKNOWN));
diff --git a/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java b/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java
index cc2825858a..1db33ac9d5 100644
--- a/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java
+++ b/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java
@@ -116,6 +116,29 @@ public class AggregationTest extends ExpressionTestBase {
"timestamp_value",
"2040-01-01 07:00:00")));
+ protected static List tuples_with_duplicates =
+ Arrays.asList(
+ ExprValueUtils.tupleValue(ImmutableMap.of(
+ "integer_value", 1,
+ "double_value", 4d,
+ "struct_value", ImmutableMap.of("str", 1),
+ "array_value", ImmutableList.of(1))),
+ ExprValueUtils.tupleValue(ImmutableMap.of(
+ "integer_value", 1,
+ "double_value", 3d,
+ "struct_value", ImmutableMap.of("str", 1),
+ "array_value", ImmutableList.of(1))),
+ ExprValueUtils.tupleValue(ImmutableMap.of(
+ "integer_value", 2,
+ "double_value", 2d,
+ "struct_value", ImmutableMap.of("str", 2),
+ "array_value", ImmutableList.of(2))),
+ ExprValueUtils.tupleValue(ImmutableMap.of(
+ "integer_value", 3,
+ "double_value", 1d,
+ "struct_value", ImmutableMap.of("str1", 1),
+ "array_value", ImmutableList.of(1, 2))));
+
protected static List tuples_with_null_and_missing =
Arrays.asList(
ExprValueUtils.tupleValue(
diff --git a/core/src/test/java/org/opensearch/sql/expression/aggregation/CountAggregatorTest.java b/core/src/test/java/org/opensearch/sql/expression/aggregation/CountAggregatorTest.java
index 0fdadfc692..ee183dafce 100644
--- a/core/src/test/java/org/opensearch/sql/expression/aggregation/CountAggregatorTest.java
+++ b/core/src/test/java/org/opensearch/sql/expression/aggregation/CountAggregatorTest.java
@@ -129,6 +129,35 @@ public void filtered_count() {
assertEquals(3, result.value());
}
+ @Test
+ public void distinct_count() {
+ ExprValue result = aggregation(dsl.distinctCount(DSL.ref("integer_value", INTEGER)),
+ tuples_with_duplicates);
+ assertEquals(3, result.value());
+ }
+
+ @Test
+ public void filtered_distinct_count() {
+ ExprValue result = aggregation(dsl.distinctCount(DSL.ref("integer_value", INTEGER))
+ .condition(dsl.greater(DSL.ref("double_value", DOUBLE), DSL.literal(1d))),
+ tuples_with_duplicates);
+ assertEquals(2, result.value());
+ }
+
+ @Test
+ public void distinct_count_map() {
+ ExprValue result = aggregation(dsl.distinctCount(DSL.ref("struct_value", STRUCT)),
+ tuples_with_duplicates);
+ assertEquals(3, result.value());
+ }
+
+ @Test
+ public void distinct_count_array() {
+ ExprValue result = aggregation(dsl.distinctCount(DSL.ref("array_value", ARRAY)),
+ tuples_with_duplicates);
+ assertEquals(3, result.value());
+ }
+
@Test
public void count_with_missing() {
ExprValue result = aggregation(dsl.count(DSL.ref("integer_value", INTEGER)),
@@ -166,6 +195,9 @@ public void valueOf() {
public void test_to_string() {
Aggregator countAggregator = dsl.count(DSL.ref("integer_value", INTEGER));
assertEquals("count(integer_value)", countAggregator.toString());
+
+ countAggregator = dsl.distinctCount(DSL.ref("integer_value", INTEGER));
+ assertEquals("count(distinct integer_value)", countAggregator.toString());
}
@Test
diff --git a/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java b/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java
index 6f8b3600ea..d6b372a12a 100644
--- a/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java
+++ b/core/src/test/java/org/opensearch/sql/expression/function/BuiltinFunctionRepositoryTest.java
@@ -29,20 +29,39 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.lenient;
+import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
+import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
+import static org.opensearch.sql.data.type.ExprCoreType.BYTE;
+import static org.opensearch.sql.data.type.ExprCoreType.DATETIME;
+import static org.opensearch.sql.data.type.ExprCoreType.INTEGER;
+import static org.opensearch.sql.data.type.ExprCoreType.STRING;
+import static org.opensearch.sql.data.type.ExprCoreType.STRUCT;
+import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED;
+import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_BOOLEAN;
+import com.google.common.collect.ImmutableList;
import java.util.Arrays;
+import java.util.List;
import java.util.Map;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
+import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
+import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.ExpressionEvaluationException;
import org.opensearch.sql.expression.Expression;
+import org.opensearch.sql.expression.FunctionExpression;
import org.opensearch.sql.expression.env.Environment;
@ExtendWith(MockitoExtension.class)
@@ -62,6 +81,13 @@ class BuiltinFunctionRepositoryTest {
@Mock
private Environment emptyEnv;
+ private BuiltinFunctionRepository repo;
+
+ @BeforeEach
+ void setUp() {
+ repo = new BuiltinFunctionRepository(mockMap);
+ }
+
@Test
void register() {
BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap);
@@ -73,8 +99,11 @@ void register() {
@Test
void compile() {
+ when(mockExpression.type()).thenReturn(UNDEFINED);
+ when(functionSignature.getParamTypeList()).thenReturn(Arrays.asList(UNDEFINED));
when(mockfunctionResolver.getFunctionName()).thenReturn(mockFunctionName);
- when(mockfunctionResolver.resolve(any())).thenReturn(functionExpressionBuilder);
+ when(mockfunctionResolver.resolve(any())).thenReturn(
+ Pair.of(functionSignature, functionExpressionBuilder));
when(mockMap.containsKey(any())).thenReturn(true);
when(mockMap.get(any())).thenReturn(mockfunctionResolver);
BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap);
@@ -89,7 +118,8 @@ void compile() {
void resolve() {
when(functionSignature.getFunctionName()).thenReturn(mockFunctionName);
when(mockfunctionResolver.getFunctionName()).thenReturn(mockFunctionName);
- when(mockfunctionResolver.resolve(functionSignature)).thenReturn(functionExpressionBuilder);
+ when(mockfunctionResolver.resolve(functionSignature)).thenReturn(
+ Pair.of(functionSignature, functionExpressionBuilder));
when(mockMap.containsKey(mockFunctionName)).thenReturn(true);
when(mockMap.get(mockFunctionName)).thenReturn(mockfunctionResolver);
BuiltinFunctionRepository repo = new BuiltinFunctionRepository(mockMap);
@@ -98,6 +128,60 @@ void resolve() {
assertEquals(functionExpressionBuilder, repo.resolve(functionSignature));
}
+ @Test
+ void resolve_should_not_cast_arguments_in_cast_function() {
+ when(mockExpression.toString()).thenReturn("string");
+ FunctionImplementation function =
+ repo.resolve(registerFunctionResolver(CAST_TO_BOOLEAN.getName(), DATETIME, BOOLEAN))
+ .apply(ImmutableList.of(mockExpression));
+ assertEquals("cast_to_boolean(string)", function.toString());
+ }
+
+ @Test
+ void resolve_should_not_cast_arguments_if_same_type() {
+ when(mockFunctionName.getFunctionName()).thenReturn("mock");
+ when(mockExpression.toString()).thenReturn("string");
+ FunctionImplementation function =
+ repo.resolve(registerFunctionResolver(mockFunctionName, STRING, STRING))
+ .apply(ImmutableList.of(mockExpression));
+ assertEquals("mock(string)", function.toString());
+ }
+
+ @Test
+ void resolve_should_not_cast_arguments_if_both_numbers() {
+ when(mockFunctionName.getFunctionName()).thenReturn("mock");
+ when(mockExpression.toString()).thenReturn("byte");
+ FunctionImplementation function =
+ repo.resolve(registerFunctionResolver(mockFunctionName, BYTE, INTEGER))
+ .apply(ImmutableList.of(mockExpression));
+ assertEquals("mock(byte)", function.toString());
+ }
+
+ @Test
+ void resolve_should_cast_arguments() {
+ when(mockFunctionName.getFunctionName()).thenReturn("mock");
+ when(mockExpression.toString()).thenReturn("string");
+ when(mockExpression.type()).thenReturn(STRING);
+
+ FunctionSignature signature =
+ registerFunctionResolver(mockFunctionName, STRING, BOOLEAN);
+ registerFunctionResolver(CAST_TO_BOOLEAN.getName(), STRING, STRING);
+
+ FunctionImplementation function =
+ repo.resolve(signature)
+ .apply(ImmutableList.of(mockExpression));
+ assertEquals("mock(cast_to_boolean(string))", function.toString());
+ }
+
+ @Test
+ void resolve_should_throw_exception_for_unsupported_conversion() {
+ ExpressionEvaluationException error =
+ assertThrows(ExpressionEvaluationException.class, () ->
+ repo.resolve(registerFunctionResolver(mockFunctionName, BYTE, STRUCT))
+ .apply(ImmutableList.of(mockExpression)));
+ assertEquals(error.getMessage(), "Type conversion to type STRUCT is not supported");
+ }
+
@Test
@DisplayName("resolve unregistered function should throw exception")
void resolve_unregistered() {
@@ -109,4 +193,52 @@ void resolve_unregistered() {
() -> repo.resolve(new FunctionSignature(FunctionName.of("unknown"), Arrays.asList())));
assertEquals("unsupported function name: unknown", exception.getMessage());
}
+
+ private FunctionSignature registerFunctionResolver(FunctionName funcName,
+ ExprType sourceType,
+ ExprType targetType) {
+ FunctionSignature unresolvedSignature = new FunctionSignature(
+ funcName, ImmutableList.of(sourceType));
+ FunctionSignature resolvedSignature = new FunctionSignature(
+ funcName, ImmutableList.of(targetType));
+
+ FunctionResolver funcResolver = mock(FunctionResolver.class);
+ FunctionBuilder funcBuilder = mock(FunctionBuilder.class);
+
+ when(mockMap.containsKey(eq(funcName))).thenReturn(true);
+ when(mockMap.get(eq(funcName))).thenReturn(funcResolver);
+ when(funcResolver.resolve(eq(unresolvedSignature))).thenReturn(
+ Pair.of(resolvedSignature, funcBuilder));
+ repo.register(funcResolver);
+
+ // Relax unnecessary stubbing check because error case test doesn't call this
+ lenient().doAnswer(invocation ->
+ new FakeFunctionExpression(funcName, invocation.getArgument(0))
+ ).when(funcBuilder).apply(any());
+ return unresolvedSignature;
+ }
+
+ private static class FakeFunctionExpression extends FunctionExpression {
+
+ public FakeFunctionExpression(FunctionName functionName, List arguments) {
+ super(functionName, arguments);
+ }
+
+ @Override
+ public ExprValue valueOf(Environment valueEnv) {
+ return null;
+ }
+
+ @Override
+ public ExprType type() {
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return getFunctionName().getFunctionName()
+ + "(" + StringUtils.join(getArguments(), ", ") + ")";
+ }
+ }
+
}
diff --git a/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java b/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java
index 1cd1e3756b..6887837b35 100644
--- a/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java
+++ b/core/src/test/java/org/opensearch/sql/expression/function/FunctionResolverTest.java
@@ -70,7 +70,7 @@ void resolve_function_signature_exactly_match() {
FunctionResolver resolver = new FunctionResolver(functionName,
ImmutableMap.of(exactlyMatchFS, exactlyMatchBuilder));
- assertEquals(exactlyMatchBuilder, resolver.resolve(functionSignature));
+ assertEquals(exactlyMatchBuilder, resolver.resolve(functionSignature).getValue());
}
@Test
@@ -80,7 +80,7 @@ void resolve_function_signature_best_match() {
FunctionResolver resolver = new FunctionResolver(functionName,
ImmutableMap.of(bestMatchFS, bestMatchBuilder, leastMatchFS, leastMatchBuilder));
- assertEquals(bestMatchBuilder, resolver.resolve(functionSignature));
+ assertEquals(bestMatchBuilder, resolver.resolve(functionSignature).getValue());
}
@Test
diff --git a/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java b/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java
index dc57a13694..745aa9cc3b 100644
--- a/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java
+++ b/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java
@@ -28,12 +28,18 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
import static org.opensearch.sql.data.type.ExprCoreType.BYTE;
+import static org.opensearch.sql.data.type.ExprCoreType.DATE;
+import static org.opensearch.sql.data.type.ExprCoreType.DATETIME;
import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE;
import static org.opensearch.sql.data.type.ExprCoreType.FLOAT;
import static org.opensearch.sql.data.type.ExprCoreType.INTEGER;
import static org.opensearch.sql.data.type.ExprCoreType.LONG;
import static org.opensearch.sql.data.type.ExprCoreType.SHORT;
+import static org.opensearch.sql.data.type.ExprCoreType.STRING;
+import static org.opensearch.sql.data.type.ExprCoreType.TIME;
+import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;
import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED;
import static org.opensearch.sql.data.type.WideningTypeRule.IMPOSSIBLE_WIDENING;
import static org.opensearch.sql.data.type.WideningTypeRule.TYPE_EQUAL;
@@ -70,6 +76,11 @@ class WideningTypeRuleTest {
.put(LONG, FLOAT, 1)
.put(LONG, DOUBLE, 2)
.put(FLOAT, DOUBLE, 1)
+ .put(STRING, BOOLEAN, 1)
+ .put(STRING, TIMESTAMP, 1)
+ .put(STRING, DATE, 1)
+ .put(STRING, TIME, 1)
+ .put(STRING, DATETIME, 1)
.put(UNDEFINED, BYTE, 1)
.put(UNDEFINED, SHORT, 2)
.put(UNDEFINED, INTEGER, 3)
diff --git a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java
index ffccf9a62e..31bdca1426 100644
--- a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java
+++ b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java
@@ -31,11 +31,14 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
+import static org.opensearch.sql.data.type.ExprCoreType.BYTE;
import static org.opensearch.sql.data.type.ExprCoreType.DATE;
+import static org.opensearch.sql.data.type.ExprCoreType.DATETIME;
import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE;
import static org.opensearch.sql.data.type.ExprCoreType.FLOAT;
import static org.opensearch.sql.data.type.ExprCoreType.INTEGER;
import static org.opensearch.sql.data.type.ExprCoreType.LONG;
+import static org.opensearch.sql.data.type.ExprCoreType.SHORT;
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;
@@ -103,6 +106,22 @@ void castToString(ExprValue value) {
assertEquals(new ExprStringValue(value.value().toString()), expression.valueOf(null));
}
+ @ParameterizedTest(name = "castToByte({0})")
+ @MethodSource({"numberData"})
+ void castToByte(ExprValue value) {
+ FunctionExpression expression = dsl.castByte(DSL.literal(value));
+ assertEquals(BYTE, expression.type());
+ assertEquals(new ExprByteValue(value.byteValue()), expression.valueOf(null));
+ }
+
+ @ParameterizedTest(name = "castToShort({0})")
+ @MethodSource({"numberData"})
+ void castToShort(ExprValue value) {
+ FunctionExpression expression = dsl.castShort(DSL.literal(value));
+ assertEquals(SHORT, expression.type());
+ assertEquals(new ExprShortValue(value.shortValue()), expression.valueOf(null));
+ }
+
@ParameterizedTest(name = "castToInt({0})")
@MethodSource({"numberData"})
void castToInt(ExprValue value) {
@@ -111,6 +130,20 @@ void castToInt(ExprValue value) {
assertEquals(new ExprIntegerValue(value.integerValue()), expression.valueOf(null));
}
+ @Test
+ void castStringToByte() {
+ FunctionExpression expression = dsl.castByte(DSL.literal("100"));
+ assertEquals(BYTE, expression.type());
+ assertEquals(new ExprByteValue(100), expression.valueOf(null));
+ }
+
+ @Test
+ void castStringToShort() {
+ FunctionExpression expression = dsl.castShort(DSL.literal("100"));
+ assertEquals(SHORT, expression.type());
+ assertEquals(new ExprShortValue(100), expression.valueOf(null));
+ }
+
@Test
void castStringToInt() {
FunctionExpression expression = dsl.castInt(DSL.literal("100"));
@@ -124,6 +157,28 @@ void castStringToIntException() {
assertThrows(RuntimeException.class, () -> expression.valueOf(null));
}
+ @Test
+ void castBooleanToByte() {
+ FunctionExpression expression = dsl.castByte(DSL.literal(true));
+ assertEquals(BYTE, expression.type());
+ assertEquals(new ExprByteValue(1), expression.valueOf(null));
+
+ expression = dsl.castByte(DSL.literal(false));
+ assertEquals(BYTE, expression.type());
+ assertEquals(new ExprByteValue(0), expression.valueOf(null));
+ }
+
+ @Test
+ void castBooleanToShort() {
+ FunctionExpression expression = dsl.castShort(DSL.literal(true));
+ assertEquals(SHORT, expression.type());
+ assertEquals(new ExprShortValue(1), expression.valueOf(null));
+
+ expression = dsl.castShort(DSL.literal(false));
+ assertEquals(SHORT, expression.type());
+ assertEquals(new ExprShortValue(0), expression.valueOf(null));
+ }
+
@Test
void castBooleanToInt() {
FunctionExpression expression = dsl.castInt(DSL.literal(true));
@@ -312,4 +367,20 @@ void castToTimestamp() {
assertEquals(TIMESTAMP, expression.type());
assertEquals(new ExprTimestampValue("2012-08-07 01:01:01"), expression.valueOf(null));
}
+
+ @Test
+ void castToDatetime() {
+ FunctionExpression expression = dsl.castDatetime(DSL.literal("2012-08-07 01:01:01"));
+ assertEquals(DATETIME, expression.type());
+ assertEquals(new ExprDatetimeValue("2012-08-07 01:01:01"), expression.valueOf(null));
+
+ expression = dsl.castDatetime(DSL.literal(new ExprTimestampValue("2012-08-07 01:01:01")));
+ assertEquals(DATETIME, expression.type());
+ assertEquals(new ExprDatetimeValue("2012-08-07 01:01:01"), expression.valueOf(null));
+
+ expression = dsl.castDatetime(DSL.literal(new ExprDateValue("2012-08-07")));
+ assertEquals(DATETIME, expression.type());
+ assertEquals(new ExprDatetimeValue("2012-08-07 00:00:00"), expression.valueOf(null));
+ }
+
}
diff --git a/docs/dev/NewSQLEngine.md b/docs/dev/NewSQLEngine.md
index 0be3370f33..cd82e61571 100644
--- a/docs/dev/NewSQLEngine.md
+++ b/docs/dev/NewSQLEngine.md
@@ -12,23 +12,23 @@ The current SQL query engine provides users the basic query capability for using
With the architecture and extensibility improved significantly, the following SQL features are able to be introduced in the new query engine:
* **Language Structure**
- * [Identifiers](/docs/user/general/identifiers.rst): added support for identifier names with special characters
- * [Data types](/docs/user/general/datatypes.rst): added support for date and interval types
- * [Expressions](/docs/user/dql/expressions.rst): complex nested expression support
- * [SQL functions](/docs/user/dql/functions.rst): more date function support, `ADDDATE`, `DATE_ADD`, `DATE_SUB`, `DAY`, `DAYNAME`, `DAYOFMONTH`, `DAYOFWEEK`, `DAYOFYEAR`, `FROM_DAYS`, `HOUR`, `MICROSECOND`, `MINUTE`, `QUARTER`, `SECOND`, `SUBDATE`, `TIME`, `TIME_TO_SEC`, `TO_DAYS`, `WEEK`
- * [Comments](/docs/user/general/comments.rst): SQL comment support
+ * [Identifiers](../../docs/user/general/identifiers.rst): added support for identifier names with special characters
+ * [Data types](../../docs/user/general/datatypes.rst): added support for date and interval types
+ * [Expressions](../../docs/user/dql/expressions.rst): complex nested expression support
+ * [SQL functions](../../docs/user/dql/functions.rst): more date function support, `ADDDATE`, `DATE_ADD`, `DATE_SUB`, `DAY`, `DAYNAME`, `DAYOFMONTH`, `DAYOFWEEK`, `DAYOFYEAR`, `FROM_DAYS`, `HOUR`, `MICROSECOND`, `MINUTE`, `QUARTER`, `SECOND`, `SUBDATE`, `TIME`, `TIME_TO_SEC`, `TO_DAYS`, `WEEK`
+ * [Comments](../../docs/user/general/comments.rst): SQL comment support
* **Basic queries**
- * [HAVING without GROUP BY clause](/docs/user/dql/aggregations.rst#having-without-group-by)
- * [Aggregate over arbitrary expression](/docs/user/dql/aggregations.rst#expression)
- * [Ordering by NULLS FIRST/LAST](/docs/user/dql/basics.rst#example-2-specifying-order-for-null)
- * [Ordering by aggregate function](/docs/user/dql/basics.rst#example-3-ordering-by-aggregate-functions)
+ * [HAVING without GROUP BY clause](../../docs/user/dql/aggregations.rst#having-without-group-by)
+ * [Aggregate over arbitrary expression](../../docs/user/dql/aggregations.rst#expression)
+ * [Ordering by NULLS FIRST/LAST](../../docs/user/dql/basics.rst#example-2-specifying-order-for-null)
+ * [Ordering by aggregate function](../../docs/user/dql/basics.rst#example-3-ordering-by-aggregate-functions)
* **Complex queries**
- * [Subqueries in FROM clause](/docs/user/dql/complex.rst#example-2-subquery-in-from-clause): support arbitrary nesting level and aggregation
+ * [Subqueries in FROM clause](../../docs/user/dql/complex.rst#example-2-subquery-in-from-clause): support arbitrary nesting level and aggregation
* **Advanced Features**
- * [Window functions](/docs/user/dql/window.rst): ranking and aggregate window functions
- * [Selective aggregation](/docs/user/dql/aggregations.rst#filter-clause): by standard `FILTER` function
+ * [Window functions](../../docs/user/dql/window.rst): ranking and aggregate window functions
+ * [Selective aggregation](../../docs/user/dql/aggregations.rst#filter-clause): by standard `FILTER` function
* **Beyond SQL**
- * [Semi-structured data query](/docs/user/beyond/partiql.rst#example-2-selecting-deeper-levels): support querying OpenSearch object fields on arbitrary level
+ * [Semi-structured data query](../../docs/user/beyond/partiql.rst#example-2-selecting-deeper-levels): support querying OpenSearch object fields on arbitrary level
* OpenSearch multi-field: handled automatically and users won't have the access, ex. `text` is converted to `text.keyword` if it’s a multi-field
As for correctness, besides full coverage of unit and integration test, we developed a new comparison test framework to ensure correctness by comparing with other databases. Please find more details in [Testing](./Testing.md).
@@ -57,7 +57,7 @@ For the following features unsupported in the new engine, the query will be forw
### 3.3 Limitations
-You can find all the limitations in [Limitations](/docs/user/limitations/limitations.rst).
+You can find all the limitations in [Limitations](../../docs/user/limitations/limitations.rst).
---
diff --git a/docs/dev/Pagination.md b/docs/dev/Pagination.md
index 38ab9d1793..4982b13d7f 100644
--- a/docs/dev/Pagination.md
+++ b/docs/dev/Pagination.md
@@ -149,7 +149,7 @@ POST _plugins/_sql/close
### 3.3 Support in JDBC
-To use the pagination functionality programmatically using the[JDBC 4.1 specification](https://download.oracle.com/otn-pub/jcp/jdbc-4_1-mrel-spec/jdbc4.1-fr-spec.pdf?AuthParam=1574798710_305327d63d91e91e19dd80953454597a), page size is being used as performance hint given by `Statement.setFetchSize()` and “applied to each result set produced by the statement”. We need to re-implement `Statement.executequery()` and `ResultSet.next()` to take advantage of cursor.
+To use the pagination functionality programmatically using the[JDBC 4.1 specification](https://download.oracle.com/otn-pub/jcp/jdbc-4_1-mrel-spec/jdbc4.1-fr-spec.pdf), page size is being used as performance hint given by `Statement.setFetchSize()` and “applied to each result set produced by the statement”. We need to re-implement `Statement.executequery()` and `ResultSet.next()` to take advantage of cursor.
We will not support backward scroll on result set. The `Statement` must be created with a `ResultSet` type of `ResultSet.TYPE_FORWARD_ONLY`. Attempt to scroll backwards or otherwise jump around in the `ResultSet` should throw an exception.
diff --git a/docs/dev/TypeConversion.md b/docs/dev/TypeConversion.md
new file mode 100644
index 0000000000..07697fed07
--- /dev/null
+++ b/docs/dev/TypeConversion.md
@@ -0,0 +1,172 @@
+# Data Type Conversion in SQL/PPL
+
+## 1.Overview
+
+### 1.1 Type Conversion
+
+Type conversion means conversion from one data type to another which has two aspects to consider:
+
+1. Whether the conversion is implicit or explicit (implicit conversion is often called coercion)
+2. Whether the data is converted within the family or reinterpreted as another data type outside
+
+It’s common that strong typed language only supports little implicit conversions and no data reinterpretation. While languages with weak typing allows many implicit conversions and flexible reinterpretation.
+
+### 1.2 Problem Statement
+
+Currently, there are only 2 implicit conversions allowed which are defined by type hierarchy tree:
+
+1. Numeric type coercion: narrower numeric types are closer to the root on the top. For example, an integer is converted to a long integer automatically similar as in JAVA.
+2. NULL literals: `UNDEFINED` type can be converted to any other so that NULL literal can be accepted by any expression at runtime.
+
+![Current type hierarchy](img/type-hierarchy-tree-old.png)
+
+However, more general conversions for non-numeric types are missing, such as conversions between string, bool and date types. The strict type check causes inconvenience and other problems discussed below.
+
+
+---
+## 2.Requirements
+
+### 2.1 Use Cases
+
+The common use case and motivation include:
+
+1. *User-friendly*: Although it doesn’t matter for application or BI tool which can always follow the strict grammar rule, it’s more friendly and accessible to human by implicit type conversion, ex. `date > DATE('2020-06-01') => date > '2020-06-01'`
+2. *Schema-on-read*: More importantly, implicit conversion from string is required for schema on read (stored as raw string on write and extract field(s) on read), ex. `regex ‘...’ | abs(a)`
+
+### 2.2 Functionalities
+
+Immediate:
+
+1. Implicit conversion between bool and string: https://github.com/opendistro-for-elasticsearch/sql/issues/1061
+2. Implicit conversion between date and string: https://github.com/opendistro-for-elasticsearch/sql/issues/1056
+
+Future:
+
+1. Implicit conversion between string and more other types for regex command support
+
+
+---
+## 3.Design
+
+### 3.1 Type Precedence
+
+Type precedence determines the direction of conversion when fields involved in an expression has different type from resolved signature. Before introducing it into our type system, let’s check how an expression is resolved to a function implementation and why type precedence is required.
+
+```
+Compiling time:
+ Expression: 1 = 1.0
+ Unresolved signature: equal(INT, DOUBLE)
+ Resovled signature: equal(DOUBLE, DOUBLE) , distance=1
+ Function builder: returns equal(DOUBLE, DOUBLE) impl
+```
+
+Now let’s follow the same idea to add support for conversion from `BOOLEAN` to `STRING`. Because all boolean values can be converted to a string (in other word string is “wider”), String type is made the parent of Boolean. However, this leads to wrong semantic as the following expression `false = ‘FALSE’` for example:
+
+```
+Compiling time:
+ Expression: false = 'FALSE'
+ Unresolved signature: equal(BOOL, STRING)
+ Resovled signature: equal(STRING, STRING)
+ Function builder: returns equal(STRING, STRING) impl
+
+Runtime:
+ Function impl: String.value(false).equals('FALSE')
+ Evaluation result: *false*
+```
+
+Therefore type precedence is supposed to be defined based on semantic expected rather than intuitive “width” of type. Now let’s reverse the direction and make Boolean the parent of String type.
+
+![New type hierarchy](img/type-hierarchy-tree-with-implicit-cast.png)
+
+```
+Compiling time:
+ Expression: false = 'FALSE'
+ Unresolved signature: equal(BOOL, STRING)
+ Resovled signature: equal(BOOL, BOOL)
+ Function builder: 1) returns equal(BOOL, cast_to_bool(STRING)) impl
+ 2) returns equal(BOOL, BOOL) impl
+Runtime:
+ equal impl: false.equals(cast_to_bool('FALSE'))
+ cast_to_bool impl: Boolean.valueOf('FALSE')
+ Evaluation result: *true*
+```
+
+### 3.2 General Rules
+
+1. Implicit conversion is defined by type precedence which is represented by the type hierarchy tree.
+2. Explicit conversion defines the complete set of conversion allowed. If no explicit conversion defined, implicit conversion should be impossible too.
+3. On the other hand, if implicit conversion can occur between 2 types, then explicit conversion should be allowed too.
+4. Conversion within a data type family is considered as conversion between different data representation and should be supported as much as possible.
+5. Conversion across 2 data type families is considered as data reinterpretation and should be enabled with strong motivation.
+
+---
+## 4.Implementation
+
+### 4.1 Explicit Conversion
+
+Explicit conversion is defined as the set of `CAST` function implementation which includes all the conversions allowed between data types. Same as before, missing cast function is added and implemented by the conversion logic in `ExprType` class.
+
+```java
+public class Cast extends UnresolvedExpression {
+
+ private static final Map CONVERTED_TYPE_FUNCTION_NAME_MAP =
+ new ImmutableMap.Builder()
+ .put("string", CAST_TO_STRING.getName())
+ .put("byte", CAST_TO_BYTE.getName())
+ .put("short", CAST_TO_SHORT.getName())
+ .put("int", CAST_TO_INT.getName())
+ .put("integer", CAST_TO_INT.getName())
+ .put("long", CAST_TO_LONG.getName())
+ .put("float", CAST_TO_FLOAT.getName())
+ .put("double", CAST_TO_DOUBLE.getName())
+ .put("boolean", CAST_TO_BOOLEAN.getName())
+ .put("date", CAST_TO_DATE.getName())
+ .put("time", CAST_TO_TIME.getName())
+ .put("timestamp", CAST_TO_TIMESTAMP.getName())
+ .build();
+}
+```
+
+### 4.2 Implicit Conversion
+
+Implicit conversion and precedence are defined by the type hierarchy tree. The data type at the head of an arrow has higher precedence than the type at the tail.
+
+```java
+public enum ExprCoreType implements ExprType {
+ UNKNOWN,
+ UNDEFINED,
+
+ /**
+ * Numbers.
+ */
+ BYTE(UNDEFINED),
+ SHORT(BYTE),
+ INTEGER(SHORT),
+ LONG(INTEGER),
+ FLOAT(LONG),
+ DOUBLE(FLOAT),
+
+ STRING(UNDEFINED),
+ BOOLEAN(STRING), // PR: change STRING's parent to BOOLEAN
+
+ /**
+ * Date.
+ */
+ TIMESTAMP(UNDEFINED),
+ DATE(UNDEFINED),
+ TIME(UNDEFINED),
+ DATETIME(UNDEFINED),
+ INTERVAL(UNDEFINED),
+
+ STRUCT(UNDEFINED),
+ ARRAY(UNDEFINED);
+}
+```
+
+### 4.3 Type Casting Logic
+
+As with examples in section 3.1, the implementation is:
+
+1. Define all possible conversions in CAST function family.
+2. Define implicit conversions by type hierarchy tree (auto implicit cast from child to parent)
+3. During compile time, wrap original function builder by a new one which cast arguments to target type.
diff --git a/docs/dev/img/type-hierarchy-tree-old.png b/docs/dev/img/type-hierarchy-tree-old.png
new file mode 100644
index 0000000000..7add83f286
Binary files /dev/null and b/docs/dev/img/type-hierarchy-tree-old.png differ
diff --git a/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png b/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png
new file mode 100644
index 0000000000..b729e23ab9
Binary files /dev/null and b/docs/dev/img/type-hierarchy-tree-with-implicit-cast.png differ
diff --git a/docs/user/dql/aggregations.rst b/docs/user/dql/aggregations.rst
index 1d6d172981..275666e7ba 100644
--- a/docs/user/dql/aggregations.rst
+++ b/docs/user/dql/aggregations.rst
@@ -357,6 +357,19 @@ Example::
| 2.8613807855648994 |
+--------------------+
+DISTINCT COUNT Aggregation
+--------------------------
+
+To get the count of distinct values of a field, you can add a keyword ``DISTINCT`` before the field in the count aggregation. Example::
+
+ os> SELECT COUNT(DISTINCT gender), COUNT(gender) FROM accounts;
+ fetched rows / total rows = 1/1
+ +--------------------------+-----------------+
+ | COUNT(DISTINCT gender) | COUNT(gender) |
+ |--------------------------+-----------------|
+ | 2 | 4 |
+ +--------------------------+-----------------+
+
HAVING Clause
=============
@@ -456,3 +469,16 @@ The ``FILTER`` clause can be used in aggregation functions without GROUP BY as w
| 4 | 1 |
+--------------+------------+
+Distinct count aggregate with FILTER
+------------------------------------
+
+The ``FILTER`` clause is also used in distinct count to do the filtering before count the distinct values of specific field. For example::
+
+ os> SELECT COUNT(DISTINCT firstname) FILTER(WHERE age > 30) AS distinct_count FROM accounts
+ fetched rows / total rows = 1/1
+ +------------------+
+ | distinct_count |
+ |------------------|
+ | 3 |
+ +------------------+
+
diff --git a/docs/user/general/datatypes.rst b/docs/user/general/datatypes.rst
index 0077422a74..c0a3bf62ac 100644
--- a/docs/user/general/datatypes.rst
+++ b/docs/user/general/datatypes.rst
@@ -1,4 +1,3 @@
-
==========
Data Types
==========
@@ -105,6 +104,106 @@ The table below list the mapping between OpenSearch Data Type, OpenSearch SQL Da
Notes: Not all the OpenSearch SQL Type has correspond OpenSearch Type. e.g. data and time. To use function which required such data type, user should explicitly convert the data type.
+Data Type Conversion
+====================
+
+A data type can be converted to another, implicitly or explicitly or impossibly, according to type precedence defined and whether the conversion is supported by query engine.
+
+The general rules and design tenets for data type conversion include:
+
+1. Implicit conversion is defined by type precedence which is represented by the type hierarchy tree. See `Data Type Conversion in SQL/PPL `_ for more details.
+2. Explicit conversion defines the complete set of conversion allowed. If no explicit conversion defined, implicit conversion should be impossible too.
+3. On the other hand, if implicit conversion can occur between 2 types, then explicit conversion should be allowed too.
+4. Conversion within a data type family is considered as conversion between different data representation and should be supported as much as possible.
+5. Conversion across two data type families is considered as data reinterpretation and should be enabled with strong motivation.
+
+Type Conversion Matrix
+----------------------
+
+The following matrix illustrates the conversions allowed by our query engine for all the built-in data types as well as types provided by OpenSearch storage engine.
+
++--------------+------------------------------------------------+---------+------------------------------+-----------------------------------------------+--------------------------+---------------------+
+| Data Types | Numeric Type Family | BOOLEAN | String Type Family | Datetime Type Family | OpenSearch Type Family | Complex Type Family |
+| +------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| | BYTE | SHORT | INTEGER | LONG | FLOAT | DOUBLE | BOOLEAN | TEXT_KEYWORD | TEXT | STRING | TIMESTAMP | DATE | TIME | DATETIME | INTERVAL | GEO_POINT | IP | BINARY | STRUCT | ARRAY |
++==============+======+=======+=========+======+=======+========+=========+==============+======+========+===========+======+======+==========+==========+===========+=====+========+===========+=========+
+| UNDEFINED | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE | IE |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| BYTE | N/A | IE | IE | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| SHORT | E | N/A | IE | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| INTEGER | E | E | N/A | IE | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| LONG | E | E | E | N/A | IE | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| FLOAT | E | E | E | E | N/A | IE | X | X | X | E | X | X | X | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| DOUBLE | E | E | E | E | E | N/A | X | X | X | E | X | X | X | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| BOOLEAN | E | E | E | E | E | E | N/A | X | X | E | X | X | X | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| TEXT_KEYWORD | | | | | | | | N/A | | IE | | | | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| TEXT | | | | | | | | | N/A | IE | | | | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| STRING | E | E | E | E | E | E | IE | X | X | N/A | IE | IE | IE | IE | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| TIMESTAMP | X | X | X | X | X | X | X | X | X | E | N/A | | | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| DATE | X | X | X | X | X | X | X | X | X | E | | N/A | | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| TIME | X | X | X | X | X | X | X | X | X | E | | | N/A | X | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| DATETIME | X | X | X | X | X | X | X | X | X | E | | | | N/A | X | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| INTERVAL | X | X | X | X | X | X | X | X | X | E | | | | X | N/A | X | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| GEO_POINT | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | N/A | X | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| IP | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | N/A | X | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| BINARY | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | N/A | X | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| STRUCT | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | X | N/A | X |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+| ARRAY | X | X | X | X | X | X | X | X | X | | X | X | X | X | X | X | X | X | X | N/A |
++--------------+------+-------+---------+------+-------+--------+---------+--------------+------+--------+-----------+------+------+----------+----------+-----------+-----+--------+-----------+---------+
+
+Note that:
+
+1. ``I`` means if implicit conversion will occur automatically. ``E`` stands for explicit conversion by ``CAST`` function. ``X`` for impossible to convert. Empty means not clear and need more test.
+2. There is no ``UNDEFINED`` column because it's only for ``NULL`` literal at runtime and should not be present in function signature definition.
+3. OpenSearch and complex types are not supported by ``CAST`` function, so it's impossible to convert a type to it for now.
+
+Examples
+--------
+
+Here are a few examples for implicit type conversion::
+
+ os> SELECT
+ ... 1 = 1.0,
+ ... 'True' = true,
+ ... DATE('2021-06-10') < '2021-06-11';
+ fetched rows / total rows = 1/1
+ +-----------+-----------------+-------------------------------------+
+ | 1 = 1.0 | 'True' = true | DATE('2021-06-10') < '2021-06-11' |
+ |-----------+-----------------+-------------------------------------|
+ | True | True | True |
+ +-----------+-----------------+-------------------------------------+
+
+Here are a few examples for explicit type conversion::
+
+ os> SELECT
+ ... CAST(true AS INT),
+ ... CAST(1.2 AS STRING),
+ ... CAST('2021-06-10 00:00:00' AS TIMESTAMP);
+ fetched rows / total rows = 1/1
+ +---------------------+-----------------------+--------------------------------------------+
+ | CAST(true AS INT) | CAST(1.2 AS STRING) | CAST('2021-06-10 00:00:00' AS TIMESTAMP) |
+ |---------------------+-----------------------+--------------------------------------------|
+ | 1 | 1.2 | 2021-06-10 00:00:00 |
+ +---------------------+-----------------------+--------------------------------------------+
Undefined Data Type
===================
@@ -233,6 +332,21 @@ Conversion from TIMESTAMP
- Conversion from timestamp is much more straightforward. To convert it to date is to extract the date value, and conversion to time is to extract the time value. Conversion to datetime, it will extracts the datetime value and leave the timezone information over. For example, the result to convert datetime '2020-08-17 14:09:00' UTC to date is date '2020-08-17', to time is '14:09:00' and to datetime is datetime '2020-08-17 14:09:00'.
+Conversion from string to date and time types
+---------------------------------------------
+
+A string can also represent and be converted to date and time types (except to interval type). As long as the string value is of valid format required by the target date and time types, the conversion can happen implicitly or explicitly as follows::
+
+ os> SELECT
+ ... TIMESTAMP('2021-06-17 00:00:00') = '2021-06-17 00:00:00',
+ ... '2021-06-18' < DATE('2021-06-17'),
+ ... '10:20:00' <= TIME('11:00:00');
+ fetched rows / total rows = 1/1
+ +------------------------------------------------------------+-------------------------------------+----------------------------------+
+ | TIMESTAMP('2021-06-17 00:00:00') = '2021-06-17 00:00:00' | '2021-06-18' < DATE('2021-06-17') | '10:20:00' <= TIME('11:00:00') |
+ |------------------------------------------------------------+-------------------------------------+----------------------------------|
+ | True | False | True |
+ +------------------------------------------------------------+-------------------------------------+----------------------------------+
String Data Types
=================
@@ -248,7 +362,17 @@ A string is a sequence of characters enclosed in either single or double quotes.
+-----------+-----------+-------------+-------------+
+Boolean Data Types
+==================
+A boolean can be represented by constant value ``TRUE`` or ``FALSE``. Besides, certain string representation is also accepted by function with boolean input. For example, string 'true', 'TRUE', 'false', 'FALSE' are all valid representation and can be converted to boolean implicitly or explicitly::
-
-
+ os> SELECT
+ ... true, FALSE,
+ ... CAST('TRUE' AS boolean), CAST('false' AS boolean);
+ fetched rows / total rows = 1/1
+ +--------+---------+---------------------------+----------------------------+
+ | true | FALSE | CAST('TRUE' AS boolean) | CAST('false' AS boolean) |
+ |--------+---------+---------------------------+----------------------------|
+ | True | False | True | False |
+ +--------+---------+---------------------------+----------------------------+
diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst
index f6dad255ef..ee91d86d14 100644
--- a/docs/user/ppl/cmd/stats.rst
+++ b/docs/user/ppl/cmd/stats.rst
@@ -302,3 +302,18 @@ PPL query::
| 36 | 32 | M |
+------------+------------+----------+
+Example 7: Calculate the distinct count of a field
+==================================================
+
+To get the count of distinct values of a field, you can use ``DISTINCT_COUNT`` (or ``DC``) function instead of ``COUNT``. The example calculates both the count and the distinct count of gender field of all the accounts.
+
+PPL query::
+
+ os> source=accounts | stats count(gender), distinct_count(gender);
+ fetched rows / total rows = 1/1
+ +-----------------+--------------------------+
+ | count(gender) | distinct_count(gender) |
+ |-----------------+--------------------------|
+ | 4 | 2 |
+ +-----------------+--------------------------+
+
diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java
index ff3ad2a6c8..4a9603fe6b 100644
--- a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java
@@ -77,6 +77,19 @@ public void testStatsCountAll() throws IOException {
verifyDataRows(response, rows(1000));
}
+ @Test
+ public void testStatsDistinctCount() throws IOException {
+ JSONObject response =
+ executeQuery(String.format("source=%s | stats distinct_count(gender)", TEST_INDEX_ACCOUNT));
+ verifySchema(response, schema("distinct_count(gender)", null, "integer"));
+ verifyDataRows(response, rows(2));
+
+ response =
+ executeQuery(String.format("source=%s | stats dc(age)", TEST_INDEX_ACCOUNT));
+ verifySchema(response, schema("dc(age)", null, "integer"));
+ verifyDataRows(response, rows(21));
+ }
+
@Test
public void testStatsMin() throws IOException {
JSONObject response = executeQuery(String.format(
diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java
index 3cbb222afe..33cddc6f1f 100644
--- a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java
@@ -30,7 +30,15 @@ protected void init() throws Exception {
}
@Test
- void filteredAggregateWithSubquery() throws IOException {
+ void filteredAggregatePushedDown() throws IOException {
+ JSONObject response = executeQuery(
+ "SELECT COUNT(*) FILTER(WHERE age > 35) FROM " + TEST_INDEX_BANK);
+ verifySchema(response, schema("COUNT(*)", null, "integer"));
+ verifyDataRows(response, rows(3));
+ }
+
+ @Test
+ void filteredAggregateNotPushedDown() throws IOException {
JSONObject response = executeQuery(
"SELECT COUNT(*) FILTER(WHERE age > 35) FROM (SELECT * FROM " + TEST_INDEX_BANK
+ ") AS a");
diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/WindowFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/WindowFunctionIT.java
index b92ca17238..52373a72e3 100644
--- a/integ-test/src/test/java/org/opensearch/sql/sql/WindowFunctionIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/sql/WindowFunctionIT.java
@@ -29,6 +29,8 @@
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
+import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder;
+
import org.json.JSONObject;
import org.junit.Test;
@@ -40,6 +42,7 @@ public class WindowFunctionIT extends SQLIntegTestCase {
@Override
protected void init() throws Exception {
loadIndex(Index.BANK_WITH_NULL_VALUES);
+ loadIndex(Index.BANK);
}
@Test
@@ -74,4 +77,49 @@ public void testOrderByNullLast() {
rows(null, 7));
}
+ @Test
+ public void testDistinctCountOverNull() {
+ JSONObject response = new JSONObject(executeQuery(
+ "SELECT lastname, COUNT(DISTINCT gender) OVER() "
+ + "FROM " + TestsConstants.TEST_INDEX_BANK, "jdbc"));
+ verifyDataRows(response,
+ rows("Duke Willmington", 2),
+ rows("Bond", 2),
+ rows("Bates", 2),
+ rows("Adams", 2),
+ rows("Ratliff", 2),
+ rows("Ayala", 2),
+ rows("Mcpherson", 2));
+ }
+
+ @Test
+ public void testDistinctCountOver() {
+ JSONObject response = new JSONObject(executeQuery(
+ "SELECT lastname, COUNT(DISTINCT gender) OVER(ORDER BY lastname) "
+ + "FROM " + TestsConstants.TEST_INDEX_BANK, "jdbc"));
+ verifyDataRowsInOrder(response,
+ rows("Adams", 1),
+ rows("Ayala", 2),
+ rows("Bates", 2),
+ rows("Bond", 2),
+ rows("Duke Willmington", 2),
+ rows("Mcpherson", 2),
+ rows("Ratliff", 2));
+ }
+
+ @Test
+ public void testDistinctCountPartition() {
+ JSONObject response = new JSONObject(executeQuery(
+ "SELECT lastname, COUNT(DISTINCT gender) OVER(PARTITION BY gender ORDER BY lastname) "
+ + "FROM " + TestsConstants.TEST_INDEX_BANK, "jdbc"));
+ verifyDataRowsInOrder(response,
+ rows("Ayala", 1),
+ rows("Bates", 1),
+ rows("Mcpherson", 1),
+ rows("Adams", 1),
+ rows("Bond", 1),
+ rows("Duke Willmington", 1),
+ rows("Ratliff", 1));
+ }
+
}
diff --git a/integ-test/src/test/resources/correctness/expressions/cast.txt b/integ-test/src/test/resources/correctness/expressions/cast.txt
index 4018a73f09..2d313203b4 100644
--- a/integ-test/src/test/resources/correctness/expressions/cast.txt
+++ b/integ-test/src/test/resources/correctness/expressions/cast.txt
@@ -17,3 +17,10 @@ cast('01:01:01' as time) as castTime
cast('true' as boolean) as castBool
cast(1 as boolean) as castBool
cast(cast(1 as string) as int) castCombine
+false = 'False' as implicitCast
+false = 'true' as implicitCast
+'TRUE' = true as implicitCast
+'false' = true as implicitCast
+CAST('2021-06-17 00:00:00' AS TIMESTAMP) = '2021-06-17 00:00:00' as implicitCast
+'2021-06-18' < CAST('2021-06-17' AS DATE) as implicitCast
+'10:20:00' <= CAST('11:00:00' AS TIME) as implicitCast
diff --git a/integ-test/src/test/resources/correctness/queries/aggregation.txt b/integ-test/src/test/resources/correctness/queries/aggregation.txt
index 45aa658783..0c0648a937 100644
--- a/integ-test/src/test/resources/correctness/queries/aggregation.txt
+++ b/integ-test/src/test/resources/correctness/queries/aggregation.txt
@@ -9,4 +9,6 @@ SELECT MIN(timestamp) FROM opensearch_dashboards_sample_data_flights
SELECT VAR_POP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights
SELECT VAR_SAMP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights
SELECT STDDEV_POP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights
-SELECT STDDEV_SAMP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights
\ No newline at end of file
+SELECT STDDEV_SAMP(AvgTicketPrice) FROM opensearch_dashboards_sample_data_flights
+SELECT COUNT(DISTINCT Origin), COUNT(DISTINCT Dest) FROM opensearch_dashboards_sample_data_flights
+SELECT COUNT(DISTINCT Origin) FROM (SELECT * FROM opensearch_dashboards_sample_data_flights) AS flights
\ No newline at end of file
diff --git a/integtest.sh b/integtest.sh
deleted file mode 100755
index 8be8831a60..0000000000
--- a/integtest.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function usage() {
- echo ""
- echo "This script is used to run integration tests for plugin installed on a remote OpenSearch/Dashboards cluster."
- echo "--------------------------------------------------------------------------"
- echo "Usage: $0 [args]"
- echo ""
- echo "Required arguments:"
- echo "None"
- echo ""
- echo "Optional arguments:"
- echo -e "-b BIND_ADDRESS\t, defaults to localhost | 127.0.0.1, can be changed to any IP or domain name for the cluster location."
- echo -e "-p BIND_PORT\t, defaults to 9200 or 5601 depends on OpenSearch or Dashboards, can be changed to any port for the cluster location."
- echo -e "-s SECURITY_ENABLED\t(true | false), defaults to true. Specify the OpenSearch/Dashboards have security enabled or not."
- echo -e "-c CREDENTIAL\t(usename:password), no defaults, effective when SECURITY_ENABLED=true."
- echo -e "-h\tPrint this message."
- echo "--------------------------------------------------------------------------"
-}
-
-while getopts ":hb:p:s:c:" arg; do
- case $arg in
- h)
- usage
- exit 1
- ;;
- b)
- BIND_ADDRESS=$OPTARG
- ;;
- p)
- BIND_PORT=$OPTARG
- ;;
- s)
- SECURITY_ENABLED=$OPTARG
- ;;
- c)
- CREDENTIAL=$OPTARG
- ;;
- :)
- echo "-${OPTARG} requires an argument"
- usage
- exit 1
- ;;
- ?)
- echo "Invalid option: -${OPTARG}"
- exit 1
- ;;
- esac
-done
-
-
-if [ -z "$BIND_ADDRESS" ]
-then
- BIND_ADDRESS="localhost"
-fi
-
-if [ -z "$BIND_PORT" ]
-then
- BIND_PORT="9200"
-fi
-
-if [ -z "$SECURITY_ENABLED" ]
-then
- SECURITY_ENABLED="true"
-fi
-
-if [ -z "$CREDENTIAL" ]
-then
- CREDENTIAL="admin:admin"
- USERNAME=`echo $CREDENTIAL | awk -F ':' '{print $1}'`
- PASSWORD=`echo $CREDENTIAL | awk -F ':' '{print $2}'`
-fi
-
-./gradlew integTest -Dtests.rest.cluster="$BIND_ADDRESS:$BIND_PORT" -Dtests.cluster="$BIND_ADDRESS:$BIND_PORT" -Dtests.clustername="opensearch-integrationtest" -Dhttps=$SECURITY_ENABLED -Duser=$USERNAME -Dpassword=$PASSWORD --console=plain
-
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java
index 60d7f8c684..2d40218688 100644
--- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java
@@ -45,16 +45,27 @@
@RequiredArgsConstructor
public enum OpenSearchDataType implements ExprType {
/**
- * OpenSearch Text.
+ * OpenSearch Text. Rather than cast text to other types (STRING), leave it alone to prevent
+ * cast_to_string(OPENSEARCH_TEXT).
* Ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/text.html
*/
- OPENSEARCH_TEXT(Collections.singletonList(STRING), "string"),
+ OPENSEARCH_TEXT(Collections.singletonList(STRING), "string") {
+ @Override
+ public boolean shouldCast(ExprType other) {
+ return false;
+ }
+ },
/**
* OpenSearch multi-fields which has text and keyword.
* Ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-fields.html
*/
- OPENSEARCH_TEXT_KEYWORD(Arrays.asList(STRING, OPENSEARCH_TEXT), "string"),
+ OPENSEARCH_TEXT_KEYWORD(Arrays.asList(STRING, OPENSEARCH_TEXT), "string") {
+ @Override
+ public boolean shouldCast(ExprType other) {
+ return false;
+ }
+ },
OPENSEARCH_IP(Arrays.asList(UNKNOWN), "ip"),
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/AggregationBuilderHelper.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/AggregationBuilderHelper.java
index 73d58d793e..cd793c9046 100644
--- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/AggregationBuilderHelper.java
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/AggregationBuilderHelper.java
@@ -43,11 +43,9 @@
/**
* Abstract Aggregation Builder.
- *
- * @param type of the actual AggregationBuilder to be built.
*/
@RequiredArgsConstructor
-public class AggregationBuilderHelper {
+public class AggregationBuilderHelper {
private final ExpressionSerializer serializer;
@@ -57,7 +55,7 @@ public class AggregationBuilderHelper {
* @param expression Expression
* @return AggregationBuilder
*/
- public T build(Expression expression, Function fieldBuilder,
+ public T build(Expression expression, Function fieldBuilder,
Function