From 8c8e08c248f229e5bb96d8e5882714f68c4abebe Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Fri, 14 Jul 2023 10:05:48 -0700 Subject: [PATCH] Added new datetime functions and aliases to PPL [Part 2] (#283) (#1852) * Added new datetime functions, documentation, and tests Signed-off-by: Matthew Wells --- docs/user/ppl/functions/datetime.rst | 289 +++++++++++++++++- .../sql/ppl/DateTimeFunctionIT.java | 81 +++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 10 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 64 ++++ .../sql/ppl/parser/AstExpressionBuilder.java | 54 ++++ .../sql/ppl/antlr/PPLSyntaxParserTest.java | 61 +++- .../ppl/parser/AstExpressionBuilderTest.java | 60 ++++ 7 files changed, 615 insertions(+), 4 deletions(-) diff --git a/docs/user/ppl/functions/datetime.rst b/docs/user/ppl/functions/datetime.rst index fccfefca6b..8f844c75a2 100644 --- a/docs/user/ppl/functions/datetime.rst +++ b/docs/user/ppl/functions/datetime.rst @@ -813,6 +813,77 @@ Example:: +-----------------------------------+ +EXTRACT +------- + +Description +>>>>>>>>>>> + +Usage: extract(part FROM date) returns a LONG with digits in order according to the given 'part' arguments. +The specific format of the returned long is determined by the table below. + +Argument type: PART, where PART is one of the following tokens in the table below. + +The format specifiers found in this table are the same as those found in the `DATE_FORMAT`_ function. + +.. list-table:: The following table describes the mapping of a 'part' to a particular format. + :widths: 20 80 + :header-rows: 1 + + * - Part + - Format + * - MICROSECOND + - %f + * - SECOND + - %s + * - MINUTE + - %i + * - HOUR + - %H + * - DAY + - %d + * - WEEK + - %X + * - MONTH + - %m + * - YEAR + - %V + * - SECOND_MICROSECOND + - %s%f + * - MINUTE_MICROSECOND + - %i%s%f + * - MINUTE_SECOND + - %i%s + * - HOUR_MICROSECOND + - %H%i%s%f + * - HOUR_SECOND + - %H%i%s + * - HOUR_MINUTE + - %H%i + * - DAY_MICROSECOND + - %d%H%i%s%f + * - DAY_SECOND + - %d%H%i%s + * - DAY_MINUTE + - %d%H%i + * - DAY_HOUR + - %d%H% + * - YEAR_MONTH + - %V%m + +Return type: LONG + +Example:: + + os> source=people | eval `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` = extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | fields `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` + fetched rows / total rows = 1/1 + +--------------------------------------------------+ + | extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | + |--------------------------------------------------| + | 202302 | + +--------------------------------------------------+ + + FROM_DAYS --------- @@ -872,6 +943,28 @@ Examples:: +-----------------------------------+ +GET_FORMAT +---------- + +Description +>>>>>>>>>>> + +Usage: Returns a string value containing string format specifiers based on the input arguments. + +Argument type: TYPE, STRING, where TYPE must be one of the following tokens: [DATE, TIME, DATETIME, TIMESTAMP], and +STRING must be one of the following tokens: ["USA", "JIS", "ISO", "EUR", "INTERNAL"] (" can be replaced by '). + +Examples:: + + os> source=people | eval `GET_FORMAT(DATE, 'USA')` = GET_FORMAT(DATE, 'USA') | fields `GET_FORMAT(DATE, 'USA')` + fetched rows / total rows = 1/1 + +---------------------------+ + | GET_FORMAT(DATE, 'USA') | + |---------------------------| + | %m.%d.%Y | + +---------------------------+ + + HOUR ---- @@ -922,6 +1015,26 @@ Example:: +---------------------------------+ +LAST_DAY +-------- + +Usage: Returns the last day of the month as a DATE for a valid argument. + +Argument type: DATE/DATETIME/STRING/TIMESTAMP/TIME + +Return type: DATE + +Example:: + + os> source=people | eval `last_day('2023-02-06')` = last_day('2023-02-06') | fields `last_day('2023-02-06')` + fetched rows / total rows = 1/1 + +--------------------------+ + | last_day('2023-02-06') | + |--------------------------| + | 2023-02-28 | + +--------------------------+ + + LOCALTIMESTAMP -------------- @@ -1136,7 +1249,6 @@ Return type: INTEGER Synonyms: `MONTH_OF_YEAR`_ - Example:: os> source=people | eval `MONTH(DATE('2020-08-26'))` = MONTH(DATE('2020-08-26')) | fields `MONTH(DATE('2020-08-26'))` @@ -1289,6 +1401,32 @@ Example:: +-------------------------------+ +SEC_TO_TIME +----------- + +Description +>>>>>>>>>>> + +Usage: sec_to_time(number) returns the time in HH:mm:ssss[.nnnnnn] format. +Note that the function returns a time between 00:00:00 and 23:59:59. +If an input value is too large (greater than 86399), the function will wrap around and begin returning outputs starting from 00:00:00. +If an input value is too small (less than 0), the function will wrap around and begin returning outputs counting down from 23:59:59. + +Argument type: INTEGER, LONG, DOUBLE, FLOAT + +Return type: TIME + +Example:: + + os> source=people | eval `SEC_TO_TIME(3601)` = SEC_TO_TIME(3601) | eval `SEC_TO_TIME(1234.123)` = SEC_TO_TIME(1234.123) | fields `SEC_TO_TIME(3601)`, `SEC_TO_TIME(1234.123)` + fetched rows / total rows = 1/1 + +---------------------+-------------------------+ + | SEC_TO_TIME(3601) | SEC_TO_TIME(1234.123) | + |---------------------+-------------------------| + | 01:00:01 | 00:20:34.123 | + +---------------------+-------------------------+ + + SECOND ------ @@ -1339,6 +1477,32 @@ Example:: +--------------------------------------+ +STR_TO_DATE +----------- + +Description +>>>>>>>>>>> + +Usage: str_to_date(string, string) is used to extract a DATETIME from the first argument string using the formats specified in the second argument string. +The input argument must have enough information to be parsed as a DATE, DATETIME, or TIME. +Acceptable string format specifiers are the same as those used in the `DATE_FORMAT`_ function. +It returns NULL when a statement cannot be parsed due to an invalid pair of arguments, and when 0 is provided for any DATE field. Otherwise, it will return a DATETIME with the parsed values (as well as default values for any field that was not parsed). + +Argument type: STRING, STRING + +Return type: DATETIME + +Example:: + + OS> source=people | eval `str_to_date("01,5,2013", "%d,%m,%Y")` = str_to_date("01,5,2013", "%d,%m,%Y") | fields = `str_to_date("01,5,2013", "%d,%m,%Y")` + fetched rows / total rows = 1/1 + +----------------------------------------+ + | str_to_date("01,5,2013", "%d,%m,%Y") | + |----------------------------------------| + | 2013-05-01 00:00:00 | + +----------------------------------------+ + + SUBDATE ------- @@ -1637,6 +1801,57 @@ Example:: +------------------------------------+------------------------------------------------------+ +TIMESTAMPADD +------------ + +Description +>>>>>>>>>>> + +Usage: Returns a DATETIME value based on a passed in DATE/DATETIME/TIME/TIMESTAMP/STRING argument and an INTERVAL and INTEGER argument which determine the amount of time to be added. +If the third argument is a STRING, it must be formatted as a valid DATETIME. If only a TIME is provided, a DATETIME is still returned with the DATE portion filled in using the current date. +If the third argument is a DATE, it will be automatically converted to a DATETIME. + +Argument type: INTERVAL, INTEGER, DATE/DATETIME/TIME/TIMESTAMP/STRING + +INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] + +Examples:: + + os> source=people | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` + fetched rows / total rows = 1/1 + +------------------------------------------------+----------------------------------------------------+ + | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | + |------------------------------------------------+----------------------------------------------------| + | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | + +------------------------------------------------+----------------------------------------------------+ + + +TIMESTAMPDIFF +------------- + +Description +>>>>>>>>>>> + +Usage: TIMESTAMPDIFF(interval, start, end) returns the difference between the start and end date/times in interval units. +If a TIME is provided as an argument, it will be converted to a DATETIME with the DATE portion filled in using the current date. +Arguments will be automatically converted to a DATETIME/TIME/TIMESTAMP when appropriate. +Any argument that is a STRING must be formatted as a valid DATETIME. + +Argument type: INTERVAL, DATE/DATETIME/TIME/TIMESTAMP/STRING, DATE/DATETIME/TIME/TIMESTAMP/STRING + +INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] + +Examples:: + + os> source=people | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` = TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` + fetched rows / total rows = 1/1 + +---------------------------------------------------------------------+-------------------------------------------------------------+ + | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | + |---------------------------------------------------------------------+-------------------------------------------------------------| + | 4 | -23 | + +---------------------------------------------------------------------+-------------------------------------------------------------+ + + TO_DAYS ------- @@ -1660,6 +1875,30 @@ Example:: +-------------------------------+ +TO_SECONDS +---------- + +Description +>>>>>>>>>>> + +Usage: to_seconds(date) returns the number of seconds since the year 0 of the given value. Returns NULL if value is invalid. +An argument of a LONG type can be used. It must be formatted as YMMDD, YYMMDD, YYYMMDD or YYYYMMDD. Note that a LONG type argument cannot have leading 0s as it will be parsed using an octal numbering system. + +Argument type: STRING/LONG/DATE/DATETIME/TIME/TIMESTAMP + +Return type: LONG + +Example:: + + os> source=people | eval `TO_SECONDS(DATE('2008-10-07'))` = TO_SECONDS(DATE('2008-10-07')) | eval `TO_SECONDS(950228)` = TO_SECONDS(950228) | fields `TO_SECONDS(DATE('2008-10-07'))`, `TO_SECONDS(950228)` + fetched rows / total rows = 1/1 + +----------------------------------+----------------------+ + | TO_SECONDS(DATE('2008-10-07')) | TO_SECONDS(950228) | + |----------------------------------+----------------------| + | 63390556800 | 62961148800 | + +----------------------------------+----------------------+ + + UNIX_TIMESTAMP -------------- @@ -1821,6 +2060,31 @@ Example:: +----------------------------+-------------------------------+ +WEEKDAY +------- + +Description +>>>>>>>>>>> + +Usage: weekday(date) returns the weekday index for date (0 = Monday, 1 = Tuesday, ..., 6 = Sunday). + +It is similar to the `dayofweek`_ function, but returns different indexes for each day. + +Argument type: STRING/DATE/DATETIME/TIME/TIMESTAMP + +Return type: INTEGER + +Example:: + + os> source=people | eval `weekday(DATE('2020-08-26'))` = weekday(DATE('2020-08-26')) | eval `weekday(DATE('2020-08-27'))` = weekday(DATE('2020-08-27')) | fields `weekday(DATE('2020-08-26'))`, `weekday(DATE('2020-08-27'))` + fetched rows / total rows = 1/1 + +-------------------------------+-------------------------------+ + | weekday(DATE('2020-08-26')) | weekday(DATE('2020-08-27')) | + |-------------------------------+-------------------------------| + | 2 | 3 | + +-------------------------------+-------------------------------+ + + WEEK_OF_YEAR ------------ @@ -1910,3 +2174,26 @@ Example:: +----------------------------+ +YEARWEEK +-------- + +Description +>>>>>>>>>>> + +Usage: yearweek(date) returns the year and week for date as an integer. It accepts and optional mode arguments aligned with those available for the `WEEK`_ function. + +Argument type: STRING/DATE/DATETIME/TIME/TIMESTAMP + +Return type: INTEGER + +Example:: + + os> source=people | eval `YEARWEEK('2020-08-26')` = YEARWEEK('2020-08-26') | eval `YEARWEEK('2019-01-05', 1)` = YEARWEEK('2019-01-05', 1) | fields `YEARWEEK('2020-08-26')`, `YEARWEEK('2019-01-05', 1)` + fetched rows / total rows = 1/1 + +--------------------------+-----------------------------+ + | YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 1) | + |--------------------------+-----------------------------| + | 202034 | 201901 | + +--------------------------+-----------------------------+ + + diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeFunctionIT.java index a10d7f3771..b75b0ecaef 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeFunctionIT.java @@ -1062,4 +1062,85 @@ public void testTimeDiff() throws IOException { verifySchema(result, schema("f", null, "time")); verifySome(result.getJSONArray("datarows"), rows("10:59:59")); } + + @Test + public void testGetFormat() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f = date_format('2003-10-03', get_format(DATE,'USA')) | fields f", TEST_INDEX_DATE)); + verifySchema(result, schema("f", null, "string")); + verifySome(result.getJSONArray("datarows"), rows("10.03.2003")); + } + + @Test + public void testLastDay() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f = last_day('2003-10-03') | fields f", TEST_INDEX_DATE)); + verifySchema(result, schema("f", null, "date")); + verifySome(result.getJSONArray("datarows"), rows("2003-10-31")); + } + + @Test + public void testSecToTime() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f = sec_to_time(123456) | fields f", TEST_INDEX_DATE)); + verifySchema(result, schema("f", null, "time")); + verifySome(result.getJSONArray("datarows"), rows("10:17:36")); + } + + @Test + public void testYearWeek() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f1 = yearweek('2003-10-03') | eval f2 = yearweek('2003-10-03', 3) | fields f1, f2", TEST_INDEX_DATE)); + verifySchema(result, + schema("f1", null, "integer"), + schema("f2", null, "integer")); + verifySome(result.getJSONArray("datarows"), rows(200339, 200340)); + } + + @Test + public void testWeekDay() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f = weekday('2003-10-03') | fields f", TEST_INDEX_DATE)); + verifySchema(result, schema("f", null, "integer")); + verifySome(result.getJSONArray("datarows"), rows(4)); + } + + @Test + public void testToSeconds() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f1 = to_seconds(date('2008-10-07')) | " + + "eval f2 = to_seconds('2020-09-16 07:40:00') | " + + "eval f3 = to_seconds(DATETIME('2020-09-16 07:40:00')) | fields f1, f2, f3", TEST_INDEX_DATE)); + verifySchema(result, + schema("f1", null, "long"), + schema("f2", null, "long"), + schema("f3", null, "long")); + verifySome(result.getJSONArray("datarows"), rows(63390556800L, 63767461200L, 63767461200L)); + } + + @Test + public void testStrToDate() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f = str_to_date('01,5,2013', '%s') | fields f", TEST_INDEX_DATE, "%d,%m,%Y")); + verifySchema(result, schema("f", null, "datetime")); + verifySome(result.getJSONArray("datarows"), rows("2013-05-01 00:00:00")); + } + + @Test + public void testTimeStampAdd() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f = timestampadd(YEAR, 15, '2001-03-06 00:00:00') | fields f", TEST_INDEX_DATE)); + verifySchema(result, schema("f", null, "datetime")); + verifySome(result.getJSONArray("datarows"), rows("2016-03-06 00:00:00")); + } + + @Test + public void testTimestampDiff() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f = timestampdiff(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | fields f", TEST_INDEX_DATE)); + verifySchema(result, schema("f", null, "datetime")); + verifySome(result.getJSONArray("datarows"), rows(4)); + } + + @Test + public void testExtract() throws IOException{ + var result = executeQuery(String.format("source=%s | eval f1 = extract(YEAR FROM '1997-01-01 00:00:00') | eval f2 = extract(MINUTE FROM time('10:17:36')) | fields f1, f2", TEST_INDEX_DATE)); + verifySchema(result, + schema("f1", null, "long"), + schema("f2", null, "long")); + verifySome(result.getJSONArray("datarows"), rows(1997L, 17L)); + } + + } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 908dbe5262..e74aed30eb 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -262,8 +262,11 @@ DAYOFWEEK: 'DAYOFWEEK'; DAYOFYEAR: 'DAYOFYEAR'; DAY_OF_MONTH: 'DAY_OF_MONTH'; DAY_OF_WEEK: 'DAY_OF_WEEK'; +EXTRACT: 'EXTRACT'; FROM_DAYS: 'FROM_DAYS'; FROM_UNIXTIME: 'FROM_UNIXTIME'; +GET_FORMAT: 'GET_FORMAT'; +LAST_DAY: 'LAST_DAY'; LOCALTIME: 'LOCALTIME'; LOCALTIMESTAMP: 'LOCALTIMESTAMP'; MAKEDATE: 'MAKEDATE'; @@ -272,19 +275,26 @@ MONTHNAME: 'MONTHNAME'; NOW: 'NOW'; PERIOD_ADD: 'PERIOD_ADD'; PERIOD_DIFF: 'PERIOD_DIFF'; +SEC_TO_TIME: 'SEC_TO_TIME'; +STR_TO_DATE: 'STR_TO_DATE'; SUBDATE: 'SUBDATE'; SUBTIME: 'SUBTIME'; SYSDATE: 'SYSDATE'; TIME: 'TIME'; TIMEDIFF: 'TIMEDIFF'; TIMESTAMP: 'TIMESTAMP'; +TIMESTAMPADD: 'TIMESTAMPADD'; +TIMESTAMPDIFF: 'TIMESTAMPDIFF'; TIME_FORMAT: 'TIME_FORMAT'; TIME_TO_SEC: 'TIME_TO_SEC'; TO_DAYS: 'TO_DAYS'; +TO_SECONDS: 'TO_SECONDS'; UNIX_TIMESTAMP: 'UNIX_TIMESTAMP'; UTC_DATE: 'UTC_DATE'; UTC_TIME: 'UTC_TIME'; UTC_TIMESTAMP: 'UTC_TIMESTAMP'; +WEEKDAY: 'WEEKDAY'; +YEARWEEK: 'YEARWEEK'; // TEXT FUNCTIONS SUBSTR: 'SUBSTR'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 853af31443..9cde1bfbb8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -287,6 +287,9 @@ valueExpression right=valueExpression #binaryArithmetic | primaryExpression #valueExpressionDefault | positionFunction #positionFunctionCall + | extractFunction #extractFunctionCall + | getFormatFunction #getFormatFunctionCall + | timestampFunction #timestampFunctionCall | LT_PRTHS valueExpression RT_PRTHS #parentheticValueExpr ; @@ -538,6 +541,7 @@ dateTimeFunctionName | FROM_UNIXTIME | HOUR | HOUR_OF_DAY + | LAST_DAY | LOCALTIME | LOCALTIMESTAMP | MAKEDATE @@ -555,6 +559,8 @@ dateTimeFunctionName | QUARTER | SECOND | SECOND_OF_MINUTE + | SEC_TO_TIME + | STR_TO_DATE | SUBDATE | SUBTIME | SYSDATE @@ -564,13 +570,71 @@ dateTimeFunctionName | TIME_FORMAT | TIME_TO_SEC | TO_DAYS + | TO_SECONDS | UNIX_TIMESTAMP | UTC_DATE | UTC_TIME | UTC_TIMESTAMP | WEEK + | WEEKDAY | WEEK_OF_YEAR | YEAR + | YEARWEEK + ; + +getFormatFunction + : GET_FORMAT LT_PRTHS getFormatType COMMA functionArg RT_PRTHS + ; + +getFormatType + : DATE + | DATETIME + | TIME + | TIMESTAMP + ; + +extractFunction + : EXTRACT LT_PRTHS datetimePart FROM functionArg RT_PRTHS + ; + +simpleDateTimePart + : MICROSECOND + | SECOND + | MINUTE + | HOUR + | DAY + | WEEK + | MONTH + | QUARTER + | YEAR + ; + +complexDateTimePart + : SECOND_MICROSECOND + | MINUTE_MICROSECOND + | MINUTE_SECOND + | HOUR_MICROSECOND + | HOUR_SECOND + | HOUR_MINUTE + | DAY_MICROSECOND + | DAY_SECOND + | DAY_MINUTE + | DAY_HOUR + | YEAR_MONTH + ; + +datetimePart + : simpleDateTimePart + | complexDateTimePart + ; + +timestampFunction + : timestampFunctionName LT_PRTHS simpleDateTimePart COMMA firstArg=functionArg COMMA secondArg=functionArg RT_PRTHS + ; + +timestampFunctionName + : TIMESTAMPADD + | TIMESTAMPDIFF ; /** condition function return boolean value */ diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index eddee3064e..c775747ec4 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -297,6 +297,60 @@ public UnresolvedExpression visitPositionFunction( visitFunctionArg(ctx.functionArg(1)))); } + @Override + public UnresolvedExpression visitExtractFunctionCall( + OpenSearchPPLParser.ExtractFunctionCallContext ctx) { + return new Function( + ctx.extractFunction().EXTRACT().toString(), + getExtractFunctionArguments(ctx)); + } + + private List getExtractFunctionArguments( + OpenSearchPPLParser.ExtractFunctionCallContext ctx) { + List args = Arrays.asList( + new Literal(ctx.extractFunction().datetimePart().getText(), DataType.STRING), + visitFunctionArg(ctx.extractFunction().functionArg()) + ); + return args; + } + + @Override + public UnresolvedExpression visitGetFormatFunctionCall( + OpenSearchPPLParser.GetFormatFunctionCallContext ctx) { + return new Function( + ctx.getFormatFunction().GET_FORMAT().toString(), + getFormatFunctionArguments(ctx)); + } + + private List getFormatFunctionArguments( + OpenSearchPPLParser.GetFormatFunctionCallContext ctx) { + List args = Arrays.asList( + new Literal(ctx.getFormatFunction().getFormatType().getText(), DataType.STRING), + visitFunctionArg(ctx.getFormatFunction().functionArg()) + ); + return args; + } + + @Override + public UnresolvedExpression visitTimestampFunctionCall( + OpenSearchPPLParser.TimestampFunctionCallContext ctx) { + return new Function( + ctx.timestampFunction().timestampFunctionName().getText(), + timestampFunctionArguments(ctx)); + } + + private List timestampFunctionArguments( + OpenSearchPPLParser.TimestampFunctionCallContext ctx) { + List args = Arrays.asList( + new Literal( + ctx.timestampFunction().simpleDateTimePart().getText(), + DataType.STRING), + visitFunctionArg(ctx.timestampFunction().firstArg), + visitFunctionArg(ctx.timestampFunction().secondArg) + ); + return args; + } + /** * Literal and value. */ diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index bbc566e2ba..8ca6c5c84e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -7,11 +7,15 @@ package org.opensearch.sql.ppl.antlr; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; +import java.util.List; import org.antlr.v4.runtime.tree.ParseTree; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.opensearch.sql.common.antlr.SyntaxCheckException; public class PPLSyntaxParserTest { @@ -135,7 +139,7 @@ public void testTopCommandWithoutNAndGroupByShouldPass() { } @Test - public void can_parse_multi_match_relevance_function() { + public void testCanParseMultiMatchRelevanceFunction() { assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match(['address'], 'query')")); assertNotEquals(null, new PPLSyntaxParser().parse( @@ -168,7 +172,7 @@ public void can_parse_multi_match_relevance_function() { } @Test - public void can_parse_simple_query_string_relevance_function() { + public void testCanParseSimpleQueryStringRelevanceFunction() { assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string(['address'], 'query')")); assertNotEquals(null, new PPLSyntaxParser().parse( @@ -201,7 +205,7 @@ public void can_parse_simple_query_string_relevance_function() { } @Test - public void can_parse_query_string_relevance_function() { + public void testCanParseQueryStringRelevanceFunction() { assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE query_string(['address'], 'query')")); assertNotEquals(null, new PPLSyntaxParser().parse( @@ -268,5 +272,56 @@ public void testDescribeCommandWithSourceShouldFail() { new PPLSyntaxParser().parse("describe source=t"); } + + @Test + public void testCanParseExtractFunction() { + String[] parts = List.of("MICROSECOND", "SECOND", "MINUTE", "HOUR", "DAY", + "WEEK", "MONTH", "QUARTER", "YEAR", "SECOND_MICROSECOND", + "MINUTE_MICROSECOND", "MINUTE_SECOND", "HOUR_MICROSECOND", + "HOUR_SECOND", "HOUR_MINUTE", "DAY_MICROSECOND", + "DAY_SECOND", "DAY_MINUTE", "DAY_HOUR", "YEAR_MONTH").toArray(new String[0]); + + for (String part : parts) { + assertNotNull(new PPLSyntaxParser().parse( + String.format("SOURCE=test | eval k = extract(%s FROM \"2023-02-06\")", part))); + } + } + + @Test + public void testCanParseGetFormatFunction() { + String[] types = {"DATE", "DATETIME", "TIME", "TIMESTAMP"}; + String[] formats = {"'USA'", "'JIS'", "'ISO'", "'EUR'", "'INTERNAL'"}; + + for (String type : types) { + for (String format : formats) { + assertNotNull(new PPLSyntaxParser().parse( + String.format("SOURCE=test | eval k = get_format(%s, %s)", type, format))); + } + } + } + + @Test + public void testCannotParseGetFormatFunctionWithBadArg() { + assertThrows( + SyntaxCheckException.class, + () -> new PPLSyntaxParser().parse( + "SOURCE=test | eval k = GET_FORMAT(NONSENSE_ARG,'INTERNAL')")); + } + + @Test + public void testCanParseTimestampaddFunction() { + assertNotNull(new PPLSyntaxParser().parse( + "SOURCE=test | eval k = TIMESTAMPADD(MINUTE, 1, '2003-01-02')")); + assertNotNull(new PPLSyntaxParser().parse( + "SOURCE=test | eval k = TIMESTAMPADD(WEEK,1,'2003-01-02')")); + } + + @Test + public void testCanParseTimestampdiffFunction() { + assertNotNull(new PPLSyntaxParser().parse( + "SOURCE=test | eval k = TIMESTAMPDIFF(MINUTE, '2003-01-02', '2003-01-02')")); + assertNotNull(new PPLSyntaxParser().parse( + "SOURCE=test | eval k = TIMESTAMPDIFF(WEEK,'2003-01-02','2003-01-02')")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 3f3c0e50ba..a6e130eed3 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -16,6 +16,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.cast; import static org.opensearch.sql.ast.dsl.AstDSL.compare; +import static org.opensearch.sql.ast.dsl.AstDSL.dateLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.defaultFieldsArgs; import static org.opensearch.sql.ast.dsl.AstDSL.defaultSortFieldArgs; import static org.opensearch.sql.ast.dsl.AstDSL.defaultStatsArgs; @@ -40,6 +41,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.relation; import static org.opensearch.sql.ast.dsl.AstDSL.sort; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.timestampLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; import static org.opensearch.sql.ast.dsl.AstDSL.xor; @@ -869,4 +871,62 @@ public void indexCanBeId() { defaultStatsArgs() )); } + + @Test + public void testExtractFunctionExpr() { + assertEqual("source=t | eval f=extract(day from '2001-05-07 10:11:12')", + eval( + relation("t"), + let( + field("f"), + function("extract", + stringLiteral("day"), stringLiteral("2001-05-07 10:11:12")) + ) + )); + } + + + @Test + public void testGet_FormatFunctionExpr() { + assertEqual("source=t | eval f=get_format(DATE,'USA')", + eval( + relation("t"), + let( + field("f"), + function("get_format", + stringLiteral("DATE"), stringLiteral("USA")) + ) + )); + } + + @Test + public void testTimeStampAddFunctionExpr() { + assertEqual("source=t | eval f=timestampadd(YEAR, 15, '2001-03-06 00:00:00')", + eval( + relation("t"), + let( + field("f"), + function("timestampadd", + stringLiteral("YEAR"), + intLiteral(15), + stringLiteral("2001-03-06 00:00:00")) + ) + )); + } + + @Test + public void testTimeStampDiffFunctionExpr() { + assertEqual("source=t | eval f=timestampdiff(" + + "YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')", + eval( + relation("t"), + let( + field("f"), + function("timestampdiff", + stringLiteral("YEAR"), + stringLiteral("1997-01-01 00:00:00"), + stringLiteral("2001-03-06 00:00:00")) + ) + )); + } }