From f0cc08b8de4f75cfa5c6ec64ca4483ad20aa511c Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 18 Nov 2024 12:56:01 +0800 Subject: [PATCH 1/5] refactor ppl docs to keep consistent look Signed-off-by: Lantao Jin --- docs/ppl-lang/README.md | 2 +- .../{ppl-lambda.md => ppl-collection.md} | 80 +- docs/ppl-lang/functions/ppl-json.md | 214 ++++- docs/ppl-lang/ppl-dedup-command.md | 8 +- docs/ppl-lang/ppl-eval-command.md | 6 +- docs/ppl-lang/ppl-fields-command.md | 6 +- docs/ppl-lang/ppl-fieldsummary-command.md | 4 +- docs/ppl-lang/ppl-grok-command.md | 2 +- docs/ppl-lang/ppl-join-command.md | 256 +++--- docs/ppl-lang/ppl-lookup-command.md | 83 +- docs/ppl-lang/ppl-rare-command.md | 4 +- docs/ppl-lang/ppl-subquery-command.md | 401 +++------ docs/ppl-lang/ppl-top-command.md | 2 +- docs/ppl-lang/ppl-trendline-command.md | 2 +- .../src/main/antlr4/OpenSearchPPLLexer.tokens | 798 ++++++++++++++++++ 15 files changed, 1364 insertions(+), 504 deletions(-) rename docs/ppl-lang/functions/{ppl-lambda.md => ppl-collection.md} (57%) create mode 100644 ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md index 9df9f5986..19e1a6ee0 100644 --- a/docs/ppl-lang/README.md +++ b/docs/ppl-lang/README.md @@ -94,7 +94,7 @@ For additional examples see the next [documentation](PPL-Example-Commands.md). - [`IP Address Functions`](functions/ppl-ip.md) - - [`Lambda Functions`](functions/ppl-lambda.md) + - [`Collection Functions`](functions/ppl-collection) --- ### PPL On Spark diff --git a/docs/ppl-lang/functions/ppl-lambda.md b/docs/ppl-lang/functions/ppl-collection.md similarity index 57% rename from docs/ppl-lang/functions/ppl-lambda.md rename to docs/ppl-lang/functions/ppl-collection.md index cdb6f9e8f..b98f5f5ca 100644 --- a/docs/ppl-lang/functions/ppl-lambda.md +++ b/docs/ppl-lang/functions/ppl-collection.md @@ -1,4 +1,56 @@ -## Lambda Functions +## PPL Collection Functions + +### `ARRAY` + +**Description** + +`array(...)` Returns an array with the given elements. + +**Argument type:** +- A \ can be any kind of value such as string, number, or boolean. + +**Return type:** ARRAY + +Example: + + os> source=people | eval `array` = array(1, 2, 0, -1, 1.1, -0.11) + fetched rows / total rows = 1/1 + +------------------------------+ + | array | + +------------------------------+ + | [1.0,2.0,0.0,-1.0,1.1,-0.11] | + +------------------------------+ + os> source=people | eval `array` = array(true, false, true, true) + fetched rows / total rows = 1/1 + +------------------------------+ + | array | + +------------------------------+ + | [true, false, true, true] | + +------------------------------+ + + +### `ARRAY_LENGTH` + +**Description** + +`array_length(array)` Returns the number of elements in the outermost array. + +**Argument type:** ARRAY + +ARRAY or JSON_ARRAY object. + +**Return type:** INTEGER + +Example: + + os> source=people | eval `array` = array_length(array(1,2,3,4)), `empty_array` = array_length(array()) + fetched rows / total rows = 1/1 + +---------+---------------+ + | array | empty_array | + +---------+---------------+ + | 4 | 0 | + +---------+---------------+ + ### `FORALL` @@ -14,7 +66,7 @@ Returns `TRUE` if all elements in the array satisfy the lambda predicate, otherw Example: - os> source=people | eval array = json_array(1, -1, 2), result = forall(array, x -> x > 0) | fields result + os> source=people | eval array = array(1, -1, 2), result = forall(array, x -> x > 0) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -22,7 +74,7 @@ Example: | false | +-----------+ - os> source=people | eval array = json_array(1, 3, 2), result = forall(array, x -> x > 0) | fields result + os> source=people | eval array = array(1, 3, 2), result = forall(array, x -> x > 0) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -41,7 +93,7 @@ Consider constructing the following array: and perform lambda functions against the nested fields `a` or `b`. See the examples: - os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result + os> source=people | eval array = array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -49,7 +101,7 @@ and perform lambda functions against the nested fields `a` or `b`. See the examp | false | +-----------+ - os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result + os> source=people | eval array = array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -71,7 +123,7 @@ Returns `TRUE` if at least one element in the array satisfies the lambda predica Example: - os> source=people | eval array = json_array(1, -1, 2), result = exists(array, x -> x > 0) | fields result + os> source=people | eval array = array(1, -1, 2), result = exists(array, x -> x > 0) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -79,7 +131,7 @@ Example: | true | +-----------+ - os> source=people | eval array = json_array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result + os> source=people | eval array = array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -102,7 +154,7 @@ An ARRAY that contains all elements in the input array that satisfy the lambda p Example: - os> source=people | eval array = json_array(1, -1, 2), result = filter(array, x -> x > 0) | fields result + os> source=people | eval array = array(1, -1, 2), result = filter(array, x -> x > 0) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -110,7 +162,7 @@ Example: | [1, 2] | +-----------+ - os> source=people | eval array = json_array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result + os> source=people | eval array = array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -132,7 +184,7 @@ An ARRAY that contains the result of applying the lambda transform function to e Example: - os> source=people | eval array = json_array(1, 2, 3), result = transform(array, x -> x + 1) | fields result + os> source=people | eval array = array(1, 2, 3), result = transform(array, x -> x + 1) | fields result fetched rows / total rows = 1/1 +--------------+ | result | @@ -140,7 +192,7 @@ Example: | [2, 3, 4] | +--------------+ - os> source=people | eval array = json_array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result + os> source=people | eval array = array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result fetched rows / total rows = 1/1 +--------------+ | result | @@ -162,7 +214,7 @@ The final result of applying the lambda functions to the start value and the inp Example: - os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result + os> source=people | eval array = array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -170,7 +222,7 @@ Example: | 6 | +-----------+ - os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result + os> source=people | eval array = array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result fetched rows / total rows = 1/1 +-----------+ | result | @@ -178,7 +230,7 @@ Example: | 16 | +-----------+ - os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result + os> source=people | eval array = array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result fetched rows / total rows = 1/1 +-----------+ | result | diff --git a/docs/ppl-lang/functions/ppl-json.md b/docs/ppl-lang/functions/ppl-json.md index 5b26ee427..2c0c0ca67 100644 --- a/docs/ppl-lang/functions/ppl-json.md +++ b/docs/ppl-lang/functions/ppl-json.md @@ -95,6 +95,11 @@ Example: | {"array":[1.0,2.0,0.0,-1.0,1.1,-0.11]} | +----------------------------------------+ +**Limitation** + +The list of parameters of `json_array` should all be the same type. +`json_array('this', 'is', 1.1, -0.11, true, false)` throws exception. + ### `TO_JSON_STRING` **Description** @@ -149,29 +154,6 @@ Example: +-----------+-----------+-------------+ -### `ARRAY_LENGTH` - -**Description** - -`array_length(jsonArray)` Returns the number of elements in the outermost array. - -**Argument type:** ARRAY - -ARRAY or JSON_ARRAY object. - -**Return type:** INTEGER - -Example: - - os> source=people | eval `json_array` = json_array_length(json_array(1,2,3,4)), `empty_array` = json_array_length(json_array()) - fetched rows / total rows = 1/1 - +--------------+---------------+ - | json_array | empty_array | - +--------------+---------------+ - | 4 | 0 | - +--------------+---------------+ - - ### `JSON_EXTRACT` **Description** @@ -280,3 +262,189 @@ Example: |------------------+---------| | 13 | null | +------------------+---------+ + +### `FORALL` + +**Description** + +`forall(json_array, lambda)` Evaluates whether a lambda predicate holds for all elements in the json_array. + +**Argument type:** ARRAY, LAMBDA + +**Return type:** BOOLEAN + +Returns `TRUE` if all elements in the array satisfy the lambda predicate, otherwise `FALSE`. + +Example: + + os> source=people | eval array = json_array(1, -1, 2), result = forall(array, x -> x > 0) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | false | + +-----------+ + + os> source=people | eval array = json_array(1, 3, 2), result = forall(array, x -> x > 0) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | true | + +-----------+ + +**Note:** The lambda expression can access the nested fields of the array elements. This applies to all lambda functions introduced in this document. + +Consider constructing the following array: + + array = [ + {"a":1, "b":1}, + {"a":-1, "b":2} + ] + +and perform lambda functions against the nested fields `a` or `b`. See the examples: + + os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | false | + +-----------+ + + os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | true | + +-----------+ + +### `EXISTS` + +**Description** + +`exists(json_array, lambda)` Evaluates whether a lambda predicate holds for one or more elements in the json_array. + +**Argument type:** ARRAY, LAMBDA + +**Return type:** BOOLEAN + +Returns `TRUE` if at least one element in the array satisfies the lambda predicate, otherwise `FALSE`. + +Example: + + os> source=people | eval array = json_array(1, -1, 2), result = exists(array, x -> x > 0) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | true | + +-----------+ + + os> source=people | eval array = json_array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | false | + +-----------+ + + +### `FILTER` + +**Description** + +`filter(json_array, lambda)` Filters the input json_array using the given lambda function. + +**Argument type:** ARRAY, LAMBDA + +**Return type:** ARRAY + +An ARRAY that contains all elements in the input json_array that satisfy the lambda predicate. + +Example: + + os> source=people | eval array = json_array(1, -1, 2), result = filter(array, x -> x > 0) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | [1, 2] | + +-----------+ + + os> source=people | eval array = json_array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | [] | + +-----------+ + +### `TRANSFORM` + +**Description** + +`transform(json_array, lambda)` Transform elements in a json_array using the lambda transform function. The second argument implies the index of the element if using binary lambda function. This is similar to a `map` in functional programming. + +**Argument type:** ARRAY, LAMBDA + +**Return type:** ARRAY + +An ARRAY that contains the result of applying the lambda transform function to each element in the input array. + +Example: + + os> source=people | eval array = json_array(1, 2, 3), result = transform(array, x -> x + 1) | fields result + fetched rows / total rows = 1/1 + +--------------+ + | result | + +--------------+ + | [2, 3, 4] | + +--------------+ + + os> source=people | eval array = json_array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result + fetched rows / total rows = 1/1 + +--------------+ + | result | + +--------------+ + | [1, 3, 5] | + +--------------+ + +### `REDUCE` + +**Description** + +`reduce(json_array, start, merge_lambda, finish_lambda)` Applies a binary merge lambda function to a start value and all elements in the json_array, and reduces this to a single state. The final state is converted into the final result by applying a finish lambda function. + +**Argument type:** ARRAY, ANY, LAMBDA, LAMBDA + +**Return type:** ANY + +The final result of applying the lambda functions to the start value and the input json_array. + +Example: + + os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | 6 | + +-----------+ + + os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | 16 | + +-----------+ + + os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result + fetched rows / total rows = 1/1 + +-----------+ + | result | + +-----------+ + | 60 | + +-----------+ diff --git a/docs/ppl-lang/ppl-dedup-command.md b/docs/ppl-lang/ppl-dedup-command.md index 28fe7f4a4..831c4926f 100644 --- a/docs/ppl-lang/ppl-dedup-command.md +++ b/docs/ppl-lang/ppl-dedup-command.md @@ -1,6 +1,6 @@ -# PPL dedup command +## PPL dedup command -## Table of contents +### Table of contents - [Description](#description) - [Syntax](#syntax) @@ -11,11 +11,11 @@ - [Example 4: Dedup in consecutive document](#example-4-dedup-in-consecutive-document) - [Limitation](#limitation) -## Description +### Description Using `dedup` command to remove identical document defined by field from the search result. -## Syntax +### Syntax ```sql dedup [int] [keepempty=] [consecutive=] diff --git a/docs/ppl-lang/ppl-eval-command.md b/docs/ppl-lang/ppl-eval-command.md index 1908c087c..e98d4d4f2 100644 --- a/docs/ppl-lang/ppl-eval-command.md +++ b/docs/ppl-lang/ppl-eval-command.md @@ -1,10 +1,10 @@ -# PPL `eval` command +## PPL `eval` command -## Description +### Description The ``eval`` command evaluate the expression and append the result to the search result. -## Syntax +### Syntax ```sql eval = ["," = ]... ``` diff --git a/docs/ppl-lang/ppl-fields-command.md b/docs/ppl-lang/ppl-fields-command.md index e37fc644f..4ef041ee2 100644 --- a/docs/ppl-lang/ppl-fields-command.md +++ b/docs/ppl-lang/ppl-fields-command.md @@ -1,12 +1,12 @@ ## PPL `fields` command -**Description** +### Description Using ``field`` command to keep or remove fields from the search result. -**Syntax** +### Syntax -field [+|-] +`field [+|-] ` * index: optional. if the plus (+) is used, only the fields specified in the field list will be keep. if the minus (-) is used, all the fields specified in the field list will be removed. **Default** + * field list: mandatory. comma-delimited keep or remove fields. diff --git a/docs/ppl-lang/ppl-fieldsummary-command.md b/docs/ppl-lang/ppl-fieldsummary-command.md index 468c2046b..2015cf815 100644 --- a/docs/ppl-lang/ppl-fieldsummary-command.md +++ b/docs/ppl-lang/ppl-fieldsummary-command.md @@ -1,11 +1,11 @@ ## PPL `fieldsummary` command -**Description** +### Description Using `fieldsummary` command to : - Calculate basic statistics for each field (count, distinct count, min, max, avg, stddev, mean ) - Determine the data type of each field -**Syntax** +### Syntax `... | fieldsummary (nulls=true/false)` diff --git a/docs/ppl-lang/ppl-grok-command.md b/docs/ppl-lang/ppl-grok-command.md index 06028109b..a9b5645c5 100644 --- a/docs/ppl-lang/ppl-grok-command.md +++ b/docs/ppl-lang/ppl-grok-command.md @@ -1,4 +1,4 @@ -## PPL Correlation Command +## PPL Grok Command ### Description diff --git a/docs/ppl-lang/ppl-join-command.md b/docs/ppl-lang/ppl-join-command.md index b374bce5f..95b375e0a 100644 --- a/docs/ppl-lang/ppl-join-command.md +++ b/docs/ppl-lang/ppl-join-command.md @@ -1,10 +1,115 @@ ## PPL Join Command -## Overview +### Description -[Trace analytics](https://opensearch.org/docs/latest/observability-plugin/trace/ta-dashboards/) considered using SQL/PPL for its queries, but some graphs rely on joining two indices (span index and service map index) together which is not supported by SQL/PPL. Trace analytics was implemented with DSL + javascript, would be good if `join` being added to SQL could support this use case. +`JOIN` command combines two datasets together. The left side could be an index or results from a piped commands, the right side could be either an index or a subquery. -### Schema +### Syntax + +`[joinType] join [leftAlias] [rightAlias] [joinHints] on ` + +**joinType** +- Syntax: `[INNER] | LEFT [OUTER] | RIGHT [OUTER] | FULL [OUTER] | CROSS | [LEFT] SEMI | [LEFT] ANTI` +- Optional +- Description: The type of join to perform. The default is `INNER` if not specified. + +**leftAlias** +- Syntax: `left = ` +- Optional +- Description: The subquery alias to use with the left join side, to avoid ambiguous naming. + +**rightAlias** +- Syntax: `right = ` +- Optional +- Description: The subquery alias to use with the right join side, to avoid ambiguous naming. + +**joinHints** +- Syntax: `[hint.left.key1 = value1 hint.right.key2 = value2]` +- Optional +- Description: Zero or more space-separated join hints in the form of `Key` = `Value`. The key must start with `hint.left.` or `hint.right.` + +**joinCriteria** +- Syntax: `` +- Required +- Description: The syntax starts with `ON`. It could be any comparison expression. Generally, the join criteria looks like `.=.`. For example: `l.id = r.id`. If the join criteria contains multiple conditions, you can specify `AND` and `OR` operator between each comparison expression. For example, `l.id = r.id AND l.email = r.email AND (r.age > 65 OR r.age < 18)`. + +**right-dataset** +- Required +- Description: Right dataset could be either an index or a subquery with/without alias. + +### Example 1: two indices join + +PPL query: + + os> source=customer | join ON c_custkey = o_custkey orders + | fields c_custkey, c_nationkey, c_mktsegment, o_orderkey, o_orderstatus, o_totalprice | head 10 + fetched rows / total rows = 10/10 + +----------+-------------+-------------+------------+---------------+-------------+ + | c_custkey| c_nationkey | c_mktsegment| o_orderkey | o_orderstatus | o_totalprice| + +----------+-------------+-------------+------------+---------------+-------------+ + | 36901 | 13 | AUTOMOBILE | 1 | O | 173665.47 | + | 78002 | 10 | AUTOMOBILE | 2 | O | 46929.18 | + | 123314 | 15 | MACHINERY | 3 | F | 193846.25 | + | 136777 | 10 | HOUSEHOLD | 4 | O | 32151.78 | + | 44485 | 20 | FURNITURE | 5 | F | 144659.2 | + | 55624 | 7 | AUTOMOBILE | 6 | F | 58749.59 | + | 39136 | 5 | FURNITURE | 7 | O | 252004.18 | + | 130057 | 9 | FURNITURE | 32 | O | 208660.75 | + | 66958 | 18 | MACHINERY | 33 | F | 163243.98 | + | 61001 | 3 | FURNITURE | 34 | O | 58949.67 | + +----------+-------------+-------------+------------+---------------+-------------+ + +### Example 2: three indices join + +PPL query: + + os> source=customer | join ON c_custkey = o_custkey orders | join ON c_nationkey = n_nationkey nation + | fields c_custkey, c_mktsegment, o_orderkey, o_orderstatus, o_totalprice, n_name | head 10 + fetched rows / total rows = 10/10 + +----------+-------------+------------+---------------+-------------+--------------+ + | c_custkey| c_mktsegment| o_orderkey | o_orderstatus | o_totalprice| n_name | + +----------+-------------+------------+---------------+-------------+--------------+ + | 36901 | AUTOMOBILE | 1 | O | 173665.47 | JORDAN | + | 78002 | AUTOMOBILE | 2 | O | 46929.18 | IRAN | + | 123314 | MACHINERY | 3 | F | 193846.25 | MOROCCO | + | 136777 | HOUSEHOLD | 4 | O | 32151.78 | IRAN | + | 44485 | FURNITURE | 5 | F | 144659.2 | SAUDI ARABIA | + | 55624 | AUTOMOBILE | 6 | F | 58749.59 | GERMANY | + | 39136 | FURNITURE | 7 | O | 252004.18 | ETHIOPIA | + | 130057 | FURNITURE | 32 | O | 208660.75 | INDONESIA | + | 66958 | MACHINERY | 33 | F | 163243.98 | CHINA | + | 61001 | FURNITURE | 34 | O | 58949.67 | CANADA | + +----------+-------------+------------+---------------+-------------+--------------+ + +### Example 3: join a subquery in right side + +PPL query: + + os>source=supplier| join right = revenue0 ON s_suppkey = supplier_no + [ + source=lineitem | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month) + | eval supplier_no = l_suppkey | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no + ] + | fields s_name, s_phone, total_revenue, supplier_no | head 10 + fetched rows / total rows = 10/10 + +---------------------+----------------+-------------------+-------------+ + | s_name | s_phone | total_revenue | supplier_no | + +---------------------+----------------+-------------------+-------------+ + | Supplier#000007747 | 24-911-546-3505| 636204.0279 | 7747 | + | Supplier#000007748 | 29-535-184-2277| 538311.8099 | 7748 | + | Supplier#000007749 | 18-225-478-7489| 743462.4473000001 | 7749 | + | Supplier#000007750 | 28-680-484-7044| 616828.2220999999 | 7750 | + | Supplier#000007751 | 20-990-606-7343| 1092975.1925 | 7751 | + | Supplier#000007752 | 12-936-258-6650| 1090399.9666 | 7752 | + | Supplier#000007753 | 22-394-329-1153| 777130.7457000001 | 7753 | + | Supplier#000007754 | 26-941-591-5320| 866600.0501 | 7754 | + | Supplier#000007755 | 32-138-467-4225| 702256.7030000001 | 7755 | + | Supplier#000007756 | 29-860-205-8019| 1304979.0511999999| 7756 | + +---------------------+----------------+-------------------+-------------+ + +### Example 4: complex example in OTEL + +**Schema** There will be at least 2 indices, `otel-v1-apm-span-*` (large) and `otel-v1-apm-service-map` (small). @@ -30,154 +135,47 @@ Relevant fields from indices: Full schemas are defined in data-prepper repo: [`otel-v1-apm-span-*`](https://github.com/opensearch-project/data-prepper/blob/04dd7bd18977294800cf4b77d7f01914def75f23/docs/schemas/trace-analytics/otel-v1-apm-span-index-template.md), [`otel-v1-apm-service-map`](https://github.com/opensearch-project/data-prepper/blob/4e5f83814c4a0eed2a1ca9bab0693b9e32240c97/docs/schemas/trace-analytics/otel-v1-apm-service-map-index-template.md) -### Requirement - -Support `join` to calculate the following: +**Requirement** For each service, join span index on service map index to calculate metrics under different type of filters. ![image](https://user-images.githubusercontent.com/28062824/194170062-f0dd1d57-c5eb-44db-95e0-6b3b4e52f25a.png) -This sample query calculates latency when filtered by trace group `client_cancel_order` for the `order` service. I only have a subquery example, don't have the join version of the query.. - -```sql -SELECT avg(durationInNanos) -FROM `otel-v1-apm-span-000001` t1 -WHERE t1.serviceName = `order` - AND ((t1.name in - (SELECT target.resource - FROM `otel-v1-apm-service-map` - WHERE serviceName = `order` - AND traceGroupName = `client_cancel_order`) - AND t1.parentSpanId != NULL) - OR (t1.parentSpanId = NULL - AND t1.name = `client_cancel_order`)) - AND t1.traceId in - (SELECT traceId - FROM `otel-v1-apm-span-000001` - WHERE serviceName = `order`) -``` -## Migrate to PPL - -### Syntax of Join Command - -```sql -SEARCH source= -| -| [joinType] JOIN - [leftAlias] - [rightAlias] - [joinHints] - ON joinCriteria - -| -``` -**joinType** -- Syntax: `[INNER] | LEFT [OUTER] | RIGHT [OUTER] | FULL [OUTER] | CROSS | [LEFT] SEMI | [LEFT] ANTI` -- Optional -- Description: The type of join to perform. The default is `INNER` if not specified. +This sample query calculates latency when filtered by trace group `client_cancel_order` for the `order` service. I only have a subquery example, don't have the join version of the query. -**leftAlias** -- Syntax: `left = ` -- Optional -- Description: The subquery alias to use with the left join side, to avoid ambiguous naming. - -**rightAlias** -- Syntax: `right = ` -- Optional -- Description: The subquery alias to use with the right join side, to avoid ambiguous naming. - -**joinHints** -- Syntax: `[hint.left.key1 = value1 hint.right.key2 = value2]` -- Optional -- Description: Zero or more space-separated join hints in the form of `Key` = `Value`. The key must start with `hint.left.` or `hint.right.` - -**joinCriteria** -- Syntax: `` -- Required -- Description: The syntax starts with `ON`. It could be any comparison expression. Generally, the join criteria looks like `.=.`. For example: `l.id = r.id`. If the join criteria contains multiple conditions, you can specify `AND` and `OR` operator between each comparison expression. For example, `l.id = r.id AND l.email = r.email AND (r.age > 65 OR r.age < 18)`. - -**right-table** -- Required -- Description: The index or table name of join right-side. Sub-search is unsupported in join right side for now. - -### Rewriting -```sql -SEARCH source=otel-v1-apm-span-000001 +PPL query: +``` +source=otel-v1-apm-span-000001 | WHERE serviceName = 'order' | JOIN left=t1 right=t2 ON t1.traceId = t2.traceId AND t2.serviceName = 'order' - otel-v1-apm-span-000001 -- self inner join -| EVAL s_name = t1.name -- rename to avoid ambiguous -| EVAL s_parentSpanId = t1.parentSpanId -- RENAME command would be better when it is supported -| EVAL s_durationInNanos = t1.durationInNanos -| FIELDS s_name, s_parentSpanId, s_durationInNanos -- reduce colunms in join + otel-v1-apm-span-000001 // self inner join +| RENAME s_name as t1.name +| RENAME s_parentSpanId as t1.parentSpanId +| RENAME s_durationInNanos as t1.durationInNanos +| FIELDS s_name, s_parentSpanId, s_durationInNanos // reduce colunms in join | LEFT JOIN left=s1 right=t3 ON s_name = t3.target.resource AND t3.serviceName = 'order' AND t3.traceGroupName = 'client_cancel_order' otel-v1-apm-service-map | WHERE (s_parentSpanId IS NOT NULL OR (s_parentSpanId IS NULL AND s_name = 'client_cancel_order')) -| STATS avg(s_durationInNanos) -- no need to add alias if there is no ambiguous -``` - - -### More examples - -Migration from SQL query (TPC-H Q13): -```sql -SELECT c_count, COUNT(*) AS custdist -FROM - ( SELECT c_custkey, COUNT(o_orderkey) c_count - FROM customer LEFT OUTER JOIN orders ON c_custkey = o_custkey - AND o_comment NOT LIKE '%unusual%packages%' - GROUP BY c_custkey - ) AS c_orders -GROUP BY c_count -ORDER BY custdist DESC, c_count DESC; -``` -Rewritten by PPL Join query: -```sql -SEARCH source=customer -| FIELDS c_custkey -| LEFT OUTER JOIN - ON c_custkey = o_custkey AND o_comment NOT LIKE '%unusual%packages%' - orders -| STATS count(o_orderkey) AS c_count BY c_custkey -| STATS count() AS custdist BY c_count -| SORT - custdist, - c_count -``` -_- **Limitation: sub-searches is unsupported in join right side**_ - -If sub-searches is supported, above ppl query could be rewritten as: -```sql -SEARCH source=customer -| FIELDS c_custkey -| LEFT OUTER JOIN - ON c_custkey = o_custkey - [ - SEARCH source=orders - | WHERE o_comment NOT LIKE '%unusual%packages%' - | FIELDS o_orderkey, o_custkey - ] -| STATS count(o_orderkey) AS c_count BY c_custkey -| STATS count() AS custdist BY c_count -| SORT - custdist, - c_count +| STATS avg(s_durationInNanos) ``` ### Comparison with [Correlation](ppl-correlation-command) A primary difference between `correlate` and `join` is that both sides of `correlate` are tables, but both sides of `join` are subqueries. For example: -```sql +``` source = testTable1 - | where country = 'Canada' OR country = 'England' - | eval cname = lower(name) - | fields cname, country, year, month - | inner join left=l, right=r - ON l.cname = r.name AND l.country = r.country AND l.year = 2023 AND r.month = 4 - testTable2s +| where country = 'Canada' OR country = 'England' +| eval cname = lower(name) +| fields cname, country, year, month +| inner join left=l right=r + ON l.cname = r.name AND l.country = r.country AND l.year = 2023 AND r.month = 4 + testTable2s ``` The subquery alias `l` does not represent the `testTable1` table itself. Instead, it represents the subquery: -```sql +``` source = testTable1 | where country = 'Canada' OR country = 'England' | eval cname = lower(name) diff --git a/docs/ppl-lang/ppl-lookup-command.md b/docs/ppl-lang/ppl-lookup-command.md index 1b8350533..6768cdcaf 100644 --- a/docs/ppl-lang/ppl-lookup-command.md +++ b/docs/ppl-lang/ppl-lookup-command.md @@ -1,20 +1,18 @@ ## PPL Lookup Command -## Overview +### Description Lookup command enriches your search data by adding or replacing data from a lookup index (dimension table). You can extend fields of an index with values from a dimension table, append or replace values when lookup condition is matched. As an alternative of [Join command](ppl-join-command), lookup command is more suitable for enriching the source data with a static dataset. -### Syntax of Lookup Command +### Syntax -```sql -SEARCH source= -| -| LOOKUP ( [AS ])... - [(REPLACE | APPEND) ( [AS ])...] -| ``` +LOOKUP ( [AS ])... + [(REPLACE | APPEND) ( [AS ])...] +``` + **lookupIndex** - Required - Description: the name of lookup index (dimension table) @@ -44,26 +42,49 @@ SEARCH source= - Description: If you specify REPLACE, matched values in \ field overwrite the values in result. If you specify APPEND, matched values in \ field only append to the missing values in result. ### Usage -> LOOKUP id AS cid REPLACE mail AS email
-> LOOKUP name REPLACE mail AS email
-> LOOKUP id AS cid, name APPEND address, mail AS email
-> LOOKUP id
- -### Example -```sql -SEARCH source= -| WHERE orderType = 'Cancelled' -| LOOKUP account_list, mkt_id AS mkt_code REPLACE amount, account_name AS name -| STATS count(mkt_code), avg(amount) BY name -``` -```sql -SEARCH source= -| DEDUP market_id -| EVAL category=replace(category, "-", ".") -| EVAL category=ltrim(category, "dvp.") -| LOOKUP bounce_category category AS category APPEND classification -``` -```sql -SEARCH source= -| LOOKUP bounce_category category -``` +- `LOOKUP id AS cid REPLACE mail AS email` +- `LOOKUP name REPLACE mail AS email` +- `LOOKUP id AS cid, name APPEND address, mail AS email` +- `LOOKUP id` + +### Examples 1: replace + +PPL query: + + os>source=people | LOOKUP work_info uid AS id REPLACE department | head 10 + fetched rows / total rows = 10/10 + +------+-----------+-------------+-----------+--------+------------------+ + | id | name | occupation | country | salary | department | + +------+-----------+-------------+-----------+--------+------------------+ + | 1000 | Daniel | Teacher | Canada | 56486 | CUSTOMER_SERVICE | + | 1001 | Joseph | Lawyer | Denmark | 135943 | FINANCE | + | 1002 | David | Artist | Finland | 60391 | DATA | + | 1003 | Charlotte | Lawyer | Denmark | 42173 | LEGAL | + | 1004 | Isabella | Veterinarian| Australia | 117699 | MARKETING | + | 1005 | Lily | Engineer | Italy | 37526 | IT | + | 1006 | Emily | Dentist | Denmark | 125340 | MARKETING | + | 1007 | James | Lawyer | Germany | 56532 | LEGAL | + | 1008 | Lucas | Lawyer | Japan | 87782 | DATA | + | 1009 | Sophia | Architect | Sweden | 37597 | MARKETING | + +------+-----------+-------------+-----------+--------+------------------+ + +### Examples 2: append + +PPL query: + + os>source=people| LOOKUP work_info uid AS ID, name APPEND department | where isnotnull(department) | head 10 + fetched rows / total rows = 10/10 + +------+---------+-------------+-------------+--------+------------+ + | id | name | occupation | country | salary | department | + +------+---------+-------------+-------------+--------+------------+ + | 1018 | Emma | Architect | USA | 72400 | IT | + | 1032 | James | Pilot | Netherlands | 71698 | SALES | + | 1043 | Jane | Nurse | Brazil | 45016 | FINANCE | + | 1046 | Joseph | Pharmacist | Mexico | 109152 | OPERATIONS | + | 1064 | Joseph | Electrician | New Zealand | 50253 | LEGAL | + | 1090 | Matthew | Psychologist| Germany | 73396 | DATA | + | 1103 | Emily | Electrician | Switzerland | 98391 | DATA | + | 1114 | Jake | Nurse | Denmark | 53418 | SALES | + | 1115 | Sofia | Engineer | Mexico | 64829 | OPERATIONS | + | 1122 | Oliver | Scientist | Netherlands | 31146 | DATA | + +------+---------+-------------+-------------+--------+------------+ diff --git a/docs/ppl-lang/ppl-rare-command.md b/docs/ppl-lang/ppl-rare-command.md index e3ad21f4e..8a2ca640f 100644 --- a/docs/ppl-lang/ppl-rare-command.md +++ b/docs/ppl-lang/ppl-rare-command.md @@ -1,11 +1,11 @@ ## PPL rare Command -**Description** +### Description Using ``rare`` command to find the least common tuple of values of all fields in the field list. **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields. -**Syntax** +### Syntax `rare [N] [by-clause]` `rare_approx [N] [by-clause]` diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md index c4a0c337c..b36eb1c80 100644 --- a/docs/ppl-lang/ppl-subquery-command.md +++ b/docs/ppl-lang/ppl-subquery-command.md @@ -1,27 +1,27 @@ -## PPL SubQuery Commands: +## PPL SubQuery Commands -### Syntax -The subquery command should be implemented using a clean, logical syntax that integrates with existing PPL structure. +### Description +The subquery command has 4 types: `InSubquery`, `ExistsSubquery`, `ScalarSubquery` and `RelationSubquery`. +`InSubquery`, `ExistsSubquery` and `ScalarSubquery` are subquery expressions, their common usage is in Where clause(`where `) and Search filter(`search source=* `). -```sql -source=logs | where field in [ subquery source=events | where condition | fields field ] +For example, a subquery expression could be used in boolean expression: ``` - -In this example, the primary search (`source=logs`) is filtered by results from the subquery (`source=events`). - -The subquery command should allow nested queries to be as complex as necessary, supporting multiple levels of nesting. - -Example: - -```sql - source=logs | where id in [ subquery source=users | where user in [ subquery source=actions | where action="login" | fields user] | fields uid ] +| where orders.order_id in [ source=returns | where return_reason="damaged" | field order_id ] ``` +The `orders.order_id in [ source=... ]` is a ``. -For additional info See [Issue](https://github.com/opensearch-project/opensearch-spark/issues/661) - ---- +But `RelationSubquery` is not a subquery expression, it is a subquery plan. +[Recall the join command doc](ppl-join-command.md), the example is a subquery/subsearch **plan**, rather than a **expression**. -### InSubquery usage +### Syntax +- `where [not] in [ source=... | ... | ... ]` (InSubquery) +- `where [not] exists [ source=... | ... | ... ]` (ExistsSubquery) +- `where = [ source=... | ... | ... ]` (ScalarSubquery) +- `source=[ source= ...]` (RelationSubquery) +- `| join ON condition [ source= ]` (RelationSubquery in join right side) + +### Usage +InSubquery: - `source = outer | where a in [ source = inner | fields b ]` - `source = outer | where (a) in [ source = inner | fields b ]` - `source = outer | where (a,b,c) in [ source = inner | fields d,e,f ]` @@ -33,92 +33,9 @@ For additional info See [Issue](https://github.com/opensearch-project/opensearch - `source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ]` (nested) - `source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c` (as join filter) -**_SQL Migration examples with IN-Subquery PPL:_** -1. tpch q4 (in-subquery with aggregation) -```sql -select - o_orderpriority, - count(*) as order_count -from - orders -where - o_orderdate >= date '1993-07-01' - and o_orderdate < date '1993-07-01' + interval '3' month - and o_orderkey in ( - select - l_orderkey - from - lineitem - where l_commitdate < l_receiptdate - ) -group by - o_orderpriority -order by - o_orderpriority -``` -Rewritten by PPL InSubquery query: -```sql -source = orders -| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01" and o_orderkey IN - [ source = lineitem - | where l_commitdate < l_receiptdate - | fields l_orderkey - ] -| stats count(1) as order_count by o_orderpriority -| sort o_orderpriority -| fields o_orderpriority, order_count -``` -2.tpch q20 (nested in-subquery) -```sql -select - s_name, - s_address -from - supplier, - nation -where - s_suppkey in ( - select - ps_suppkey - from - partsupp - where - ps_partkey in ( - select - p_partkey - from - part - where - p_name like 'forest%' - ) - ) - and s_nationkey = n_nationkey - and n_name = 'CANADA' -order by - s_name -``` -Rewritten by PPL InSubquery query: -```sql -source = supplier -| where s_suppkey IN [ - source = partsupp - | where ps_partkey IN [ - source = part - | where like(p_name, "forest%") - | fields p_partkey - ] - | fields ps_suppkey - ] -| inner join left=l right=r on s_nationkey = n_nationkey and n_name = 'CANADA' - nation -| sort s_name -``` ---- - -### ExistsSubquery usage - -Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner, `e`, `f` are fields of table inner2 +ExistsSubquery: +(Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner, `e`, `f` are fields of table inner2) - `source = outer | where exists [ source = inner | where a = c ]` - `source = outer | where not exists [ source = inner | where a = c ]` - `source = outer | where exists [ source = inner | where a = c and b = d ]` @@ -132,48 +49,9 @@ Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table in - `source = outer | where not exists [ source = inner | where c > 10 ]` (uncorrelated exists) - `source = outer | where exists [ source = inner ] | eval l = "nonEmpty" | fields l` (special uncorrelated exists) -**_SQL Migration examples with Exists-Subquery PPL:_** - -tpch q4 (exists subquery with aggregation) -```sql -select - o_orderpriority, - count(*) as order_count -from - orders -where - o_orderdate >= date '1993-07-01' - and o_orderdate < date '1993-07-01' + interval '3' month - and exists ( - select - l_orderkey - from - lineitem - where l_orderkey = o_orderkey - and l_commitdate < l_receiptdate - ) -group by - o_orderpriority -order by - o_orderpriority -``` -Rewritten by PPL ExistsSubquery query: -```sql -source = orders -| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01" - and exists [ - source = lineitem - | where l_orderkey = o_orderkey and l_commitdate < l_receiptdate - ] -| stats count(1) as order_count by o_orderpriority -| sort o_orderpriority -| fields o_orderpriority, order_count -``` ---- - -### ScalarSubquery usage +ScalarSubquery: -Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner, `e`, `f` are fields of table nested +(Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner, `e`, `f` are fields of table nested) **Uncorrelated scalar subquery in Select** - `source = outer | eval m = [ source = inner | stats max(c) ] | fields m, a` @@ -203,146 +81,98 @@ Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table in - `source = outer | where a = [ source = inner | stats max(c) | sort c ] OR b = [ source = inner | where c = 1 | stats min(d) | sort d ]` - `source = outer | where a = [ source = inner | where c = [ source = nested | stats max(e) by f | sort f ] | stats max(d) by c | sort c | head 1 ]` -_SQL Migration examples with Scalar-Subquery PPL:_ -Example 1 -```sql -SELECT * -FROM outer -WHERE a = (SELECT max(c) - FROM inner1 - WHERE c = (SELECT max(e) - FROM inner2 - GROUP BY f - ORDER BY f - ) - GROUP BY c - ORDER BY c - LIMIT 1) -``` -Rewritten by PPL ScalarSubquery query: -```sql -source = spark_catalog.default.outer -| where a = [ - source = spark_catalog.default.inner1 - | where c = [ - source = spark_catalog.default.inner2 - | stats max(e) by f - | sort f - ] - | stats max(d) by c - | sort c - | head 1 - ] -``` -Example 2 -```sql -SELECT * FROM outer -WHERE a = (SELECT max(c) - FROM inner - ORDER BY c) -OR b = (SELECT min(d) - FROM inner - WHERE c = 1 - ORDER BY d) -``` -Rewritten by PPL ScalarSubquery query: -```sql -source = spark_catalog.default.outer -| where a = [ - source = spark_catalog.default.inner | stats max(c) | sort c - ] OR b = [ - source = spark_catalog.default.inner | where c = 1 | stats min(d) | sort d - ] -``` ---- - -### (Relation) Subquery -`InSubquery`, `ExistsSubquery` and `ScalarSubquery` are all subquery expressions. But `RelationSubquery` is not a subquery expression, it is a subquery plan which is common used in Join or From clause. - -- `source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ]` (subquery in join right side) +RelationSubquery: +- `source = table1 | join left = l right = r on condition [ source = table2 | where d > 10 | head 5 ]` (subquery in join right side) - `source = [ source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] | stats count(a) by b ] as outer | head 1` -**_SQL Migration examples with Subquery PPL:_** - -tpch q13 -```sql -select - c_count, - count(*) as custdist -from - ( - select - c_custkey, - count(o_orderkey) as c_count - from - customer left outer join orders on - c_custkey = o_custkey - and o_comment not like '%special%requests%' - group by - c_custkey - ) as c_orders -group by - c_count -order by - custdist desc, - c_count desc -``` -Rewritten by PPL (Relation) Subquery: -```sql -SEARCH source = [ - SEARCH source = customer - | LEFT OUTER JOIN left = c right = o ON c_custkey = o_custkey - [ - SEARCH source = orders - | WHERE not like(o_comment, '%special%requests%') - ] - | STATS COUNT(o_orderkey) AS c_count BY c_custkey -] AS c_orders -| STATS COUNT(o_orderkey) AS c_count BY c_custkey -| STATS COUNT(1) AS custdist BY c_count -| SORT - custdist, - c_count -``` ---- +### Examples 1: TPC-H q20 + +PPL query: + + os> source=supplier + | join ON s_nationkey = n_nationkey nation + | where n_name = 'CANADA' + and s_suppkey in [ // InSubquery + source = partsupp + | where ps_partkey in [ // InSubquery + source = part + | where like(p_name, 'forest%') + | fields p_partkey + ] + and ps_availqty > [ // ScalarSubquery + source = lineitem + | where l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date('1994-01-01') + and l_shipdate < date_add(date('1994-01-01'), interval 1 year) + | stats sum(l_quantity) as sum_l_quantity + | eval half_sum_l_quantity = 0.5 * sum_l_quantity // Stats and Eval commands can combine when issues/819 resolved + | fields half_sum_l_quantity + ] + | fields ps_suppkey + ] + | fields s_suppkey, s_name, s_phone, s_acctbal, n_name | head 10 + fetched rows / total rows = 10/10 + +-----------+---------------------+----------------+----------+---------+ + | s_suppkey | s_name | s_phone | s_acctbal| n_name | + +-----------+---------------------+----------------+----------+---------+ + | 8243 | Supplier#000008243 | 13-707-547-1386| 9067.07 | CANADA | + | 736 | Supplier#000000736 | 13-681-806-8650| 5700.83 | CANADA | + | 9032 | Supplier#000009032 | 13-441-662-5539| 3982.32 | CANADA | + | 3201 | Supplier#000003201 | 13-600-413-7165| 3799.41 | CANADA | + | 3849 | Supplier#000003849 | 13-582-965-9117| 52.33 | CANADA | + | 5505 | Supplier#000005505 | 13-531-190-6523| 2023.4 | CANADA | + | 5195 | Supplier#000005195 | 13-622-661-2956| 3717.34 | CANADA | + | 9753 | Supplier#000009753 | 13-724-256-7877| 4406.93 | CANADA | + | 7135 | Supplier#000007135 | 13-367-994-6705| 4950.29 | CANADA | + | 5256 | Supplier#000005256 | 13-180-538-8836| 5624.79 | CANADA | + +-----------+---------------------+----------------+----------+---------+ + + +### Examples 2: TPC-H q22 + +PPL query: + + os> source = [ + source = customer + | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > [ + source = customer + | where c_acctbal > 0.00 + and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') + | stats avg(c_acctbal) + ] + and not exists [ + source = orders + | where o_custkey = c_custkey + ] + | eval cntrycode = substring(c_phone, 1, 2) + | fields cntrycode, c_acctbal + ] as custsale + | stats count() as numcust, sum(c_acctbal) as totacctbal by cntrycode + | sort cntrycode + fetched rows / total rows = 10/10 + +---------+--------------------+------------+ + | numcust | totacctbal | cntrycode | + +---------+--------------------+------------+ + | 888 | 6737713.989999999 | 13 | + | 861 | 6460573.72 | 17 | + | 964 | 7236687.4 | 18 | + | 892 | 6701457.950000001 | 23 | + | 948 | 7158866.630000001 | 29 | + | 909 | 6808436.129999999 | 30 | + | 922 | 6806670.179999999 | 31 | + +---------+--------------------+------------+ ### Additional Context -`InSubquery`, `ExistsSubquery` and `ScalarSubquery` as subquery expressions, their common usage is in `where` clause and `search filter`. - -Where command: -``` -| where | ... -``` -Search filter: -``` -search source=* | ... -``` -A subquery expression could be used in boolean expression, for example - -```sql -| where orders.order_id in [ source=returns | where return_reason="damaged" | field order_id ] -``` - -The `orders.order_id in [ source=... ]` is a ``. - -In general, we name this kind of subquery clause the `InSubquery` expression, it is a ``. - -**Subquery with Different Join Types** +#### RelationSubquery -In issue description is a `ScalarSubquery`: - -```sql -source=employees -| join source=sales on employees.employee_id = sales.employee_id -| where sales.sale_amount > [ source=targets | where target_met="true" | fields target_value ] +RelationSubquery is plan instead of expression, for example ``` - -But `RelationSubquery` is not a subquery expression, it is a subquery plan. -[Recall the join command doc](ppl-join-command.md), the example is a subquery/subsearch **plan**, rather than a **expression**. - -```sql -SEARCH source=customer +source=customer | FIELDS c_custkey -| LEFT OUTER JOIN left = c, right = o ON c.c_custkey = o.o_custkey +| LEFT OUTER JOIN left = c right = o ON c.c_custkey = o.o_custkey [ SEARCH source=orders | WHERE o_comment NOT LIKE '%unusual%packages%' @@ -351,7 +181,7 @@ SEARCH source=customer | STATS ... ``` simply into -```sql +``` SEARCH | LEFT OUTER JOIN ON [ @@ -359,21 +189,14 @@ SEARCH ] | STATS ... ``` -Apply the syntax here and simply into - -```sql -search | left join on [ search ... ] -``` - -The `[ search ...]` is not a `expression`, it's `plan`, similar to the `relation` plan -**Uncorrelated Subquery** +#### Uncorrelated Subquery An uncorrelated subquery is independent of the outer query. It is executed once, and the result is used by the outer query. It's **less common** when using `ExistsSubquery` because `ExistsSubquery` typically checks for the presence of rows that are dependent on the outer query’s row. There is a very special exists subquery which highlight by `(special uncorrelated exists)`: -```sql +``` SELECT 'nonEmpty' FROM outer WHERE EXISTS ( @@ -382,7 +205,7 @@ FROM outer ); ``` Rewritten by PPL ExistsSubquery query: -```sql +``` source = outer | where exists [ source = inner @@ -392,11 +215,11 @@ source = outer ``` This query just print "nonEmpty" if the inner table is not empty. -**Table alias in subquery** +#### Table alias in subquery Table alias is useful in query which contains a subquery, for example -```sql +``` select a, ( select sum(b) from catalog.schema.table1 as t1 diff --git a/docs/ppl-lang/ppl-top-command.md b/docs/ppl-lang/ppl-top-command.md index 93d3a7148..012457fe2 100644 --- a/docs/ppl-lang/ppl-top-command.md +++ b/docs/ppl-lang/ppl-top-command.md @@ -1,6 +1,6 @@ ## PPL top Command -**Description** +### Description Using ``top`` command to find the most common tuple of values of all fields in the field list. diff --git a/docs/ppl-lang/ppl-trendline-command.md b/docs/ppl-lang/ppl-trendline-command.md index b466e2e8f..44b8c999f 100644 --- a/docs/ppl-lang/ppl-trendline-command.md +++ b/docs/ppl-lang/ppl-trendline-command.md @@ -1,6 +1,6 @@ ## PPL trendline Command -**Description** +### Description Using ``trendline`` command to calculate moving averages of fields. ### Syntax - SMA (Simple Moving Average) diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens new file mode 100644 index 000000000..5f976453e --- /dev/null +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens @@ -0,0 +1,798 @@ +SEARCH=1 +DESCRIBE=2 +SHOW=3 +FROM=4 +WHERE=5 +FIELDS=6 +RENAME=7 +STATS=8 +EVENTSTATS=9 +DEDUP=10 +SORT=11 +EVAL=12 +HEAD=13 +TOP_APPROX=14 +TOP=15 +RARE_APPROX=16 +RARE=17 +PARSE=18 +METHOD=19 +REGEX=20 +PUNCT=21 +GROK=22 +PATTERN=23 +PATTERNS=24 +NEW_FIELD=25 +KMEANS=26 +AD=27 +ML=28 +FILLNULL=29 +EXPAND=30 +FLATTEN=31 +TRENDLINE=32 +JOIN=33 +ON=34 +INNER=35 +OUTER=36 +FULL=37 +SEMI=38 +ANTI=39 +CROSS=40 +LEFT_HINT=41 +RIGHT_HINT=42 +CORRELATE=43 +SELF=44 +EXACT=45 +APPROXIMATE=46 +SCOPE=47 +MAPPING=48 +EXPLAIN=49 +FORMATTED=50 +COST=51 +CODEGEN=52 +EXTENDED=53 +SIMPLE=54 +AS=55 +BY=56 +SOURCE=57 +INDEX=58 +D=59 +DESC=60 +DATASOURCES=61 +USING=62 +WITH=63 +AUTO=64 +STR=65 +IP=66 +NUM=67 +FIELDSUMMARY=68 +INCLUDEFIELDS=69 +NULLS=70 +SMA=71 +WMA=72 +KEEPEMPTY=73 +CONSECUTIVE=74 +DEDUP_SPLITVALUES=75 +PARTITIONS=76 +ALLNUM=77 +DELIM=78 +CENTROIDS=79 +ITERATIONS=80 +DISTANCE_TYPE=81 +NUMBER_OF_TREES=82 +SHINGLE_SIZE=83 +SAMPLE_SIZE=84 +OUTPUT_AFTER=85 +TIME_DECAY=86 +ANOMALY_RATE=87 +CATEGORY_FIELD=88 +TIME_FIELD=89 +TIME_ZONE=90 +TRAINING_DATA_SIZE=91 +ANOMALY_SCORE_THRESHOLD=92 +APPEND=93 +CASE=94 +ELSE=95 +IN=96 +EXISTS=97 +NOT=98 +OR=99 +AND=100 +XOR=101 +TRUE=102 +FALSE=103 +REGEXP=104 +CONVERT_TZ=105 +DATETIME=106 +DAY=107 +DAY_HOUR=108 +DAY_MICROSECOND=109 +DAY_MINUTE=110 +DAY_OF_YEAR=111 +DAY_SECOND=112 +HOUR=113 +HOUR_MICROSECOND=114 +HOUR_MINUTE=115 +HOUR_OF_DAY=116 +HOUR_SECOND=117 +INTERVAL=118 +MICROSECOND=119 +MILLISECOND=120 +MINUTE=121 +MINUTE_MICROSECOND=122 +MINUTE_OF_DAY=123 +MINUTE_OF_HOUR=124 +MINUTE_SECOND=125 +MONTH=126 +MONTH_OF_YEAR=127 +QUARTER=128 +SECOND=129 +SECOND_MICROSECOND=130 +SECOND_OF_MINUTE=131 +WEEK=132 +WEEK_OF_YEAR=133 +YEAR=134 +YEAR_MONTH=135 +DATAMODEL=136 +LOOKUP=137 +SAVEDSEARCH=138 +INT=139 +INTEGER=140 +DOUBLE=141 +LONG=142 +FLOAT=143 +STRING=144 +BOOLEAN=145 +PIPE=146 +COMMA=147 +DOT=148 +EQUAL=149 +GREATER=150 +LESS=151 +NOT_GREATER=152 +NOT_LESS=153 +NOT_EQUAL=154 +PLUS=155 +MINUS=156 +STAR=157 +DIVIDE=158 +MODULE=159 +EXCLAMATION_SYMBOL=160 +COLON=161 +LT_PRTHS=162 +RT_PRTHS=163 +LT_SQR_PRTHS=164 +RT_SQR_PRTHS=165 +SINGLE_QUOTE=166 +DOUBLE_QUOTE=167 +BACKTICK=168 +ARROW=169 +BIT_NOT_OP=170 +BIT_AND_OP=171 +BIT_XOR_OP=172 +AVG=173 +COUNT=174 +DISTINCT_COUNT=175 +DISTINCT_COUNT_APPROX=176 +ESTDC=177 +ESTDC_ERROR=178 +MAX=179 +MEAN=180 +MEDIAN=181 +MIN=182 +MODE=183 +RANGE=184 +STDEV=185 +STDEVP=186 +SUM=187 +SUMSQ=188 +VAR_SAMP=189 +VAR_POP=190 +STDDEV_SAMP=191 +STDDEV_POP=192 +PERCENTILE=193 +PERCENTILE_APPROX=194 +TAKE=195 +FIRST=196 +LAST=197 +LIST=198 +VALUES=199 +EARLIEST=200 +EARLIEST_TIME=201 +LATEST=202 +LATEST_TIME=203 +PER_DAY=204 +PER_HOUR=205 +PER_MINUTE=206 +PER_SECOND=207 +RATE=208 +SPARKLINE=209 +C=210 +DC=211 +ABS=212 +CBRT=213 +CEIL=214 +CEILING=215 +CONV=216 +CRC32=217 +E=218 +EXP=219 +FLOOR=220 +LN=221 +LOG=222 +LOG10=223 +LOG2=224 +MOD=225 +PI=226 +POSITION=227 +POW=228 +POWER=229 +RAND=230 +ROUND=231 +SIGN=232 +SIGNUM=233 +SQRT=234 +TRUNCATE=235 +ACOS=236 +ASIN=237 +ATAN=238 +ATAN2=239 +COS=240 +COT=241 +DEGREES=242 +RADIANS=243 +SIN=244 +TAN=245 +MD5=246 +SHA1=247 +SHA2=248 +ADDDATE=249 +ADDTIME=250 +CURDATE=251 +CURRENT_DATE=252 +CURRENT_TIME=253 +CURRENT_TIMESTAMP=254 +CURRENT_TIMEZONE=255 +CURTIME=256 +DATE=257 +DATEDIFF=258 +DATE_ADD=259 +DATE_FORMAT=260 +DATE_SUB=261 +DAYNAME=262 +DAYOFMONTH=263 +DAYOFWEEK=264 +DAYOFYEAR=265 +DAY_OF_MONTH=266 +DAY_OF_WEEK=267 +DURATION=268 +EXTRACT=269 +FROM_DAYS=270 +FROM_UNIXTIME=271 +GET_FORMAT=272 +LAST_DAY=273 +LOCALTIME=274 +LOCALTIMESTAMP=275 +MAKEDATE=276 +MAKE_DATE=277 +MAKETIME=278 +MONTHNAME=279 +NOW=280 +PERIOD_ADD=281 +PERIOD_DIFF=282 +SEC_TO_TIME=283 +STR_TO_DATE=284 +SUBDATE=285 +SUBTIME=286 +SYSDATE=287 +TIME=288 +TIMEDIFF=289 +TIMESTAMP=290 +TIMESTAMPADD=291 +TIMESTAMPDIFF=292 +TIME_FORMAT=293 +TIME_TO_SEC=294 +TO_DAYS=295 +TO_SECONDS=296 +UNIX_TIMESTAMP=297 +UTC_DATE=298 +UTC_TIME=299 +UTC_TIMESTAMP=300 +WEEKDAY=301 +YEARWEEK=302 +SUBSTR=303 +SUBSTRING=304 +LTRIM=305 +RTRIM=306 +TRIM=307 +TO=308 +LOWER=309 +UPPER=310 +CONCAT=311 +CONCAT_WS=312 +LENGTH=313 +STRCMP=314 +RIGHT=315 +LEFT=316 +ASCII=317 +LOCATE=318 +REPLACE=319 +REVERSE=320 +CAST=321 +ISEMPTY=322 +ISBLANK=323 +JSON=324 +JSON_OBJECT=325 +JSON_ARRAY=326 +JSON_ARRAY_LENGTH=327 +TO_JSON_STRING=328 +JSON_EXTRACT=329 +JSON_KEYS=330 +JSON_VALID=331 +ARRAY=332 +ARRAY_LENGTH=333 +FORALL=334 +FILTER=335 +TRANSFORM=336 +REDUCE=337 +LIKE=338 +ISNULL=339 +ISNOTNULL=340 +ISPRESENT=341 +BETWEEN=342 +CIDRMATCH=343 +GEOIP=344 +IFNULL=345 +NULLIF=346 +IF=347 +TYPEOF=348 +COALESCE=349 +MATCH=350 +MATCH_PHRASE=351 +MATCH_PHRASE_PREFIX=352 +MATCH_BOOL_PREFIX=353 +SIMPLE_QUERY_STRING=354 +MULTI_MATCH=355 +QUERY_STRING=356 +ALLOW_LEADING_WILDCARD=357 +ANALYZE_WILDCARD=358 +ANALYZER=359 +AUTO_GENERATE_SYNONYMS_PHRASE_QUERY=360 +BOOST=361 +CUTOFF_FREQUENCY=362 +DEFAULT_FIELD=363 +DEFAULT_OPERATOR=364 +ENABLE_POSITION_INCREMENTS=365 +ESCAPE=366 +FLAGS=367 +FUZZY_MAX_EXPANSIONS=368 +FUZZY_PREFIX_LENGTH=369 +FUZZY_TRANSPOSITIONS=370 +FUZZY_REWRITE=371 +FUZZINESS=372 +LENIENT=373 +LOW_FREQ_OPERATOR=374 +MAX_DETERMINIZED_STATES=375 +MAX_EXPANSIONS=376 +MINIMUM_SHOULD_MATCH=377 +OPERATOR=378 +PHRASE_SLOP=379 +PREFIX_LENGTH=380 +QUOTE_ANALYZER=381 +QUOTE_FIELD_SUFFIX=382 +REWRITE=383 +SLOP=384 +TIE_BREAKER=385 +TYPE=386 +ZERO_TERMS_QUERY=387 +SPAN=388 +MS=389 +S=390 +M=391 +H=392 +W=393 +Q=394 +Y=395 +ID=396 +CLUSTER=397 +INTEGER_LITERAL=398 +DECIMAL_LITERAL=399 +ID_DATE_SUFFIX=400 +DQUOTA_STRING=401 +SQUOTA_STRING=402 +BQUOTA_STRING=403 +LINE_COMMENT=404 +BLOCK_COMMENT=405 +ERROR_RECOGNITION=406 +'SEARCH'=1 +'DESCRIBE'=2 +'SHOW'=3 +'FROM'=4 +'WHERE'=5 +'FIELDS'=6 +'RENAME'=7 +'STATS'=8 +'EVENTSTATS'=9 +'DEDUP'=10 +'SORT'=11 +'EVAL'=12 +'HEAD'=13 +'TOP_APPROX'=14 +'TOP'=15 +'RARE_APPROX'=16 +'RARE'=17 +'PARSE'=18 +'METHOD'=19 +'REGEX'=20 +'PUNCT'=21 +'GROK'=22 +'PATTERN'=23 +'PATTERNS'=24 +'NEW_FIELD'=25 +'KMEANS'=26 +'AD'=27 +'ML'=28 +'FILLNULL'=29 +'EXPAND'=30 +'FLATTEN'=31 +'TRENDLINE'=32 +'JOIN'=33 +'ON'=34 +'INNER'=35 +'OUTER'=36 +'FULL'=37 +'SEMI'=38 +'ANTI'=39 +'CROSS'=40 +'HINT.LEFT'=41 +'HINT.RIGHT'=42 +'CORRELATE'=43 +'SELF'=44 +'EXACT'=45 +'APPROXIMATE'=46 +'SCOPE'=47 +'MAPPING'=48 +'EXPLAIN'=49 +'FORMATTED'=50 +'COST'=51 +'CODEGEN'=52 +'EXTENDED'=53 +'SIMPLE'=54 +'AS'=55 +'BY'=56 +'SOURCE'=57 +'INDEX'=58 +'D'=59 +'DESC'=60 +'DATASOURCES'=61 +'USING'=62 +'WITH'=63 +'AUTO'=64 +'STR'=65 +'IP'=66 +'NUM'=67 +'FIELDSUMMARY'=68 +'INCLUDEFIELDS'=69 +'NULLS'=70 +'SMA'=71 +'WMA'=72 +'KEEPEMPTY'=73 +'CONSECUTIVE'=74 +'DEDUP_SPLITVALUES'=75 +'PARTITIONS'=76 +'ALLNUM'=77 +'DELIM'=78 +'CENTROIDS'=79 +'ITERATIONS'=80 +'DISTANCE_TYPE'=81 +'NUMBER_OF_TREES'=82 +'SHINGLE_SIZE'=83 +'SAMPLE_SIZE'=84 +'OUTPUT_AFTER'=85 +'TIME_DECAY'=86 +'ANOMALY_RATE'=87 +'CATEGORY_FIELD'=88 +'TIME_FIELD'=89 +'TIME_ZONE'=90 +'TRAINING_DATA_SIZE'=91 +'ANOMALY_SCORE_THRESHOLD'=92 +'APPEND'=93 +'CASE'=94 +'ELSE'=95 +'IN'=96 +'EXISTS'=97 +'NOT'=98 +'OR'=99 +'AND'=100 +'XOR'=101 +'TRUE'=102 +'FALSE'=103 +'REGEXP'=104 +'CONVERT_TZ'=105 +'DATETIME'=106 +'DAY'=107 +'DAY_HOUR'=108 +'DAY_MICROSECOND'=109 +'DAY_MINUTE'=110 +'DAY_OF_YEAR'=111 +'DAY_SECOND'=112 +'HOUR'=113 +'HOUR_MICROSECOND'=114 +'HOUR_MINUTE'=115 +'HOUR_OF_DAY'=116 +'HOUR_SECOND'=117 +'INTERVAL'=118 +'MICROSECOND'=119 +'MILLISECOND'=120 +'MINUTE'=121 +'MINUTE_MICROSECOND'=122 +'MINUTE_OF_DAY'=123 +'MINUTE_OF_HOUR'=124 +'MINUTE_SECOND'=125 +'MONTH'=126 +'MONTH_OF_YEAR'=127 +'QUARTER'=128 +'SECOND'=129 +'SECOND_MICROSECOND'=130 +'SECOND_OF_MINUTE'=131 +'WEEK'=132 +'WEEK_OF_YEAR'=133 +'YEAR'=134 +'YEAR_MONTH'=135 +'DATAMODEL'=136 +'LOOKUP'=137 +'SAVEDSEARCH'=138 +'INT'=139 +'INTEGER'=140 +'DOUBLE'=141 +'LONG'=142 +'FLOAT'=143 +'STRING'=144 +'BOOLEAN'=145 +'|'=146 +','=147 +'.'=148 +'='=149 +'>'=150 +'<'=151 +'+'=155 +'-'=156 +'*'=157 +'/'=158 +'%'=159 +'!'=160 +':'=161 +'('=162 +')'=163 +'['=164 +']'=165 +'\''=166 +'"'=167 +'`'=168 +'->'=169 +'~'=170 +'&'=171 +'^'=172 +'AVG'=173 +'COUNT'=174 +'DISTINCT_COUNT'=175 +'DISTINCT_COUNT_APPROX'=176 +'ESTDC'=177 +'ESTDC_ERROR'=178 +'MAX'=179 +'MEAN'=180 +'MEDIAN'=181 +'MIN'=182 +'MODE'=183 +'RANGE'=184 +'STDEV'=185 +'STDEVP'=186 +'SUM'=187 +'SUMSQ'=188 +'VAR_SAMP'=189 +'VAR_POP'=190 +'STDDEV_SAMP'=191 +'STDDEV_POP'=192 +'PERCENTILE'=193 +'PERCENTILE_APPROX'=194 +'TAKE'=195 +'FIRST'=196 +'LAST'=197 +'LIST'=198 +'VALUES'=199 +'EARLIEST'=200 +'EARLIEST_TIME'=201 +'LATEST'=202 +'LATEST_TIME'=203 +'PER_DAY'=204 +'PER_HOUR'=205 +'PER_MINUTE'=206 +'PER_SECOND'=207 +'RATE'=208 +'SPARKLINE'=209 +'C'=210 +'DC'=211 +'ABS'=212 +'CBRT'=213 +'CEIL'=214 +'CEILING'=215 +'CONV'=216 +'CRC32'=217 +'E'=218 +'EXP'=219 +'FLOOR'=220 +'LN'=221 +'LOG'=222 +'LOG10'=223 +'LOG2'=224 +'MOD'=225 +'PI'=226 +'POSITION'=227 +'POW'=228 +'POWER'=229 +'RAND'=230 +'ROUND'=231 +'SIGN'=232 +'SIGNUM'=233 +'SQRT'=234 +'TRUNCATE'=235 +'ACOS'=236 +'ASIN'=237 +'ATAN'=238 +'ATAN2'=239 +'COS'=240 +'COT'=241 +'DEGREES'=242 +'RADIANS'=243 +'SIN'=244 +'TAN'=245 +'MD5'=246 +'SHA1'=247 +'SHA2'=248 +'ADDDATE'=249 +'ADDTIME'=250 +'CURDATE'=251 +'CURRENT_DATE'=252 +'CURRENT_TIME'=253 +'CURRENT_TIMESTAMP'=254 +'CURRENT_TIMEZONE'=255 +'CURTIME'=256 +'DATE'=257 +'DATEDIFF'=258 +'DATE_ADD'=259 +'DATE_FORMAT'=260 +'DATE_SUB'=261 +'DAYNAME'=262 +'DAYOFMONTH'=263 +'DAYOFWEEK'=264 +'DAYOFYEAR'=265 +'DAY_OF_MONTH'=266 +'DAY_OF_WEEK'=267 +'DURATION'=268 +'EXTRACT'=269 +'FROM_DAYS'=270 +'FROM_UNIXTIME'=271 +'GET_FORMAT'=272 +'LAST_DAY'=273 +'LOCALTIME'=274 +'LOCALTIMESTAMP'=275 +'MAKEDATE'=276 +'MAKE_DATE'=277 +'MAKETIME'=278 +'MONTHNAME'=279 +'NOW'=280 +'PERIOD_ADD'=281 +'PERIOD_DIFF'=282 +'SEC_TO_TIME'=283 +'STR_TO_DATE'=284 +'SUBDATE'=285 +'SUBTIME'=286 +'SYSDATE'=287 +'TIME'=288 +'TIMEDIFF'=289 +'TIMESTAMP'=290 +'TIMESTAMPADD'=291 +'TIMESTAMPDIFF'=292 +'TIME_FORMAT'=293 +'TIME_TO_SEC'=294 +'TO_DAYS'=295 +'TO_SECONDS'=296 +'UNIX_TIMESTAMP'=297 +'UTC_DATE'=298 +'UTC_TIME'=299 +'UTC_TIMESTAMP'=300 +'WEEKDAY'=301 +'YEARWEEK'=302 +'SUBSTR'=303 +'SUBSTRING'=304 +'LTRIM'=305 +'RTRIM'=306 +'TRIM'=307 +'TO'=308 +'LOWER'=309 +'UPPER'=310 +'CONCAT'=311 +'CONCAT_WS'=312 +'LENGTH'=313 +'STRCMP'=314 +'RIGHT'=315 +'LEFT'=316 +'ASCII'=317 +'LOCATE'=318 +'REPLACE'=319 +'REVERSE'=320 +'CAST'=321 +'ISEMPTY'=322 +'ISBLANK'=323 +'JSON'=324 +'JSON_OBJECT'=325 +'JSON_ARRAY'=326 +'JSON_ARRAY_LENGTH'=327 +'TO_JSON_STRING'=328 +'JSON_EXTRACT'=329 +'JSON_KEYS'=330 +'JSON_VALID'=331 +'ARRAY'=332 +'ARRAY_LENGTH'=333 +'FORALL'=334 +'FILTER'=335 +'TRANSFORM'=336 +'REDUCE'=337 +'LIKE'=338 +'ISNULL'=339 +'ISNOTNULL'=340 +'ISPRESENT'=341 +'BETWEEN'=342 +'CIDRMATCH'=343 +'GEOIP'=344 +'IFNULL'=345 +'NULLIF'=346 +'IF'=347 +'TYPEOF'=348 +'COALESCE'=349 +'MATCH'=350 +'MATCH_PHRASE'=351 +'MATCH_PHRASE_PREFIX'=352 +'MATCH_BOOL_PREFIX'=353 +'SIMPLE_QUERY_STRING'=354 +'MULTI_MATCH'=355 +'QUERY_STRING'=356 +'ALLOW_LEADING_WILDCARD'=357 +'ANALYZE_WILDCARD'=358 +'ANALYZER'=359 +'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'=360 +'BOOST'=361 +'CUTOFF_FREQUENCY'=362 +'DEFAULT_FIELD'=363 +'DEFAULT_OPERATOR'=364 +'ENABLE_POSITION_INCREMENTS'=365 +'ESCAPE'=366 +'FLAGS'=367 +'FUZZY_MAX_EXPANSIONS'=368 +'FUZZY_PREFIX_LENGTH'=369 +'FUZZY_TRANSPOSITIONS'=370 +'FUZZY_REWRITE'=371 +'FUZZINESS'=372 +'LENIENT'=373 +'LOW_FREQ_OPERATOR'=374 +'MAX_DETERMINIZED_STATES'=375 +'MAX_EXPANSIONS'=376 +'MINIMUM_SHOULD_MATCH'=377 +'OPERATOR'=378 +'PHRASE_SLOP'=379 +'PREFIX_LENGTH'=380 +'QUOTE_ANALYZER'=381 +'QUOTE_FIELD_SUFFIX'=382 +'REWRITE'=383 +'SLOP'=384 +'TIE_BREAKER'=385 +'TYPE'=386 +'ZERO_TERMS_QUERY'=387 +'SPAN'=388 +'MS'=389 +'S'=390 +'M'=391 +'H'=392 +'W'=393 +'Q'=394 +'Y'=395 From 08e1df99b8ec2e4672708cc5b0b6492ada0587d9 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 18 Nov 2024 13:00:20 +0800 Subject: [PATCH 2/5] remove auto generated file Signed-off-by: Lantao Jin --- .../src/main/antlr4/OpenSearchPPLLexer.tokens | 798 ------------------ 1 file changed, 798 deletions(-) delete mode 100644 ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens deleted file mode 100644 index 5f976453e..000000000 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens +++ /dev/null @@ -1,798 +0,0 @@ -SEARCH=1 -DESCRIBE=2 -SHOW=3 -FROM=4 -WHERE=5 -FIELDS=6 -RENAME=7 -STATS=8 -EVENTSTATS=9 -DEDUP=10 -SORT=11 -EVAL=12 -HEAD=13 -TOP_APPROX=14 -TOP=15 -RARE_APPROX=16 -RARE=17 -PARSE=18 -METHOD=19 -REGEX=20 -PUNCT=21 -GROK=22 -PATTERN=23 -PATTERNS=24 -NEW_FIELD=25 -KMEANS=26 -AD=27 -ML=28 -FILLNULL=29 -EXPAND=30 -FLATTEN=31 -TRENDLINE=32 -JOIN=33 -ON=34 -INNER=35 -OUTER=36 -FULL=37 -SEMI=38 -ANTI=39 -CROSS=40 -LEFT_HINT=41 -RIGHT_HINT=42 -CORRELATE=43 -SELF=44 -EXACT=45 -APPROXIMATE=46 -SCOPE=47 -MAPPING=48 -EXPLAIN=49 -FORMATTED=50 -COST=51 -CODEGEN=52 -EXTENDED=53 -SIMPLE=54 -AS=55 -BY=56 -SOURCE=57 -INDEX=58 -D=59 -DESC=60 -DATASOURCES=61 -USING=62 -WITH=63 -AUTO=64 -STR=65 -IP=66 -NUM=67 -FIELDSUMMARY=68 -INCLUDEFIELDS=69 -NULLS=70 -SMA=71 -WMA=72 -KEEPEMPTY=73 -CONSECUTIVE=74 -DEDUP_SPLITVALUES=75 -PARTITIONS=76 -ALLNUM=77 -DELIM=78 -CENTROIDS=79 -ITERATIONS=80 -DISTANCE_TYPE=81 -NUMBER_OF_TREES=82 -SHINGLE_SIZE=83 -SAMPLE_SIZE=84 -OUTPUT_AFTER=85 -TIME_DECAY=86 -ANOMALY_RATE=87 -CATEGORY_FIELD=88 -TIME_FIELD=89 -TIME_ZONE=90 -TRAINING_DATA_SIZE=91 -ANOMALY_SCORE_THRESHOLD=92 -APPEND=93 -CASE=94 -ELSE=95 -IN=96 -EXISTS=97 -NOT=98 -OR=99 -AND=100 -XOR=101 -TRUE=102 -FALSE=103 -REGEXP=104 -CONVERT_TZ=105 -DATETIME=106 -DAY=107 -DAY_HOUR=108 -DAY_MICROSECOND=109 -DAY_MINUTE=110 -DAY_OF_YEAR=111 -DAY_SECOND=112 -HOUR=113 -HOUR_MICROSECOND=114 -HOUR_MINUTE=115 -HOUR_OF_DAY=116 -HOUR_SECOND=117 -INTERVAL=118 -MICROSECOND=119 -MILLISECOND=120 -MINUTE=121 -MINUTE_MICROSECOND=122 -MINUTE_OF_DAY=123 -MINUTE_OF_HOUR=124 -MINUTE_SECOND=125 -MONTH=126 -MONTH_OF_YEAR=127 -QUARTER=128 -SECOND=129 -SECOND_MICROSECOND=130 -SECOND_OF_MINUTE=131 -WEEK=132 -WEEK_OF_YEAR=133 -YEAR=134 -YEAR_MONTH=135 -DATAMODEL=136 -LOOKUP=137 -SAVEDSEARCH=138 -INT=139 -INTEGER=140 -DOUBLE=141 -LONG=142 -FLOAT=143 -STRING=144 -BOOLEAN=145 -PIPE=146 -COMMA=147 -DOT=148 -EQUAL=149 -GREATER=150 -LESS=151 -NOT_GREATER=152 -NOT_LESS=153 -NOT_EQUAL=154 -PLUS=155 -MINUS=156 -STAR=157 -DIVIDE=158 -MODULE=159 -EXCLAMATION_SYMBOL=160 -COLON=161 -LT_PRTHS=162 -RT_PRTHS=163 -LT_SQR_PRTHS=164 -RT_SQR_PRTHS=165 -SINGLE_QUOTE=166 -DOUBLE_QUOTE=167 -BACKTICK=168 -ARROW=169 -BIT_NOT_OP=170 -BIT_AND_OP=171 -BIT_XOR_OP=172 -AVG=173 -COUNT=174 -DISTINCT_COUNT=175 -DISTINCT_COUNT_APPROX=176 -ESTDC=177 -ESTDC_ERROR=178 -MAX=179 -MEAN=180 -MEDIAN=181 -MIN=182 -MODE=183 -RANGE=184 -STDEV=185 -STDEVP=186 -SUM=187 -SUMSQ=188 -VAR_SAMP=189 -VAR_POP=190 -STDDEV_SAMP=191 -STDDEV_POP=192 -PERCENTILE=193 -PERCENTILE_APPROX=194 -TAKE=195 -FIRST=196 -LAST=197 -LIST=198 -VALUES=199 -EARLIEST=200 -EARLIEST_TIME=201 -LATEST=202 -LATEST_TIME=203 -PER_DAY=204 -PER_HOUR=205 -PER_MINUTE=206 -PER_SECOND=207 -RATE=208 -SPARKLINE=209 -C=210 -DC=211 -ABS=212 -CBRT=213 -CEIL=214 -CEILING=215 -CONV=216 -CRC32=217 -E=218 -EXP=219 -FLOOR=220 -LN=221 -LOG=222 -LOG10=223 -LOG2=224 -MOD=225 -PI=226 -POSITION=227 -POW=228 -POWER=229 -RAND=230 -ROUND=231 -SIGN=232 -SIGNUM=233 -SQRT=234 -TRUNCATE=235 -ACOS=236 -ASIN=237 -ATAN=238 -ATAN2=239 -COS=240 -COT=241 -DEGREES=242 -RADIANS=243 -SIN=244 -TAN=245 -MD5=246 -SHA1=247 -SHA2=248 -ADDDATE=249 -ADDTIME=250 -CURDATE=251 -CURRENT_DATE=252 -CURRENT_TIME=253 -CURRENT_TIMESTAMP=254 -CURRENT_TIMEZONE=255 -CURTIME=256 -DATE=257 -DATEDIFF=258 -DATE_ADD=259 -DATE_FORMAT=260 -DATE_SUB=261 -DAYNAME=262 -DAYOFMONTH=263 -DAYOFWEEK=264 -DAYOFYEAR=265 -DAY_OF_MONTH=266 -DAY_OF_WEEK=267 -DURATION=268 -EXTRACT=269 -FROM_DAYS=270 -FROM_UNIXTIME=271 -GET_FORMAT=272 -LAST_DAY=273 -LOCALTIME=274 -LOCALTIMESTAMP=275 -MAKEDATE=276 -MAKE_DATE=277 -MAKETIME=278 -MONTHNAME=279 -NOW=280 -PERIOD_ADD=281 -PERIOD_DIFF=282 -SEC_TO_TIME=283 -STR_TO_DATE=284 -SUBDATE=285 -SUBTIME=286 -SYSDATE=287 -TIME=288 -TIMEDIFF=289 -TIMESTAMP=290 -TIMESTAMPADD=291 -TIMESTAMPDIFF=292 -TIME_FORMAT=293 -TIME_TO_SEC=294 -TO_DAYS=295 -TO_SECONDS=296 -UNIX_TIMESTAMP=297 -UTC_DATE=298 -UTC_TIME=299 -UTC_TIMESTAMP=300 -WEEKDAY=301 -YEARWEEK=302 -SUBSTR=303 -SUBSTRING=304 -LTRIM=305 -RTRIM=306 -TRIM=307 -TO=308 -LOWER=309 -UPPER=310 -CONCAT=311 -CONCAT_WS=312 -LENGTH=313 -STRCMP=314 -RIGHT=315 -LEFT=316 -ASCII=317 -LOCATE=318 -REPLACE=319 -REVERSE=320 -CAST=321 -ISEMPTY=322 -ISBLANK=323 -JSON=324 -JSON_OBJECT=325 -JSON_ARRAY=326 -JSON_ARRAY_LENGTH=327 -TO_JSON_STRING=328 -JSON_EXTRACT=329 -JSON_KEYS=330 -JSON_VALID=331 -ARRAY=332 -ARRAY_LENGTH=333 -FORALL=334 -FILTER=335 -TRANSFORM=336 -REDUCE=337 -LIKE=338 -ISNULL=339 -ISNOTNULL=340 -ISPRESENT=341 -BETWEEN=342 -CIDRMATCH=343 -GEOIP=344 -IFNULL=345 -NULLIF=346 -IF=347 -TYPEOF=348 -COALESCE=349 -MATCH=350 -MATCH_PHRASE=351 -MATCH_PHRASE_PREFIX=352 -MATCH_BOOL_PREFIX=353 -SIMPLE_QUERY_STRING=354 -MULTI_MATCH=355 -QUERY_STRING=356 -ALLOW_LEADING_WILDCARD=357 -ANALYZE_WILDCARD=358 -ANALYZER=359 -AUTO_GENERATE_SYNONYMS_PHRASE_QUERY=360 -BOOST=361 -CUTOFF_FREQUENCY=362 -DEFAULT_FIELD=363 -DEFAULT_OPERATOR=364 -ENABLE_POSITION_INCREMENTS=365 -ESCAPE=366 -FLAGS=367 -FUZZY_MAX_EXPANSIONS=368 -FUZZY_PREFIX_LENGTH=369 -FUZZY_TRANSPOSITIONS=370 -FUZZY_REWRITE=371 -FUZZINESS=372 -LENIENT=373 -LOW_FREQ_OPERATOR=374 -MAX_DETERMINIZED_STATES=375 -MAX_EXPANSIONS=376 -MINIMUM_SHOULD_MATCH=377 -OPERATOR=378 -PHRASE_SLOP=379 -PREFIX_LENGTH=380 -QUOTE_ANALYZER=381 -QUOTE_FIELD_SUFFIX=382 -REWRITE=383 -SLOP=384 -TIE_BREAKER=385 -TYPE=386 -ZERO_TERMS_QUERY=387 -SPAN=388 -MS=389 -S=390 -M=391 -H=392 -W=393 -Q=394 -Y=395 -ID=396 -CLUSTER=397 -INTEGER_LITERAL=398 -DECIMAL_LITERAL=399 -ID_DATE_SUFFIX=400 -DQUOTA_STRING=401 -SQUOTA_STRING=402 -BQUOTA_STRING=403 -LINE_COMMENT=404 -BLOCK_COMMENT=405 -ERROR_RECOGNITION=406 -'SEARCH'=1 -'DESCRIBE'=2 -'SHOW'=3 -'FROM'=4 -'WHERE'=5 -'FIELDS'=6 -'RENAME'=7 -'STATS'=8 -'EVENTSTATS'=9 -'DEDUP'=10 -'SORT'=11 -'EVAL'=12 -'HEAD'=13 -'TOP_APPROX'=14 -'TOP'=15 -'RARE_APPROX'=16 -'RARE'=17 -'PARSE'=18 -'METHOD'=19 -'REGEX'=20 -'PUNCT'=21 -'GROK'=22 -'PATTERN'=23 -'PATTERNS'=24 -'NEW_FIELD'=25 -'KMEANS'=26 -'AD'=27 -'ML'=28 -'FILLNULL'=29 -'EXPAND'=30 -'FLATTEN'=31 -'TRENDLINE'=32 -'JOIN'=33 -'ON'=34 -'INNER'=35 -'OUTER'=36 -'FULL'=37 -'SEMI'=38 -'ANTI'=39 -'CROSS'=40 -'HINT.LEFT'=41 -'HINT.RIGHT'=42 -'CORRELATE'=43 -'SELF'=44 -'EXACT'=45 -'APPROXIMATE'=46 -'SCOPE'=47 -'MAPPING'=48 -'EXPLAIN'=49 -'FORMATTED'=50 -'COST'=51 -'CODEGEN'=52 -'EXTENDED'=53 -'SIMPLE'=54 -'AS'=55 -'BY'=56 -'SOURCE'=57 -'INDEX'=58 -'D'=59 -'DESC'=60 -'DATASOURCES'=61 -'USING'=62 -'WITH'=63 -'AUTO'=64 -'STR'=65 -'IP'=66 -'NUM'=67 -'FIELDSUMMARY'=68 -'INCLUDEFIELDS'=69 -'NULLS'=70 -'SMA'=71 -'WMA'=72 -'KEEPEMPTY'=73 -'CONSECUTIVE'=74 -'DEDUP_SPLITVALUES'=75 -'PARTITIONS'=76 -'ALLNUM'=77 -'DELIM'=78 -'CENTROIDS'=79 -'ITERATIONS'=80 -'DISTANCE_TYPE'=81 -'NUMBER_OF_TREES'=82 -'SHINGLE_SIZE'=83 -'SAMPLE_SIZE'=84 -'OUTPUT_AFTER'=85 -'TIME_DECAY'=86 -'ANOMALY_RATE'=87 -'CATEGORY_FIELD'=88 -'TIME_FIELD'=89 -'TIME_ZONE'=90 -'TRAINING_DATA_SIZE'=91 -'ANOMALY_SCORE_THRESHOLD'=92 -'APPEND'=93 -'CASE'=94 -'ELSE'=95 -'IN'=96 -'EXISTS'=97 -'NOT'=98 -'OR'=99 -'AND'=100 -'XOR'=101 -'TRUE'=102 -'FALSE'=103 -'REGEXP'=104 -'CONVERT_TZ'=105 -'DATETIME'=106 -'DAY'=107 -'DAY_HOUR'=108 -'DAY_MICROSECOND'=109 -'DAY_MINUTE'=110 -'DAY_OF_YEAR'=111 -'DAY_SECOND'=112 -'HOUR'=113 -'HOUR_MICROSECOND'=114 -'HOUR_MINUTE'=115 -'HOUR_OF_DAY'=116 -'HOUR_SECOND'=117 -'INTERVAL'=118 -'MICROSECOND'=119 -'MILLISECOND'=120 -'MINUTE'=121 -'MINUTE_MICROSECOND'=122 -'MINUTE_OF_DAY'=123 -'MINUTE_OF_HOUR'=124 -'MINUTE_SECOND'=125 -'MONTH'=126 -'MONTH_OF_YEAR'=127 -'QUARTER'=128 -'SECOND'=129 -'SECOND_MICROSECOND'=130 -'SECOND_OF_MINUTE'=131 -'WEEK'=132 -'WEEK_OF_YEAR'=133 -'YEAR'=134 -'YEAR_MONTH'=135 -'DATAMODEL'=136 -'LOOKUP'=137 -'SAVEDSEARCH'=138 -'INT'=139 -'INTEGER'=140 -'DOUBLE'=141 -'LONG'=142 -'FLOAT'=143 -'STRING'=144 -'BOOLEAN'=145 -'|'=146 -','=147 -'.'=148 -'='=149 -'>'=150 -'<'=151 -'+'=155 -'-'=156 -'*'=157 -'/'=158 -'%'=159 -'!'=160 -':'=161 -'('=162 -')'=163 -'['=164 -']'=165 -'\''=166 -'"'=167 -'`'=168 -'->'=169 -'~'=170 -'&'=171 -'^'=172 -'AVG'=173 -'COUNT'=174 -'DISTINCT_COUNT'=175 -'DISTINCT_COUNT_APPROX'=176 -'ESTDC'=177 -'ESTDC_ERROR'=178 -'MAX'=179 -'MEAN'=180 -'MEDIAN'=181 -'MIN'=182 -'MODE'=183 -'RANGE'=184 -'STDEV'=185 -'STDEVP'=186 -'SUM'=187 -'SUMSQ'=188 -'VAR_SAMP'=189 -'VAR_POP'=190 -'STDDEV_SAMP'=191 -'STDDEV_POP'=192 -'PERCENTILE'=193 -'PERCENTILE_APPROX'=194 -'TAKE'=195 -'FIRST'=196 -'LAST'=197 -'LIST'=198 -'VALUES'=199 -'EARLIEST'=200 -'EARLIEST_TIME'=201 -'LATEST'=202 -'LATEST_TIME'=203 -'PER_DAY'=204 -'PER_HOUR'=205 -'PER_MINUTE'=206 -'PER_SECOND'=207 -'RATE'=208 -'SPARKLINE'=209 -'C'=210 -'DC'=211 -'ABS'=212 -'CBRT'=213 -'CEIL'=214 -'CEILING'=215 -'CONV'=216 -'CRC32'=217 -'E'=218 -'EXP'=219 -'FLOOR'=220 -'LN'=221 -'LOG'=222 -'LOG10'=223 -'LOG2'=224 -'MOD'=225 -'PI'=226 -'POSITION'=227 -'POW'=228 -'POWER'=229 -'RAND'=230 -'ROUND'=231 -'SIGN'=232 -'SIGNUM'=233 -'SQRT'=234 -'TRUNCATE'=235 -'ACOS'=236 -'ASIN'=237 -'ATAN'=238 -'ATAN2'=239 -'COS'=240 -'COT'=241 -'DEGREES'=242 -'RADIANS'=243 -'SIN'=244 -'TAN'=245 -'MD5'=246 -'SHA1'=247 -'SHA2'=248 -'ADDDATE'=249 -'ADDTIME'=250 -'CURDATE'=251 -'CURRENT_DATE'=252 -'CURRENT_TIME'=253 -'CURRENT_TIMESTAMP'=254 -'CURRENT_TIMEZONE'=255 -'CURTIME'=256 -'DATE'=257 -'DATEDIFF'=258 -'DATE_ADD'=259 -'DATE_FORMAT'=260 -'DATE_SUB'=261 -'DAYNAME'=262 -'DAYOFMONTH'=263 -'DAYOFWEEK'=264 -'DAYOFYEAR'=265 -'DAY_OF_MONTH'=266 -'DAY_OF_WEEK'=267 -'DURATION'=268 -'EXTRACT'=269 -'FROM_DAYS'=270 -'FROM_UNIXTIME'=271 -'GET_FORMAT'=272 -'LAST_DAY'=273 -'LOCALTIME'=274 -'LOCALTIMESTAMP'=275 -'MAKEDATE'=276 -'MAKE_DATE'=277 -'MAKETIME'=278 -'MONTHNAME'=279 -'NOW'=280 -'PERIOD_ADD'=281 -'PERIOD_DIFF'=282 -'SEC_TO_TIME'=283 -'STR_TO_DATE'=284 -'SUBDATE'=285 -'SUBTIME'=286 -'SYSDATE'=287 -'TIME'=288 -'TIMEDIFF'=289 -'TIMESTAMP'=290 -'TIMESTAMPADD'=291 -'TIMESTAMPDIFF'=292 -'TIME_FORMAT'=293 -'TIME_TO_SEC'=294 -'TO_DAYS'=295 -'TO_SECONDS'=296 -'UNIX_TIMESTAMP'=297 -'UTC_DATE'=298 -'UTC_TIME'=299 -'UTC_TIMESTAMP'=300 -'WEEKDAY'=301 -'YEARWEEK'=302 -'SUBSTR'=303 -'SUBSTRING'=304 -'LTRIM'=305 -'RTRIM'=306 -'TRIM'=307 -'TO'=308 -'LOWER'=309 -'UPPER'=310 -'CONCAT'=311 -'CONCAT_WS'=312 -'LENGTH'=313 -'STRCMP'=314 -'RIGHT'=315 -'LEFT'=316 -'ASCII'=317 -'LOCATE'=318 -'REPLACE'=319 -'REVERSE'=320 -'CAST'=321 -'ISEMPTY'=322 -'ISBLANK'=323 -'JSON'=324 -'JSON_OBJECT'=325 -'JSON_ARRAY'=326 -'JSON_ARRAY_LENGTH'=327 -'TO_JSON_STRING'=328 -'JSON_EXTRACT'=329 -'JSON_KEYS'=330 -'JSON_VALID'=331 -'ARRAY'=332 -'ARRAY_LENGTH'=333 -'FORALL'=334 -'FILTER'=335 -'TRANSFORM'=336 -'REDUCE'=337 -'LIKE'=338 -'ISNULL'=339 -'ISNOTNULL'=340 -'ISPRESENT'=341 -'BETWEEN'=342 -'CIDRMATCH'=343 -'GEOIP'=344 -'IFNULL'=345 -'NULLIF'=346 -'IF'=347 -'TYPEOF'=348 -'COALESCE'=349 -'MATCH'=350 -'MATCH_PHRASE'=351 -'MATCH_PHRASE_PREFIX'=352 -'MATCH_BOOL_PREFIX'=353 -'SIMPLE_QUERY_STRING'=354 -'MULTI_MATCH'=355 -'QUERY_STRING'=356 -'ALLOW_LEADING_WILDCARD'=357 -'ANALYZE_WILDCARD'=358 -'ANALYZER'=359 -'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'=360 -'BOOST'=361 -'CUTOFF_FREQUENCY'=362 -'DEFAULT_FIELD'=363 -'DEFAULT_OPERATOR'=364 -'ENABLE_POSITION_INCREMENTS'=365 -'ESCAPE'=366 -'FLAGS'=367 -'FUZZY_MAX_EXPANSIONS'=368 -'FUZZY_PREFIX_LENGTH'=369 -'FUZZY_TRANSPOSITIONS'=370 -'FUZZY_REWRITE'=371 -'FUZZINESS'=372 -'LENIENT'=373 -'LOW_FREQ_OPERATOR'=374 -'MAX_DETERMINIZED_STATES'=375 -'MAX_EXPANSIONS'=376 -'MINIMUM_SHOULD_MATCH'=377 -'OPERATOR'=378 -'PHRASE_SLOP'=379 -'PREFIX_LENGTH'=380 -'QUOTE_ANALYZER'=381 -'QUOTE_FIELD_SUFFIX'=382 -'REWRITE'=383 -'SLOP'=384 -'TIE_BREAKER'=385 -'TYPE'=386 -'ZERO_TERMS_QUERY'=387 -'SPAN'=388 -'MS'=389 -'S'=390 -'M'=391 -'H'=392 -'W'=393 -'Q'=394 -'Y'=395 From 2c8b839fa4eae8e572ee92ddc2be0d158ff5d29d Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 18 Nov 2024 13:07:05 +0800 Subject: [PATCH 3/5] minor updates Signed-off-by: Lantao Jin --- docs/ppl-lang/ppl-subquery-command.md | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md index b36eb1c80..09d5132ea 100644 --- a/docs/ppl-lang/ppl-subquery-command.md +++ b/docs/ppl-lang/ppl-subquery-command.md @@ -87,26 +87,28 @@ RelationSubquery: ### Examples 1: TPC-H q20 +InSubquery and ScalarSubquery + PPL query: os> source=supplier | join ON s_nationkey = n_nationkey nation | where n_name = 'CANADA' - and s_suppkey in [ // InSubquery + and s_suppkey in [ // InSubquery source = partsupp - | where ps_partkey in [ // InSubquery + | where ps_partkey in [ // InSubquery source = part | where like(p_name, 'forest%') | fields p_partkey ] - and ps_availqty > [ // ScalarSubquery + and ps_availqty > [ // ScalarSubquery source = lineitem | where l_partkey = ps_partkey and l_suppkey = ps_suppkey and l_shipdate >= date('1994-01-01') and l_shipdate < date_add(date('1994-01-01'), interval 1 year) | stats sum(l_quantity) as sum_l_quantity - | eval half_sum_l_quantity = 0.5 * sum_l_quantity // Stats and Eval commands can combine when issues/819 resolved + | eval half_sum_l_quantity = 0.5 * sum_l_quantity | fields half_sum_l_quantity ] | fields ps_suppkey @@ -131,18 +133,20 @@ PPL query: ### Examples 2: TPC-H q22 +RelationSubquery, ScalarSubquery and ExistsSubquery + PPL query: - os> source = [ + os> source = [ // RelationSubquery source = customer | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') - and c_acctbal > [ + and c_acctbal > [ // ScalarSubquery source = customer | where c_acctbal > 0.00 and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') | stats avg(c_acctbal) ] - and not exists [ + and not exists [ // ExistsSubquery source = orders | where o_custkey = c_custkey ] From 274678fd62d417b2aa683814c823c08a601548d4 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 18 Nov 2024 20:11:06 +0800 Subject: [PATCH 4/5] address comments Signed-off-by: Lantao Jin --- docs/ppl-lang/ppl-correlation-command.md | 2 +- docs/ppl-lang/ppl-dedup-command.md | 2 +- docs/ppl-lang/ppl-grok-command.md | 2 +- docs/ppl-lang/ppl-head-command.md | 2 +- docs/ppl-lang/ppl-join-command.md | 2 +- docs/ppl-lang/ppl-lookup-command.md | 2 +- docs/ppl-lang/ppl-parse-command.md | 2 +- docs/ppl-lang/ppl-rare-command.md | 4 ++-- docs/ppl-lang/ppl-search-command.md | 2 +- docs/ppl-lang/ppl-sort-command.md | 4 ++-- docs/ppl-lang/ppl-stats-command.md | 2 +- docs/ppl-lang/ppl-subquery-command.md | 4 ++-- docs/ppl-lang/ppl-top-command.md | 2 +- docs/ppl-lang/ppl-trendline-command.md | 4 ++-- docs/ppl-lang/ppl-where-command.md | 2 +- 15 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/ppl-lang/ppl-correlation-command.md b/docs/ppl-lang/ppl-correlation-command.md index 2e8507a14..74e04da86 100644 --- a/docs/ppl-lang/ppl-correlation-command.md +++ b/docs/ppl-lang/ppl-correlation-command.md @@ -1,4 +1,4 @@ -## PPL Correlation Command +## PPL `correlation` command > This is an experimental command - it may be removed in future versions diff --git a/docs/ppl-lang/ppl-dedup-command.md b/docs/ppl-lang/ppl-dedup-command.md index 831c4926f..4e06d275e 100644 --- a/docs/ppl-lang/ppl-dedup-command.md +++ b/docs/ppl-lang/ppl-dedup-command.md @@ -1,4 +1,4 @@ -## PPL dedup command +## PPL `dedup` command ### Table of contents diff --git a/docs/ppl-lang/ppl-grok-command.md b/docs/ppl-lang/ppl-grok-command.md index a9b5645c5..8d5946563 100644 --- a/docs/ppl-lang/ppl-grok-command.md +++ b/docs/ppl-lang/ppl-grok-command.md @@ -1,4 +1,4 @@ -## PPL Grok Command +## PPL `grok` command ### Description diff --git a/docs/ppl-lang/ppl-head-command.md b/docs/ppl-lang/ppl-head-command.md index e4172b1c6..51a87db3b 100644 --- a/docs/ppl-lang/ppl-head-command.md +++ b/docs/ppl-lang/ppl-head-command.md @@ -1,4 +1,4 @@ -## PPL `head` Command +## PPL `head` command **Description** The ``head`` command returns the first N number of specified results after an optional offset in search order. diff --git a/docs/ppl-lang/ppl-join-command.md b/docs/ppl-lang/ppl-join-command.md index 95b375e0a..f04f1c5c1 100644 --- a/docs/ppl-lang/ppl-join-command.md +++ b/docs/ppl-lang/ppl-join-command.md @@ -1,4 +1,4 @@ -## PPL Join Command +## PPL `join` command ### Description diff --git a/docs/ppl-lang/ppl-lookup-command.md b/docs/ppl-lang/ppl-lookup-command.md index 6768cdcaf..87cf34bac 100644 --- a/docs/ppl-lang/ppl-lookup-command.md +++ b/docs/ppl-lang/ppl-lookup-command.md @@ -1,4 +1,4 @@ -## PPL Lookup Command +## PPL `lookup` command ### Description Lookup command enriches your search data by adding or replacing data from a lookup index (dimension table). diff --git a/docs/ppl-lang/ppl-parse-command.md b/docs/ppl-lang/ppl-parse-command.md index 10be21cc0..0e000756e 100644 --- a/docs/ppl-lang/ppl-parse-command.md +++ b/docs/ppl-lang/ppl-parse-command.md @@ -1,4 +1,4 @@ -## PPL Parse Command +## PPL `parse` command ### Description diff --git a/docs/ppl-lang/ppl-rare-command.md b/docs/ppl-lang/ppl-rare-command.md index 8a2ca640f..93967e6fe 100644 --- a/docs/ppl-lang/ppl-rare-command.md +++ b/docs/ppl-lang/ppl-rare-command.md @@ -1,7 +1,7 @@ -## PPL rare Command +## PPL `rare` command ### Description -Using ``rare`` command to find the least common tuple of values of all fields in the field list. +Using `rare` command to find the least common tuple of values of all fields in the field list. **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields. diff --git a/docs/ppl-lang/ppl-search-command.md b/docs/ppl-lang/ppl-search-command.md index bccfd04f0..6e1cf0e50 100644 --- a/docs/ppl-lang/ppl-search-command.md +++ b/docs/ppl-lang/ppl-search-command.md @@ -1,7 +1,7 @@ ## PPL `search` command ### Description -Using ``search`` command to retrieve document from the index. ``search`` command could be only used as the first command in the PPL query. +Using `search` command to retrieve document from the index. `search` command could be only used as the first command in the PPL query. ### Syntax diff --git a/docs/ppl-lang/ppl-sort-command.md b/docs/ppl-lang/ppl-sort-command.md index c3bf304d7..dd9b4b33d 100644 --- a/docs/ppl-lang/ppl-sort-command.md +++ b/docs/ppl-lang/ppl-sort-command.md @@ -1,7 +1,7 @@ -## PPL `sort`command +## PPL `sort` command ### Description -Using ``sort`` command to sorts all the search result by the specified fields. +Using `sort` command to sorts all the search result by the specified fields. ### Syntax diff --git a/docs/ppl-lang/ppl-stats-command.md b/docs/ppl-lang/ppl-stats-command.md index 552f83e46..a73800b26 100644 --- a/docs/ppl-lang/ppl-stats-command.md +++ b/docs/ppl-lang/ppl-stats-command.md @@ -1,7 +1,7 @@ ## PPL `stats` command ### Description -Using ``stats`` command to calculate the aggregation from search result. +Using `stats` command to calculate the aggregation from search result. ### NULL/MISSING values handling: diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md index 09d5132ea..766b37130 100644 --- a/docs/ppl-lang/ppl-subquery-command.md +++ b/docs/ppl-lang/ppl-subquery-command.md @@ -1,7 +1,7 @@ -## PPL SubQuery Commands +## PPL `subquery` command ### Description -The subquery command has 4 types: `InSubquery`, `ExistsSubquery`, `ScalarSubquery` and `RelationSubquery`. +The subquery commands contain 4 types: `InSubquery`, `ExistsSubquery`, `ScalarSubquery` and `RelationSubquery`. `InSubquery`, `ExistsSubquery` and `ScalarSubquery` are subquery expressions, their common usage is in Where clause(`where `) and Search filter(`search source=* `). For example, a subquery expression could be used in boolean expression: diff --git a/docs/ppl-lang/ppl-top-command.md b/docs/ppl-lang/ppl-top-command.md index 012457fe2..2bacdba50 100644 --- a/docs/ppl-lang/ppl-top-command.md +++ b/docs/ppl-lang/ppl-top-command.md @@ -1,4 +1,4 @@ -## PPL top Command +## PPL `top` command ### Description Using ``top`` command to find the most common tuple of values of all fields in the field list. diff --git a/docs/ppl-lang/ppl-trendline-command.md b/docs/ppl-lang/ppl-trendline-command.md index 44b8c999f..b2be172cd 100644 --- a/docs/ppl-lang/ppl-trendline-command.md +++ b/docs/ppl-lang/ppl-trendline-command.md @@ -1,7 +1,7 @@ -## PPL trendline Command +## PPL `trendline` command ### Description -Using ``trendline`` command to calculate moving averages of fields. +Using `trendline` command to calculate moving averages of fields. ### Syntax - SMA (Simple Moving Average) `TRENDLINE [sort <[+|-] sort-field>] SMA(number-of-datapoints, field) [AS alias] [SMA(number-of-datapoints, field) [AS alias]]...` diff --git a/docs/ppl-lang/ppl-where-command.md b/docs/ppl-lang/ppl-where-command.md index aa7d9299e..ec676ab62 100644 --- a/docs/ppl-lang/ppl-where-command.md +++ b/docs/ppl-lang/ppl-where-command.md @@ -1,4 +1,4 @@ -## PPL where Command +## PPL `where` command ### Description The ``where`` command bool-expression to filter the search result. The ``where`` command only return the result when bool-expression evaluated to true. From 5d143f8062d49cc019ded45735f240ba99e6e64a Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 19 Nov 2024 08:55:02 +0800 Subject: [PATCH 5/5] fix hyper-link issue Signed-off-by: Lantao Jin --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 12123b456..db3790e64 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Please refer to the [Flint Index Reference Manual](./docs/index.md) for more inf * For additional details on Spark PPL commands project, see [PPL Project](https://github.com/orgs/opensearch-project/projects/214/views/2) -* Experiment ppl queries on local spark cluster[PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md) +* Experiment ppl queries on local spark cluster [PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md) ## Prerequisites @@ -88,7 +88,7 @@ bin/spark-shell --packages "org.opensearch:opensearch-spark-ppl_2.12:0.7.0-SNAPS ``` ### PPL Run queries on a local spark cluster -See ppl usage sample on local spark cluster[PPL on local spark ](local-spark-ppl-test-instruction.md) +See ppl usage sample on local spark cluster [PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md) ## Code of Conduct