From f0cc08b8de4f75cfa5c6ec64ca4483ad20aa511c Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Mon, 18 Nov 2024 12:56:01 +0800
Subject: [PATCH 1/5] refactor ppl docs to keep consistent look

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 docs/ppl-lang/README.md                       |   2 +-
 .../{ppl-lambda.md => ppl-collection.md}      |  80 +-
 docs/ppl-lang/functions/ppl-json.md           | 214 ++++-
 docs/ppl-lang/ppl-dedup-command.md            |   8 +-
 docs/ppl-lang/ppl-eval-command.md             |   6 +-
 docs/ppl-lang/ppl-fields-command.md           |   6 +-
 docs/ppl-lang/ppl-fieldsummary-command.md     |   4 +-
 docs/ppl-lang/ppl-grok-command.md             |   2 +-
 docs/ppl-lang/ppl-join-command.md             | 256 +++---
 docs/ppl-lang/ppl-lookup-command.md           |  83 +-
 docs/ppl-lang/ppl-rare-command.md             |   4 +-
 docs/ppl-lang/ppl-subquery-command.md         | 401 +++------
 docs/ppl-lang/ppl-top-command.md              |   2 +-
 docs/ppl-lang/ppl-trendline-command.md        |   2 +-
 .../src/main/antlr4/OpenSearchPPLLexer.tokens | 798 ++++++++++++++++++
 15 files changed, 1364 insertions(+), 504 deletions(-)
 rename docs/ppl-lang/functions/{ppl-lambda.md => ppl-collection.md} (57%)
 create mode 100644 ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens
diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md
index 9df9f5986..19e1a6ee0 100644
--- a/docs/ppl-lang/README.md
+++ b/docs/ppl-lang/README.md
@@ -94,7 +94,7 @@ For additional examples see the next [documentation](PPL-Example-Commands.md).
 
     - [`IP Address Functions`](functions/ppl-ip.md)
      
-    - [`Lambda Functions`](functions/ppl-lambda.md)
+    - [`Collection Functions`](functions/ppl-collection)
 
 ---
 ### PPL On Spark
diff --git a/docs/ppl-lang/functions/ppl-lambda.md b/docs/ppl-lang/functions/ppl-collection.md
similarity index 57%
rename from docs/ppl-lang/functions/ppl-lambda.md
rename to docs/ppl-lang/functions/ppl-collection.md
index cdb6f9e8f..b98f5f5ca 100644
--- a/docs/ppl-lang/functions/ppl-lambda.md
+++ b/docs/ppl-lang/functions/ppl-collection.md
@@ -1,4 +1,56 @@
-## Lambda Functions
+## PPL Collection Functions
+
+### `ARRAY`
+
+**Description**
+
+`array(<value>...)` Returns an array with the given elements.
+
+**Argument type:**
+- A \<value\> can be any kind of value such as string, number, or boolean.
+
+**Return type:** ARRAY
+
+Example:
+
+    os> source=people | eval `array` = array(1, 2, 0, -1, 1.1, -0.11)
+    fetched rows / total rows = 1/1
+    +------------------------------+
+    | array                        |
+    +------------------------------+
+    | [1.0,2.0,0.0,-1.0,1.1,-0.11] |
+    +------------------------------+
+    os> source=people | eval `array` = array(true, false, true, true)
+    fetched rows / total rows = 1/1
+    +------------------------------+
+    | array                        |
+    +------------------------------+
+    | [true, false, true, true]    |
+    +------------------------------+
+
+
+### `ARRAY_LENGTH`
+
+**Description**
+
+`array_length(array)` Returns the number of elements in the outermost array.
+
+**Argument type:** ARRAY
+
+ARRAY or JSON_ARRAY object.
+
+**Return type:** INTEGER
+
+Example:
+
+    os> source=people | eval `array` = array_length(array(1,2,3,4)), `empty_array` = array_length(array())
+    fetched rows / total rows = 1/1
+    +---------+---------------+
+    | array   | empty_array   |
+    +---------+---------------+
+    | 4       | 0             |
+    +---------+---------------+
+
 
 ### `FORALL`
 
@@ -14,7 +66,7 @@ Returns `TRUE` if all elements in the array satisfy the lambda predicate, otherw
 
 Example:
 
-    os> source=people | eval array = json_array(1, -1, 2), result = forall(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(1, -1, 2), result = forall(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -22,7 +74,7 @@ Example:
     | false     |
     +-----------+
 
-    os> source=people | eval array = json_array(1, 3, 2), result = forall(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(1, 3, 2), result = forall(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -41,7 +93,7 @@ Consider constructing the following array:
 
 and perform lambda functions against the nested fields `a` or `b`. See the examples:
 
-    os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result
+    os> source=people | eval array = array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -49,7 +101,7 @@ and perform lambda functions against the nested fields `a` or `b`. See the examp
     | false     |
     +-----------+
 
-    os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result
+    os> source=people | eval array = array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -71,7 +123,7 @@ Returns `TRUE` if at least one element in the array satisfies the lambda predica
 
 Example:
 
-    os> source=people | eval array = json_array(1, -1, 2), result = exists(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(1, -1, 2), result = exists(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -79,7 +131,7 @@ Example:
     | true      |
     +-----------+
 
-    os> source=people | eval array = json_array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -102,7 +154,7 @@ An ARRAY that contains all elements in the input array that satisfy the lambda p
 
 Example:
 
-    os> source=people | eval array = json_array(1, -1, 2), result = filter(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(1, -1, 2), result = filter(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -110,7 +162,7 @@ Example:
     | [1, 2]    |
     +-----------+
 
-    os> source=people | eval array = json_array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result
+    os> source=people | eval array = array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -132,7 +184,7 @@ An ARRAY that contains the result of applying the lambda transform function to e
 
 Example:
 
-    os> source=people | eval array = json_array(1, 2, 3), result = transform(array, x -> x + 1) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = transform(array, x -> x + 1) | fields result
     fetched rows / total rows = 1/1
     +--------------+
     | result       |
@@ -140,7 +192,7 @@ Example:
     | [2, 3, 4]    |
     +--------------+
 
-    os> source=people | eval array = json_array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result
     fetched rows / total rows = 1/1
     +--------------+
     | result       |
@@ -162,7 +214,7 @@ The final result of applying the lambda functions to the start value and the inp
 
 Example:
 
-    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -170,7 +222,7 @@ Example:
     | 6         |
     +-----------+
 
-    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
@@ -178,7 +230,7 @@ Example:
     | 16        |
     +-----------+
 
-    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result
+    os> source=people | eval array = array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result
     fetched rows / total rows = 1/1
     +-----------+
     | result    |
diff --git a/docs/ppl-lang/functions/ppl-json.md b/docs/ppl-lang/functions/ppl-json.md
index 5b26ee427..2c0c0ca67 100644
--- a/docs/ppl-lang/functions/ppl-json.md
+++ b/docs/ppl-lang/functions/ppl-json.md
@@ -95,6 +95,11 @@ Example:
     | {"array":[1.0,2.0,0.0,-1.0,1.1,-0.11]} |
     +----------------------------------------+
 
+**Limitation**
+
+The list of parameters of `json_array` should all be the same type.
+`json_array('this', 'is', 1.1, -0.11, true, false)` throws exception.
+
 ### `TO_JSON_STRING`
 
 **Description**
@@ -149,29 +154,6 @@ Example:
     +-----------+-----------+-------------+
 
 
-### `ARRAY_LENGTH`
-
-**Description**
-
-`array_length(jsonArray)` Returns the number of elements in the outermost array.
-
-**Argument type:** ARRAY
-
-ARRAY or JSON_ARRAY object.
-
-**Return type:** INTEGER
-
-Example:
-
-    os> source=people | eval `json_array` = json_array_length(json_array(1,2,3,4)), `empty_array` = json_array_length(json_array())
-    fetched rows / total rows = 1/1
-    +--------------+---------------+
-    | json_array   | empty_array   |
-    +--------------+---------------+
-    | 4            | 0             |
-    +--------------+---------------+
-
-
 ### `JSON_EXTRACT`
 
 **Description**
@@ -280,3 +262,189 @@ Example:
     |------------------+---------|
     | 13               | null    |
     +------------------+---------+
+
+### `FORALL`
+
+**Description**
+
+`forall(json_array, lambda)` Evaluates whether a lambda predicate holds for all elements in the json_array.
+
+**Argument type:** ARRAY, LAMBDA
+
+**Return type:** BOOLEAN
+
+Returns `TRUE` if all elements in the array satisfy the lambda predicate, otherwise `FALSE`.
+
+Example:
+
+    os> source=people | eval array = json_array(1, -1, 2), result = forall(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | false     |
+    +-----------+
+
+    os> source=people | eval array = json_array(1, 3, 2), result = forall(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | true      |
+    +-----------+
+
+**Note:** The lambda expression can access the nested fields of the array elements. This applies to all lambda functions introduced in this document.
+
+Consider constructing the following array:
+
+    array = [
+        {"a":1, "b":1},
+        {"a":-1, "b":2}
+    ]
+
+and perform lambda functions against the nested fields `a` or `b`. See the examples:
+
+    os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.a > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | false     |
+    +-----------+
+
+    os> source=people | eval array = json_array(json_object("a", 1, "b", 1), json_object("a" , -1, "b", 2)), result = forall(array, x -> x.b > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | true      |
+    +-----------+
+
+### `EXISTS`
+
+**Description**
+
+`exists(json_array, lambda)` Evaluates whether a lambda predicate holds for one or more elements in the json_array.
+
+**Argument type:** ARRAY, LAMBDA
+
+**Return type:** BOOLEAN
+
+Returns `TRUE` if at least one element in the array satisfies the lambda predicate, otherwise `FALSE`.
+
+Example:
+
+    os> source=people | eval array = json_array(1, -1, 2), result = exists(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | true      |
+    +-----------+
+
+    os> source=people | eval array = json_array(-1, -3, -2), result = exists(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | false     |
+    +-----------+
+
+
+### `FILTER`
+
+**Description**
+
+`filter(json_array, lambda)`  Filters the input json_array using the given lambda function.
+
+**Argument type:** ARRAY, LAMBDA
+
+**Return type:** ARRAY
+
+An ARRAY that contains all elements in the input json_array that satisfy the lambda predicate.
+
+Example:
+
+    os> source=people | eval array = json_array(1, -1, 2), result = filter(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | [1, 2]    |
+    +-----------+
+
+    os> source=people | eval array = json_array(-1, -3, -2), result = filter(array, x -> x > 0) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | []        |
+    +-----------+
+
+### `TRANSFORM`
+
+**Description**
+
+`transform(json_array, lambda)` Transform elements in a json_array using the lambda transform function. The second argument implies the index of the element if using binary lambda function. This is similar to a `map` in functional programming.
+
+**Argument type:** ARRAY, LAMBDA
+
+**Return type:** ARRAY
+
+An ARRAY that contains the result of applying the lambda transform function to each element in the input array.
+
+Example:
+
+    os> source=people | eval array = json_array(1, 2, 3), result = transform(array, x -> x + 1) | fields result
+    fetched rows / total rows = 1/1
+    +--------------+
+    | result       |
+    +--------------+
+    | [2, 3, 4]    |
+    +--------------+
+
+    os> source=people | eval array = json_array(1, 2, 3), result = transform(array, (x, i) -> x + i) | fields result
+    fetched rows / total rows = 1/1
+    +--------------+
+    | result       |
+    +--------------+
+    | [1, 3, 5]    |
+    +--------------+
+
+### `REDUCE`
+
+**Description**
+
+`reduce(json_array, start, merge_lambda, finish_lambda)` Applies a binary merge lambda function to a start value and all elements in the json_array, and reduces this to a single state. The final state is converted into the final result by applying a finish lambda function.
+
+**Argument type:** ARRAY, ANY, LAMBDA, LAMBDA
+
+**Return type:** ANY
+
+The final result of applying the lambda functions to the start value and the input json_array.
+
+Example:
+
+    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | 6         |
+    +-----------+
+
+    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | 16        |
+    +-----------+
+
+    os> source=people | eval array = json_array(1, 2, 3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | fields result
+    fetched rows / total rows = 1/1
+    +-----------+
+    | result    |
+    +-----------+
+    | 60        |
+    +-----------+
diff --git a/docs/ppl-lang/ppl-dedup-command.md b/docs/ppl-lang/ppl-dedup-command.md
index 28fe7f4a4..831c4926f 100644
--- a/docs/ppl-lang/ppl-dedup-command.md
+++ b/docs/ppl-lang/ppl-dedup-command.md
@@ -1,6 +1,6 @@
-# PPL dedup command
+## PPL dedup command
 
-## Table of contents
+### Table of contents
 
 - [Description](#description)
 - [Syntax](#syntax)
@@ -11,11 +11,11 @@
     - [Example 4: Dedup in consecutive document](#example-4-dedup-in-consecutive-document)
 - [Limitation](#limitation)
 
-## Description
+### Description
 
 Using `dedup` command to remove identical document defined by field from the search result.
 
-## Syntax
+### Syntax
 
 ```sql
 dedup [int] <field-list> [keepempty=<bool>] [consecutive=<bool>]
diff --git a/docs/ppl-lang/ppl-eval-command.md b/docs/ppl-lang/ppl-eval-command.md
index 1908c087c..e98d4d4f2 100644
--- a/docs/ppl-lang/ppl-eval-command.md
+++ b/docs/ppl-lang/ppl-eval-command.md
@@ -1,10 +1,10 @@
-# PPL `eval` command
+## PPL `eval` command
 
-## Description
+### Description
  The ``eval`` command evaluate the expression and append the result to the search result.
 
 
-## Syntax
+### Syntax
 ```sql
 eval <field>=<expression> ["," <field>=<expression> ]...
 ```
diff --git a/docs/ppl-lang/ppl-fields-command.md b/docs/ppl-lang/ppl-fields-command.md
index e37fc644f..4ef041ee2 100644
--- a/docs/ppl-lang/ppl-fields-command.md
+++ b/docs/ppl-lang/ppl-fields-command.md
@@ -1,12 +1,12 @@
 ## PPL `fields` command
 
-**Description**
+### Description
 Using ``field`` command to keep or remove fields from the search result.
 
 
-**Syntax**
+### Syntax
 
-field [+|-] <field-list>
+`field [+|-] <field-list>`
 
 * index: optional. if the plus (+) is used, only the fields specified in the field list will be keep. if the minus (-) is used, all the fields specified in the field list will be removed. **Default** +
 * field list: mandatory. comma-delimited keep or remove fields.
diff --git a/docs/ppl-lang/ppl-fieldsummary-command.md b/docs/ppl-lang/ppl-fieldsummary-command.md
index 468c2046b..2015cf815 100644
--- a/docs/ppl-lang/ppl-fieldsummary-command.md
+++ b/docs/ppl-lang/ppl-fieldsummary-command.md
@@ -1,11 +1,11 @@
 ## PPL `fieldsummary` command
 
-**Description**
+### Description
 Using `fieldsummary` command to :
  - Calculate basic statistics for each field (count, distinct count, min, max, avg, stddev, mean )
  - Determine the data type of each field
 
-**Syntax**
+### Syntax
 
 `... | fieldsummary <field-list> (nulls=true/false)`
 
diff --git a/docs/ppl-lang/ppl-grok-command.md b/docs/ppl-lang/ppl-grok-command.md
index 06028109b..a9b5645c5 100644
--- a/docs/ppl-lang/ppl-grok-command.md
+++ b/docs/ppl-lang/ppl-grok-command.md
@@ -1,4 +1,4 @@
-## PPL Correlation Command
+## PPL Grok Command
 
 
 ### Description
diff --git a/docs/ppl-lang/ppl-join-command.md b/docs/ppl-lang/ppl-join-command.md
index b374bce5f..95b375e0a 100644
--- a/docs/ppl-lang/ppl-join-command.md
+++ b/docs/ppl-lang/ppl-join-command.md
@@ -1,10 +1,115 @@
 ## PPL Join Command
 
-## Overview
+### Description
 
-[Trace analytics](https://opensearch.org/docs/latest/observability-plugin/trace/ta-dashboards/) considered using SQL/PPL for its queries, but some graphs rely on joining two indices (span index and service map index) together which is not supported by SQL/PPL. Trace analytics was implemented with DSL + javascript, would be good if `join` being added to SQL could support this use case.
+`JOIN` command combines two datasets together. The left side could be an index or results from a piped commands, the right side could be either an index or a subquery.
 
-### Schema
+### Syntax
+
+`[joinType] join [leftAlias] [rightAlias] [joinHints] on <joinCriteria> <right-dataset>`
+
+**joinType**
+- Syntax: `[INNER] | LEFT [OUTER] | RIGHT [OUTER] | FULL [OUTER] | CROSS | [LEFT] SEMI | [LEFT] ANTI`
+- Optional
+- Description: The type of join to perform. The default is `INNER` if not specified.
+
+**leftAlias**
+- Syntax: `left = <leftAlias>`
+- Optional
+- Description: The subquery alias to use with the left join side, to avoid ambiguous naming.
+
+**rightAlias**
+- Syntax: `right = <rightAlias>`
+- Optional
+- Description: The subquery alias to use with the right join side, to avoid ambiguous naming.
+
+**joinHints**
+- Syntax: `[hint.left.key1 = value1 hint.right.key2 = value2]`
+- Optional
+- Description: Zero or more space-separated join hints in the form of `Key` = `Value`. The key must start with `hint.left.` or `hint.right.`
+
+**joinCriteria**
+- Syntax: `<expression>`
+- Required
+- Description: The syntax starts with `ON`. It could be any comparison expression. Generally, the join criteria looks like `<leftAlias>.<leftField>=<rightAlias>.<rightField>`. For example: `l.id = r.id`. If the join criteria contains multiple conditions, you can specify `AND` and `OR` operator between each comparison expression. For example, `l.id = r.id AND l.email = r.email AND (r.age > 65 OR r.age < 18)`.
+
+**right-dataset**
+- Required
+- Description: Right dataset could be either an index or a subquery with/without alias.
+
+### Example 1: two indices join
+
+PPL query:
+
+    os> source=customer | join ON c_custkey = o_custkey orders
+        | fields c_custkey, c_nationkey, c_mktsegment, o_orderkey, o_orderstatus, o_totalprice | head 10
+    fetched rows / total rows = 10/10
+    +----------+-------------+-------------+------------+---------------+-------------+
+    | c_custkey| c_nationkey | c_mktsegment| o_orderkey | o_orderstatus | o_totalprice|
+    +----------+-------------+-------------+------------+---------------+-------------+
+    | 36901    | 13          | AUTOMOBILE  | 1          | O             | 173665.47   |
+    | 78002    | 10          | AUTOMOBILE  | 2          | O             | 46929.18    |
+    | 123314   | 15          | MACHINERY   | 3          | F             | 193846.25   |
+    | 136777   | 10          | HOUSEHOLD   | 4          | O             | 32151.78    |
+    | 44485    | 20          | FURNITURE   | 5          | F             | 144659.2    |
+    | 55624    | 7           | AUTOMOBILE  | 6          | F             | 58749.59    |
+    | 39136    | 5           | FURNITURE   | 7          | O             | 252004.18   |
+    | 130057   | 9           | FURNITURE   | 32         | O             | 208660.75   |
+    | 66958    | 18          | MACHINERY   | 33         | F             | 163243.98   |
+    | 61001    | 3           | FURNITURE   | 34         | O             | 58949.67    |
+    +----------+-------------+-------------+------------+---------------+-------------+
+
+### Example 2: three indices join
+
+PPL query:
+
+    os> source=customer | join ON c_custkey = o_custkey orders | join ON c_nationkey = n_nationkey nation
+        | fields c_custkey, c_mktsegment, o_orderkey, o_orderstatus, o_totalprice, n_name | head 10
+    fetched rows / total rows = 10/10
+    +----------+-------------+------------+---------------+-------------+--------------+
+    | c_custkey| c_mktsegment| o_orderkey | o_orderstatus | o_totalprice| n_name       |
+    +----------+-------------+------------+---------------+-------------+--------------+
+    | 36901    | AUTOMOBILE  | 1          | O             | 173665.47   | JORDAN       |
+    | 78002    | AUTOMOBILE  | 2          | O             | 46929.18    | IRAN         |
+    | 123314   | MACHINERY   | 3          | F             | 193846.25   | MOROCCO      |
+    | 136777   | HOUSEHOLD   | 4          | O             | 32151.78    | IRAN         |
+    | 44485    | FURNITURE   | 5          | F             | 144659.2    | SAUDI ARABIA |
+    | 55624    | AUTOMOBILE  | 6          | F             | 58749.59    | GERMANY      |
+    | 39136    | FURNITURE   | 7          | O             | 252004.18   | ETHIOPIA     |
+    | 130057   | FURNITURE   | 32         | O             | 208660.75   | INDONESIA    |
+    | 66958    | MACHINERY   | 33         | F             | 163243.98   | CHINA        |
+    | 61001    | FURNITURE   | 34         | O             | 58949.67    | CANADA       |
+    +----------+-------------+------------+---------------+-------------+--------------+
+
+### Example 3: join a subquery in right side
+
+PPL query:
+
+    os>source=supplier| join right = revenue0 ON s_suppkey = supplier_no
+         [
+           source=lineitem | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month)
+           | eval supplier_no = l_suppkey | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no
+         ]
+       | fields s_name, s_phone, total_revenue, supplier_no | head 10
+    fetched rows / total rows = 10/10
+    +---------------------+----------------+-------------------+-------------+
+    | s_name              | s_phone        | total_revenue     | supplier_no |
+    +---------------------+----------------+-------------------+-------------+
+    | Supplier#000007747  | 24-911-546-3505| 636204.0279       | 7747        |
+    | Supplier#000007748  | 29-535-184-2277| 538311.8099       | 7748        |
+    | Supplier#000007749  | 18-225-478-7489| 743462.4473000001 | 7749        |
+    | Supplier#000007750  | 28-680-484-7044| 616828.2220999999 | 7750        |
+    | Supplier#000007751  | 20-990-606-7343| 1092975.1925      | 7751        |
+    | Supplier#000007752  | 12-936-258-6650| 1090399.9666      | 7752        |
+    | Supplier#000007753  | 22-394-329-1153| 777130.7457000001 | 7753        |
+    | Supplier#000007754  | 26-941-591-5320| 866600.0501       | 7754        |
+    | Supplier#000007755  | 32-138-467-4225| 702256.7030000001 | 7755        |
+    | Supplier#000007756  | 29-860-205-8019| 1304979.0511999999| 7756        |
+    +---------------------+----------------+-------------------+-------------+
+
+### Example 4: complex example in OTEL
+
+**Schema**
 
 There will be at least 2 indices, `otel-v1-apm-span-*` (large) and `otel-v1-apm-service-map` (small).
 
@@ -30,154 +135,47 @@ Relevant fields from indices:
 
 Full schemas are defined in data-prepper repo: [`otel-v1-apm-span-*`](https://github.com/opensearch-project/data-prepper/blob/04dd7bd18977294800cf4b77d7f01914def75f23/docs/schemas/trace-analytics/otel-v1-apm-span-index-template.md), [`otel-v1-apm-service-map`](https://github.com/opensearch-project/data-prepper/blob/4e5f83814c4a0eed2a1ca9bab0693b9e32240c97/docs/schemas/trace-analytics/otel-v1-apm-service-map-index-template.md)
 
-### Requirement
-
-Support `join` to calculate the following:
+**Requirement**
 
 For each service, join span index on service map index to calculate metrics under different type of filters.
 
 ![image](https://user-images.githubusercontent.com/28062824/194170062-f0dd1d57-c5eb-44db-95e0-6b3b4e52f25a.png)
 
-This sample query calculates latency when filtered by trace group `client_cancel_order` for the `order` service. I only have a subquery example, don't have the join version of the query..
-
-```sql
-SELECT avg(durationInNanos)
-FROM `otel-v1-apm-span-000001` t1
-WHERE t1.serviceName = `order`
-  AND ((t1.name in
-          (SELECT target.resource
-           FROM `otel-v1-apm-service-map`
-           WHERE serviceName = `order`
-             AND traceGroupName = `client_cancel_order`)
-        AND t1.parentSpanId != NULL)
-       OR (t1.parentSpanId = NULL
-           AND t1.name = `client_cancel_order`))
-  AND t1.traceId in
-    (SELECT traceId
-     FROM `otel-v1-apm-span-000001`
-     WHERE serviceName = `order`)
-```
-## Migrate to PPL
-
-### Syntax of Join Command
-
-```sql
-SEARCH source=<left-table>
-| <other piped command>
-| [joinType] JOIN
-    [leftAlias]
-    [rightAlias]
-    [joinHints]
-    ON joinCriteria
-    <right-table>
-| <other piped command>
-```
-**joinType**
-- Syntax: `[INNER] | LEFT [OUTER] | RIGHT [OUTER] | FULL [OUTER] | CROSS | [LEFT] SEMI | [LEFT] ANTI`
-- Optional
-- Description: The type of join to perform. The default is `INNER` if not specified.
+This sample query calculates latency when filtered by trace group `client_cancel_order` for the `order` service. I only have a subquery example, don't have the join version of the query.
 
-**leftAlias**
-- Syntax: `left = <leftAlias>`
-- Optional
-- Description: The subquery alias to use with the left join side, to avoid ambiguous naming.
-
-**rightAlias**
-- Syntax: `right = <rightAlias>`
-- Optional
-- Description: The subquery alias to use with the right join side, to avoid ambiguous naming.
-
-**joinHints**
-- Syntax: `[hint.left.key1 = value1 hint.right.key2 = value2]`
-- Optional
-- Description: Zero or more space-separated join hints in the form of `Key` = `Value`. The key must start with `hint.left.` or `hint.right.`
-
-**joinCriteria**
-- Syntax: `<expression>`
-- Required
-- Description: The syntax starts with `ON`. It could be any comparison expression. Generally, the join criteria looks like `<leftAlias>.<leftField>=<rightAlias>.<rightField>`. For example: `l.id = r.id`. If the join criteria contains multiple conditions, you can specify `AND` and `OR` operator between each comparison expression. For example, `l.id = r.id AND l.email = r.email AND (r.age > 65 OR r.age < 18)`.
-
-**right-table**
-- Required
-- Description: The index or table name of join right-side. Sub-search is unsupported in join right side for now.
-
-### Rewriting
-```sql
-SEARCH source=otel-v1-apm-span-000001
+PPL query:
+```
+source=otel-v1-apm-span-000001
 | WHERE serviceName = 'order'
 | JOIN left=t1 right=t2
     ON t1.traceId = t2.traceId AND t2.serviceName = 'order'
-    otel-v1-apm-span-000001 -- self inner join
-| EVAL s_name = t1.name -- rename to avoid ambiguous
-| EVAL s_parentSpanId = t1.parentSpanId -- RENAME command would be better when it is supported
-| EVAL s_durationInNanos = t1.durationInNanos 
-| FIELDS s_name, s_parentSpanId, s_durationInNanos -- reduce colunms in join
+    otel-v1-apm-span-000001 // self inner join
+| RENAME s_name as t1.name
+| RENAME s_parentSpanId as t1.parentSpanId
+| RENAME s_durationInNanos as t1.durationInNanos 
+| FIELDS s_name, s_parentSpanId, s_durationInNanos // reduce colunms in join
 | LEFT JOIN left=s1 right=t3
     ON s_name = t3.target.resource AND t3.serviceName = 'order' AND t3.traceGroupName = 'client_cancel_order'
     otel-v1-apm-service-map
 | WHERE (s_parentSpanId IS NOT NULL OR (s_parentSpanId IS NULL AND s_name = 'client_cancel_order'))
-| STATS avg(s_durationInNanos) -- no need to add alias if there is no ambiguous
-```
-
-
-### More examples
-
-Migration from SQL query (TPC-H Q13):
-```sql
-SELECT c_count, COUNT(*) AS custdist
-FROM
-  ( SELECT c_custkey, COUNT(o_orderkey) c_count
-    FROM customer LEFT OUTER JOIN orders ON c_custkey = o_custkey
-        AND o_comment NOT LIKE '%unusual%packages%'
-    GROUP BY c_custkey
-  ) AS c_orders
-GROUP BY c_count
-ORDER BY custdist DESC, c_count DESC;
-```
-Rewritten by PPL Join query:
-```sql
-SEARCH source=customer
-| FIELDS c_custkey
-| LEFT OUTER JOIN
-    ON c_custkey = o_custkey AND o_comment NOT LIKE '%unusual%packages%'
-    orders
-| STATS count(o_orderkey) AS c_count BY c_custkey
-| STATS count() AS custdist BY c_count
-| SORT - custdist, - c_count
-```
-_- **Limitation: sub-searches is unsupported in join right side**_
-
-If sub-searches is supported, above ppl query could be rewritten as:
-```sql
-SEARCH source=customer
-| FIELDS c_custkey
-| LEFT OUTER JOIN
-   ON c_custkey = o_custkey
-   [
-      SEARCH source=orders
-      | WHERE o_comment NOT LIKE '%unusual%packages%'
-      | FIELDS o_orderkey, o_custkey
-   ]
-| STATS count(o_orderkey) AS c_count BY c_custkey
-| STATS count() AS custdist BY c_count
-| SORT - custdist, - c_count
+| STATS avg(s_durationInNanos)
 ```
 
 ### Comparison with [Correlation](ppl-correlation-command)
 
 A primary difference between `correlate` and `join` is that both sides of `correlate` are tables, but both sides of `join` are subqueries. 
 For example:
-```sql
+```
 source = testTable1
- | where country = 'Canada' OR country = 'England'
- | eval cname = lower(name)
- | fields cname, country, year, month
- | inner join left=l, right=r
-     ON l.cname = r.name AND l.country = r.country AND l.year = 2023 AND r.month = 4
-     testTable2s
+| where country = 'Canada' OR country = 'England'
+| eval cname = lower(name)
+| fields cname, country, year, month
+| inner join left=l right=r
+    ON l.cname = r.name AND l.country = r.country AND l.year = 2023 AND r.month = 4
+    testTable2s
 ```
 The subquery alias `l` does not represent the `testTable1` table itself. Instead, it represents the subquery:
-```sql
+```
 source = testTable1
 | where country = 'Canada' OR country = 'England'
 | eval cname = lower(name)
diff --git a/docs/ppl-lang/ppl-lookup-command.md b/docs/ppl-lang/ppl-lookup-command.md
index 1b8350533..6768cdcaf 100644
--- a/docs/ppl-lang/ppl-lookup-command.md
+++ b/docs/ppl-lang/ppl-lookup-command.md
@@ -1,20 +1,18 @@
 ## PPL Lookup Command
 
-## Overview
+### Description
 Lookup command enriches your search data by adding or replacing data from a lookup index (dimension table).
 You can extend fields of an index with values from a dimension table, append or replace values when lookup condition is matched.
 As an alternative of [Join command](ppl-join-command), lookup command is more suitable for enriching the source data with a static dataset.
 
 
-### Syntax of Lookup Command
+### Syntax
 
-```sql
-SEARCH source=<sourceIndex>
-| <other piped command>
-| LOOKUP <lookupIndex> (<lookupMappingField> [AS <sourceMappingField>])...
-    [(REPLACE | APPEND) (<inputField> [AS <outputField>])...]
-| <other piped command>
 ```
+LOOKUP <lookupIndex> (<lookupMappingField> [AS <sourceMappingField>])...
+       [(REPLACE | APPEND) (<inputField> [AS <outputField>])...]
+```
+
 **lookupIndex**
 - Required
 - Description: the name of lookup index (dimension table)
@@ -44,26 +42,49 @@ SEARCH source=<sourceIndex>
 - Description: If you specify REPLACE, matched values in \<lookupIndex\> field overwrite the values in result. If you specify APPEND, matched values in \<lookupIndex\> field only append to the missing values in result.
 
 ### Usage
-> LOOKUP <lookupIndex> id AS cid REPLACE mail AS email</br>
-> LOOKUP <lookupIndex> name REPLACE mail AS email</br>
-> LOOKUP <lookupIndex> id AS cid, name APPEND address, mail AS email</br>
-> LOOKUP <lookupIndex> id</br>
-
-### Example
-```sql
-SEARCH source=<sourceIndex>
-| WHERE orderType = 'Cancelled'
-| LOOKUP account_list, mkt_id AS mkt_code REPLACE amount, account_name AS name
-| STATS count(mkt_code), avg(amount) BY name
-```
-```sql
-SEARCH source=<sourceIndex>
-| DEDUP market_id
-| EVAL category=replace(category, "-", ".")
-| EVAL category=ltrim(category, "dvp.")
-| LOOKUP bounce_category category AS category APPEND classification
-```
-```sql
-SEARCH source=<sourceIndex>
-| LOOKUP bounce_category category
-```
+- `LOOKUP <lookupIndex> id AS cid REPLACE mail AS email`
+- `LOOKUP <lookupIndex> name REPLACE mail AS email`
+- `LOOKUP <lookupIndex> id AS cid, name APPEND address, mail AS email`
+- `LOOKUP <lookupIndex> id`
+
+### Examples 1: replace
+
+PPL query:
+
+    os>source=people | LOOKUP work_info uid AS id REPLACE department | head 10
+    fetched rows / total rows = 10/10
+    +------+-----------+-------------+-----------+--------+------------------+
+    | id   | name      | occupation  | country   | salary | department       |
+    +------+-----------+-------------+-----------+--------+------------------+
+    | 1000 | Daniel    | Teacher     | Canada    | 56486  | CUSTOMER_SERVICE |
+    | 1001 | Joseph    | Lawyer      | Denmark   | 135943 | FINANCE          |
+    | 1002 | David     | Artist      | Finland   | 60391  | DATA             |
+    | 1003 | Charlotte | Lawyer      | Denmark   | 42173  | LEGAL            |
+    | 1004 | Isabella  | Veterinarian| Australia | 117699 | MARKETING        |
+    | 1005 | Lily      | Engineer    | Italy     | 37526  | IT               |
+    | 1006 | Emily     | Dentist     | Denmark   | 125340 | MARKETING        |
+    | 1007 | James     | Lawyer      | Germany   | 56532  | LEGAL            |
+    | 1008 | Lucas     | Lawyer      | Japan     | 87782  | DATA             |
+    | 1009 | Sophia    | Architect   | Sweden    | 37597  | MARKETING        |
+    +------+-----------+-------------+-----------+--------+------------------+
+
+### Examples 2: append
+
+PPL query:
+
+    os>source=people| LOOKUP work_info uid AS ID, name APPEND department | where isnotnull(department) | head 10
+    fetched rows / total rows = 10/10
+    +------+---------+-------------+-------------+--------+------------+
+    | id   | name    | occupation  | country     | salary | department |
+    +------+---------+-------------+-------------+--------+------------+
+    | 1018 | Emma    | Architect   | USA         | 72400  | IT         |
+    | 1032 | James   | Pilot       | Netherlands | 71698  | SALES      |
+    | 1043 | Jane    | Nurse       | Brazil      | 45016  | FINANCE    |
+    | 1046 | Joseph  | Pharmacist  | Mexico      | 109152 | OPERATIONS |
+    | 1064 | Joseph  | Electrician | New Zealand | 50253  | LEGAL      |
+    | 1090 | Matthew | Psychologist| Germany     | 73396  | DATA       |
+    | 1103 | Emily   | Electrician | Switzerland | 98391  | DATA       |
+    | 1114 | Jake    | Nurse       | Denmark     | 53418  | SALES      |
+    | 1115 | Sofia   | Engineer    | Mexico      | 64829  | OPERATIONS |
+    | 1122 | Oliver  | Scientist   | Netherlands | 31146  | DATA       |
+    +------+---------+-------------+-------------+--------+------------+
diff --git a/docs/ppl-lang/ppl-rare-command.md b/docs/ppl-lang/ppl-rare-command.md
index e3ad21f4e..8a2ca640f 100644
--- a/docs/ppl-lang/ppl-rare-command.md
+++ b/docs/ppl-lang/ppl-rare-command.md
@@ -1,11 +1,11 @@
 ## PPL rare Command
 
-**Description**
+### Description
 Using ``rare`` command to find the least common tuple of values of all fields in the field list.
 
 **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields.
 
-**Syntax**
+### Syntax
 `rare [N] <field-list> [by-clause]`
 `rare_approx [N] <field-list> [by-clause]`
 
diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md
index c4a0c337c..b36eb1c80 100644
--- a/docs/ppl-lang/ppl-subquery-command.md
+++ b/docs/ppl-lang/ppl-subquery-command.md
@@ -1,27 +1,27 @@
-## PPL SubQuery Commands:
+## PPL SubQuery Commands
 
-### Syntax
-The subquery command should be implemented using a clean, logical syntax that integrates with existing PPL structure.
+### Description
+The subquery command has 4 types: `InSubquery`, `ExistsSubquery`, `ScalarSubquery` and `RelationSubquery`.
+`InSubquery`, `ExistsSubquery` and `ScalarSubquery` are subquery expressions, their common usage is in Where clause(`where <boolean expression>`) and Search filter(`search source=* <boolean expression>`).
 
-```sql
-source=logs | where field in [ subquery source=events | where condition | fields field ]
+For example, a subquery expression could be used in boolean expression:
 ```
-
-In this example, the primary search (`source=logs`) is filtered by results from the subquery (`source=events`).
-
-The subquery command should allow nested queries to be as complex as necessary, supporting multiple levels of nesting.
-
-Example:
-
-```sql
-  source=logs | where id in [ subquery source=users | where user in [ subquery source=actions | where action="login" | fields user] | fields uid ]
+| where orders.order_id in [ source=returns | where return_reason="damaged" | field order_id ]
 ```
+The `orders.order_id in [ source=... ]` is a `<boolean expression>`.
 
-For additional info See [Issue](https://github.com/opensearch-project/opensearch-spark/issues/661)
-
----
+But `RelationSubquery` is not a subquery expression, it is a subquery plan.
+[Recall the join command doc](ppl-join-command.md), the example is a subquery/subsearch **plan**, rather than a **expression**.
 
-### InSubquery usage
+### Syntax
+- `where <field> [not] in [ source=... | ... | ... ]` (InSubquery)
+- `where [not] exists [ source=... | ... | ... ]` (ExistsSubquery)
+- `where <field> = [ source=... | ... | ... ]` (ScalarSubquery)
+- `source=[ source= ...]` (RelationSubquery)
+- `| join ON condition [ source= ]` (RelationSubquery in join right side)
+
+### Usage
+InSubquery:
 - `source = outer | where a in [ source = inner | fields b ]`
 - `source = outer | where (a) in [ source = inner | fields b ]`
 - `source = outer | where (a,b,c) in [ source = inner | fields d,e,f ]`
@@ -33,92 +33,9 @@ For additional info See [Issue](https://github.com/opensearch-project/opensearch
 - `source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ]` (nested)
 - `source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c` (as join filter)
 
-**_SQL Migration examples with IN-Subquery PPL:_**
-1. tpch q4 (in-subquery with aggregation)
-```sql
-select
-  o_orderpriority,
-  count(*) as order_count
-from
-  orders
-where
-  o_orderdate >= date '1993-07-01'
-  and o_orderdate < date '1993-07-01' + interval '3' month
-  and o_orderkey in (
-    select
-      l_orderkey
-    from
-      lineitem
-    where l_commitdate < l_receiptdate
-  )
-group by
-  o_orderpriority
-order by
-  o_orderpriority
-```
-Rewritten by PPL InSubquery query:
-```sql
-source = orders
-| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01" and o_orderkey IN
-  [ source = lineitem
-    | where l_commitdate < l_receiptdate
-    | fields l_orderkey
-  ]
-| stats count(1) as order_count by o_orderpriority
-| sort o_orderpriority
-| fields o_orderpriority, order_count
-```
-2.tpch q20 (nested in-subquery)
-```sql
-select
-  s_name,
-  s_address
-from
-  supplier,
-  nation
-where
-  s_suppkey in (
-    select
-      ps_suppkey
-    from
-      partsupp
-    where
-      ps_partkey in (
-        select
-          p_partkey
-        from
-          part
-        where
-          p_name like 'forest%'
-      )
-  )
-  and s_nationkey = n_nationkey
-  and n_name = 'CANADA'
-order by
-  s_name
-```
-Rewritten by PPL InSubquery query:
-```sql
-source = supplier
-| where s_suppkey IN [
-    source = partsupp
-    | where ps_partkey IN [
-        source = part
-        | where like(p_name, "forest%")
-        | fields p_partkey
-      ]
-    | fields ps_suppkey
-  ]
-| inner join left=l right=r on s_nationkey = n_nationkey and n_name = 'CANADA'
-  nation
-| sort s_name
-```
----
-
-### ExistsSubquery usage
-
-Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table inner2
+ExistsSubquery:
 
+(Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table inner2)
 - `source = outer | where exists [ source = inner | where a = c ]`
 - `source = outer | where not exists [ source = inner | where a = c ]`
 - `source = outer | where exists [ source = inner | where a = c and b = d ]`
@@ -132,48 +49,9 @@ Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table in
 - `source = outer | where not exists [ source = inner | where c > 10 ]` (uncorrelated exists)
 - `source = outer | where exists [ source = inner ] | eval l = "nonEmpty" | fields l` (special uncorrelated exists)
 
-**_SQL Migration examples with Exists-Subquery PPL:_**
-
-tpch q4 (exists subquery with aggregation)
-```sql
-select
-  o_orderpriority,
-  count(*) as order_count
-from
-  orders
-where
-  o_orderdate >= date '1993-07-01'
-  and o_orderdate < date '1993-07-01' + interval '3' month
-  and exists (
-    select
-      l_orderkey
-    from
-      lineitem
-    where l_orderkey = o_orderkey
-      and l_commitdate < l_receiptdate
-  )
-group by
-  o_orderpriority
-order by
-  o_orderpriority
-```
-Rewritten by PPL ExistsSubquery query:
-```sql
-source = orders
-| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01"
-    and exists [
-      source = lineitem
-      | where l_orderkey = o_orderkey and l_commitdate < l_receiptdate
-    ]
-| stats count(1) as order_count by o_orderpriority
-| sort o_orderpriority
-| fields o_orderpriority, order_count
-```
----
-
-### ScalarSubquery usage
+ScalarSubquery:
 
-Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table nested
+(Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner,  `e`, `f` are fields of table nested)
 
 **Uncorrelated scalar subquery in Select**
 - `source = outer | eval m = [ source = inner | stats max(c) ] | fields m, a`
@@ -203,146 +81,98 @@ Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table in
 - `source = outer | where a = [ source = inner | stats max(c) | sort c ] OR b = [ source = inner | where c = 1 | stats min(d) | sort d ]`
 - `source = outer | where a = [ source = inner | where c =  [ source = nested | stats max(e) by f | sort f ] | stats max(d) by c | sort c | head 1 ]`
 
-_SQL Migration examples with Scalar-Subquery PPL:_
-Example 1
-```sql
-SELECT *
-FROM   outer
-WHERE  a = (SELECT   max(c)
-            FROM     inner1
-            WHERE c = (SELECT   max(e)
-                       FROM     inner2
-                       GROUP BY f
-                       ORDER BY f
-                       )
-            GROUP BY c
-            ORDER BY c
-            LIMIT 1)
-```
-Rewritten by PPL ScalarSubquery query:
-```sql
-source = spark_catalog.default.outer
-| where a = [
-    source = spark_catalog.default.inner1
-    | where c = [
-        source = spark_catalog.default.inner2
-        | stats max(e) by f
-        | sort f
-      ]
-    | stats max(d) by c
-    | sort c
-    | head 1
-  ]
-```
-Example 2
-```sql
-SELECT * FROM outer
-WHERE  a = (SELECT max(c)
-            FROM   inner
-            ORDER BY c)
-OR     b = (SELECT min(d)
-            FROM   inner
-            WHERE  c = 1
-            ORDER BY d)
-```
-Rewritten by PPL ScalarSubquery query:
-```sql
-source = spark_catalog.default.outer
-| where a = [
-    source = spark_catalog.default.inner | stats max(c) | sort c
-  ] OR b = [
-    source = spark_catalog.default.inner | where c = 1 | stats min(d) | sort d
-  ]
-```
----
-
-### (Relation) Subquery
-`InSubquery`, `ExistsSubquery` and `ScalarSubquery` are all subquery expressions. But `RelationSubquery` is not a subquery expression, it is a subquery plan which is common used in Join or From clause.
-
-- `source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ]` (subquery in join right side)
+RelationSubquery:
+- `source = table1 | join left = l right = r on condition [ source = table2 | where d > 10 | head 5 ]` (subquery in join right side)
 - `source = [ source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] | stats count(a) by b ] as outer | head 1`
 
-**_SQL Migration examples with Subquery PPL:_**
-
-tpch q13
-```sql
-select
-    c_count,
-    count(*) as custdist
-from
-    (
-        select
-            c_custkey,
-            count(o_orderkey) as c_count
-        from
-            customer left outer join orders on
-                c_custkey = o_custkey
-                and o_comment not like '%special%requests%'
-        group by
-            c_custkey
-    ) as c_orders
-group by
-    c_count
-order by
-    custdist desc,
-    c_count desc
-```
-Rewritten by PPL (Relation) Subquery:
-```sql
-SEARCH source = [
-  SEARCH source = customer
-  | LEFT OUTER JOIN left = c right = o ON c_custkey = o_custkey
-    [
-      SEARCH source = orders
-      | WHERE not like(o_comment, '%special%requests%')
-    ]
-  | STATS COUNT(o_orderkey) AS c_count BY c_custkey
-] AS c_orders
-| STATS COUNT(o_orderkey) AS c_count BY c_custkey
-| STATS COUNT(1) AS custdist BY c_count
-| SORT - custdist, - c_count
-```
----
+### Examples 1: TPC-H q20
+
+PPL query:
+
+    os> source=supplier
+        | join ON s_nationkey = n_nationkey nation
+        | where n_name = 'CANADA'
+            and s_suppkey in [  // InSubquery
+                source = partsupp
+                | where ps_partkey in [ // InSubquery
+                    source = part
+                    | where like(p_name, 'forest%')
+                    | fields p_partkey
+                ]
+                and ps_availqty > [ // ScalarSubquery
+                    source = lineitem
+                    | where l_partkey = ps_partkey
+                        and l_suppkey = ps_suppkey
+                        and l_shipdate >= date('1994-01-01')
+                        and l_shipdate < date_add(date('1994-01-01'), interval 1 year)
+                    | stats sum(l_quantity) as sum_l_quantity
+                    | eval half_sum_l_quantity = 0.5 * sum_l_quantity // Stats and Eval commands can combine when issues/819 resolved
+                    | fields half_sum_l_quantity
+                ]
+            | fields ps_suppkey
+        ]
+        | fields s_suppkey, s_name, s_phone, s_acctbal, n_name | head 10
+    fetched rows / total rows = 10/10
+    +-----------+---------------------+----------------+----------+---------+
+    | s_suppkey | s_name              | s_phone        | s_acctbal| n_name  |
+    +-----------+---------------------+----------------+----------+---------+
+    | 8243      | Supplier#000008243  | 13-707-547-1386| 9067.07  | CANADA  |
+    | 736       | Supplier#000000736  | 13-681-806-8650| 5700.83  | CANADA  |
+    | 9032      | Supplier#000009032  | 13-441-662-5539| 3982.32  | CANADA  |
+    | 3201      | Supplier#000003201  | 13-600-413-7165| 3799.41  | CANADA  |
+    | 3849      | Supplier#000003849  | 13-582-965-9117| 52.33    | CANADA  |
+    | 5505      | Supplier#000005505  | 13-531-190-6523| 2023.4   | CANADA  |
+    | 5195      | Supplier#000005195  | 13-622-661-2956| 3717.34  | CANADA  |
+    | 9753      | Supplier#000009753  | 13-724-256-7877| 4406.93  | CANADA  |
+    | 7135      | Supplier#000007135  | 13-367-994-6705| 4950.29  | CANADA  |
+    | 5256      | Supplier#000005256  | 13-180-538-8836| 5624.79  | CANADA  |
+    +-----------+---------------------+----------------+----------+---------+
+
+
+### Examples 2: TPC-H q22
+
+PPL query:
+
+    os> source = [
+            source = customer
+            | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
+            and c_acctbal > [
+                source = customer
+                | where c_acctbal > 0.00
+                    and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
+                | stats avg(c_acctbal)
+            ]
+            and not exists [
+                source = orders
+                | where o_custkey = c_custkey
+            ]
+            | eval cntrycode = substring(c_phone, 1, 2)
+            | fields cntrycode, c_acctbal
+        ] as custsale
+        | stats count() as numcust, sum(c_acctbal) as totacctbal by cntrycode
+        | sort cntrycode
+    fetched rows / total rows = 10/10
+    +---------+--------------------+------------+
+    | numcust | totacctbal         | cntrycode  |
+    +---------+--------------------+------------+
+    | 888     | 6737713.989999999  | 13         |
+    | 861     | 6460573.72         | 17         |
+    | 964     | 7236687.4          | 18         |
+    | 892     | 6701457.950000001  | 23         |
+    | 948     | 7158866.630000001  | 29         |
+    | 909     | 6808436.129999999  | 30         |
+    | 922     | 6806670.179999999  | 31         |
+    +---------+--------------------+------------+
 
 ### Additional Context
 
-`InSubquery`, `ExistsSubquery` and `ScalarSubquery` as subquery expressions, their common usage is in `where` clause and `search filter`.
-
-Where command:
-```
-| where <boolean expression> | ...
-```
-Search filter:
-```
-search source=* <boolean expression> | ...
-```
-A subquery expression could be used in boolean expression, for example
-
-```sql
-| where orders.order_id in [ source=returns | where return_reason="damaged" | field order_id ]
-```
-
-The `orders.order_id in [ source=... ]` is a `<boolean expression>`.
-
-In general, we name this kind of subquery clause the `InSubquery` expression, it is a `<boolean expression>`.
-
-**Subquery with Different Join Types**
+#### RelationSubquery
 
-In issue description is a `ScalarSubquery`:
-
-```sql
-source=employees
-| join source=sales on employees.employee_id = sales.employee_id
-| where sales.sale_amount > [ source=targets | where target_met="true" | fields target_value ]
+RelationSubquery is plan instead of expression, for example
 ```
-
-But `RelationSubquery` is not a subquery expression, it is a subquery plan.
-[Recall the join command doc](ppl-join-command.md), the example is a subquery/subsearch **plan**, rather than a **expression**.
-
-```sql
-SEARCH source=customer
+source=customer
 | FIELDS c_custkey
-| LEFT OUTER JOIN left = c, right = o ON c.c_custkey = o.o_custkey
+| LEFT OUTER JOIN left = c right = o ON c.c_custkey = o.o_custkey
    [
       SEARCH source=orders
       | WHERE o_comment NOT LIKE '%unusual%packages%'
@@ -351,7 +181,7 @@ SEARCH source=customer
 | STATS ...
 ```
 simply into
-```sql
+```
 SEARCH <leftPlan>
 | LEFT OUTER JOIN ON <condition>
    [
@@ -359,21 +189,14 @@ SEARCH <leftPlan>
    ]
 | STATS ...
 ```
-Apply the syntax here and simply into
-
-```sql
-search <leftPlan> | left join on <condition> [ search ... ]
-```
-
-The `[ search ...]` is not a `expression`, it's `plan`, similar to the `relation` plan
 
-**Uncorrelated Subquery**
+#### Uncorrelated Subquery
 
 An uncorrelated subquery is independent of the outer query. It is executed once, and the result is used by the outer query.
 It's **less common** when using `ExistsSubquery` because `ExistsSubquery` typically checks for the presence of rows that are dependent on the outer query’s row.
 
 There is a very special exists subquery which highlight by `(special uncorrelated exists)`:
-```sql
+```
 SELECT 'nonEmpty'
 FROM outer
     WHERE EXISTS (
@@ -382,7 +205,7 @@ FROM outer
     );
 ```
 Rewritten by PPL ExistsSubquery query:
-```sql
+```
 source = outer
 | where exists [
     source = inner
@@ -392,11 +215,11 @@ source = outer
 ```
 This query just print "nonEmpty" if the inner table is not empty.
 
-**Table alias in subquery**
+#### Table alias in subquery
 
 Table alias is useful in query which contains a subquery, for example
 
-```sql
+```
 select a, (
              select sum(b)
              from catalog.schema.table1 as t1
diff --git a/docs/ppl-lang/ppl-top-command.md b/docs/ppl-lang/ppl-top-command.md
index 93d3a7148..012457fe2 100644
--- a/docs/ppl-lang/ppl-top-command.md
+++ b/docs/ppl-lang/ppl-top-command.md
@@ -1,6 +1,6 @@
 ## PPL top Command
 
-**Description**
+### Description
 Using ``top`` command to find the most common tuple of values of all fields in the field list.
 
 
diff --git a/docs/ppl-lang/ppl-trendline-command.md b/docs/ppl-lang/ppl-trendline-command.md
index b466e2e8f..44b8c999f 100644
--- a/docs/ppl-lang/ppl-trendline-command.md
+++ b/docs/ppl-lang/ppl-trendline-command.md
@@ -1,6 +1,6 @@
 ## PPL trendline Command
 
-**Description**
+### Description
 Using ``trendline`` command to calculate moving averages of fields.
 
 ### Syntax - SMA (Simple Moving Average)
diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens
new file mode 100644
index 000000000..5f976453e
--- /dev/null
+++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens
@@ -0,0 +1,798 @@
+SEARCH=1
+DESCRIBE=2
+SHOW=3
+FROM=4
+WHERE=5
+FIELDS=6
+RENAME=7
+STATS=8
+EVENTSTATS=9
+DEDUP=10
+SORT=11
+EVAL=12
+HEAD=13
+TOP_APPROX=14
+TOP=15
+RARE_APPROX=16
+RARE=17
+PARSE=18
+METHOD=19
+REGEX=20
+PUNCT=21
+GROK=22
+PATTERN=23
+PATTERNS=24
+NEW_FIELD=25
+KMEANS=26
+AD=27
+ML=28
+FILLNULL=29
+EXPAND=30
+FLATTEN=31
+TRENDLINE=32
+JOIN=33
+ON=34
+INNER=35
+OUTER=36
+FULL=37
+SEMI=38
+ANTI=39
+CROSS=40
+LEFT_HINT=41
+RIGHT_HINT=42
+CORRELATE=43
+SELF=44
+EXACT=45
+APPROXIMATE=46
+SCOPE=47
+MAPPING=48
+EXPLAIN=49
+FORMATTED=50
+COST=51
+CODEGEN=52
+EXTENDED=53
+SIMPLE=54
+AS=55
+BY=56
+SOURCE=57
+INDEX=58
+D=59
+DESC=60
+DATASOURCES=61
+USING=62
+WITH=63
+AUTO=64
+STR=65
+IP=66
+NUM=67
+FIELDSUMMARY=68
+INCLUDEFIELDS=69
+NULLS=70
+SMA=71
+WMA=72
+KEEPEMPTY=73
+CONSECUTIVE=74
+DEDUP_SPLITVALUES=75
+PARTITIONS=76
+ALLNUM=77
+DELIM=78
+CENTROIDS=79
+ITERATIONS=80
+DISTANCE_TYPE=81
+NUMBER_OF_TREES=82
+SHINGLE_SIZE=83
+SAMPLE_SIZE=84
+OUTPUT_AFTER=85
+TIME_DECAY=86
+ANOMALY_RATE=87
+CATEGORY_FIELD=88
+TIME_FIELD=89
+TIME_ZONE=90
+TRAINING_DATA_SIZE=91
+ANOMALY_SCORE_THRESHOLD=92
+APPEND=93
+CASE=94
+ELSE=95
+IN=96
+EXISTS=97
+NOT=98
+OR=99
+AND=100
+XOR=101
+TRUE=102
+FALSE=103
+REGEXP=104
+CONVERT_TZ=105
+DATETIME=106
+DAY=107
+DAY_HOUR=108
+DAY_MICROSECOND=109
+DAY_MINUTE=110
+DAY_OF_YEAR=111
+DAY_SECOND=112
+HOUR=113
+HOUR_MICROSECOND=114
+HOUR_MINUTE=115
+HOUR_OF_DAY=116
+HOUR_SECOND=117
+INTERVAL=118
+MICROSECOND=119
+MILLISECOND=120
+MINUTE=121
+MINUTE_MICROSECOND=122
+MINUTE_OF_DAY=123
+MINUTE_OF_HOUR=124
+MINUTE_SECOND=125
+MONTH=126
+MONTH_OF_YEAR=127
+QUARTER=128
+SECOND=129
+SECOND_MICROSECOND=130
+SECOND_OF_MINUTE=131
+WEEK=132
+WEEK_OF_YEAR=133
+YEAR=134
+YEAR_MONTH=135
+DATAMODEL=136
+LOOKUP=137
+SAVEDSEARCH=138
+INT=139
+INTEGER=140
+DOUBLE=141
+LONG=142
+FLOAT=143
+STRING=144
+BOOLEAN=145
+PIPE=146
+COMMA=147
+DOT=148
+EQUAL=149
+GREATER=150
+LESS=151
+NOT_GREATER=152
+NOT_LESS=153
+NOT_EQUAL=154
+PLUS=155
+MINUS=156
+STAR=157
+DIVIDE=158
+MODULE=159
+EXCLAMATION_SYMBOL=160
+COLON=161
+LT_PRTHS=162
+RT_PRTHS=163
+LT_SQR_PRTHS=164
+RT_SQR_PRTHS=165
+SINGLE_QUOTE=166
+DOUBLE_QUOTE=167
+BACKTICK=168
+ARROW=169
+BIT_NOT_OP=170
+BIT_AND_OP=171
+BIT_XOR_OP=172
+AVG=173
+COUNT=174
+DISTINCT_COUNT=175
+DISTINCT_COUNT_APPROX=176
+ESTDC=177
+ESTDC_ERROR=178
+MAX=179
+MEAN=180
+MEDIAN=181
+MIN=182
+MODE=183
+RANGE=184
+STDEV=185
+STDEVP=186
+SUM=187
+SUMSQ=188
+VAR_SAMP=189
+VAR_POP=190
+STDDEV_SAMP=191
+STDDEV_POP=192
+PERCENTILE=193
+PERCENTILE_APPROX=194
+TAKE=195
+FIRST=196
+LAST=197
+LIST=198
+VALUES=199
+EARLIEST=200
+EARLIEST_TIME=201
+LATEST=202
+LATEST_TIME=203
+PER_DAY=204
+PER_HOUR=205
+PER_MINUTE=206
+PER_SECOND=207
+RATE=208
+SPARKLINE=209
+C=210
+DC=211
+ABS=212
+CBRT=213
+CEIL=214
+CEILING=215
+CONV=216
+CRC32=217
+E=218
+EXP=219
+FLOOR=220
+LN=221
+LOG=222
+LOG10=223
+LOG2=224
+MOD=225
+PI=226
+POSITION=227
+POW=228
+POWER=229
+RAND=230
+ROUND=231
+SIGN=232
+SIGNUM=233
+SQRT=234
+TRUNCATE=235
+ACOS=236
+ASIN=237
+ATAN=238
+ATAN2=239
+COS=240
+COT=241
+DEGREES=242
+RADIANS=243
+SIN=244
+TAN=245
+MD5=246
+SHA1=247
+SHA2=248
+ADDDATE=249
+ADDTIME=250
+CURDATE=251
+CURRENT_DATE=252
+CURRENT_TIME=253
+CURRENT_TIMESTAMP=254
+CURRENT_TIMEZONE=255
+CURTIME=256
+DATE=257
+DATEDIFF=258
+DATE_ADD=259
+DATE_FORMAT=260
+DATE_SUB=261
+DAYNAME=262
+DAYOFMONTH=263
+DAYOFWEEK=264
+DAYOFYEAR=265
+DAY_OF_MONTH=266
+DAY_OF_WEEK=267
+DURATION=268
+EXTRACT=269
+FROM_DAYS=270
+FROM_UNIXTIME=271
+GET_FORMAT=272
+LAST_DAY=273
+LOCALTIME=274
+LOCALTIMESTAMP=275
+MAKEDATE=276
+MAKE_DATE=277
+MAKETIME=278
+MONTHNAME=279
+NOW=280
+PERIOD_ADD=281
+PERIOD_DIFF=282
+SEC_TO_TIME=283
+STR_TO_DATE=284
+SUBDATE=285
+SUBTIME=286
+SYSDATE=287
+TIME=288
+TIMEDIFF=289
+TIMESTAMP=290
+TIMESTAMPADD=291
+TIMESTAMPDIFF=292
+TIME_FORMAT=293
+TIME_TO_SEC=294
+TO_DAYS=295
+TO_SECONDS=296
+UNIX_TIMESTAMP=297
+UTC_DATE=298
+UTC_TIME=299
+UTC_TIMESTAMP=300
+WEEKDAY=301
+YEARWEEK=302
+SUBSTR=303
+SUBSTRING=304
+LTRIM=305
+RTRIM=306
+TRIM=307
+TO=308
+LOWER=309
+UPPER=310
+CONCAT=311
+CONCAT_WS=312
+LENGTH=313
+STRCMP=314
+RIGHT=315
+LEFT=316
+ASCII=317
+LOCATE=318
+REPLACE=319
+REVERSE=320
+CAST=321
+ISEMPTY=322
+ISBLANK=323
+JSON=324
+JSON_OBJECT=325
+JSON_ARRAY=326
+JSON_ARRAY_LENGTH=327
+TO_JSON_STRING=328
+JSON_EXTRACT=329
+JSON_KEYS=330
+JSON_VALID=331
+ARRAY=332
+ARRAY_LENGTH=333
+FORALL=334
+FILTER=335
+TRANSFORM=336
+REDUCE=337
+LIKE=338
+ISNULL=339
+ISNOTNULL=340
+ISPRESENT=341
+BETWEEN=342
+CIDRMATCH=343
+GEOIP=344
+IFNULL=345
+NULLIF=346
+IF=347
+TYPEOF=348
+COALESCE=349
+MATCH=350
+MATCH_PHRASE=351
+MATCH_PHRASE_PREFIX=352
+MATCH_BOOL_PREFIX=353
+SIMPLE_QUERY_STRING=354
+MULTI_MATCH=355
+QUERY_STRING=356
+ALLOW_LEADING_WILDCARD=357
+ANALYZE_WILDCARD=358
+ANALYZER=359
+AUTO_GENERATE_SYNONYMS_PHRASE_QUERY=360
+BOOST=361
+CUTOFF_FREQUENCY=362
+DEFAULT_FIELD=363
+DEFAULT_OPERATOR=364
+ENABLE_POSITION_INCREMENTS=365
+ESCAPE=366
+FLAGS=367
+FUZZY_MAX_EXPANSIONS=368
+FUZZY_PREFIX_LENGTH=369
+FUZZY_TRANSPOSITIONS=370
+FUZZY_REWRITE=371
+FUZZINESS=372
+LENIENT=373
+LOW_FREQ_OPERATOR=374
+MAX_DETERMINIZED_STATES=375
+MAX_EXPANSIONS=376
+MINIMUM_SHOULD_MATCH=377
+OPERATOR=378
+PHRASE_SLOP=379
+PREFIX_LENGTH=380
+QUOTE_ANALYZER=381
+QUOTE_FIELD_SUFFIX=382
+REWRITE=383
+SLOP=384
+TIE_BREAKER=385
+TYPE=386
+ZERO_TERMS_QUERY=387
+SPAN=388
+MS=389
+S=390
+M=391
+H=392
+W=393
+Q=394
+Y=395
+ID=396
+CLUSTER=397
+INTEGER_LITERAL=398
+DECIMAL_LITERAL=399
+ID_DATE_SUFFIX=400
+DQUOTA_STRING=401
+SQUOTA_STRING=402
+BQUOTA_STRING=403
+LINE_COMMENT=404
+BLOCK_COMMENT=405
+ERROR_RECOGNITION=406
+'SEARCH'=1
+'DESCRIBE'=2
+'SHOW'=3
+'FROM'=4
+'WHERE'=5
+'FIELDS'=6
+'RENAME'=7
+'STATS'=8
+'EVENTSTATS'=9
+'DEDUP'=10
+'SORT'=11
+'EVAL'=12
+'HEAD'=13
+'TOP_APPROX'=14
+'TOP'=15
+'RARE_APPROX'=16
+'RARE'=17
+'PARSE'=18
+'METHOD'=19
+'REGEX'=20
+'PUNCT'=21
+'GROK'=22
+'PATTERN'=23
+'PATTERNS'=24
+'NEW_FIELD'=25
+'KMEANS'=26
+'AD'=27
+'ML'=28
+'FILLNULL'=29
+'EXPAND'=30
+'FLATTEN'=31
+'TRENDLINE'=32
+'JOIN'=33
+'ON'=34
+'INNER'=35
+'OUTER'=36
+'FULL'=37
+'SEMI'=38
+'ANTI'=39
+'CROSS'=40
+'HINT.LEFT'=41
+'HINT.RIGHT'=42
+'CORRELATE'=43
+'SELF'=44
+'EXACT'=45
+'APPROXIMATE'=46
+'SCOPE'=47
+'MAPPING'=48
+'EXPLAIN'=49
+'FORMATTED'=50
+'COST'=51
+'CODEGEN'=52
+'EXTENDED'=53
+'SIMPLE'=54
+'AS'=55
+'BY'=56
+'SOURCE'=57
+'INDEX'=58
+'D'=59
+'DESC'=60
+'DATASOURCES'=61
+'USING'=62
+'WITH'=63
+'AUTO'=64
+'STR'=65
+'IP'=66
+'NUM'=67
+'FIELDSUMMARY'=68
+'INCLUDEFIELDS'=69
+'NULLS'=70
+'SMA'=71
+'WMA'=72
+'KEEPEMPTY'=73
+'CONSECUTIVE'=74
+'DEDUP_SPLITVALUES'=75
+'PARTITIONS'=76
+'ALLNUM'=77
+'DELIM'=78
+'CENTROIDS'=79
+'ITERATIONS'=80
+'DISTANCE_TYPE'=81
+'NUMBER_OF_TREES'=82
+'SHINGLE_SIZE'=83
+'SAMPLE_SIZE'=84
+'OUTPUT_AFTER'=85
+'TIME_DECAY'=86
+'ANOMALY_RATE'=87
+'CATEGORY_FIELD'=88
+'TIME_FIELD'=89
+'TIME_ZONE'=90
+'TRAINING_DATA_SIZE'=91
+'ANOMALY_SCORE_THRESHOLD'=92
+'APPEND'=93
+'CASE'=94
+'ELSE'=95
+'IN'=96
+'EXISTS'=97
+'NOT'=98
+'OR'=99
+'AND'=100
+'XOR'=101
+'TRUE'=102
+'FALSE'=103
+'REGEXP'=104
+'CONVERT_TZ'=105
+'DATETIME'=106
+'DAY'=107
+'DAY_HOUR'=108
+'DAY_MICROSECOND'=109
+'DAY_MINUTE'=110
+'DAY_OF_YEAR'=111
+'DAY_SECOND'=112
+'HOUR'=113
+'HOUR_MICROSECOND'=114
+'HOUR_MINUTE'=115
+'HOUR_OF_DAY'=116
+'HOUR_SECOND'=117
+'INTERVAL'=118
+'MICROSECOND'=119
+'MILLISECOND'=120
+'MINUTE'=121
+'MINUTE_MICROSECOND'=122
+'MINUTE_OF_DAY'=123
+'MINUTE_OF_HOUR'=124
+'MINUTE_SECOND'=125
+'MONTH'=126
+'MONTH_OF_YEAR'=127
+'QUARTER'=128
+'SECOND'=129
+'SECOND_MICROSECOND'=130
+'SECOND_OF_MINUTE'=131
+'WEEK'=132
+'WEEK_OF_YEAR'=133
+'YEAR'=134
+'YEAR_MONTH'=135
+'DATAMODEL'=136
+'LOOKUP'=137
+'SAVEDSEARCH'=138
+'INT'=139
+'INTEGER'=140
+'DOUBLE'=141
+'LONG'=142
+'FLOAT'=143
+'STRING'=144
+'BOOLEAN'=145
+'|'=146
+','=147
+'.'=148
+'='=149
+'>'=150
+'<'=151
+'+'=155
+'-'=156
+'*'=157
+'/'=158
+'%'=159
+'!'=160
+':'=161
+'('=162
+')'=163
+'['=164
+']'=165
+'\''=166
+'"'=167
+'`'=168
+'->'=169
+'~'=170
+'&'=171
+'^'=172
+'AVG'=173
+'COUNT'=174
+'DISTINCT_COUNT'=175
+'DISTINCT_COUNT_APPROX'=176
+'ESTDC'=177
+'ESTDC_ERROR'=178
+'MAX'=179
+'MEAN'=180
+'MEDIAN'=181
+'MIN'=182
+'MODE'=183
+'RANGE'=184
+'STDEV'=185
+'STDEVP'=186
+'SUM'=187
+'SUMSQ'=188
+'VAR_SAMP'=189
+'VAR_POP'=190
+'STDDEV_SAMP'=191
+'STDDEV_POP'=192
+'PERCENTILE'=193
+'PERCENTILE_APPROX'=194
+'TAKE'=195
+'FIRST'=196
+'LAST'=197
+'LIST'=198
+'VALUES'=199
+'EARLIEST'=200
+'EARLIEST_TIME'=201
+'LATEST'=202
+'LATEST_TIME'=203
+'PER_DAY'=204
+'PER_HOUR'=205
+'PER_MINUTE'=206
+'PER_SECOND'=207
+'RATE'=208
+'SPARKLINE'=209
+'C'=210
+'DC'=211
+'ABS'=212
+'CBRT'=213
+'CEIL'=214
+'CEILING'=215
+'CONV'=216
+'CRC32'=217
+'E'=218
+'EXP'=219
+'FLOOR'=220
+'LN'=221
+'LOG'=222
+'LOG10'=223
+'LOG2'=224
+'MOD'=225
+'PI'=226
+'POSITION'=227
+'POW'=228
+'POWER'=229
+'RAND'=230
+'ROUND'=231
+'SIGN'=232
+'SIGNUM'=233
+'SQRT'=234
+'TRUNCATE'=235
+'ACOS'=236
+'ASIN'=237
+'ATAN'=238
+'ATAN2'=239
+'COS'=240
+'COT'=241
+'DEGREES'=242
+'RADIANS'=243
+'SIN'=244
+'TAN'=245
+'MD5'=246
+'SHA1'=247
+'SHA2'=248
+'ADDDATE'=249
+'ADDTIME'=250
+'CURDATE'=251
+'CURRENT_DATE'=252
+'CURRENT_TIME'=253
+'CURRENT_TIMESTAMP'=254
+'CURRENT_TIMEZONE'=255
+'CURTIME'=256
+'DATE'=257
+'DATEDIFF'=258
+'DATE_ADD'=259
+'DATE_FORMAT'=260
+'DATE_SUB'=261
+'DAYNAME'=262
+'DAYOFMONTH'=263
+'DAYOFWEEK'=264
+'DAYOFYEAR'=265
+'DAY_OF_MONTH'=266
+'DAY_OF_WEEK'=267
+'DURATION'=268
+'EXTRACT'=269
+'FROM_DAYS'=270
+'FROM_UNIXTIME'=271
+'GET_FORMAT'=272
+'LAST_DAY'=273
+'LOCALTIME'=274
+'LOCALTIMESTAMP'=275
+'MAKEDATE'=276
+'MAKE_DATE'=277
+'MAKETIME'=278
+'MONTHNAME'=279
+'NOW'=280
+'PERIOD_ADD'=281
+'PERIOD_DIFF'=282
+'SEC_TO_TIME'=283
+'STR_TO_DATE'=284
+'SUBDATE'=285
+'SUBTIME'=286
+'SYSDATE'=287
+'TIME'=288
+'TIMEDIFF'=289
+'TIMESTAMP'=290
+'TIMESTAMPADD'=291
+'TIMESTAMPDIFF'=292
+'TIME_FORMAT'=293
+'TIME_TO_SEC'=294
+'TO_DAYS'=295
+'TO_SECONDS'=296
+'UNIX_TIMESTAMP'=297
+'UTC_DATE'=298
+'UTC_TIME'=299
+'UTC_TIMESTAMP'=300
+'WEEKDAY'=301
+'YEARWEEK'=302
+'SUBSTR'=303
+'SUBSTRING'=304
+'LTRIM'=305
+'RTRIM'=306
+'TRIM'=307
+'TO'=308
+'LOWER'=309
+'UPPER'=310
+'CONCAT'=311
+'CONCAT_WS'=312
+'LENGTH'=313
+'STRCMP'=314
+'RIGHT'=315
+'LEFT'=316
+'ASCII'=317
+'LOCATE'=318
+'REPLACE'=319
+'REVERSE'=320
+'CAST'=321
+'ISEMPTY'=322
+'ISBLANK'=323
+'JSON'=324
+'JSON_OBJECT'=325
+'JSON_ARRAY'=326
+'JSON_ARRAY_LENGTH'=327
+'TO_JSON_STRING'=328
+'JSON_EXTRACT'=329
+'JSON_KEYS'=330
+'JSON_VALID'=331
+'ARRAY'=332
+'ARRAY_LENGTH'=333
+'FORALL'=334
+'FILTER'=335
+'TRANSFORM'=336
+'REDUCE'=337
+'LIKE'=338
+'ISNULL'=339
+'ISNOTNULL'=340
+'ISPRESENT'=341
+'BETWEEN'=342
+'CIDRMATCH'=343
+'GEOIP'=344
+'IFNULL'=345
+'NULLIF'=346
+'IF'=347
+'TYPEOF'=348
+'COALESCE'=349
+'MATCH'=350
+'MATCH_PHRASE'=351
+'MATCH_PHRASE_PREFIX'=352
+'MATCH_BOOL_PREFIX'=353
+'SIMPLE_QUERY_STRING'=354
+'MULTI_MATCH'=355
+'QUERY_STRING'=356
+'ALLOW_LEADING_WILDCARD'=357
+'ANALYZE_WILDCARD'=358
+'ANALYZER'=359
+'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'=360
+'BOOST'=361
+'CUTOFF_FREQUENCY'=362
+'DEFAULT_FIELD'=363
+'DEFAULT_OPERATOR'=364
+'ENABLE_POSITION_INCREMENTS'=365
+'ESCAPE'=366
+'FLAGS'=367
+'FUZZY_MAX_EXPANSIONS'=368
+'FUZZY_PREFIX_LENGTH'=369
+'FUZZY_TRANSPOSITIONS'=370
+'FUZZY_REWRITE'=371
+'FUZZINESS'=372
+'LENIENT'=373
+'LOW_FREQ_OPERATOR'=374
+'MAX_DETERMINIZED_STATES'=375
+'MAX_EXPANSIONS'=376
+'MINIMUM_SHOULD_MATCH'=377
+'OPERATOR'=378
+'PHRASE_SLOP'=379
+'PREFIX_LENGTH'=380
+'QUOTE_ANALYZER'=381
+'QUOTE_FIELD_SUFFIX'=382
+'REWRITE'=383
+'SLOP'=384
+'TIE_BREAKER'=385
+'TYPE'=386
+'ZERO_TERMS_QUERY'=387
+'SPAN'=388
+'MS'=389
+'S'=390
+'M'=391
+'H'=392
+'W'=393
+'Q'=394
+'Y'=395

From 08e1df99b8ec2e4672708cc5b0b6492ada0587d9 Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Mon, 18 Nov 2024 13:00:20 +0800
Subject: [PATCH 2/5] remove auto generated file

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 .../src/main/antlr4/OpenSearchPPLLexer.tokens | 798 ------------------
 1 file changed, 798 deletions(-)
 delete mode 100644 ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens

diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens
deleted file mode 100644
index 5f976453e..000000000
--- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.tokens
+++ /dev/null
@@ -1,798 +0,0 @@
-SEARCH=1
-DESCRIBE=2
-SHOW=3
-FROM=4
-WHERE=5
-FIELDS=6
-RENAME=7
-STATS=8
-EVENTSTATS=9
-DEDUP=10
-SORT=11
-EVAL=12
-HEAD=13
-TOP_APPROX=14
-TOP=15
-RARE_APPROX=16
-RARE=17
-PARSE=18
-METHOD=19
-REGEX=20
-PUNCT=21
-GROK=22
-PATTERN=23
-PATTERNS=24
-NEW_FIELD=25
-KMEANS=26
-AD=27
-ML=28
-FILLNULL=29
-EXPAND=30
-FLATTEN=31
-TRENDLINE=32
-JOIN=33
-ON=34
-INNER=35
-OUTER=36
-FULL=37
-SEMI=38
-ANTI=39
-CROSS=40
-LEFT_HINT=41
-RIGHT_HINT=42
-CORRELATE=43
-SELF=44
-EXACT=45
-APPROXIMATE=46
-SCOPE=47
-MAPPING=48
-EXPLAIN=49
-FORMATTED=50
-COST=51
-CODEGEN=52
-EXTENDED=53
-SIMPLE=54
-AS=55
-BY=56
-SOURCE=57
-INDEX=58
-D=59
-DESC=60
-DATASOURCES=61
-USING=62
-WITH=63
-AUTO=64
-STR=65
-IP=66
-NUM=67
-FIELDSUMMARY=68
-INCLUDEFIELDS=69
-NULLS=70
-SMA=71
-WMA=72
-KEEPEMPTY=73
-CONSECUTIVE=74
-DEDUP_SPLITVALUES=75
-PARTITIONS=76
-ALLNUM=77
-DELIM=78
-CENTROIDS=79
-ITERATIONS=80
-DISTANCE_TYPE=81
-NUMBER_OF_TREES=82
-SHINGLE_SIZE=83
-SAMPLE_SIZE=84
-OUTPUT_AFTER=85
-TIME_DECAY=86
-ANOMALY_RATE=87
-CATEGORY_FIELD=88
-TIME_FIELD=89
-TIME_ZONE=90
-TRAINING_DATA_SIZE=91
-ANOMALY_SCORE_THRESHOLD=92
-APPEND=93
-CASE=94
-ELSE=95
-IN=96
-EXISTS=97
-NOT=98
-OR=99
-AND=100
-XOR=101
-TRUE=102
-FALSE=103
-REGEXP=104
-CONVERT_TZ=105
-DATETIME=106
-DAY=107
-DAY_HOUR=108
-DAY_MICROSECOND=109
-DAY_MINUTE=110
-DAY_OF_YEAR=111
-DAY_SECOND=112
-HOUR=113
-HOUR_MICROSECOND=114
-HOUR_MINUTE=115
-HOUR_OF_DAY=116
-HOUR_SECOND=117
-INTERVAL=118
-MICROSECOND=119
-MILLISECOND=120
-MINUTE=121
-MINUTE_MICROSECOND=122
-MINUTE_OF_DAY=123
-MINUTE_OF_HOUR=124
-MINUTE_SECOND=125
-MONTH=126
-MONTH_OF_YEAR=127
-QUARTER=128
-SECOND=129
-SECOND_MICROSECOND=130
-SECOND_OF_MINUTE=131
-WEEK=132
-WEEK_OF_YEAR=133
-YEAR=134
-YEAR_MONTH=135
-DATAMODEL=136
-LOOKUP=137
-SAVEDSEARCH=138
-INT=139
-INTEGER=140
-DOUBLE=141
-LONG=142
-FLOAT=143
-STRING=144
-BOOLEAN=145
-PIPE=146
-COMMA=147
-DOT=148
-EQUAL=149
-GREATER=150
-LESS=151
-NOT_GREATER=152
-NOT_LESS=153
-NOT_EQUAL=154
-PLUS=155
-MINUS=156
-STAR=157
-DIVIDE=158
-MODULE=159
-EXCLAMATION_SYMBOL=160
-COLON=161
-LT_PRTHS=162
-RT_PRTHS=163
-LT_SQR_PRTHS=164
-RT_SQR_PRTHS=165
-SINGLE_QUOTE=166
-DOUBLE_QUOTE=167
-BACKTICK=168
-ARROW=169
-BIT_NOT_OP=170
-BIT_AND_OP=171
-BIT_XOR_OP=172
-AVG=173
-COUNT=174
-DISTINCT_COUNT=175
-DISTINCT_COUNT_APPROX=176
-ESTDC=177
-ESTDC_ERROR=178
-MAX=179
-MEAN=180
-MEDIAN=181
-MIN=182
-MODE=183
-RANGE=184
-STDEV=185
-STDEVP=186
-SUM=187
-SUMSQ=188
-VAR_SAMP=189
-VAR_POP=190
-STDDEV_SAMP=191
-STDDEV_POP=192
-PERCENTILE=193
-PERCENTILE_APPROX=194
-TAKE=195
-FIRST=196
-LAST=197
-LIST=198
-VALUES=199
-EARLIEST=200
-EARLIEST_TIME=201
-LATEST=202
-LATEST_TIME=203
-PER_DAY=204
-PER_HOUR=205
-PER_MINUTE=206
-PER_SECOND=207
-RATE=208
-SPARKLINE=209
-C=210
-DC=211
-ABS=212
-CBRT=213
-CEIL=214
-CEILING=215
-CONV=216
-CRC32=217
-E=218
-EXP=219
-FLOOR=220
-LN=221
-LOG=222
-LOG10=223
-LOG2=224
-MOD=225
-PI=226
-POSITION=227
-POW=228
-POWER=229
-RAND=230
-ROUND=231
-SIGN=232
-SIGNUM=233
-SQRT=234
-TRUNCATE=235
-ACOS=236
-ASIN=237
-ATAN=238
-ATAN2=239
-COS=240
-COT=241
-DEGREES=242
-RADIANS=243
-SIN=244
-TAN=245
-MD5=246
-SHA1=247
-SHA2=248
-ADDDATE=249
-ADDTIME=250
-CURDATE=251
-CURRENT_DATE=252
-CURRENT_TIME=253
-CURRENT_TIMESTAMP=254
-CURRENT_TIMEZONE=255
-CURTIME=256
-DATE=257
-DATEDIFF=258
-DATE_ADD=259
-DATE_FORMAT=260
-DATE_SUB=261
-DAYNAME=262
-DAYOFMONTH=263
-DAYOFWEEK=264
-DAYOFYEAR=265
-DAY_OF_MONTH=266
-DAY_OF_WEEK=267
-DURATION=268
-EXTRACT=269
-FROM_DAYS=270
-FROM_UNIXTIME=271
-GET_FORMAT=272
-LAST_DAY=273
-LOCALTIME=274
-LOCALTIMESTAMP=275
-MAKEDATE=276
-MAKE_DATE=277
-MAKETIME=278
-MONTHNAME=279
-NOW=280
-PERIOD_ADD=281
-PERIOD_DIFF=282
-SEC_TO_TIME=283
-STR_TO_DATE=284
-SUBDATE=285
-SUBTIME=286
-SYSDATE=287
-TIME=288
-TIMEDIFF=289
-TIMESTAMP=290
-TIMESTAMPADD=291
-TIMESTAMPDIFF=292
-TIME_FORMAT=293
-TIME_TO_SEC=294
-TO_DAYS=295
-TO_SECONDS=296
-UNIX_TIMESTAMP=297
-UTC_DATE=298
-UTC_TIME=299
-UTC_TIMESTAMP=300
-WEEKDAY=301
-YEARWEEK=302
-SUBSTR=303
-SUBSTRING=304
-LTRIM=305
-RTRIM=306
-TRIM=307
-TO=308
-LOWER=309
-UPPER=310
-CONCAT=311
-CONCAT_WS=312
-LENGTH=313
-STRCMP=314
-RIGHT=315
-LEFT=316
-ASCII=317
-LOCATE=318
-REPLACE=319
-REVERSE=320
-CAST=321
-ISEMPTY=322
-ISBLANK=323
-JSON=324
-JSON_OBJECT=325
-JSON_ARRAY=326
-JSON_ARRAY_LENGTH=327
-TO_JSON_STRING=328
-JSON_EXTRACT=329
-JSON_KEYS=330
-JSON_VALID=331
-ARRAY=332
-ARRAY_LENGTH=333
-FORALL=334
-FILTER=335
-TRANSFORM=336
-REDUCE=337
-LIKE=338
-ISNULL=339
-ISNOTNULL=340
-ISPRESENT=341
-BETWEEN=342
-CIDRMATCH=343
-GEOIP=344
-IFNULL=345
-NULLIF=346
-IF=347
-TYPEOF=348
-COALESCE=349
-MATCH=350
-MATCH_PHRASE=351
-MATCH_PHRASE_PREFIX=352
-MATCH_BOOL_PREFIX=353
-SIMPLE_QUERY_STRING=354
-MULTI_MATCH=355
-QUERY_STRING=356
-ALLOW_LEADING_WILDCARD=357
-ANALYZE_WILDCARD=358
-ANALYZER=359
-AUTO_GENERATE_SYNONYMS_PHRASE_QUERY=360
-BOOST=361
-CUTOFF_FREQUENCY=362
-DEFAULT_FIELD=363
-DEFAULT_OPERATOR=364
-ENABLE_POSITION_INCREMENTS=365
-ESCAPE=366
-FLAGS=367
-FUZZY_MAX_EXPANSIONS=368
-FUZZY_PREFIX_LENGTH=369
-FUZZY_TRANSPOSITIONS=370
-FUZZY_REWRITE=371
-FUZZINESS=372
-LENIENT=373
-LOW_FREQ_OPERATOR=374
-MAX_DETERMINIZED_STATES=375
-MAX_EXPANSIONS=376
-MINIMUM_SHOULD_MATCH=377
-OPERATOR=378
-PHRASE_SLOP=379
-PREFIX_LENGTH=380
-QUOTE_ANALYZER=381
-QUOTE_FIELD_SUFFIX=382
-REWRITE=383
-SLOP=384
-TIE_BREAKER=385
-TYPE=386
-ZERO_TERMS_QUERY=387
-SPAN=388
-MS=389
-S=390
-M=391
-H=392
-W=393
-Q=394
-Y=395
-ID=396
-CLUSTER=397
-INTEGER_LITERAL=398
-DECIMAL_LITERAL=399
-ID_DATE_SUFFIX=400
-DQUOTA_STRING=401
-SQUOTA_STRING=402
-BQUOTA_STRING=403
-LINE_COMMENT=404
-BLOCK_COMMENT=405
-ERROR_RECOGNITION=406
-'SEARCH'=1
-'DESCRIBE'=2
-'SHOW'=3
-'FROM'=4
-'WHERE'=5
-'FIELDS'=6
-'RENAME'=7
-'STATS'=8
-'EVENTSTATS'=9
-'DEDUP'=10
-'SORT'=11
-'EVAL'=12
-'HEAD'=13
-'TOP_APPROX'=14
-'TOP'=15
-'RARE_APPROX'=16
-'RARE'=17
-'PARSE'=18
-'METHOD'=19
-'REGEX'=20
-'PUNCT'=21
-'GROK'=22
-'PATTERN'=23
-'PATTERNS'=24
-'NEW_FIELD'=25
-'KMEANS'=26
-'AD'=27
-'ML'=28
-'FILLNULL'=29
-'EXPAND'=30
-'FLATTEN'=31
-'TRENDLINE'=32
-'JOIN'=33
-'ON'=34
-'INNER'=35
-'OUTER'=36
-'FULL'=37
-'SEMI'=38
-'ANTI'=39
-'CROSS'=40
-'HINT.LEFT'=41
-'HINT.RIGHT'=42
-'CORRELATE'=43
-'SELF'=44
-'EXACT'=45
-'APPROXIMATE'=46
-'SCOPE'=47
-'MAPPING'=48
-'EXPLAIN'=49
-'FORMATTED'=50
-'COST'=51
-'CODEGEN'=52
-'EXTENDED'=53
-'SIMPLE'=54
-'AS'=55
-'BY'=56
-'SOURCE'=57
-'INDEX'=58
-'D'=59
-'DESC'=60
-'DATASOURCES'=61
-'USING'=62
-'WITH'=63
-'AUTO'=64
-'STR'=65
-'IP'=66
-'NUM'=67
-'FIELDSUMMARY'=68
-'INCLUDEFIELDS'=69
-'NULLS'=70
-'SMA'=71
-'WMA'=72
-'KEEPEMPTY'=73
-'CONSECUTIVE'=74
-'DEDUP_SPLITVALUES'=75
-'PARTITIONS'=76
-'ALLNUM'=77
-'DELIM'=78
-'CENTROIDS'=79
-'ITERATIONS'=80
-'DISTANCE_TYPE'=81
-'NUMBER_OF_TREES'=82
-'SHINGLE_SIZE'=83
-'SAMPLE_SIZE'=84
-'OUTPUT_AFTER'=85
-'TIME_DECAY'=86
-'ANOMALY_RATE'=87
-'CATEGORY_FIELD'=88
-'TIME_FIELD'=89
-'TIME_ZONE'=90
-'TRAINING_DATA_SIZE'=91
-'ANOMALY_SCORE_THRESHOLD'=92
-'APPEND'=93
-'CASE'=94
-'ELSE'=95
-'IN'=96
-'EXISTS'=97
-'NOT'=98
-'OR'=99
-'AND'=100
-'XOR'=101
-'TRUE'=102
-'FALSE'=103
-'REGEXP'=104
-'CONVERT_TZ'=105
-'DATETIME'=106
-'DAY'=107
-'DAY_HOUR'=108
-'DAY_MICROSECOND'=109
-'DAY_MINUTE'=110
-'DAY_OF_YEAR'=111
-'DAY_SECOND'=112
-'HOUR'=113
-'HOUR_MICROSECOND'=114
-'HOUR_MINUTE'=115
-'HOUR_OF_DAY'=116
-'HOUR_SECOND'=117
-'INTERVAL'=118
-'MICROSECOND'=119
-'MILLISECOND'=120
-'MINUTE'=121
-'MINUTE_MICROSECOND'=122
-'MINUTE_OF_DAY'=123
-'MINUTE_OF_HOUR'=124
-'MINUTE_SECOND'=125
-'MONTH'=126
-'MONTH_OF_YEAR'=127
-'QUARTER'=128
-'SECOND'=129
-'SECOND_MICROSECOND'=130
-'SECOND_OF_MINUTE'=131
-'WEEK'=132
-'WEEK_OF_YEAR'=133
-'YEAR'=134
-'YEAR_MONTH'=135
-'DATAMODEL'=136
-'LOOKUP'=137
-'SAVEDSEARCH'=138
-'INT'=139
-'INTEGER'=140
-'DOUBLE'=141
-'LONG'=142
-'FLOAT'=143
-'STRING'=144
-'BOOLEAN'=145
-'|'=146
-','=147
-'.'=148
-'='=149
-'>'=150
-'<'=151
-'+'=155
-'-'=156
-'*'=157
-'/'=158
-'%'=159
-'!'=160
-':'=161
-'('=162
-')'=163
-'['=164
-']'=165
-'\''=166
-'"'=167
-'`'=168
-'->'=169
-'~'=170
-'&'=171
-'^'=172
-'AVG'=173
-'COUNT'=174
-'DISTINCT_COUNT'=175
-'DISTINCT_COUNT_APPROX'=176
-'ESTDC'=177
-'ESTDC_ERROR'=178
-'MAX'=179
-'MEAN'=180
-'MEDIAN'=181
-'MIN'=182
-'MODE'=183
-'RANGE'=184
-'STDEV'=185
-'STDEVP'=186
-'SUM'=187
-'SUMSQ'=188
-'VAR_SAMP'=189
-'VAR_POP'=190
-'STDDEV_SAMP'=191
-'STDDEV_POP'=192
-'PERCENTILE'=193
-'PERCENTILE_APPROX'=194
-'TAKE'=195
-'FIRST'=196
-'LAST'=197
-'LIST'=198
-'VALUES'=199
-'EARLIEST'=200
-'EARLIEST_TIME'=201
-'LATEST'=202
-'LATEST_TIME'=203
-'PER_DAY'=204
-'PER_HOUR'=205
-'PER_MINUTE'=206
-'PER_SECOND'=207
-'RATE'=208
-'SPARKLINE'=209
-'C'=210
-'DC'=211
-'ABS'=212
-'CBRT'=213
-'CEIL'=214
-'CEILING'=215
-'CONV'=216
-'CRC32'=217
-'E'=218
-'EXP'=219
-'FLOOR'=220
-'LN'=221
-'LOG'=222
-'LOG10'=223
-'LOG2'=224
-'MOD'=225
-'PI'=226
-'POSITION'=227
-'POW'=228
-'POWER'=229
-'RAND'=230
-'ROUND'=231
-'SIGN'=232
-'SIGNUM'=233
-'SQRT'=234
-'TRUNCATE'=235
-'ACOS'=236
-'ASIN'=237
-'ATAN'=238
-'ATAN2'=239
-'COS'=240
-'COT'=241
-'DEGREES'=242
-'RADIANS'=243
-'SIN'=244
-'TAN'=245
-'MD5'=246
-'SHA1'=247
-'SHA2'=248
-'ADDDATE'=249
-'ADDTIME'=250
-'CURDATE'=251
-'CURRENT_DATE'=252
-'CURRENT_TIME'=253
-'CURRENT_TIMESTAMP'=254
-'CURRENT_TIMEZONE'=255
-'CURTIME'=256
-'DATE'=257
-'DATEDIFF'=258
-'DATE_ADD'=259
-'DATE_FORMAT'=260
-'DATE_SUB'=261
-'DAYNAME'=262
-'DAYOFMONTH'=263
-'DAYOFWEEK'=264
-'DAYOFYEAR'=265
-'DAY_OF_MONTH'=266
-'DAY_OF_WEEK'=267
-'DURATION'=268
-'EXTRACT'=269
-'FROM_DAYS'=270
-'FROM_UNIXTIME'=271
-'GET_FORMAT'=272
-'LAST_DAY'=273
-'LOCALTIME'=274
-'LOCALTIMESTAMP'=275
-'MAKEDATE'=276
-'MAKE_DATE'=277
-'MAKETIME'=278
-'MONTHNAME'=279
-'NOW'=280
-'PERIOD_ADD'=281
-'PERIOD_DIFF'=282
-'SEC_TO_TIME'=283
-'STR_TO_DATE'=284
-'SUBDATE'=285
-'SUBTIME'=286
-'SYSDATE'=287
-'TIME'=288
-'TIMEDIFF'=289
-'TIMESTAMP'=290
-'TIMESTAMPADD'=291
-'TIMESTAMPDIFF'=292
-'TIME_FORMAT'=293
-'TIME_TO_SEC'=294
-'TO_DAYS'=295
-'TO_SECONDS'=296
-'UNIX_TIMESTAMP'=297
-'UTC_DATE'=298
-'UTC_TIME'=299
-'UTC_TIMESTAMP'=300
-'WEEKDAY'=301
-'YEARWEEK'=302
-'SUBSTR'=303
-'SUBSTRING'=304
-'LTRIM'=305
-'RTRIM'=306
-'TRIM'=307
-'TO'=308
-'LOWER'=309
-'UPPER'=310
-'CONCAT'=311
-'CONCAT_WS'=312
-'LENGTH'=313
-'STRCMP'=314
-'RIGHT'=315
-'LEFT'=316
-'ASCII'=317
-'LOCATE'=318
-'REPLACE'=319
-'REVERSE'=320
-'CAST'=321
-'ISEMPTY'=322
-'ISBLANK'=323
-'JSON'=324
-'JSON_OBJECT'=325
-'JSON_ARRAY'=326
-'JSON_ARRAY_LENGTH'=327
-'TO_JSON_STRING'=328
-'JSON_EXTRACT'=329
-'JSON_KEYS'=330
-'JSON_VALID'=331
-'ARRAY'=332
-'ARRAY_LENGTH'=333
-'FORALL'=334
-'FILTER'=335
-'TRANSFORM'=336
-'REDUCE'=337
-'LIKE'=338
-'ISNULL'=339
-'ISNOTNULL'=340
-'ISPRESENT'=341
-'BETWEEN'=342
-'CIDRMATCH'=343
-'GEOIP'=344
-'IFNULL'=345
-'NULLIF'=346
-'IF'=347
-'TYPEOF'=348
-'COALESCE'=349
-'MATCH'=350
-'MATCH_PHRASE'=351
-'MATCH_PHRASE_PREFIX'=352
-'MATCH_BOOL_PREFIX'=353
-'SIMPLE_QUERY_STRING'=354
-'MULTI_MATCH'=355
-'QUERY_STRING'=356
-'ALLOW_LEADING_WILDCARD'=357
-'ANALYZE_WILDCARD'=358
-'ANALYZER'=359
-'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'=360
-'BOOST'=361
-'CUTOFF_FREQUENCY'=362
-'DEFAULT_FIELD'=363
-'DEFAULT_OPERATOR'=364
-'ENABLE_POSITION_INCREMENTS'=365
-'ESCAPE'=366
-'FLAGS'=367
-'FUZZY_MAX_EXPANSIONS'=368
-'FUZZY_PREFIX_LENGTH'=369
-'FUZZY_TRANSPOSITIONS'=370
-'FUZZY_REWRITE'=371
-'FUZZINESS'=372
-'LENIENT'=373
-'LOW_FREQ_OPERATOR'=374
-'MAX_DETERMINIZED_STATES'=375
-'MAX_EXPANSIONS'=376
-'MINIMUM_SHOULD_MATCH'=377
-'OPERATOR'=378
-'PHRASE_SLOP'=379
-'PREFIX_LENGTH'=380
-'QUOTE_ANALYZER'=381
-'QUOTE_FIELD_SUFFIX'=382
-'REWRITE'=383
-'SLOP'=384
-'TIE_BREAKER'=385
-'TYPE'=386
-'ZERO_TERMS_QUERY'=387
-'SPAN'=388
-'MS'=389
-'S'=390
-'M'=391
-'H'=392
-'W'=393
-'Q'=394
-'Y'=395

From 2c8b839fa4eae8e572ee92ddc2be0d158ff5d29d Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Mon, 18 Nov 2024 13:07:05 +0800
Subject: [PATCH 3/5] minor updates

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 docs/ppl-lang/ppl-subquery-command.md | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md
index b36eb1c80..09d5132ea 100644
--- a/docs/ppl-lang/ppl-subquery-command.md
+++ b/docs/ppl-lang/ppl-subquery-command.md
@@ -87,26 +87,28 @@ RelationSubquery:
 
 ### Examples 1: TPC-H q20
 
+InSubquery and ScalarSubquery
+
 PPL query:
 
     os> source=supplier
         | join ON s_nationkey = n_nationkey nation
         | where n_name = 'CANADA'
-            and s_suppkey in [  // InSubquery
+            and s_suppkey in [                      // InSubquery
                 source = partsupp
-                | where ps_partkey in [ // InSubquery
+                | where ps_partkey in [             // InSubquery
                     source = part
                     | where like(p_name, 'forest%')
                     | fields p_partkey
                 ]
-                and ps_availqty > [ // ScalarSubquery
+                and ps_availqty > [                 // ScalarSubquery
                     source = lineitem
                     | where l_partkey = ps_partkey
                         and l_suppkey = ps_suppkey
                         and l_shipdate >= date('1994-01-01')
                         and l_shipdate < date_add(date('1994-01-01'), interval 1 year)
                     | stats sum(l_quantity) as sum_l_quantity
-                    | eval half_sum_l_quantity = 0.5 * sum_l_quantity // Stats and Eval commands can combine when issues/819 resolved
+                    | eval half_sum_l_quantity = 0.5 * sum_l_quantity
                     | fields half_sum_l_quantity
                 ]
             | fields ps_suppkey
@@ -131,18 +133,20 @@ PPL query:
 
 ### Examples 2: TPC-H q22
 
+RelationSubquery, ScalarSubquery and ExistsSubquery
+
 PPL query:
 
-    os> source = [
+    os> source = [                                  // RelationSubquery
             source = customer
             | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
-            and c_acctbal > [
+            and c_acctbal > [                       // ScalarSubquery
                 source = customer
                 | where c_acctbal > 0.00
                     and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
                 | stats avg(c_acctbal)
             ]
-            and not exists [
+            and not exists [                        // ExistsSubquery
                 source = orders
                 | where o_custkey = c_custkey
             ]

From 274678fd62d417b2aa683814c823c08a601548d4 Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Mon, 18 Nov 2024 20:11:06 +0800
Subject: [PATCH 4/5] address comments

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 docs/ppl-lang/ppl-correlation-command.md | 2 +-
 docs/ppl-lang/ppl-dedup-command.md       | 2 +-
 docs/ppl-lang/ppl-grok-command.md        | 2 +-
 docs/ppl-lang/ppl-head-command.md        | 2 +-
 docs/ppl-lang/ppl-join-command.md        | 2 +-
 docs/ppl-lang/ppl-lookup-command.md      | 2 +-
 docs/ppl-lang/ppl-parse-command.md       | 2 +-
 docs/ppl-lang/ppl-rare-command.md        | 4 ++--
 docs/ppl-lang/ppl-search-command.md      | 2 +-
 docs/ppl-lang/ppl-sort-command.md        | 4 ++--
 docs/ppl-lang/ppl-stats-command.md       | 2 +-
 docs/ppl-lang/ppl-subquery-command.md    | 4 ++--
 docs/ppl-lang/ppl-top-command.md         | 2 +-
 docs/ppl-lang/ppl-trendline-command.md   | 4 ++--
 docs/ppl-lang/ppl-where-command.md       | 2 +-
 15 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/docs/ppl-lang/ppl-correlation-command.md b/docs/ppl-lang/ppl-correlation-command.md
index 2e8507a14..74e04da86 100644
--- a/docs/ppl-lang/ppl-correlation-command.md
+++ b/docs/ppl-lang/ppl-correlation-command.md
@@ -1,4 +1,4 @@
-## PPL Correlation Command
+## PPL `correlation` command
 
 > This is an experimental command - it may be removed in future versions
 
diff --git a/docs/ppl-lang/ppl-dedup-command.md b/docs/ppl-lang/ppl-dedup-command.md
index 831c4926f..4e06d275e 100644
--- a/docs/ppl-lang/ppl-dedup-command.md
+++ b/docs/ppl-lang/ppl-dedup-command.md
@@ -1,4 +1,4 @@
-## PPL dedup command
+## PPL `dedup` command
 
 ### Table of contents
 
diff --git a/docs/ppl-lang/ppl-grok-command.md b/docs/ppl-lang/ppl-grok-command.md
index a9b5645c5..8d5946563 100644
--- a/docs/ppl-lang/ppl-grok-command.md
+++ b/docs/ppl-lang/ppl-grok-command.md
@@ -1,4 +1,4 @@
-## PPL Grok Command
+## PPL `grok` command
 
 
 ### Description
diff --git a/docs/ppl-lang/ppl-head-command.md b/docs/ppl-lang/ppl-head-command.md
index e4172b1c6..51a87db3b 100644
--- a/docs/ppl-lang/ppl-head-command.md
+++ b/docs/ppl-lang/ppl-head-command.md
@@ -1,4 +1,4 @@
-## PPL `head` Command
+## PPL `head` command
 
 **Description**
 The ``head`` command returns the first N number of specified results after an optional offset in search order.
diff --git a/docs/ppl-lang/ppl-join-command.md b/docs/ppl-lang/ppl-join-command.md
index 95b375e0a..f04f1c5c1 100644
--- a/docs/ppl-lang/ppl-join-command.md
+++ b/docs/ppl-lang/ppl-join-command.md
@@ -1,4 +1,4 @@
-## PPL Join Command
+## PPL `join` command
 
 ### Description
 
diff --git a/docs/ppl-lang/ppl-lookup-command.md b/docs/ppl-lang/ppl-lookup-command.md
index 6768cdcaf..87cf34bac 100644
--- a/docs/ppl-lang/ppl-lookup-command.md
+++ b/docs/ppl-lang/ppl-lookup-command.md
@@ -1,4 +1,4 @@
-## PPL Lookup Command
+## PPL `lookup` command
 
 ### Description
 Lookup command enriches your search data by adding or replacing data from a lookup index (dimension table).
diff --git a/docs/ppl-lang/ppl-parse-command.md b/docs/ppl-lang/ppl-parse-command.md
index 10be21cc0..0e000756e 100644
--- a/docs/ppl-lang/ppl-parse-command.md
+++ b/docs/ppl-lang/ppl-parse-command.md
@@ -1,4 +1,4 @@
-## PPL Parse Command
+## PPL `parse` command
 
 
 ### Description
diff --git a/docs/ppl-lang/ppl-rare-command.md b/docs/ppl-lang/ppl-rare-command.md
index 8a2ca640f..93967e6fe 100644
--- a/docs/ppl-lang/ppl-rare-command.md
+++ b/docs/ppl-lang/ppl-rare-command.md
@@ -1,7 +1,7 @@
-## PPL rare Command
+## PPL `rare` command
 
 ### Description
-Using ``rare`` command to find the least common tuple of values of all fields in the field list.
+Using `rare` command to find the least common tuple of values of all fields in the field list.
 
 **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields.
 
diff --git a/docs/ppl-lang/ppl-search-command.md b/docs/ppl-lang/ppl-search-command.md
index bccfd04f0..6e1cf0e50 100644
--- a/docs/ppl-lang/ppl-search-command.md
+++ b/docs/ppl-lang/ppl-search-command.md
@@ -1,7 +1,7 @@
 ## PPL `search` command
 
 ### Description
-Using ``search`` command to retrieve document from the index. ``search`` command could be only used as the first command in the PPL query.
+Using `search` command to retrieve document from the index. `search` command could be only used as the first command in the PPL query.
 
 
 ### Syntax
diff --git a/docs/ppl-lang/ppl-sort-command.md b/docs/ppl-lang/ppl-sort-command.md
index c3bf304d7..dd9b4b33d 100644
--- a/docs/ppl-lang/ppl-sort-command.md
+++ b/docs/ppl-lang/ppl-sort-command.md
@@ -1,7 +1,7 @@
-## PPL `sort`command
+## PPL `sort` command
 
 ### Description
-Using ``sort`` command to sorts all the search result by the specified fields.
+Using `sort` command to sorts all the search result by the specified fields.
 
 
 ### Syntax
diff --git a/docs/ppl-lang/ppl-stats-command.md b/docs/ppl-lang/ppl-stats-command.md
index 552f83e46..a73800b26 100644
--- a/docs/ppl-lang/ppl-stats-command.md
+++ b/docs/ppl-lang/ppl-stats-command.md
@@ -1,7 +1,7 @@
 ## PPL `stats` command
 
 ### Description
-Using ``stats`` command to calculate the aggregation from search result.
+Using `stats` command to calculate the aggregation from search result.
  
 ### NULL/MISSING values handling:
 
diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md
index 09d5132ea..766b37130 100644
--- a/docs/ppl-lang/ppl-subquery-command.md
+++ b/docs/ppl-lang/ppl-subquery-command.md
@@ -1,7 +1,7 @@
-## PPL SubQuery Commands
+## PPL `subquery` command
 
 ### Description
-The subquery command has 4 types: `InSubquery`, `ExistsSubquery`, `ScalarSubquery` and `RelationSubquery`.
+The subquery commands contain 4 types: `InSubquery`, `ExistsSubquery`, `ScalarSubquery` and `RelationSubquery`.
 `InSubquery`, `ExistsSubquery` and `ScalarSubquery` are subquery expressions, their common usage is in Where clause(`where <boolean expression>`) and Search filter(`search source=* <boolean expression>`).
 
 For example, a subquery expression could be used in boolean expression:
diff --git a/docs/ppl-lang/ppl-top-command.md b/docs/ppl-lang/ppl-top-command.md
index 012457fe2..2bacdba50 100644
--- a/docs/ppl-lang/ppl-top-command.md
+++ b/docs/ppl-lang/ppl-top-command.md
@@ -1,4 +1,4 @@
-## PPL top Command
+## PPL `top` command
 
 ### Description
 Using ``top`` command to find the most common tuple of values of all fields in the field list.
diff --git a/docs/ppl-lang/ppl-trendline-command.md b/docs/ppl-lang/ppl-trendline-command.md
index 44b8c999f..b2be172cd 100644
--- a/docs/ppl-lang/ppl-trendline-command.md
+++ b/docs/ppl-lang/ppl-trendline-command.md
@@ -1,7 +1,7 @@
-## PPL trendline Command
+## PPL `trendline` command
 
 ### Description
-Using ``trendline`` command to calculate moving averages of fields.
+Using `trendline` command to calculate moving averages of fields.
 
 ### Syntax - SMA (Simple Moving Average)
 `TRENDLINE [sort <[+|-] sort-field>] SMA(number-of-datapoints, field) [AS alias] [SMA(number-of-datapoints, field) [AS alias]]...`
diff --git a/docs/ppl-lang/ppl-where-command.md b/docs/ppl-lang/ppl-where-command.md
index aa7d9299e..ec676ab62 100644
--- a/docs/ppl-lang/ppl-where-command.md
+++ b/docs/ppl-lang/ppl-where-command.md
@@ -1,4 +1,4 @@
-## PPL where Command
+## PPL `where` command
 
 ### Description
 The ``where`` command bool-expression to filter the search result. The ``where`` command only return the result when bool-expression evaluated to true.

From 5d143f8062d49cc019ded45735f240ba99e6e64a Mon Sep 17 00:00:00 2001
From: Lantao Jin <ltjin@amazon.com>
Date: Tue, 19 Nov 2024 08:55:02 +0800
Subject: [PATCH 5/5] fix hyper-link issue

Signed-off-by: Lantao Jin <ltjin@amazon.com>
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 12123b456..db3790e64 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Please refer to the [Flint Index Reference Manual](./docs/index.md) for more inf
 
 * For additional details on Spark PPL commands project, see [PPL Project](https://github.com/orgs/opensearch-project/projects/214/views/2)
 
-* Experiment ppl queries on local spark cluster[PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md)
+* Experiment ppl queries on local spark cluster [PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md)
 
 ## Prerequisites
 
@@ -88,7 +88,7 @@ bin/spark-shell --packages "org.opensearch:opensearch-spark-ppl_2.12:0.7.0-SNAPS
 ```
 
 ### PPL Run queries on a local spark cluster
-See ppl usage sample on local spark cluster[PPL on local spark ](local-spark-ppl-test-instruction.md)
+See ppl usage sample on local spark cluster [PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md)
 
 
 ## Code of Conduct