From 1fd9a7e28e911f7080987979f3c6541d40068b48 Mon Sep 17 00:00:00 2001
From: "leanne.laceybyrne@eliatra.com" <leanne.laceybyrne@eliatra.com>
Date: Mon, 23 Sep 2024 16:22:36 +0100
Subject: [PATCH 01/14] finished tokenizers example

Signed-off-by: leanne.laceybyrne@eliatra.com <leanne.laceybyrne@eliatra.com>
---
 .../tokenizers/character-group-tokenizer.md   | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 _analyzers/tokenizers/character-group-tokenizer.md

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
new file mode 100644
index 0000000000..e80f26fe59
--- /dev/null
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -0,0 +1,41 @@
+---
+layout: default
+title: Character Group Tokenizer
+parent: Tokenizers
+nav_order: 60
+has_children: false
+has_toc: false
+---
+
+# Character group tokenizer
+
+The Character Group Tokenizer is designed to segment text into tokens based on the presence of specific characters. This tokenizer is ideal for scenarios where a straightforward tokenization approach is required, avoiding the complexity and overhead associated with pattern-based tokenizers.
+
+The Character Group Tokenizer accepts the following parameters:
+1. `tokenize_on_chars`: Specifies a set of characters on which the text should be tokenized. When any character from this set is encountered, a new token is created. For example, single characters `(e.g., -, @)` and character classes such as `whitespace`, `letter`, `digit`, `punctuation`, and `symbol`.
+4. `max_token_length`: This parameter defines the maximum length allowed for a token. If a token exceeds this specified length, it will be split at intervals defined by `max_token_length`. The default value is `255`.
+
+## Example of the character group tokenizer
+```
+POST _analyze
+{
+  "tokenizer": {
+    "type": "char_group",
+    "tokenize_on_chars": [
+      "whitespace",
+      "-",
+      ":"
+    ]
+  },
+  "text": "Fast-cars: drive fast!"
+}
+```
+Summary of the outputted response text:
+```
+Fast cars drive fast
+```
+
+
+
+
+

From 0a849c820287f8baf09b72c4c56add199eae4ab7 Mon Sep 17 00:00:00 2001
From: "leanne.laceybyrne@eliatra.com" <leanne.laceybyrne@eliatra.com>
Date: Mon, 23 Sep 2024 16:27:57 +0100
Subject: [PATCH 02/14] updating nav order

Signed-off-by: leanne.laceybyrne@eliatra.com <leanne.laceybyrne@eliatra.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index e80f26fe59..8a5aff1647 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -2,7 +2,7 @@
 layout: default
 title: Character Group Tokenizer
 parent: Tokenizers
-nav_order: 60
+nav_order: 70
 has_children: false
 has_toc: false
 ---

From ecb5e5f0e042a6378b23746d9cd005067c73d1ac Mon Sep 17 00:00:00 2001
From: "leanne.laceybyrne@eliatra.com" <leanne.laceybyrne@eliatra.com>
Date: Wed, 9 Oct 2024 14:08:56 +0100
Subject: [PATCH 03/14] layout cleanup

Signed-off-by: leanne.laceybyrne@eliatra.com <leanne.laceybyrne@eliatra.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index 8a5aff1647..a72a767224 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -16,6 +16,9 @@ The Character Group Tokenizer accepts the following parameters:
 4. `max_token_length`: This parameter defines the maximum length allowed for a token. If a token exceeds this specified length, it will be split at intervals defined by `max_token_length`. The default value is `255`.
 
 ## Example of the character group tokenizer
+
+We can tokenize the on characters such as `whitespace`, `-` and `:`.
+
 ```
 POST _analyze
 {
@@ -30,12 +33,9 @@ POST _analyze
   "text": "Fast-cars: drive fast!"
 }
 ```
-Summary of the outputted response text:
+
+By analyzing the text "Fast-cars: drive fast!", we can see the specified characters have been removed: 
+
 ```
 Fast cars drive fast
 ```
-
-
-
-
-

From 828e7fc604b9f3c69f9b8d7b494dbfbb95971f1d Mon Sep 17 00:00:00 2001
From: "leanne.laceybyrne@eliatra.com" <leanne.laceybyrne@eliatra.com>
Date: Wed, 9 Oct 2024 14:47:17 +0100
Subject: [PATCH 04/14] grammar fix

Signed-off-by: leanne.laceybyrne@eliatra.com <leanne.laceybyrne@eliatra.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index a72a767224..a98a7bf86c 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -19,7 +19,7 @@ The Character Group Tokenizer accepts the following parameters:
 
 We can tokenize the on characters such as `whitespace`, `-` and `:`.
 
-```
+```json
 POST _analyze
 {
   "tokenizer": {

From def3c62fc6ed88b467c3ab9c0302d7cfd4c37d3f Mon Sep 17 00:00:00 2001
From: "leanne.laceybyrne@eliatra.com" <leanne.laceybyrne@eliatra.com>
Date: Thu, 10 Oct 2024 12:41:24 +0100
Subject: [PATCH 05/14] doc: small update for page numbers

Signed-off-by: leanne.laceybyrne@eliatra.com <leanne.laceybyrne@eliatra.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 2 +-
 _analyzers/tokenizers/index.md                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index a98a7bf86c..c68823f6d7 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -2,7 +2,7 @@
 layout: default
 title: Character Group Tokenizer
 parent: Tokenizers
-nav_order: 70
+nav_order: 20
 has_children: false
 has_toc: false
 ---
diff --git a/_analyzers/tokenizers/index.md b/_analyzers/tokenizers/index.md
index e5ac796c12..6bf6fc1aba 100644
--- a/_analyzers/tokenizers/index.md
+++ b/_analyzers/tokenizers/index.md
@@ -1,7 +1,7 @@
 ---
 layout: default
 title: Tokenizers
-nav_order: 60
+nav_order: 10
 has_children: false
 has_toc: false
 redirect_from:

From cef551a60dc2bd7aafabb342a37d943c01123567 Mon Sep 17 00:00:00 2001
From: "leanne.laceybyrne@eliatra.com" <leanne.laceybyrne@eliatra.com>
Date: Thu, 10 Oct 2024 12:56:54 +0100
Subject: [PATCH 06/14] layout fix: correct scentence case for all examples

Signed-off-by: leanne.laceybyrne@eliatra.com <leanne.laceybyrne@eliatra.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index c68823f6d7..ca3349c89a 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -9,9 +9,9 @@ has_toc: false
 
 # Character group tokenizer
 
-The Character Group Tokenizer is designed to segment text into tokens based on the presence of specific characters. This tokenizer is ideal for scenarios where a straightforward tokenization approach is required, avoiding the complexity and overhead associated with pattern-based tokenizers.
+The character group tokenizer is designed to segment text into tokens based on the presence of specific characters. This tokenizer is ideal for scenarios where a straightforward tokenization approach is required, avoiding the complexity and overhead associated with pattern-based tokenizers.
 
-The Character Group Tokenizer accepts the following parameters:
+The character group tokenizer accepts the following parameters:
 1. `tokenize_on_chars`: Specifies a set of characters on which the text should be tokenized. When any character from this set is encountered, a new token is created. For example, single characters `(e.g., -, @)` and character classes such as `whitespace`, `letter`, `digit`, `punctuation`, and `symbol`.
 4. `max_token_length`: This parameter defines the maximum length allowed for a token. If a token exceeds this specified length, it will be split at intervals defined by `max_token_length`. The default value is `255`.
 

From d4c1cc43cc1b97d35a5726857239ec7c2c3f2c37 Mon Sep 17 00:00:00 2001
From: "leanne.laceybyrne@eliatra.com" <leanne.laceybyrne@eliatra.com>
Date: Fri, 11 Oct 2024 11:25:30 +0100
Subject: [PATCH 07/14] small update: adding copy tag for json segment

Signed-off-by: leanne.laceybyrne@eliatra.com <leanne.laceybyrne@eliatra.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index ca3349c89a..1d1dcc7465 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -33,6 +33,7 @@ POST _analyze
   "text": "Fast-cars: drive fast!"
 }
 ```
+{% include copy-curl.html %}
 
 By analyzing the text "Fast-cars: drive fast!", we can see the specified characters have been removed: 
 

From b20028f2bc420c4ba2a606160d3b8b82aaadded7 Mon Sep 17 00:00:00 2001
From: Melissa Vagi <vagimeli@amazon.com>
Date: Tue, 15 Oct 2024 17:59:35 -0600
Subject: [PATCH 08/14] Update
 _analyzers/tokenizers/character-group-tokenizer.md

Signed-off-by: Melissa Vagi <vagimeli@amazon.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index 1d1dcc7465..d281df9d6b 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -1,6 +1,6 @@
 ---
 layout: default
-title: Character Group Tokenizer
+title: Character group tokenizer
 parent: Tokenizers
 nav_order: 20
 has_children: false

From 9a8d21fc8032e5575f9b6e0037c6bdecfd7d83ff Mon Sep 17 00:00:00 2001
From: Melissa Vagi <vagimeli@amazon.com>
Date: Tue, 15 Oct 2024 18:10:31 -0600
Subject: [PATCH 09/14] Update
 _analyzers/tokenizers/character-group-tokenizer.md

Signed-off-by: Melissa Vagi <vagimeli@amazon.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index d281df9d6b..8a713270e3 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -12,6 +12,7 @@ has_toc: false
 The character group tokenizer is designed to segment text into tokens based on the presence of specific characters. This tokenizer is ideal for scenarios where a straightforward tokenization approach is required, avoiding the complexity and overhead associated with pattern-based tokenizers.
 
 The character group tokenizer accepts the following parameters:
+
 1. `tokenize_on_chars`: Specifies a set of characters on which the text should be tokenized. When any character from this set is encountered, a new token is created. For example, single characters `(e.g., -, @)` and character classes such as `whitespace`, `letter`, `digit`, `punctuation`, and `symbol`.
 4. `max_token_length`: This parameter defines the maximum length allowed for a token. If a token exceeds this specified length, it will be split at intervals defined by `max_token_length`. The default value is `255`.
 

From 563ae6b8d887841f790fd19502f1a4d977c5e706 Mon Sep 17 00:00:00 2001
From: Melissa Vagi <vagimeli@amazon.com>
Date: Tue, 15 Oct 2024 18:10:55 -0600
Subject: [PATCH 10/14] Update
 _analyzers/tokenizers/character-group-tokenizer.md

Signed-off-by: Melissa Vagi <vagimeli@amazon.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index 8a713270e3..1c0437a8ff 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -14,7 +14,7 @@ The character group tokenizer is designed to segment text into tokens based on t
 The character group tokenizer accepts the following parameters:
 
 1. `tokenize_on_chars`: Specifies a set of characters on which the text should be tokenized. When any character from this set is encountered, a new token is created. For example, single characters `(e.g., -, @)` and character classes such as `whitespace`, `letter`, `digit`, `punctuation`, and `symbol`.
-4. `max_token_length`: This parameter defines the maximum length allowed for a token. If a token exceeds this specified length, it will be split at intervals defined by `max_token_length`. The default value is `255`.
+2. `max_token_length`: This parameter defines the maximum length allowed for a token. If a token exceeds this specified length, it will be split at intervals defined by `max_token_length`. The default value is `255`.
 
 ## Example of the character group tokenizer
 

From 0eca713ef0cec394989b4302aab13402573ce5bd Mon Sep 17 00:00:00 2001
From: leanneeliatra <131779422+leanneeliatra@users.noreply.github.com>
Date: Thu, 17 Oct 2024 14:51:23 +0100
Subject: [PATCH 11/14] Apply suggestions from code review

Co-authored-by: Melissa Vagi <vagimeli@amazon.com>
Signed-off-by: leanneeliatra <131779422+leanneeliatra@users.noreply.github.com>
---
 _analyzers/tokenizers/character-group-tokenizer.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
index 1c0437a8ff..f232af0742 100644
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ b/_analyzers/tokenizers/character-group-tokenizer.md
@@ -9,16 +9,16 @@ has_toc: false
 
 # Character group tokenizer
 
-The character group tokenizer is designed to segment text into tokens based on the presence of specific characters. This tokenizer is ideal for scenarios where a straightforward tokenization approach is required, avoiding the complexity and overhead associated with pattern-based tokenizers.
+The character group tokenizer is a simple text segmentation tool that splits text into tokens based on the presence of specific characters. This tokenizer is ideal for scenarios where a simple tokenization method is required, avoiding the complexity and overhead associated with pattern-based tokenizers.
 
 The character group tokenizer accepts the following parameters:
 
-1. `tokenize_on_chars`: Specifies a set of characters on which the text should be tokenized. When any character from this set is encountered, a new token is created. For example, single characters `(e.g., -, @)` and character classes such as `whitespace`, `letter`, `digit`, `punctuation`, and `symbol`.
-2. `max_token_length`: This parameter defines the maximum length allowed for a token. If a token exceeds this specified length, it will be split at intervals defined by `max_token_length`. The default value is `255`.
+1. `tokenize_on_chars`: Specifies a set of characters on which the text should be tokenized. The tokenizer creates a new token upon encountering any character from the specified set, for example, single characters `(e.g., -, @)` and character classes such as `whitespace`, `letter`, `digit`, `punctuation`, and `symbol`.
+2. `max_token_length`: Defines the token's maximum length. If the token exceeds the specified length, then the tokenizer splits a token at intervals defined by the parameter. Default is `255`.
 
-## Example of the character group tokenizer
+## Example: Using the character group tokenizer
 
-We can tokenize the on characters such as `whitespace`, `-` and `:`.
+To tokenize the on characters such as `whitespace`, `-` and `:`, see the following example request:
 
 ```json
 POST _analyze
@@ -36,7 +36,7 @@ POST _analyze
 ```
 {% include copy-curl.html %}
 
-By analyzing the text "Fast-cars: drive fast!", we can see the specified characters have been removed: 
+The following response shows that the specified characters have been removed: 
 
 ```
 Fast cars drive fast

From 4af10ef04706679478701d58a20ebced26537ea5 Mon Sep 17 00:00:00 2001
From: Fanit Kolchina <kolchfa@amazon.com>
Date: Thu, 2 Jan 2025 10:59:39 -0500
Subject: [PATCH 12/14] Doc review

Signed-off-by: Fanit Kolchina <kolchfa@amazon.com>
---
 .../tokenizers/character-group-tokenizer.md   |  43 ------
 _analyzers/tokenizers/character-group.md      | 124 ++++++++++++++++++
 2 files changed, 124 insertions(+), 43 deletions(-)
 delete mode 100644 _analyzers/tokenizers/character-group-tokenizer.md
 create mode 100644 _analyzers/tokenizers/character-group.md

diff --git a/_analyzers/tokenizers/character-group-tokenizer.md b/_analyzers/tokenizers/character-group-tokenizer.md
deleted file mode 100644
index f232af0742..0000000000
--- a/_analyzers/tokenizers/character-group-tokenizer.md
+++ /dev/null
@@ -1,43 +0,0 @@
----
-layout: default
-title: Character group tokenizer
-parent: Tokenizers
-nav_order: 20
-has_children: false
-has_toc: false
----
-
-# Character group tokenizer
-
-The character group tokenizer is a simple text segmentation tool that splits text into tokens based on the presence of specific characters. This tokenizer is ideal for scenarios where a simple tokenization method is required, avoiding the complexity and overhead associated with pattern-based tokenizers.
-
-The character group tokenizer accepts the following parameters:
-
-1. `tokenize_on_chars`: Specifies a set of characters on which the text should be tokenized. The tokenizer creates a new token upon encountering any character from the specified set, for example, single characters `(e.g., -, @)` and character classes such as `whitespace`, `letter`, `digit`, `punctuation`, and `symbol`.
-2. `max_token_length`: Defines the token's maximum length. If the token exceeds the specified length, then the tokenizer splits a token at intervals defined by the parameter. Default is `255`.
-
-## Example: Using the character group tokenizer
-
-To tokenize the on characters such as `whitespace`, `-` and `:`, see the following example request:
-
-```json
-POST _analyze
-{
-  "tokenizer": {
-    "type": "char_group",
-    "tokenize_on_chars": [
-      "whitespace",
-      "-",
-      ":"
-    ]
-  },
-  "text": "Fast-cars: drive fast!"
-}
-```
-{% include copy-curl.html %}
-
-The following response shows that the specified characters have been removed: 
-
-```
-Fast cars drive fast
-```
diff --git a/_analyzers/tokenizers/character-group.md b/_analyzers/tokenizers/character-group.md
new file mode 100644
index 0000000000..850b450198
--- /dev/null
+++ b/_analyzers/tokenizers/character-group.md
@@ -0,0 +1,124 @@
+---
+layout: default
+title: Character group
+parent: Tokenizers
+nav_order: 20
+has_children: false
+has_toc: false
+---
+
+# Character group tokenizer
+
+The `char_group` tokenizer splits text into tokens using specific characters as delimiters. It is suitable for situations requiring straightforward tokenization, offering a simpler alternative to pattern-based tokenizers without the added complexity.
+
+## Example usage
+
+The following example request creates a new index named `my_index` and configures an analyzer with a `char_group` tokenizer. The tokenizer splits text on white space, `-`, and `:` characters:
+
+```json
+PUT /my_index
+{
+  "settings": {
+    "analysis": {
+      "tokenizer": {
+        "my_char_group_tokenizer": {
+          "type": "char_group",
+          "tokenize_on_chars": [
+            "whitespace",
+            "-",
+            ":"
+          ]
+        }
+      },
+      "analyzer": {
+        "my_char_group_analyzer": {
+          "type": "custom",
+          "tokenizer": "my_char_group_tokenizer"
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "my_char_group_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /my_index/_analyze
+{
+  "analyzer": "my_char_group_analyzer",
+  "text": "Fast-driving cars: they drive fast!"
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "Fast",
+      "start_offset": 0,
+      "end_offset": 4,
+      "type": "word",
+      "position": 0
+    },
+    {
+      "token": "driving",
+      "start_offset": 5,
+      "end_offset": 12,
+      "type": "word",
+      "position": 1
+    },
+    {
+      "token": "cars",
+      "start_offset": 13,
+      "end_offset": 17,
+      "type": "word",
+      "position": 2
+    },
+    {
+      "token": "they",
+      "start_offset": 19,
+      "end_offset": 23,
+      "type": "word",
+      "position": 3
+    },
+    {
+      "token": "drive",
+      "start_offset": 24,
+      "end_offset": 29,
+      "type": "word",
+      "position": 4
+    },
+    {
+      "token": "fast!",
+      "start_offset": 30,
+      "end_offset": 35,
+      "type": "word",
+      "position": 5
+    }
+  ]
+}
+```
+
+## Parameters
+
+The `char_group` tokenizer can be configured with the following parameters.
+
+| **Parameter**        | **Required/Optional** | **Data type** | **Description** |
+| :--- |  :--- |  :--- |  :--- |  
+| `tokenize_on_chars`   | Required              | Array         | Specifies a set of characters on which the text should be tokenized. You can specify single characters (for example, `-`, `@`) or character classes such as `whitespace`, `letter`, `digit`, `punctuation`, or `symbol`. |
+| `max_token_length`    | Optional              | Integer       | Sets the maximum length of the produced token. If this length is exceeded, the token is split into multiple tokens at the length configured in `max_token_length`. Default is `255`.  |
\ No newline at end of file

From f5f063906565e244e85c39a91c313f01cd64beac Mon Sep 17 00:00:00 2001
From: Fanit Kolchina <kolchfa@amazon.com>
Date: Thu, 2 Jan 2025 11:00:34 -0500
Subject: [PATCH 13/14] Reorder index

Signed-off-by: Fanit Kolchina <kolchfa@amazon.com>
---
 _analyzers/tokenizers/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/tokenizers/index.md b/_analyzers/tokenizers/index.md
index 6bf6fc1aba..e5ac796c12 100644
--- a/_analyzers/tokenizers/index.md
+++ b/_analyzers/tokenizers/index.md
@@ -1,7 +1,7 @@
 ---
 layout: default
 title: Tokenizers
-nav_order: 10
+nav_order: 60
 has_children: false
 has_toc: false
 redirect_from:

From 52eaad871da322f0b501ea969c4e8db084d5ae8f Mon Sep 17 00:00:00 2001
From: Fanit Kolchina <kolchfa@amazon.com>
Date: Thu, 2 Jan 2025 11:45:48 -0500
Subject: [PATCH 14/14] Add escape characters

Signed-off-by: Fanit Kolchina <kolchfa@amazon.com>
---
 _analyzers/tokenizers/character-group.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/tokenizers/character-group.md b/_analyzers/tokenizers/character-group.md
index 850b450198..56e52780fc 100644
--- a/_analyzers/tokenizers/character-group.md
+++ b/_analyzers/tokenizers/character-group.md
@@ -120,5 +120,5 @@ The `char_group` tokenizer can be configured with the following parameters.
 
 | **Parameter**        | **Required/Optional** | **Data type** | **Description** |
 | :--- |  :--- |  :--- |  :--- |  
-| `tokenize_on_chars`   | Required              | Array         | Specifies a set of characters on which the text should be tokenized. You can specify single characters (for example, `-`, `@`) or character classes such as `whitespace`, `letter`, `digit`, `punctuation`, or `symbol`. |
+| `tokenize_on_chars`   | Required              | Array         | Specifies a set of characters on which the text should be tokenized. You can specify single characters (for example, `-` or `@`), including escape characters (for example, `\n`), or character classes such as `whitespace`, `letter`, `digit`, `punctuation`, or `symbol`. |
 | `max_token_length`    | Optional              | Integer       | Sets the maximum length of the produced token. If this length is exceeded, the token is split into multiple tokens at the length configured in `max_token_length`. Default is `255`.  |
\ No newline at end of file