From fd5744cb2345bf7261193228e3748bcb88192dc2 Mon Sep 17 00:00:00 2001 From: fmassot Date: Wed, 17 Jan 2024 22:23:32 +0100 Subject: [PATCH 1/3] Allow $ and @ characters in field names. --- .../quickwit-doc-mapper/src/default_doc_mapper/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs index 01be87c17e3..c151928bb8c 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs @@ -46,17 +46,17 @@ pub(crate) use self::tokenizer_entry::{ use crate::QW_RESERVED_FIELD_NAMES; /// Regular expression validating a field mapping name. -pub const FIELD_MAPPING_NAME_PATTERN: &str = r"^[_\-a-zA-Z][_\.\-a-zA-Z0-9]{0,254}$"; +pub const FIELD_MAPPING_NAME_PATTERN: &str = r"^[@$_\-a-zA-Z][@$_\.\-a-zA-Z0-9]{0,254}$"; /// Validates a field mapping name. -/// Returns `Ok(())` if the name can be used for a field mapping. Does not check for reserved field -/// mapping names such as `_source`. +/// Returns `Ok(())` if the name can be used for a field mapping. /// /// A field mapping name: /// - may only contain uppercase and lowercase ASCII letters `[a-zA-Z]`, digits `[0-9]`, hyphens -/// `-`, and underscores `_`; +/// `-`, `$` and underscores `_`; /// - must not start with a dot or a digit; -/// - must be different from Quickwit's reserved field mapping names `_source`, `_dynamic`; +/// - must be different from Quickwit's reserved field mapping names `_source`, `_dynamic`, +/// `_field_presence`; /// - must not be longer than 255 characters. pub fn validate_field_mapping_name(field_mapping_name: &str) -> anyhow::Result<()> { static FIELD_MAPPING_NAME_PTN: Lazy = From 98673b7e5d823f13675dd38f9ef8d935ea87d51c Mon Sep 17 00:00:00 2001 From: fmassot Date: Thu, 18 Jan 2024 00:05:22 +0100 Subject: [PATCH 2/3] Add test and update docs. --- docs/configuration/index-config.md | 7 ++++--- quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/configuration/index-config.md b/docs/configuration/index-config.md index 6e792e8368f..38154aed253 100644 --- a/docs/configuration/index-config.md +++ b/docs/configuration/index-config.md @@ -491,12 +491,13 @@ src.port:53 AND query_params.ctk:e42bb897d ### Field name validation rules Currently Quickwit only accepts field name that matches the following regular expression: -`[a-zA-Z][_\.\-a-zA-Z0-9]*$` +`^[@$_\-a-zA-Z][@$_\.\-a-zA-Z0-9]{0,254}$` In plain language: - it needs to have at least one character. -- it should only contain latin letter `[a-zA-Z]` digits `[0-9]` or (`.`, `-`, `_`). -- the first character needs to be a letter. +- it can only contain uppercase and lowercase ASCII letters `[a-zA-Z]`, digits `[0-9]`, hyphens `-`, `$`, `@` and underscores `_`; +- it must not start with a dot or a digit. +- it must be different from Quickwit's reserved field mapping names `_source`, `_dynamic`, `_field_presence`. :::caution For field names containing the `.` character, you will need to escape it when referencing them. Otherwise the `.` character will be interpreted as a JSON object property access. Because of this, it is recommended to avoid using field names containing the `.` character. diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs index c151928bb8c..277730097f2 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs @@ -53,7 +53,7 @@ pub const FIELD_MAPPING_NAME_PATTERN: &str = r"^[@$_\-a-zA-Z][@$_\.\-a-zA-Z0-9]{ /// /// A field mapping name: /// - may only contain uppercase and lowercase ASCII letters `[a-zA-Z]`, digits `[0-9]`, hyphens -/// `-`, `$` and underscores `_`; +/// `-`, `$`, `@` and underscores `_`; /// - must not start with a dot or a digit; /// - must be different from Quickwit's reserved field mapping names `_source`, `_dynamic`, /// `_field_presence`; @@ -146,6 +146,7 @@ mod tests { assert!(validate_field_mapping_name("my-field").is_ok()); assert!(validate_field_mapping_name("my.field").is_ok()); assert!(validate_field_mapping_name("my_field").is_ok()); + assert!(validate_field_mapping_name("$my_field@").is_ok()); assert!(validate_field_mapping_name(&"a".repeat(255)).is_ok()); } } From be328d64cd94d85dfbf23236a386e31f0bebf253 Mon Sep 17 00:00:00 2001 From: fmassot Date: Thu, 18 Jan 2024 00:21:55 +0100 Subject: [PATCH 3/3] Fix typo. --- docs/configuration/index-config.md | 2 +- docs/configuration/storage-config.md | 2 ++ quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/configuration/index-config.md b/docs/configuration/index-config.md index 38154aed253..ad95384444a 100644 --- a/docs/configuration/index-config.md +++ b/docs/configuration/index-config.md @@ -495,7 +495,7 @@ Currently Quickwit only accepts field name that matches the following regular ex In plain language: - it needs to have at least one character. -- it can only contain uppercase and lowercase ASCII letters `[a-zA-Z]`, digits `[0-9]`, hyphens `-`, `$`, `@` and underscores `_`; +- it can only contain uppercase and lowercase ASCII letters `[a-zA-Z]`, digits `[0-9]`, `.`, hyphens `-`, underscores `_`, at `@` and dollar `$` signs. - it must not start with a dot or a digit. - it must be different from Quickwit's reserved field mapping names `_source`, `_dynamic`, `_field_presence`. diff --git a/docs/configuration/storage-config.md b/docs/configuration/storage-config.md index 6db66d08b0f..f26039520fd 100644 --- a/docs/configuration/storage-config.md +++ b/docs/configuration/storage-config.md @@ -146,3 +146,5 @@ storage: flavor: minio endpoint: http://127.0.0.1:9000 ``` + +Note: `default_index_root_uri` or index URIs do not include the endpoint, you should set it as a typical S3 path such as `s3://indexes`. diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs index 277730097f2..a096650d78e 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mod.rs @@ -52,8 +52,8 @@ pub const FIELD_MAPPING_NAME_PATTERN: &str = r"^[@$_\-a-zA-Z][@$_\.\-a-zA-Z0-9]{ /// Returns `Ok(())` if the name can be used for a field mapping. /// /// A field mapping name: -/// - may only contain uppercase and lowercase ASCII letters `[a-zA-Z]`, digits `[0-9]`, hyphens -/// `-`, `$`, `@` and underscores `_`; +/// - can only contain uppercase and lowercase ASCII letters `[a-zA-Z]`, digits `[0-9]`, `.`, +/// hyphens `-`, underscores `_`, at `@` and dollar `$` signs; /// - must not start with a dot or a digit; /// - must be different from Quickwit's reserved field mapping names `_source`, `_dynamic`, /// `_field_presence`;