diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs index 388c8e9130e..b2930fe3052 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs @@ -487,22 +487,113 @@ async fn es_compat_index_field_capabilities( Ok(search_response_rest) } +fn filter_source( + value: &mut serde_json::Value, + _source_excludes: &Option>, + _source_includes: &Option>, +) { + fn remove_path(value: &mut serde_json::Value, path: &str) { + for (prefix, suffix) in generate_path_variants_with_suffix(path) { + match value { + serde_json::Value::Object(ref mut map) => { + if let Some(suffix) = suffix { + if let Some(sub_value) = map.get_mut(prefix) { + remove_path(sub_value, suffix); + return; + } + } else { + map.remove(prefix); + } + } + _ => continue, + } + } + } + fn retain_includes( + value: &mut serde_json::Value, + current_path: &str, + include_paths: &Vec, + ) { + if let Some(ref mut map) = value.as_object_mut() { + map.retain(|key, sub_value| { + let path = if current_path.is_empty() { + key.to_string() + } else { + format!("{}.{}", current_path, key) + }; + + if include_paths.contains(&path) { + // Exact match keep whole node + return true; + } + // Check if the path is sub path of any allowed path + for allowed_path in include_paths { + if allowed_path.starts_with(path.as_str()) { + retain_includes(sub_value, &path, include_paths); + return true; + } + } + false + }); + } + } + + // Remove fields that are not included + if let Some(includes) = _source_includes { + retain_includes(value, "", includes); + } + + // Remove fields that are excluded + if let Some(excludes) = _source_excludes { + for exclude in excludes { + remove_path(value, exclude); + } + } +} + +/// "app.id.name" -> [("app", Some("id.name")), ("app.id", Some("name")), ("app.id.name", None)] +fn generate_path_variants_with_suffix(input: &str) -> Vec<(&str, Option<&str>)> { + let mut variants = Vec::new(); + + // Iterate over each character in the input. + for (idx, ch) in input.char_indices() { + if ch == '.' { + // If a dot is found, create a variant using the current slice and the remainder of the + // string. + let prefix = &input[0..idx]; + let suffix = if idx + 1 < input.len() { + Some(&input[idx + 1..]) + } else { + None + }; + variants.push((prefix, suffix)); + } + } + + variants.push((&input[0..], None)); + + variants +} + fn convert_hit( hit: quickwit_proto::search::Hit, append_shard_doc: bool, _source_excludes: &Option>, _source_includes: &Option>, ) -> ElasticHit { - let mut fields: BTreeMap = - serde_json::from_str(&hit.json).unwrap_or_default(); - if let Some(_source_includes) = _source_includes { - fields.retain(|key, _| _source_includes.contains(key)); - } - if let Some(_source_excludes) = _source_excludes { - for exclude in _source_excludes { - fields.remove(exclude); + let mut json: serde_json::Value = serde_json::from_str(&hit.json).unwrap_or(json!({})); + filter_source(&mut json, _source_excludes, _source_includes); + let source = + Source::from_string(serde_json::to_string(&json).unwrap_or_else(|_| "{}".to_string())) + .unwrap_or_else(|_| Source::from_string("{}".to_string()).unwrap()); + + let mut fields: BTreeMap = Default::default(); + if let serde_json::Value::Object(map) = json { + for (key, val) in map { + fields.insert(key, val); } } + let mut sort = Vec::new(); if let Some(partial_hit) = hit.partial_hit { if let Some(sort_value) = partial_hit.sort_value { @@ -517,9 +608,6 @@ fn convert_hit( )); } } - let source = - Source::from_string(serde_json::to_string(&fields).unwrap_or_else(|_| "{}".to_string())) - .unwrap_or_else(|_| Source::from_string("{}".to_string()).unwrap()); ElasticHit { fields, @@ -745,7 +833,7 @@ pub(crate) fn str_lines(body: &str) -> impl Iterator { mod tests { use hyper::StatusCode; - use super::partial_hit_from_search_after_param; + use super::{partial_hit_from_search_after_param, *}; #[test] fn test_partial_hit_from_search_after_param_invalid_length() { @@ -791,4 +879,136 @@ mod tests { u32}`" ); } + + #[test] + fn test_single_element() { + let input = "app"; + let expected = vec![("app", None)]; + assert_eq!(generate_path_variants_with_suffix(input), expected); + } + + #[test] + fn test_two_elements() { + let input = "app.id"; + let expected = vec![("app", Some("id")), ("app.id", None)]; + assert_eq!(generate_path_variants_with_suffix(input), expected); + } + + #[test] + fn test_multiple_elements() { + let input = "app.id.name"; + let expected = vec![ + ("app", Some("id.name")), + ("app.id", Some("name")), + ("app.id.name", None), + ]; + assert_eq!(generate_path_variants_with_suffix(input), expected); + } + + #[test] + fn test_include_fields1() { + let mut fields = json!({ + "app": { "id": 123, "name": "Blub" }, + "user": { "id": 456, "name": "Fred" } + }); + + let includes = Some(vec!["app.id".to_string()]); + filter_source(&mut fields, &None, &includes); + + let expected = json!({ + "app": { "id": 123 } + }); + + assert_eq!(fields, expected); + } + #[test] + fn test_include_fields2() { + let mut fields = json!({ + "app": { "id": 123, "name": "Blub" }, + "app.id": { "id": 123, "name": "Blub" }, + "user": { "id": 456, "name": "Fred" } + }); + + let includes = Some(vec!["app".to_string(), "app.id".to_string()]); + filter_source(&mut fields, &None, &includes); + + let expected = json!({ + "app": { "id": 123, "name": "Blub" }, + "app.id": { "id": 123, "name": "Blub" }, + }); + + assert_eq!(fields, expected); + } + + #[test] + fn test_exclude_fields() { + let mut fields = json!({ + "app": { + "id": 123, + "name": "Blub" + }, + "user": { + "id": 456, + "name": "Fred" + } + }); + + let excludes = Some(vec!["app.name".to_string(), "user.id".to_string()]); + filter_source(&mut fields, &excludes, &None); + + let expected = json!({ + "app": { + "id": 123 + }, + "user": { + "name": "Fred" + } + }); + + assert_eq!(fields, expected); + } + + #[test] + fn test_include_and_exclude_fields() { + let mut fields = json!({ + "app": { "id": 123, "name": "Blub", "version": "1.0" }, + "user": { "id": 456, "name": "Fred", "email": "john@example.com" } + }); + + let includes = Some(vec![ + "app".to_string(), + "user.name".to_string(), + "user.email".to_string(), + ]); + let excludes = Some(vec!["app.version".to_string(), "user.email".to_string()]); + filter_source(&mut fields, &excludes, &includes); + + let expected = json!({ + "app": { "id": 123, "name": "Blub" }, + "user": { "name": "Fred" } + }); + + assert_eq!(fields, expected); + } + + #[test] + fn test_no_includes_or_excludes() { + let mut fields = json!({ + "app": { + "id": 123, + "name": "Blub" + } + }); + + filter_source(&mut fields, &None, &None); + + let expected = json!({ + "app": { + "id": 123, + "name": "Blub" + } + }); + + assert_eq!(fields, expected); + } } diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0022-source.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0022-source.yaml index 7b5a5fae95b..e5b041ad1aa 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0022-source.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0022-source.yaml @@ -47,3 +47,21 @@ expected: - _source: $expect: "len(val) == 1" # Contains only 'actor' id: 5688 +--- # _source_includes with path +params: + _source_includes: "actor.id" +json: + size: 1 + query: + match_all: {} +expected: + hits: + total: + value: 100 + relation: eq + hits: + - _source: + actor: + $expect: "len(val) == 1" # Contains only 'actor' + id: 5688 +