Skip to content

Commit

Permalink
handle nested paths
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Feb 14, 2024
1 parent 675b582 commit 689b16f
Show file tree
Hide file tree
Showing 2 changed files with 250 additions and 12 deletions.
244 changes: 232 additions & 12 deletions quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,22 +487,113 @@ async fn es_compat_index_field_capabilities(
Ok(search_response_rest)
}

fn filter_source(
value: &mut serde_json::Value,
_source_excludes: &Option<Vec<String>>,
_source_includes: &Option<Vec<String>>,
) {
fn remove_path(value: &mut serde_json::Value, path: &str) {
for (prefix, suffix) in generate_path_variants_with_suffix(path) {
match value {
serde_json::Value::Object(ref mut map) => {
if let Some(suffix) = suffix {
if let Some(sub_value) = map.get_mut(prefix) {
remove_path(sub_value, suffix);
return;
}
} else {
map.remove(prefix);
}
}
_ => continue,
}
}
}
fn retain_includes(
value: &mut serde_json::Value,
current_path: &str,
include_paths: &Vec<String>,
) {
if let Some(ref mut map) = value.as_object_mut() {
map.retain(|key, sub_value| {
let path = if current_path.is_empty() {
key.to_string()
} else {
format!("{}.{}", current_path, key)
};

if include_paths.contains(&path) {
// Exact match keep whole node
return true;
}
// Check if the path is sub path of any allowed path
for allowed_path in include_paths {
if allowed_path.starts_with(path.as_str()) {
retain_includes(sub_value, &path, include_paths);
return true;
}
}
false
});
}
}

// Remove fields that are not included
if let Some(includes) = _source_includes {
retain_includes(value, "", includes);
}

// Remove fields that are excluded
if let Some(excludes) = _source_excludes {
for exclude in excludes {
remove_path(value, exclude);
}
}
}

/// "app.id.name" -> [("app", Some("id.name")), ("app.id", Some("name")), ("app.id.name", None)]
fn generate_path_variants_with_suffix(input: &str) -> Vec<(&str, Option<&str>)> {
let mut variants = Vec::new();

// Iterate over each character in the input.
for (idx, ch) in input.char_indices() {
if ch == '.' {
// If a dot is found, create a variant using the current slice and the remainder of the
// string.
let prefix = &input[0..idx];
let suffix = if idx + 1 < input.len() {
Some(&input[idx + 1..])
} else {
None
};
variants.push((prefix, suffix));
}
}

variants.push((&input[0..], None));

variants
}

fn convert_hit(
hit: quickwit_proto::search::Hit,
append_shard_doc: bool,
_source_excludes: &Option<Vec<String>>,
_source_includes: &Option<Vec<String>>,
) -> ElasticHit {
let mut fields: BTreeMap<String, serde_json::Value> =
serde_json::from_str(&hit.json).unwrap_or_default();
if let Some(_source_includes) = _source_includes {
fields.retain(|key, _| _source_includes.contains(key));
}
if let Some(_source_excludes) = _source_excludes {
for exclude in _source_excludes {
fields.remove(exclude);
let mut json: serde_json::Value = serde_json::from_str(&hit.json).unwrap_or(json!({}));
filter_source(&mut json, _source_excludes, _source_includes);
let source =
Source::from_string(serde_json::to_string(&json).unwrap_or_else(|_| "{}".to_string()))
.unwrap_or_else(|_| Source::from_string("{}".to_string()).unwrap());

let mut fields: BTreeMap<String, serde_json::Value> = Default::default();
if let serde_json::Value::Object(map) = json {
for (key, val) in map {
fields.insert(key, val);
}
}

let mut sort = Vec::new();
if let Some(partial_hit) = hit.partial_hit {
if let Some(sort_value) = partial_hit.sort_value {
Expand All @@ -517,9 +608,6 @@ fn convert_hit(
));
}
}
let source =
Source::from_string(serde_json::to_string(&fields).unwrap_or_else(|_| "{}".to_string()))
.unwrap_or_else(|_| Source::from_string("{}".to_string()).unwrap());

ElasticHit {
fields,
Expand Down Expand Up @@ -745,7 +833,7 @@ pub(crate) fn str_lines(body: &str) -> impl Iterator<Item = &str> {
mod tests {
use hyper::StatusCode;

use super::partial_hit_from_search_after_param;
use super::{partial_hit_from_search_after_param, *};

#[test]
fn test_partial_hit_from_search_after_param_invalid_length() {
Expand Down Expand Up @@ -791,4 +879,136 @@ mod tests {
u32}`"
);
}

#[test]
fn test_single_element() {
let input = "app";
let expected = vec![("app", None)];
assert_eq!(generate_path_variants_with_suffix(input), expected);
}

#[test]
fn test_two_elements() {
let input = "app.id";
let expected = vec![("app", Some("id")), ("app.id", None)];
assert_eq!(generate_path_variants_with_suffix(input), expected);
}

#[test]
fn test_multiple_elements() {
let input = "app.id.name";
let expected = vec![
("app", Some("id.name")),
("app.id", Some("name")),
("app.id.name", None),
];
assert_eq!(generate_path_variants_with_suffix(input), expected);
}

#[test]
fn test_include_fields1() {
let mut fields = json!({
"app": { "id": 123, "name": "Blub" },
"user": { "id": 456, "name": "Fred" }
});

let includes = Some(vec!["app.id".to_string()]);
filter_source(&mut fields, &None, &includes);

let expected = json!({
"app": { "id": 123 }
});

assert_eq!(fields, expected);
}
#[test]
fn test_include_fields2() {
let mut fields = json!({
"app": { "id": 123, "name": "Blub" },
"app.id": { "id": 123, "name": "Blub" },
"user": { "id": 456, "name": "Fred" }
});

let includes = Some(vec!["app".to_string(), "app.id".to_string()]);
filter_source(&mut fields, &None, &includes);

let expected = json!({
"app": { "id": 123, "name": "Blub" },
"app.id": { "id": 123, "name": "Blub" },
});

assert_eq!(fields, expected);
}

#[test]
fn test_exclude_fields() {
let mut fields = json!({
"app": {
"id": 123,
"name": "Blub"
},
"user": {
"id": 456,
"name": "Fred"
}
});

let excludes = Some(vec!["app.name".to_string(), "user.id".to_string()]);
filter_source(&mut fields, &excludes, &None);

let expected = json!({
"app": {
"id": 123
},
"user": {
"name": "Fred"
}
});

assert_eq!(fields, expected);
}

#[test]
fn test_include_and_exclude_fields() {
let mut fields = json!({
"app": { "id": 123, "name": "Blub", "version": "1.0" },
"user": { "id": 456, "name": "Fred", "email": "[email protected]" }
});

let includes = Some(vec![
"app".to_string(),
"user.name".to_string(),
"user.email".to_string(),
]);
let excludes = Some(vec!["app.version".to_string(), "user.email".to_string()]);
filter_source(&mut fields, &excludes, &includes);

let expected = json!({
"app": { "id": 123, "name": "Blub" },
"user": { "name": "Fred" }
});

assert_eq!(fields, expected);
}

#[test]
fn test_no_includes_or_excludes() {
let mut fields = json!({
"app": {
"id": 123,
"name": "Blub"
}
});

filter_source(&mut fields, &None, &None);

let expected = json!({
"app": {
"id": 123,
"name": "Blub"
}
});

assert_eq!(fields, expected);
}
}
18 changes: 18 additions & 0 deletions quickwit/rest-api-tests/scenarii/es_compatibility/0022-source.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,21 @@ expected:
- _source:
$expect: "len(val) == 1" # Contains only 'actor'
id: 5688
--- # _source_includes with path
params:
_source_includes: "actor.id"
json:
size: 1
query:
match_all: {}
expected:
hits:
total:
value: 100
relation: eq
hits:
- _source:
actor:
$expect: "len(val) == 1" # Contains only 'actor'
id: 5688

0 comments on commit 689b16f

Please sign in to comment.