diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs index 9205f41e1e1..43f12afa773 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs @@ -17,7 +17,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use serde::{Deserialize, Serialize}; +use serde::Deserialize; use serde_with::formats::PreferMany; use serde_with::{serde_as, OneOrMany}; @@ -29,7 +29,7 @@ use crate::query_ast::{self, QueryAst}; /// - minimum_should_match /// - named queries #[serde_as] -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Deserialize, Debug, PartialEq, Eq, Clone)] #[serde(deny_unknown_fields)] pub struct BoolQuery { #[serde_as(deserialize_as = "OneOrMany<_, PreferMany>")] @@ -48,6 +48,18 @@ pub struct BoolQuery { pub boost: Option, } +impl BoolQuery { + pub(crate) fn union(children: Vec) -> BoolQuery { + BoolQuery { + must: Vec::new(), + must_not: Vec::new(), + should: children, + filter: Vec::new(), + boost: None, + } + } +} + fn convert_vec(query_dsls: Vec) -> anyhow::Result> { query_dsls .into_iter() diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs new file mode 100644 index 00000000000..f67662a1484 --- /dev/null +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs @@ -0,0 +1,212 @@ +// Copyright (C) 2023 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::fmt; + +use serde::de::{self, MapAccess, Visitor}; +use serde::{Deserialize, Deserializer, Serialize}; + +use crate::elastic_query_dsl::ConvertableToQueryAst; +use crate::query_ast::{FullTextMode, FullTextParams, FullTextQuery, QueryAst}; +use crate::{MatchAllOrNone, OneFieldMap}; + +/// `MatchQuery` as defined in +/// +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug)] +#[serde( + from = "OneFieldMap", + into = "OneFieldMap" +)] +pub struct MatchPhraseQuery { + field: String, + params: MatchPhraseQueryParams, +} + +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)] +#[serde(deny_unknown_fields)] +pub struct MatchPhraseQueryParams { + query: String, + #[serde(default)] + zero_terms_query: MatchAllOrNone, + #[serde(default)] + analyzer: Option, + #[serde(default)] + slop: u32, +} + +impl ConvertableToQueryAst for MatchPhraseQuery { + fn convert_to_query_ast(self) -> anyhow::Result { + let full_text_params = FullTextParams { + tokenizer: self.params.analyzer, + mode: FullTextMode::Phrase { + slop: self.params.slop, + }, + zero_terms_query: self.params.zero_terms_query, + }; + Ok(QueryAst::FullText(FullTextQuery { + field: self.field, + text: self.params.query, + params: full_text_params, + })) + } +} + +// -------------- +// +// Below is the Serialization/Deserialization code +// The difficulty here is to support the two following formats: +// +// `{"field": {"query": "my query", "default_operator": "OR"}}` +// `{"field": "my query"}` +// +// We don't use untagged enum to support this, in order to keep good errors. +// +// The code below is adapted from solution described here: https://serde.rs/string-or-struct.html + +#[derive(Serialize, Deserialize)] +#[serde(transparent)] +struct MatchPhraseQueryParamsForDeserialization { + #[serde(deserialize_with = "string_or_struct")] + inner: MatchPhraseQueryParams, +} + +impl From for OneFieldMap { + fn from(match_phrase_query: MatchPhraseQuery) -> OneFieldMap { + OneFieldMap { + field: match_phrase_query.field, + value: match_phrase_query.params, + } + } +} + +impl From> for MatchPhraseQuery { + fn from(match_query_params: OneFieldMap) -> Self { + let OneFieldMap { field, value } = match_query_params; + MatchPhraseQuery { + field, + params: value.inner, + } + } +} + +struct MatchQueryParamsStringOrStructVisitor; + +impl<'de> Visitor<'de> for MatchQueryParamsStringOrStructVisitor { + type Value = MatchPhraseQueryParams; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("string or map containing the parameters of a match query.") + } + + fn visit_str(self, query: &str) -> Result + where E: serde::de::Error { + Ok(MatchPhraseQueryParams { + query: query.to_string(), + zero_terms_query: Default::default(), + analyzer: None, + slop: 0, + }) + } + + fn visit_map(self, map: M) -> Result + where M: MapAccess<'de> { + Deserialize::deserialize(de::value::MapAccessDeserializer::new(map)) + } +} + +fn string_or_struct<'de, D>(deserializer: D) -> Result +where D: Deserializer<'de> { + deserializer.deserialize_any(MatchQueryParamsStringOrStructVisitor) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deserialize_match_query_string() { + // We accept a single string + let match_query: MatchPhraseQuery = + serde_json::from_str(r#"{"my_field": "my_query"}"#).unwrap(); + assert_eq!(match_query.field, "my_field"); + assert_eq!(&match_query.params.query, "my_query"); + assert_eq!(match_query.params.slop, 0u32); + assert!(match_query.params.analyzer.is_none()); + assert_eq!( + match_query.params.zero_terms_query, + MatchAllOrNone::MatchNone + ); + } + + #[test] + fn test_deserialize_match_query_struct() { + // We accept a struct too. + let match_query: MatchPhraseQuery = serde_json::from_str( + r#" + {"my_field": + { + "query": "my_query", + "slop": 1 + } + } + "#, + ) + .unwrap(); + assert_eq!(match_query.field, "my_field"); + assert_eq!(&match_query.params.query, "my_query"); + assert_eq!(match_query.params.slop, 1u32); + } + + #[test] + fn test_deserialize_match_query_nice_errors() { + let deser_error = serde_json::from_str::( + r#"{"my_field": {"query": "my_query", "wrong_param": 2}}"#, + ) + .unwrap_err(); + assert!(deser_error + .to_string() + .contains("unknown field `wrong_param`")); + } + + #[test] + fn test_match_query() { + let match_query = MatchPhraseQuery { + field: "body".to_string(), + params: MatchPhraseQueryParams { + analyzer: Some("whitespace".to_string()), + query: "hello".to_string(), + slop: 2u32, + zero_terms_query: crate::MatchAllOrNone::MatchAll, + }, + }; + let ast = match_query.convert_to_query_ast().unwrap(); + let QueryAst::FullText(FullTextQuery { + field, + text, + params, + }) = ast + else { + panic!() + }; + assert_eq!(field, "body"); + assert_eq!(text, "hello"); + assert_eq!(params.mode, FullTextMode::Phrase { slop: 2u32 }); + assert_eq!(params.zero_terms_query, MatchAllOrNone::MatchAll); + } +} diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs index 3c76ab110e8..7f06683844e 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs @@ -132,11 +132,8 @@ where D: Deserializer<'de> { #[cfg(test)] mod tests { - use super::MatchQueryParams; - use crate::elastic_query_dsl::match_query::MatchQuery; - use crate::elastic_query_dsl::ConvertableToQueryAst; - use crate::query_ast::{FullTextMode, FullTextQuery, QueryAst}; - use crate::{BooleanOperand, MatchAllOrNone}; + use super::*; + use crate::query_ast::FullTextMode; #[test] fn test_deserialize_match_query_string() { diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs index 537b8c83d2b..f8cc776d8c8 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs @@ -21,7 +21,9 @@ use serde::{Deserialize, Serialize}; mod bool_query; mod exists_query; +mod match_phrase_query; mod match_query; +mod multi_match; mod one_field_map; mod phrase_prefix_query; mod query_string_query; @@ -30,39 +32,44 @@ mod term_query; use bool_query::BoolQuery; pub use one_field_map::OneFieldMap; -use phrase_prefix_query::MatchPhrasePrefix; +use phrase_prefix_query::MatchPhrasePrefixQuery; pub(crate) use query_string_query::QueryStringQuery; use range_query::RangeQuery; use term_query::TermQuery; use crate::elastic_query_dsl::exists_query::ExistsQuery; +use crate::elastic_query_dsl::match_phrase_query::MatchPhraseQuery; use crate::elastic_query_dsl::match_query::MatchQuery; +use crate::elastic_query_dsl::multi_match::MultiMatchQuery; +// use crate::elastic_query_dsl::multi_match::MultiMatchQuery; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::QueryAst; #[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone, Copy, Default)] -struct MatchAllQuery { +pub(crate) struct MatchAllQuery { pub boost: Option, } #[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone, Copy)] -struct MatchNoneQuery; +pub(crate) struct MatchNoneQuery; -#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)] +#[derive(Deserialize, Debug, Eq, PartialEq, Clone)] #[serde(rename_all = "snake_case", deny_unknown_fields)] -enum ElasticQueryDslInner { +pub(crate) enum ElasticQueryDslInner { QueryString(QueryStringQuery), Bool(BoolQuery), Term(TermQuery), MatchAll(MatchAllQuery), MatchNone(MatchNoneQuery), Match(MatchQuery), - MatchPhrasePrefix(MatchPhrasePrefix), + MatchPhrase(MatchPhraseQuery), + MatchPhrasePrefix(MatchPhrasePrefixQuery), + MultiMatch(MultiMatchQuery), Range(RangeQuery), Exists(ExistsQuery), } -#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)] +#[derive(Deserialize, Debug, Eq, PartialEq, Clone)] #[serde(transparent)] pub struct ElasticQueryDsl(ElasticQueryDslInner); @@ -95,12 +102,14 @@ impl ConvertableToQueryAst for ElasticQueryDslInner { } } Self::MatchNone(_) => Ok(QueryAst::MatchNone), + Self::MatchPhrase(match_phrase_query) => match_phrase_query.convert_to_query_ast(), Self::MatchPhrasePrefix(match_phrase_prefix) => { match_phrase_prefix.convert_to_query_ast() } Self::Range(range_query) => range_query.convert_to_query_ast(), Self::Match(match_query) => match_query.convert_to_query_ast(), Self::Exists(exists_query) => exists_query.convert_to_query_ast(), + Self::MultiMatch(multi_match_query) => multi_match_query.convert_to_query_ast(), } } } diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs b/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs new file mode 100644 index 00000000000..16f7e89134b --- /dev/null +++ b/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs @@ -0,0 +1,100 @@ +// Copyright (C) 2023 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use serde::Deserialize; + +use crate::elastic_query_dsl::bool_query::BoolQuery; +use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner}; + +/// Multi match queries are a bit odd. They end up being expanded into another type query of query. +/// In Quickwit, we operate this expansion in generic way at the time of deserialization. +#[derive(Deserialize, Debug, Eq, PartialEq, Clone)] +#[serde(try_from = "MultiMatchQueryForDeserialization")] +pub struct MultiMatchQuery(Box); + +#[derive(Deserialize, Debug, Eq, PartialEq, Clone)] +struct MultiMatchQueryForDeserialization { + #[serde(rename = "type")] + match_type: MatchType, + // Other parameters is used to dynamically collect more parameters. + // We will then expand the query at the json level, and then deserialize the right object. + #[serde(flatten)] + other_parameters: serde_json::Map, + fields: Vec, +} + +fn deserialize_one( + match_type: MatchType, + field: &str, + mut json_object: serde_json::Map, +) -> serde_json::Result { + json_object.insert("field".to_string(), field.into()); + let json_val = serde_json::Value::Object(json_object); + match match_type { + MatchType::Phrase => { + let phrase: crate::elastic_query_dsl::MatchPhraseQuery = + serde_json::from_value(json_val)?; + Ok(ElasticQueryDslInner::MatchPhrase(phrase)) + } + MatchType::PhrasePrefix => { + let phrase_prefix: crate::elastic_query_dsl::MatchPhrasePrefixQuery = + serde_json::from_value(json_val)?; + Ok(ElasticQueryDslInner::MatchPhrasePrefix(phrase_prefix)) + } + MatchType::MostFields => { + let match_query: crate::elastic_query_dsl::MatchQuery = + serde_json::from_value(json_val)?; + Ok(ElasticQueryDslInner::Match(match_query)) + } + } +} + +impl TryFrom for MultiMatchQuery { + type Error = serde_json::Error; + + fn try_from(multi_match_query: MultiMatchQueryForDeserialization) -> Result { + let mut children = Vec::new(); + for field in multi_match_query.fields { + let child = deserialize_one( + multi_match_query.match_type, + &field, + multi_match_query.other_parameters.clone(), + )?; + children.push(child); + } + let bool_query = BoolQuery::union(children); + Ok(MultiMatchQuery(Box::new(ElasticQueryDslInner::Bool( + bool_query.into(), + )))) + } +} + +#[derive(Deserialize, Debug, Eq, PartialEq, Clone, Copy)] +#[serde(rename_all = "snake_case")] +pub enum MatchType { + MostFields, + Phrase, + PhrasePrefix, +} + +impl ConvertableToQueryAst for MultiMatchQuery { + fn convert_to_query_ast(self) -> anyhow::Result { + self.0.convert_to_query_ast() + } +} diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs index 875410fea09..a0eb135328e 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs @@ -24,7 +24,7 @@ use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner}; use crate::query_ast::{self, FullTextMode, FullTextParams, QueryAst}; use crate::MatchAllOrNone; -pub type MatchPhrasePrefix = OneFieldMap; +pub type MatchPhrasePrefixQuery = OneFieldMap; fn default_max_expansions() -> u32 { 50 @@ -32,7 +32,7 @@ fn default_max_expansions() -> u32 { #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)] #[serde(deny_unknown_fields)] -pub struct PhrasePrefixValue { +pub struct PhrasePrefixParams { pub query: String, #[serde(default)] pub analyzer: Option, @@ -44,15 +44,15 @@ pub struct PhrasePrefixValue { pub zero_terms_query: MatchAllOrNone, } -impl From for ElasticQueryDslInner { - fn from(term_query: MatchPhrasePrefix) -> Self { +impl From for ElasticQueryDslInner { + fn from(term_query: MatchPhrasePrefixQuery) -> Self { Self::MatchPhrasePrefix(term_query) } } -impl ConvertableToQueryAst for MatchPhrasePrefix { +impl ConvertableToQueryAst for MatchPhrasePrefixQuery { fn convert_to_query_ast(self) -> anyhow::Result { - let PhrasePrefixValue { + let PhrasePrefixParams { query, analyzer, max_expansions, @@ -76,15 +76,16 @@ impl ConvertableToQueryAst for MatchPhrasePrefix { #[cfg(test)] mod tests { - use super::{MatchAllOrNone, MatchPhrasePrefix, PhrasePrefixValue}; + use super::{MatchAllOrNone, MatchPhrasePrefixQuery, PhrasePrefixParams}; #[test] fn test_term_query_simple() { let phrase_prefix_json = r#"{ "message": { "query": "quick brown f" } }"#; - let phrase_prefix: MatchPhrasePrefix = serde_json::from_str(phrase_prefix_json).unwrap(); - let expected = MatchPhrasePrefix { + let phrase_prefix: MatchPhrasePrefixQuery = + serde_json::from_str(phrase_prefix_json).unwrap(); + let expected = MatchPhrasePrefixQuery { field: "message".to_string(), - value: PhrasePrefixValue { + value: PhrasePrefixParams { query: "quick brown f".to_string(), analyzer: None, max_expansions: 50,