diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs
index 9205f41e1e1..43f12afa773 100644
--- a/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs
+++ b/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs
@@ -17,7 +17,7 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see .
-use serde::{Deserialize, Serialize};
+use serde::Deserialize;
use serde_with::formats::PreferMany;
use serde_with::{serde_as, OneOrMany};
@@ -29,7 +29,7 @@ use crate::query_ast::{self, QueryAst};
/// - minimum_should_match
/// - named queries
#[serde_as]
-#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
+#[derive(Deserialize, Debug, PartialEq, Eq, Clone)]
#[serde(deny_unknown_fields)]
pub struct BoolQuery {
#[serde_as(deserialize_as = "OneOrMany<_, PreferMany>")]
@@ -48,6 +48,18 @@ pub struct BoolQuery {
pub boost: Option,
}
+impl BoolQuery {
+ pub(crate) fn union(children: Vec) -> BoolQuery {
+ BoolQuery {
+ must: Vec::new(),
+ must_not: Vec::new(),
+ should: children,
+ filter: Vec::new(),
+ boost: None,
+ }
+ }
+}
+
fn convert_vec(query_dsls: Vec) -> anyhow::Result> {
query_dsls
.into_iter()
diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs
new file mode 100644
index 00000000000..f67662a1484
--- /dev/null
+++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs
@@ -0,0 +1,212 @@
+// Copyright (C) 2023 Quickwit, Inc.
+//
+// Quickwit is offered under the AGPL v3.0 and as commercial software.
+// For commercial licensing, contact us at hello@quickwit.io.
+//
+// AGPL:
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+use std::fmt;
+
+use serde::de::{self, MapAccess, Visitor};
+use serde::{Deserialize, Deserializer, Serialize};
+
+use crate::elastic_query_dsl::ConvertableToQueryAst;
+use crate::query_ast::{FullTextMode, FullTextParams, FullTextQuery, QueryAst};
+use crate::{MatchAllOrNone, OneFieldMap};
+
+/// `MatchQuery` as defined in
+///
+#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug)]
+#[serde(
+ from = "OneFieldMap",
+ into = "OneFieldMap"
+)]
+pub struct MatchPhraseQuery {
+ field: String,
+ params: MatchPhraseQueryParams,
+}
+
+#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
+#[serde(deny_unknown_fields)]
+pub struct MatchPhraseQueryParams {
+ query: String,
+ #[serde(default)]
+ zero_terms_query: MatchAllOrNone,
+ #[serde(default)]
+ analyzer: Option,
+ #[serde(default)]
+ slop: u32,
+}
+
+impl ConvertableToQueryAst for MatchPhraseQuery {
+ fn convert_to_query_ast(self) -> anyhow::Result {
+ let full_text_params = FullTextParams {
+ tokenizer: self.params.analyzer,
+ mode: FullTextMode::Phrase {
+ slop: self.params.slop,
+ },
+ zero_terms_query: self.params.zero_terms_query,
+ };
+ Ok(QueryAst::FullText(FullTextQuery {
+ field: self.field,
+ text: self.params.query,
+ params: full_text_params,
+ }))
+ }
+}
+
+// --------------
+//
+// Below is the Serialization/Deserialization code
+// The difficulty here is to support the two following formats:
+//
+// `{"field": {"query": "my query", "default_operator": "OR"}}`
+// `{"field": "my query"}`
+//
+// We don't use untagged enum to support this, in order to keep good errors.
+//
+// The code below is adapted from solution described here: https://serde.rs/string-or-struct.html
+
+#[derive(Serialize, Deserialize)]
+#[serde(transparent)]
+struct MatchPhraseQueryParamsForDeserialization {
+ #[serde(deserialize_with = "string_or_struct")]
+ inner: MatchPhraseQueryParams,
+}
+
+impl From for OneFieldMap {
+ fn from(match_phrase_query: MatchPhraseQuery) -> OneFieldMap {
+ OneFieldMap {
+ field: match_phrase_query.field,
+ value: match_phrase_query.params,
+ }
+ }
+}
+
+impl From> for MatchPhraseQuery {
+ fn from(match_query_params: OneFieldMap) -> Self {
+ let OneFieldMap { field, value } = match_query_params;
+ MatchPhraseQuery {
+ field,
+ params: value.inner,
+ }
+ }
+}
+
+struct MatchQueryParamsStringOrStructVisitor;
+
+impl<'de> Visitor<'de> for MatchQueryParamsStringOrStructVisitor {
+ type Value = MatchPhraseQueryParams;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("string or map containing the parameters of a match query.")
+ }
+
+ fn visit_str(self, query: &str) -> Result
+ where E: serde::de::Error {
+ Ok(MatchPhraseQueryParams {
+ query: query.to_string(),
+ zero_terms_query: Default::default(),
+ analyzer: None,
+ slop: 0,
+ })
+ }
+
+ fn visit_map(self, map: M) -> Result
+ where M: MapAccess<'de> {
+ Deserialize::deserialize(de::value::MapAccessDeserializer::new(map))
+ }
+}
+
+fn string_or_struct<'de, D>(deserializer: D) -> Result
+where D: Deserializer<'de> {
+ deserializer.deserialize_any(MatchQueryParamsStringOrStructVisitor)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_deserialize_match_query_string() {
+ // We accept a single string
+ let match_query: MatchPhraseQuery =
+ serde_json::from_str(r#"{"my_field": "my_query"}"#).unwrap();
+ assert_eq!(match_query.field, "my_field");
+ assert_eq!(&match_query.params.query, "my_query");
+ assert_eq!(match_query.params.slop, 0u32);
+ assert!(match_query.params.analyzer.is_none());
+ assert_eq!(
+ match_query.params.zero_terms_query,
+ MatchAllOrNone::MatchNone
+ );
+ }
+
+ #[test]
+ fn test_deserialize_match_query_struct() {
+ // We accept a struct too.
+ let match_query: MatchPhraseQuery = serde_json::from_str(
+ r#"
+ {"my_field":
+ {
+ "query": "my_query",
+ "slop": 1
+ }
+ }
+ "#,
+ )
+ .unwrap();
+ assert_eq!(match_query.field, "my_field");
+ assert_eq!(&match_query.params.query, "my_query");
+ assert_eq!(match_query.params.slop, 1u32);
+ }
+
+ #[test]
+ fn test_deserialize_match_query_nice_errors() {
+ let deser_error = serde_json::from_str::(
+ r#"{"my_field": {"query": "my_query", "wrong_param": 2}}"#,
+ )
+ .unwrap_err();
+ assert!(deser_error
+ .to_string()
+ .contains("unknown field `wrong_param`"));
+ }
+
+ #[test]
+ fn test_match_query() {
+ let match_query = MatchPhraseQuery {
+ field: "body".to_string(),
+ params: MatchPhraseQueryParams {
+ analyzer: Some("whitespace".to_string()),
+ query: "hello".to_string(),
+ slop: 2u32,
+ zero_terms_query: crate::MatchAllOrNone::MatchAll,
+ },
+ };
+ let ast = match_query.convert_to_query_ast().unwrap();
+ let QueryAst::FullText(FullTextQuery {
+ field,
+ text,
+ params,
+ }) = ast
+ else {
+ panic!()
+ };
+ assert_eq!(field, "body");
+ assert_eq!(text, "hello");
+ assert_eq!(params.mode, FullTextMode::Phrase { slop: 2u32 });
+ assert_eq!(params.zero_terms_query, MatchAllOrNone::MatchAll);
+ }
+}
diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs
index 3c76ab110e8..7f06683844e 100644
--- a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs
+++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs
@@ -132,11 +132,8 @@ where D: Deserializer<'de> {
#[cfg(test)]
mod tests {
- use super::MatchQueryParams;
- use crate::elastic_query_dsl::match_query::MatchQuery;
- use crate::elastic_query_dsl::ConvertableToQueryAst;
- use crate::query_ast::{FullTextMode, FullTextQuery, QueryAst};
- use crate::{BooleanOperand, MatchAllOrNone};
+ use super::*;
+ use crate::query_ast::FullTextMode;
#[test]
fn test_deserialize_match_query_string() {
diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs
index 537b8c83d2b..f8cc776d8c8 100644
--- a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs
+++ b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs
@@ -21,7 +21,9 @@ use serde::{Deserialize, Serialize};
mod bool_query;
mod exists_query;
+mod match_phrase_query;
mod match_query;
+mod multi_match;
mod one_field_map;
mod phrase_prefix_query;
mod query_string_query;
@@ -30,39 +32,44 @@ mod term_query;
use bool_query::BoolQuery;
pub use one_field_map::OneFieldMap;
-use phrase_prefix_query::MatchPhrasePrefix;
+use phrase_prefix_query::MatchPhrasePrefixQuery;
pub(crate) use query_string_query::QueryStringQuery;
use range_query::RangeQuery;
use term_query::TermQuery;
use crate::elastic_query_dsl::exists_query::ExistsQuery;
+use crate::elastic_query_dsl::match_phrase_query::MatchPhraseQuery;
use crate::elastic_query_dsl::match_query::MatchQuery;
+use crate::elastic_query_dsl::multi_match::MultiMatchQuery;
+// use crate::elastic_query_dsl::multi_match::MultiMatchQuery;
use crate::not_nan_f32::NotNaNf32;
use crate::query_ast::QueryAst;
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone, Copy, Default)]
-struct MatchAllQuery {
+pub(crate) struct MatchAllQuery {
pub boost: Option,
}
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone, Copy)]
-struct MatchNoneQuery;
+pub(crate) struct MatchNoneQuery;
-#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)]
+#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
-enum ElasticQueryDslInner {
+pub(crate) enum ElasticQueryDslInner {
QueryString(QueryStringQuery),
Bool(BoolQuery),
Term(TermQuery),
MatchAll(MatchAllQuery),
MatchNone(MatchNoneQuery),
Match(MatchQuery),
- MatchPhrasePrefix(MatchPhrasePrefix),
+ MatchPhrase(MatchPhraseQuery),
+ MatchPhrasePrefix(MatchPhrasePrefixQuery),
+ MultiMatch(MultiMatchQuery),
Range(RangeQuery),
Exists(ExistsQuery),
}
-#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)]
+#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
#[serde(transparent)]
pub struct ElasticQueryDsl(ElasticQueryDslInner);
@@ -95,12 +102,14 @@ impl ConvertableToQueryAst for ElasticQueryDslInner {
}
}
Self::MatchNone(_) => Ok(QueryAst::MatchNone),
+ Self::MatchPhrase(match_phrase_query) => match_phrase_query.convert_to_query_ast(),
Self::MatchPhrasePrefix(match_phrase_prefix) => {
match_phrase_prefix.convert_to_query_ast()
}
Self::Range(range_query) => range_query.convert_to_query_ast(),
Self::Match(match_query) => match_query.convert_to_query_ast(),
Self::Exists(exists_query) => exists_query.convert_to_query_ast(),
+ Self::MultiMatch(multi_match_query) => multi_match_query.convert_to_query_ast(),
}
}
}
diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs b/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs
new file mode 100644
index 00000000000..16f7e89134b
--- /dev/null
+++ b/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs
@@ -0,0 +1,100 @@
+// Copyright (C) 2023 Quickwit, Inc.
+//
+// Quickwit is offered under the AGPL v3.0 and as commercial software.
+// For commercial licensing, contact us at hello@quickwit.io.
+//
+// AGPL:
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+use serde::Deserialize;
+
+use crate::elastic_query_dsl::bool_query::BoolQuery;
+use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner};
+
+/// Multi match queries are a bit odd. They end up being expanded into another type query of query.
+/// In Quickwit, we operate this expansion in generic way at the time of deserialization.
+#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
+#[serde(try_from = "MultiMatchQueryForDeserialization")]
+pub struct MultiMatchQuery(Box);
+
+#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
+struct MultiMatchQueryForDeserialization {
+ #[serde(rename = "type")]
+ match_type: MatchType,
+ // Other parameters is used to dynamically collect more parameters.
+ // We will then expand the query at the json level, and then deserialize the right object.
+ #[serde(flatten)]
+ other_parameters: serde_json::Map,
+ fields: Vec,
+}
+
+fn deserialize_one(
+ match_type: MatchType,
+ field: &str,
+ mut json_object: serde_json::Map,
+) -> serde_json::Result {
+ json_object.insert("field".to_string(), field.into());
+ let json_val = serde_json::Value::Object(json_object);
+ match match_type {
+ MatchType::Phrase => {
+ let phrase: crate::elastic_query_dsl::MatchPhraseQuery =
+ serde_json::from_value(json_val)?;
+ Ok(ElasticQueryDslInner::MatchPhrase(phrase))
+ }
+ MatchType::PhrasePrefix => {
+ let phrase_prefix: crate::elastic_query_dsl::MatchPhrasePrefixQuery =
+ serde_json::from_value(json_val)?;
+ Ok(ElasticQueryDslInner::MatchPhrasePrefix(phrase_prefix))
+ }
+ MatchType::MostFields => {
+ let match_query: crate::elastic_query_dsl::MatchQuery =
+ serde_json::from_value(json_val)?;
+ Ok(ElasticQueryDslInner::Match(match_query))
+ }
+ }
+}
+
+impl TryFrom for MultiMatchQuery {
+ type Error = serde_json::Error;
+
+ fn try_from(multi_match_query: MultiMatchQueryForDeserialization) -> Result {
+ let mut children = Vec::new();
+ for field in multi_match_query.fields {
+ let child = deserialize_one(
+ multi_match_query.match_type,
+ &field,
+ multi_match_query.other_parameters.clone(),
+ )?;
+ children.push(child);
+ }
+ let bool_query = BoolQuery::union(children);
+ Ok(MultiMatchQuery(Box::new(ElasticQueryDslInner::Bool(
+ bool_query.into(),
+ ))))
+ }
+}
+
+#[derive(Deserialize, Debug, Eq, PartialEq, Clone, Copy)]
+#[serde(rename_all = "snake_case")]
+pub enum MatchType {
+ MostFields,
+ Phrase,
+ PhrasePrefix,
+}
+
+impl ConvertableToQueryAst for MultiMatchQuery {
+ fn convert_to_query_ast(self) -> anyhow::Result {
+ self.0.convert_to_query_ast()
+ }
+}
diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs
index 875410fea09..a0eb135328e 100644
--- a/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs
+++ b/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs
@@ -24,7 +24,7 @@ use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner};
use crate::query_ast::{self, FullTextMode, FullTextParams, QueryAst};
use crate::MatchAllOrNone;
-pub type MatchPhrasePrefix = OneFieldMap;
+pub type MatchPhrasePrefixQuery = OneFieldMap;
fn default_max_expansions() -> u32 {
50
@@ -32,7 +32,7 @@ fn default_max_expansions() -> u32 {
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
#[serde(deny_unknown_fields)]
-pub struct PhrasePrefixValue {
+pub struct PhrasePrefixParams {
pub query: String,
#[serde(default)]
pub analyzer: Option,
@@ -44,15 +44,15 @@ pub struct PhrasePrefixValue {
pub zero_terms_query: MatchAllOrNone,
}
-impl From for ElasticQueryDslInner {
- fn from(term_query: MatchPhrasePrefix) -> Self {
+impl From for ElasticQueryDslInner {
+ fn from(term_query: MatchPhrasePrefixQuery) -> Self {
Self::MatchPhrasePrefix(term_query)
}
}
-impl ConvertableToQueryAst for MatchPhrasePrefix {
+impl ConvertableToQueryAst for MatchPhrasePrefixQuery {
fn convert_to_query_ast(self) -> anyhow::Result {
- let PhrasePrefixValue {
+ let PhrasePrefixParams {
query,
analyzer,
max_expansions,
@@ -76,15 +76,16 @@ impl ConvertableToQueryAst for MatchPhrasePrefix {
#[cfg(test)]
mod tests {
- use super::{MatchAllOrNone, MatchPhrasePrefix, PhrasePrefixValue};
+ use super::{MatchAllOrNone, MatchPhrasePrefixQuery, PhrasePrefixParams};
#[test]
fn test_term_query_simple() {
let phrase_prefix_json = r#"{ "message": { "query": "quick brown f" } }"#;
- let phrase_prefix: MatchPhrasePrefix = serde_json::from_str(phrase_prefix_json).unwrap();
- let expected = MatchPhrasePrefix {
+ let phrase_prefix: MatchPhrasePrefixQuery =
+ serde_json::from_str(phrase_prefix_json).unwrap();
+ let expected = MatchPhrasePrefixQuery {
field: "message".to_string(),
- value: PhrasePrefixValue {
+ value: PhrasePrefixParams {
query: "quick brown f".to_string(),
analyzer: None,
max_expansions: 50,