Skip to content

Commit

Permalink
Add rest api test and clarify docs about leniency
Browse files Browse the repository at this point in the history
  • Loading branch information
rdettai committed Dec 9, 2024
1 parent 2951107 commit f04c1b7
Show file tree
Hide file tree
Showing 11 changed files with 54 additions and 27 deletions.
22 changes: 19 additions & 3 deletions docs/reference/es_compatible_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ The following query types are supported.
| `fields` | `String[]` (Optional) | Default search target fields. | - |
| `default_operator` | `"AND"` or `"OR"` | In the absence of boolean operator defines whether terms should be combined as a conjunction (`AND`) or disjunction (`OR`). | `OR` |
| `boost` | `Number` | Multiplier boost for score computation. | 1.0 |
| `lenient` | `Boolean` | [See note](#about-the-lenient-argument). | false |


### `bool`
Expand Down Expand Up @@ -494,7 +495,7 @@ The following query types are supported.
| `operator` | `"AND"` or `"OR"` | Defines whether all terms should be present (`AND`) or if at least one term is sufficient to match (`OR`). | OR |
| `zero_terms_query` | `all` or `none` | Defines if all (`all`) or no documents (`none`) should be returned if the query does not contain any terms after tokenization. | `none` |
| `boost` | `Number` | Multiplier boost for score computation | 1.0 |

| `lenient` | `Boolean` | [See note](#about-the-lenient-argument). | false |



Expand Down Expand Up @@ -637,8 +638,17 @@ Contrary to ES/Opensearch, in Quickwit, at most 50 terms will be considered when
}
```

#### Supported Multi-match Queries
| Type | Description |
#### Supported parameters

| Variable | Type | Description | Default value |
| ------------------ | --------------------- | ---------------------------------------------| ------------- |
| `type` | `String` | See supported types below | `most_fields` |
| `fields` | `String[]` (Optional) | Default search target fields. | - |
| `lenient` | `Boolean` | [See note](#about-the-lenient-argument). | false |

Supported types:

| `type` value | Description |
| --------------- | ------------------------------------------------------------------------------------------- |
| `most_fields` | Finds documents matching any field and combines the `_score` from each field (default). |
| `phrase` | Runs a `match_phrase` query on each field. |
Expand Down Expand Up @@ -721,6 +731,12 @@ Query matching only documents containing a non-null value for a given field.
| `field` | String | Only documents with a value for field will be returned. | - |


### About the `lenient` argument

Quickwit and Elasticsearch have different interpretations of the `lenient` setting:
- In Quickwit, lenient mode allows ignoring parts of the query that reference non-existing columns. This is a behavior that Elasticsearch supports by default.
- In Elasticsearch, lenient mode primarily addresses type errors (such as searching for text in an integer field). Quickwit always supports this behavior, regardless of the `lenient` setting.

## Search multiple indices

Search APIs that accept <index_id> requests path parameter also support multi-target syntax.
Expand Down
2 changes: 2 additions & 0 deletions quickwit/quickwit-doc-mapper/src/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -702,12 +702,14 @@ mod test {
phrase: "short".to_string(),
max_expansions: 50,
params: params.clone(),
lenient: false,
};
let long = PhrasePrefixQuery {
field: "title".to_string(),
phrase: "not so short".to_string(),
max_expansions: 50,
params: params.clone(),
lenient: false,
};
let mut extractor1 = ExtractPrefixTermRanges::with_schema(&schema, &tokenizer_manager);
extractor1.visit_phrase_prefix(&short).unwrap();
Expand Down
6 changes: 2 additions & 4 deletions quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

use serde::Deserialize;

use super::LeniencyBool;
use crate::elastic_query_dsl::{
ConvertibleToQueryAst, ElasticQueryDslInner, StringOrStructForSerialization,
};
Expand All @@ -42,11 +43,8 @@ pub(crate) struct MatchQueryParams {
pub(crate) operator: BooleanOperand,
#[serde(default)]
pub(crate) zero_terms_query: MatchAllOrNone,
// Quickwit and Elastic have different notions of lenient. For us, it means it's okay to
// disregard part of the query where which uses non-existing collumn (which Elastic does by
// default). For Elastic, it covers type errors (searching text in an integer field).
#[serde(default)]
pub(crate) lenient: bool,
pub(crate) lenient: LeniencyBool,
}

impl ConvertibleToQueryAst for MatchQuery {
Expand Down
8 changes: 8 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ use crate::elastic_query_dsl::terms_query::TermsQuery;
use crate::not_nan_f32::NotNaNf32;
use crate::query_ast::QueryAst;

/// Quickwit and Elasticsearch have different interpretations of leniency:
/// - In Quickwit, lenient mode allows ignoring parts of the query that reference non-existing
/// columns. This is a behavior that Elasticsearch supports by default.
/// - In Elasticsearch, lenient mode primarily addresses type errors (such as searching for text in
/// an integer field). Quickwit always supports this behavior, regardless of the `lenient`
/// setting.
pub type LeniencyBool = bool;

fn default_max_expansions() -> u32 {
50
}
Expand Down
6 changes: 2 additions & 4 deletions quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use serde::Deserialize;
use serde_with::formats::PreferMany;
use serde_with::{serde_as, OneOrMany};

use super::LeniencyBool;
use crate::elastic_query_dsl::bool_query::BoolQuery;
use crate::elastic_query_dsl::match_bool_prefix::MatchBoolPrefixQuery;
use crate::elastic_query_dsl::match_phrase_query::{MatchPhraseQuery, MatchPhraseQueryParams};
Expand Down Expand Up @@ -48,11 +49,8 @@ struct MultiMatchQueryForDeserialization {
#[serde_as(deserialize_as = "OneOrMany<_, PreferMany>")]
#[serde(default)]
fields: Vec<String>,
// Quickwit and Elastic have different notions of lenient. For us, it means it's okay to
// disregard part of the query where which uses non-existing collumn (which Elastic does by
// default). For Elastic, it covers type errors (searching text in an integer field).
#[serde(default)]
lenient: bool,
lenient: LeniencyBool,
}

fn deserialize_match_query_for_one_field(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

use serde::Deserialize;

use super::LeniencyBool;
use crate::elastic_query_dsl::ConvertibleToQueryAst;
use crate::not_nan_f32::NotNaNf32;
use crate::query_ast::UserInputQuery;
Expand All @@ -40,11 +41,8 @@ pub(crate) struct QueryStringQuery {
default_operator: BooleanOperand,
#[serde(default)]
boost: Option<NotNaNf32>,
// Regardless of this option Quickwit behaves in elasticsearch definition of
// lenient. We include this property here just to accept user queries containing
// this option.
#[serde(default)]
lenient: bool,
lenient: LeniencyBool,
}

impl ConvertibleToQueryAst for QueryStringQuery {
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-query/src/query_ast/full_text_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ pub struct FullTextQuery {
pub field: String,
pub text: String,
pub params: FullTextParams,
/// Support missing fields
pub lenient: bool,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub struct PhrasePrefixQuery {
pub phrase: String,
pub max_expansions: u32,
pub params: FullTextParams,
/// Support missing fields
pub lenient: bool,
}

Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-query/src/query_ast/user_input_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ pub struct UserInputQuery {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub default_fields: Option<Vec<String>>,
pub default_operator: BooleanOperand,
/// Support missing fields
pub lenient: bool,
}

Expand Down
12 changes: 2 additions & 10 deletions quickwit/quickwit-query/src/query_ast/wildcard_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use crate::{find_field_or_hit_dynamic, InvalidQuery};
pub struct WildcardQuery {
pub field: String,
pub value: String,
/// Support missing fields
pub lenient: bool,
}

Expand All @@ -43,16 +44,6 @@ impl From<WildcardQuery> for QueryAst {
}
}

impl WildcardQuery {
#[cfg(test)]
pub fn from_field_value(field: impl ToString, value: impl ToString) -> Self {
Self {
field: field.to_string(),
value: value.to_string(),
}
}
}

fn extract_unique_token(mut tokens: Vec<Term>) -> anyhow::Result<Term> {
let term = tokens
.pop()
Expand Down Expand Up @@ -218,6 +209,7 @@ mod tests {
let query = WildcardQuery {
field: "my_field".to_string(),
value: "MyString Wh1ch a nOrMal Tokenizer would cut*".to_string(),
lenient: false,
};
let tokenizer_manager = create_default_quickwit_tokenizer_manager();
for tokenizer in ["raw", "whitespace"] {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,19 +226,31 @@ json:
query:
query_string:
query: "true"
fields: ["public", "public.inner"]
fields: ["public", "public.notdefined", "notdefined"]
lenient: true
expected:
hits:
total:
value: 100
---
# trailing wildcard
json:
query:
query_string:
query: "jour*"
fields: ["payload.description", "payload.notdefined", "notdefined"]
lenient: true
expected:
hits:
total:
value: 3
---
# elasticsearch accepts this query
engines:
- quickwit
json:
query:
query_string:
query: "true"
fields: ["public", "public.inner"]
fields: ["public", "public.notdefined"]
status_code: 400

0 comments on commit f04c1b7

Please sign in to comment.