Skip to content

Commit

Permalink
updated tantivy version to get support for phrase query with slop
Browse files Browse the repository at this point in the history
  • Loading branch information
evanxg852000 committed Jul 5, 2022
1 parent 46473dc commit 98d8933
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added
- Support for boolean field
- Support for slop in phrase queries

### Fixed

Expand Down
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions docs/reference/query-language.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,36 @@ Quickwit supports parenthesis to group multiple clauses:
(color:red OR color:green) AND size:large
```

### Slop Operator

Quickwit also supports phrase queries with a slop parameter using the slop operator `~` followed by the value of the slop. For instance, the query `body:"small bike"~2` will match documents containing the word `small`, followed by one or two words immediately followed by the word `bike`.

:::caution
Slop queries can only be used on field indexed with the [record option](./../configuration/index-config.md#text-type) set to `position` value.
:::

#### Examples:

With the following corpus:
```json
[
{"id": 1, "body": "a red bike"},
{"id": 2, "body": "a small blue bike"},
{"id": 3, "body": "a small, rusty, and yellow bike"},
{"id": 4, "body": "fred's small bike"},
{"id": 5, "body": "a tiny shelter"}
]
```
The following queries will output:

- `body:"small bird"~2`: no match []
- `body:"red bike"~2`: matches [1]
- `body:"small blue bike"~3`: matches [2]
- `body:"small bike"`: matches [4]
- `body:"small bike"~1`: matches [2, 4]
- `body:"small bike"~2`: matches [2, 4]
- `body:"small bike"~3`: matches [2, 3, 4]

### Escaping Special Characters

Special reserved characters are: `+` , `^`, `` ` ``, `:`, `{`, `}`, `"`, `[`, `]`, `(`, `)`, `~`, `!`, `\\`, `*`, `SPACE`. Such characters can still appear in query terms, but they need to be escaped by an antislash `\` .
2 changes: 1 addition & 1 deletion quickwit-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ quickwit-storage = { version = "0.3.1", path = "../quickwit-storage" }
rand = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "a568857", default-features = false, features = [
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "5750224", default-features = false, features = [
"mmap",
"lz4-compression",
"zstd-compression",
Expand Down
2 changes: 1 addition & 1 deletion quickwit-directories/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ quickwit-storage = { version = "0.3.1", path = "../quickwit-storage" }
serde = "1"
serde_cbor = "0.11"
serde_json = "1"
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "a568857", default-features = false, features = [
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "5750224", default-features = false, features = [
"mmap",
"lz4-compression",
"zstd-compression",
Expand Down
4 changes: 2 additions & 2 deletions quickwit-doc-mapper/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ once_cell = "1.12"
regex = "1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "a568857", default-features = false, features = [
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "5750224", default-features = false, features = [
"mmap",
"lz4-compression",
"zstd-compression",
"quickwit"
] }
tantivy-query-grammar = { git = "https://github.com/quickwit-oss/tantivy/", rev = "a568857" }
tantivy-query-grammar = { git = "https://github.com/quickwit-oss/tantivy/", rev = "5750224" }
thiserror = "1.0"
tracing = "0.1.29"
typetag = "0.2"
Expand Down
2 changes: 1 addition & 1 deletion quickwit-indexing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ rusoto_kinesis = { version = "0.48", default-features = false, features = [
serde = "1"
serde_json = "1"
serde_yaml = "0.8"
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "a568857", default-features = false, features = [
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "5750224", default-features = false, features = [
"mmap",
"lz4-compression",
"zstd-compression",
Expand Down
2 changes: 1 addition & 1 deletion quickwit-search/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ quickwit-storage = { version = "0.3.1", path = "../quickwit-storage" }
rayon = "1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "a568857", default-features = false, features = [
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "5750224", default-features = false, features = [
"mmap",
"lz4-compression",
"zstd-compression",
Expand Down
62 changes: 62 additions & 0 deletions quickwit-search/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ async fn test_single_node_simple() -> anyhow::Result<()> {
let docs = vec![
json!({"title": "snoopy", "body": "Snoopy is an anthropomorphic beagle[5] in the comic strip...", "url": "http://snoopy"}),
json!({"title": "beagle", "body": "The beagle is a breed of small scent hound, similar in appearance to the much larger foxhound.", "url": "http://beagle"}),
json!({"title": "hamsters", "body": "A hamsters is a small rodent popularized these days as pets.", "url": "http://hamsters"}),
];
test_sandbox.add_documents(docs.clone()).await?;
let search_request = SearchRequest {
Expand Down Expand Up @@ -72,6 +73,67 @@ async fn test_single_node_simple() -> anyhow::Result<()> {
Ok(())
}

async fn slop_search_and_check(
test_sandbox: &TestSandbox,
index_id: &str,
query: &str,
expected_num_match: u64,
) -> anyhow::Result<()> {
let search_request = SearchRequest {
index_id: index_id.to_string(),
query: query.to_string(),
search_fields: vec!["body".to_string()],
start_timestamp: None,
end_timestamp: None,
max_hits: 5,
start_offset: 0,
..Default::default()
};
let single_node_result = single_node_search(
&search_request,
&*test_sandbox.metastore(),
test_sandbox.storage_uri_resolver(),
)
.await?;
assert_eq!(single_node_result.num_hits, expected_num_match);
assert_eq!(single_node_result.hits.len(), expected_num_match as usize);
Ok(())
}

#[tokio::test]
async fn test_slop_queries() -> anyhow::Result<()> {
let index_id = "slop-query";
let doc_mapping_yaml = r#"
field_mappings:
- name: title
type: text
- name: body
type: text
record: position
"#;

let test_sandbox = TestSandbox::create(index_id, doc_mapping_yaml, "{}", &["body"]).await?;
let docs = vec![
json!({"title": "one", "body": "a red bike"}),
json!({"title": "two", "body": "a small blue bike"}),
json!({"title": "three", "body": "a small, rusty, and yellow bike"}),
json!({"title": "four", "body": "fred's small bike"}),
json!({"title": "five", "body": "a tiny shelter"}),
];
test_sandbox.add_documents(docs.clone()).await?;

slop_search_and_check(&test_sandbox, index_id, "\"small bird\"~2", 0).await?;
slop_search_and_check(&test_sandbox, index_id, "\"red bike\"~2", 1).await?;
slop_search_and_check(&test_sandbox, index_id, "\"small blue bike\"~3", 1).await?;
slop_search_and_check(&test_sandbox, index_id, "\"small bike\"", 1).await?;
slop_search_and_check(&test_sandbox, index_id, "\"small bike\"~1", 2).await?;
slop_search_and_check(&test_sandbox, index_id, "\"small bike\"~2", 2).await?;
slop_search_and_check(&test_sandbox, index_id, "\"small bike\"~3", 3).await?;
slop_search_and_check(&test_sandbox, index_id, "\"tiny shelter\"~3", 1).await?;

Ok(())
}

// TODO remove me once `Iterator::is_sorted_by_key` is stabilized.
fn is_sorted<E, I: Iterator<Item = E>>(mut it: I) -> bool
where E: Ord {
Expand Down
2 changes: 1 addition & 1 deletion quickwit-storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ rusoto_s3 = { version = "0.48", default-features = false, features = [
] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "a568857", default-features = false, features = [
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "5750224", default-features = false, features = [
"mmap",
"lz4-compression",
"zstd-compression",
Expand Down

0 comments on commit 98d8933

Please sign in to comment.