Skip to content

Commit

Permalink
fix slop (#2031)
Browse files Browse the repository at this point in the history
Fix slop by carrying slop so far for multiterms.
Define slop contract in the API
  • Loading branch information
PSeitz authored May 10, 2023
1 parent d3357a8 commit 0eafbaa
Show file tree
Hide file tree
Showing 3 changed files with 358 additions and 14 deletions.
36 changes: 36 additions & 0 deletions src/query/phrase_query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,20 @@ pub mod tests {
Ok(())
}

#[test]
pub fn test_phrase_score_with_slop_bug_2() -> crate::Result<()> {
// fails
let index = create_index(&["a x b x c", "a a c"])?;
let scores = test_query(2, &index, vec!["a", "b", "c"]);
assert_eq!(scores.len(), 1);

let index = create_index(&["a x b x c", "b c c"])?;
let scores = test_query(2, &index, vec!["a", "b", "c"]);
assert_eq!(scores.len(), 1);

Ok(())
}

fn test_query(slop: u32, index: &Index, texts: Vec<&str>) -> Vec<f32> {
let text_field = index.schema().get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher();
Expand Down Expand Up @@ -212,11 +226,33 @@ pub mod tests {
pub fn test_phrase_score_with_slop_size() -> crate::Result<()> {
let index = create_index(&["a b e c", "a e e e c", "a e e e e c"])?;
let scores = test_query(3, &index, vec!["a", "c"]);
assert_eq!(scores.len(), 2);
assert_nearly_equals!(scores[0], 0.29086056);
assert_nearly_equals!(scores[1], 0.26706287);
Ok(())
}

#[test]
pub fn test_phrase_slop() -> crate::Result<()> {
let index = create_index(&["a x b c"])?;
let scores = test_query(1, &index, vec!["a", "b", "c"]);
assert_eq!(scores.len(), 1);

let index = create_index(&["a x b x c"])?;
let scores = test_query(1, &index, vec!["a", "b", "c"]);
assert_eq!(scores.len(), 0);

let index = create_index(&["a b"])?;
let scores = test_query(1, &index, vec!["b", "a"]);
assert_eq!(scores.len(), 0);

let index = create_index(&["a b"])?;
let scores = test_query(2, &index, vec!["b", "a"]);
assert_eq!(scores.len(), 1);

Ok(())
}

#[test]
pub fn test_phrase_score_with_slop_ordering() -> crate::Result<()> {
let index = create_index(&[
Expand Down
10 changes: 10 additions & 0 deletions src/query/phrase_query/phrase_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ impl PhraseQuery {
/// Slop allowed for the phrase.
///
/// The query will match if its terms are separated by `slop` terms at most.
/// The slop can be considered a budget between all terms.
/// E.g. "A B C" with slop 1 allows "A X B C", "A B X C", but not "A X B X C".
///
/// Transposition costs 2, e.g. "A B" with slop 1 will not match "B A" but it would with slop 2
/// Transposition is not a special case, in the example above A is moved 1 position and B is
/// moved 1 position, so the slop is 2.
///
/// As a result slop works in both directions, so the order of the terms may changed as long as
/// they respect the slop.
///
/// By default the slop is 0 meaning query terms need to be adjacent.
pub fn set_slop(&mut self, value: u32) {
self.slop = value;
Expand Down
Loading

0 comments on commit 0eafbaa

Please sign in to comment.