From a4e2e3aa0a12af2b325e60e1cecc05781cf3fa2e Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 4 Jan 2023 08:02:21 +0100 Subject: [PATCH 1/2] Make nightly Clippy mostly happy. --- src/alphabet.rs | 18 +++++++++--------- src/dfa.rs | 2 +- src/index.rs | 2 +- src/levenshtein_nfa.rs | 6 +++--- src/lib.rs | 4 +--- src/parametric_dfa.rs | 6 +++--- src/tests.rs | 20 ++++++++++---------- 7 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/alphabet.rs b/src/alphabet.rs index c2e1fdc..c75322b 100644 --- a/src/alphabet.rs +++ b/src/alphabet.rs @@ -10,8 +10,8 @@ impl FullCharacteristicVector { if align == 0 { self.0[bucket_id] & mask } else { - let left = (self.0[bucket_id] >> align) as u32; - let right = self.0[bucket_id + 1] << (32 - align) as u32; + let left = self.0[bucket_id] >> align; + let right = self.0[bucket_id + 1] << (32 - align); (left | right) & mask } } @@ -51,7 +51,7 @@ impl Alphabet { (c, FullCharacteristicVector(bits)) }) .collect(); - Alphabet { charset: charset } + Alphabet { charset } } } @@ -66,22 +66,22 @@ mod tests { let mut it = alphabet.iter(); { - let &(ref c, ref chi) = it.next().unwrap(); + let (c, chi) = it.next().unwrap(); assert_eq!(*c, 'a'); assert_eq!(chi.0[0], 2u32); } { - let &(ref c, ref chi) = it.next().unwrap(); + let (c, chi) = it.next().unwrap(); assert_eq!(*c, 'h'); assert_eq!(chi.0[0], 1u32); } { - let &(ref c, ref chi) = it.next().unwrap(); + let (c, chi) = it.next().unwrap(); assert_eq!(*c, 'p'); assert_eq!(chi.0[0], 4u32 + 8u32); } { - let &(ref c, ref chi) = it.next().unwrap(); + let (c, chi) = it.next().unwrap(); assert_eq!(*c, 'y'); assert_eq!(chi.0[0], 16u32); } @@ -106,7 +106,7 @@ mod tests { let alphabet = Alphabet::for_query_chars(&query_chars[..]); let mut alphabet_it = alphabet.iter(); { - let &(ref c, ref chi) = alphabet_it.next().unwrap(); + let (c, chi) = alphabet_it.next().unwrap(); assert_eq!(*c, 'a'); assert_eq!(chi.shift_and_mask(0, 7), 7); assert_eq!(chi.shift_and_mask(28, 7), 3); @@ -114,7 +114,7 @@ mod tests { assert_eq!(chi.shift_and_mask(28, 4095), 1 + 2 + 16 + 256); } { - let &(ref c, ref chi) = alphabet_it.next().unwrap(); + let (c, chi) = alphabet_it.next().unwrap(); assert_eq!(*c, 'b'); assert_eq!(chi.shift_and_mask(0, 7), 0); assert_eq!(chi.shift_and_mask(28, 15), 4); diff --git a/src/dfa.rs b/src/dfa.rs index 682c021..c88abc5 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -133,7 +133,7 @@ impl<'a> Utf8DFAStateBuilder<'a> { let bytes: &[u8] = chr.encode_utf8(&mut buffer).as_bytes(); let mut from_state_id_decoded = self.state_id; for (i, b) in bytes[..bytes.len() - 1].iter().cloned().enumerate() { - let remaining_num_bytes = bytes.len() - i as usize - 1 as usize; + let remaining_num_bytes = bytes.len() - i - 1; let default_successor = self.default_successor[remaining_num_bytes]; let mut intermediary_state_id: u32 = self.dfa_builder.transitions[from_state_id_decoded as usize][b as usize]; diff --git a/src/index.rs b/src/index.rs index 89ee5b6..a686740 100644 --- a/src/index.rs +++ b/src/index.rs @@ -21,7 +21,7 @@ impl Index { if item_index == index_len { self.items.push(item.clone()); } - item_index as u32 + item_index } pub fn len(&self) -> u32 { diff --git a/src/levenshtein_nfa.rs b/src/levenshtein_nfa.rs index 051631f..5e936f8 100644 --- a/src/levenshtein_nfa.rs +++ b/src/levenshtein_nfa.rs @@ -3,8 +3,8 @@ use std::cmp::Ordering; #[cfg(test)] pub fn compute_characteristic_vector(query: &[char], c: char) -> u64 { let mut chi = 0u64; - for i in 0..query.len() { - if query[i] == c { + for (i, q) in query.iter().enumerate() { + if *q == c { chi |= 1u64 << i; } } @@ -145,7 +145,7 @@ fn dist(left: u32, right: u32) -> u32 { impl LevenshteinNFA { pub fn levenshtein(max_distance: u8, transposition: bool) -> LevenshteinNFA { LevenshteinNFA { - max_distance: max_distance, + max_distance, damerau: transposition, } } diff --git a/src/lib.rs b/src/lib.rs index 9220461..80f7fc6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,9 +75,7 @@ impl LevenshteinAutomatonBuilder { pub fn new(max_distance: u8, transposition_cost_one: bool) -> LevenshteinAutomatonBuilder { let levenshtein_nfa = LevenshteinNFA::levenshtein(max_distance, transposition_cost_one); let parametric_dfa = ParametricDFA::from_nfa(&levenshtein_nfa); - LevenshteinAutomatonBuilder { - parametric_dfa: parametric_dfa, - } + LevenshteinAutomatonBuilder { parametric_dfa } } /// Builds a Finite Determinstic Automaton to compute diff --git a/src/parametric_dfa.rs b/src/parametric_dfa.rs index 923b42a..773a065 100644 --- a/src/parametric_dfa.rs +++ b/src/parametric_dfa.rs @@ -55,7 +55,7 @@ impl ParametricStateIndex { ParametricStateIndex { state_index: vec![None; max_num_states], state_queue: Vec::with_capacity(100), - num_offsets: num_offsets, + num_offsets, } } @@ -170,7 +170,7 @@ impl ParametricDFA { parametric_state_index.get_or_allocate(default_successor); let mut state_builder = dfa_builder.add_state(state_id, distance, default_successor_id); - for &(ref chr, ref characteristic_vec) in alphabet.iter() { + for (chr, characteristic_vec) in alphabet.iter() { let chi = characteristic_vec.shift_and_mask(state.offset as usize, mask); let dest_state: ParametricState = self.transition(state, chi).apply(state); let dest_state_id = parametric_state_index.get_or_allocate(dest_state); @@ -284,7 +284,7 @@ impl ParametricDFA { distance, max_distance, transitions, - diameter: multistate_diameter as usize, + diameter: multistate_diameter, } } } diff --git a/src/tests.rs b/src/tests.rs index 3096ee8..5cb26a0 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -64,11 +64,11 @@ fn test_levenshtein_dfa_slow() { for left in test_sample.lefts() { for m in 0..4u8 { - let dfa = parametric_dfas[m as usize].build_dfa(&left, false); + let dfa = parametric_dfas[m as usize].build_dfa(left, false); for right in test_sample.rights() { - let expected = levenshtein::levenshtein(&left, &right) as u8; + let expected = levenshtein::levenshtein(left, right) as u8; let expected_distance = make_distance(expected, m); - let result_distance = dfa.eval(&right); + let result_distance = dfa.eval(right); assert_eq!(expected_distance, result_distance); } } @@ -276,8 +276,8 @@ fn test_prefix() { let parametric_dfa = ParametricDFA::from_nfa(&nfa); let dfa = parametric_dfa.build_dfa(q, true); assert_eq!(dfa.eval(q), Distance::Exact(0u8)); - assert_eq!(dfa.eval(&"a"), Distance::AtLeast(1u8)); - assert_eq!(dfa.eval(&"ab"), Distance::AtLeast(1u8)); + assert_eq!(dfa.eval("a"), Distance::AtLeast(1u8)); + assert_eq!(dfa.eval("ab"), Distance::AtLeast(1u8)); for d in 3..10 { assert_eq!(dfa.eval(&"abcdefghij"[..d]), Distance::Exact(0u8)); } @@ -289,11 +289,11 @@ fn test_applied_distance() { let nfa = LevenshteinNFA::levenshtein(1, true); let parametric_dfa = ParametricDFA::from_nfa(&nfa); let dfa = parametric_dfa.build_custom_dfa(q, true, true); - assert_eq!(dfa.eval(&"abcde"), Distance::Exact(0u8)); - assert_eq!(dfa.eval(&"abcd"), Distance::Exact(0u8)); - assert_eq!(dfa.eval(&"abde"), Distance::Exact(1u8)); - assert_eq!(dfa.eval(&"abdce"), Distance::Exact(1u8)); - assert_eq!(dfa.eval(&"abbbb"), Distance::AtLeast(2u8)); + assert_eq!(dfa.eval("abcde"), Distance::Exact(0u8)); + assert_eq!(dfa.eval("abcd"), Distance::Exact(0u8)); + assert_eq!(dfa.eval("abde"), Distance::Exact(1u8)); + assert_eq!(dfa.eval("abdce"), Distance::Exact(1u8)); + assert_eq!(dfa.eval("abbbb"), Distance::AtLeast(2u8)); } fn test_prefix_aux( From 5c6a8d62a85d7d20fe6b81f3fe4b196f3e1253b0 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 4 Jan 2023 08:03:09 +0100 Subject: [PATCH 2/2] Make automatons and builder cloneable to enable more involved concurrency schemes to be used in Tantivy. --- src/dfa.rs | 1 + src/lib.rs | 1 + src/parametric_dfa.rs | 1 + 3 files changed, 3 insertions(+) diff --git a/src/dfa.rs b/src/dfa.rs index c88abc5..2c7d3f6 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -42,6 +42,7 @@ pub const SINK_STATE: u32 = 0u32; /// let distance = dfa.distance(state); /// # } //``` +#[derive(Clone)] pub struct DFA { transitions: Vec<[u32; 256]>, distances: Vec, diff --git a/src/lib.rs b/src/lib.rs index 80f7fc6..6fa769b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,7 @@ use self::parametric_dfa::ParametricDFA; /// /// It wraps a precomputed datastructure that allows to /// produce small (but not minimal) DFA. +#[derive(Clone)] pub struct LevenshteinAutomatonBuilder { parametric_dfa: ParametricDFA, } diff --git a/src/parametric_dfa.rs b/src/parametric_dfa.rs index 773a065..2b1b875 100644 --- a/src/parametric_dfa.rs +++ b/src/parametric_dfa.rs @@ -84,6 +84,7 @@ impl ParametricStateIndex { } } +#[derive(Clone)] pub struct ParametricDFA { distance: Vec, transitions: Vec,