Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make automatons and builder cloneable #13

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/alphabet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ impl FullCharacteristicVector {
if align == 0 {
self.0[bucket_id] & mask
} else {
let left = (self.0[bucket_id] >> align) as u32;
let right = self.0[bucket_id + 1] << (32 - align) as u32;
let left = self.0[bucket_id] >> align;
let right = self.0[bucket_id + 1] << (32 - align);
(left | right) & mask
}
}
Expand Down Expand Up @@ -51,7 +51,7 @@ impl Alphabet {
(c, FullCharacteristicVector(bits))
})
.collect();
Alphabet { charset: charset }
Alphabet { charset }
}
}

Expand All @@ -66,22 +66,22 @@ mod tests {
let mut it = alphabet.iter();

{
let &(ref c, ref chi) = it.next().unwrap();
let (c, chi) = it.next().unwrap();
assert_eq!(*c, 'a');
assert_eq!(chi.0[0], 2u32);
}
{
let &(ref c, ref chi) = it.next().unwrap();
let (c, chi) = it.next().unwrap();
assert_eq!(*c, 'h');
assert_eq!(chi.0[0], 1u32);
}
{
let &(ref c, ref chi) = it.next().unwrap();
let (c, chi) = it.next().unwrap();
assert_eq!(*c, 'p');
assert_eq!(chi.0[0], 4u32 + 8u32);
}
{
let &(ref c, ref chi) = it.next().unwrap();
let (c, chi) = it.next().unwrap();
assert_eq!(*c, 'y');
assert_eq!(chi.0[0], 16u32);
}
Expand All @@ -106,15 +106,15 @@ mod tests {
let alphabet = Alphabet::for_query_chars(&query_chars[..]);
let mut alphabet_it = alphabet.iter();
{
let &(ref c, ref chi) = alphabet_it.next().unwrap();
let (c, chi) = alphabet_it.next().unwrap();
assert_eq!(*c, 'a');
assert_eq!(chi.shift_and_mask(0, 7), 7);
assert_eq!(chi.shift_and_mask(28, 7), 3);
assert_eq!(chi.shift_and_mask(28, 127), 1 + 2 + 16);
assert_eq!(chi.shift_and_mask(28, 4095), 1 + 2 + 16 + 256);
}
{
let &(ref c, ref chi) = alphabet_it.next().unwrap();
let (c, chi) = alphabet_it.next().unwrap();
assert_eq!(*c, 'b');
assert_eq!(chi.shift_and_mask(0, 7), 0);
assert_eq!(chi.shift_and_mask(28, 15), 4);
Expand Down
3 changes: 2 additions & 1 deletion src/dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub const SINK_STATE: u32 = 0u32;
/// let distance = dfa.distance(state);
/// # }
//```
#[derive(Clone)]
pub struct DFA {
transitions: Vec<[u32; 256]>,
distances: Vec<Distance>,
Expand Down Expand Up @@ -133,7 +134,7 @@ impl<'a> Utf8DFAStateBuilder<'a> {
let bytes: &[u8] = chr.encode_utf8(&mut buffer).as_bytes();
let mut from_state_id_decoded = self.state_id;
for (i, b) in bytes[..bytes.len() - 1].iter().cloned().enumerate() {
let remaining_num_bytes = bytes.len() - i as usize - 1 as usize;
let remaining_num_bytes = bytes.len() - i - 1;
let default_successor = self.default_successor[remaining_num_bytes];
let mut intermediary_state_id: u32 =
self.dfa_builder.transitions[from_state_id_decoded as usize][b as usize];
Expand Down
2 changes: 1 addition & 1 deletion src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ impl<I: Eq + Hash + Clone + Debug> Index<I> {
if item_index == index_len {
self.items.push(item.clone());
}
item_index as u32
item_index
}

pub fn len(&self) -> u32 {
Expand Down
6 changes: 3 additions & 3 deletions src/levenshtein_nfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use std::cmp::Ordering;
#[cfg(test)]
pub fn compute_characteristic_vector(query: &[char], c: char) -> u64 {
let mut chi = 0u64;
for i in 0..query.len() {
if query[i] == c {
for (i, q) in query.iter().enumerate() {
if *q == c {
chi |= 1u64 << i;
}
}
Expand Down Expand Up @@ -145,7 +145,7 @@ fn dist(left: u32, right: u32) -> u32 {
impl LevenshteinNFA {
pub fn levenshtein(max_distance: u8, transposition: bool) -> LevenshteinNFA {
LevenshteinNFA {
max_distance: max_distance,
max_distance,
damerau: transposition,
}
}
Expand Down
5 changes: 2 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ use self::parametric_dfa::ParametricDFA;
///
/// It wraps a precomputed datastructure that allows to
/// produce small (but not minimal) DFA.
#[derive(Clone)]
pub struct LevenshteinAutomatonBuilder {
parametric_dfa: ParametricDFA,
}
Expand All @@ -75,9 +76,7 @@ impl LevenshteinAutomatonBuilder {
pub fn new(max_distance: u8, transposition_cost_one: bool) -> LevenshteinAutomatonBuilder {
let levenshtein_nfa = LevenshteinNFA::levenshtein(max_distance, transposition_cost_one);
let parametric_dfa = ParametricDFA::from_nfa(&levenshtein_nfa);
LevenshteinAutomatonBuilder {
parametric_dfa: parametric_dfa,
}
LevenshteinAutomatonBuilder { parametric_dfa }
}

/// Builds a Finite Determinstic Automaton to compute
Expand Down
7 changes: 4 additions & 3 deletions src/parametric_dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl ParametricStateIndex {
ParametricStateIndex {
state_index: vec![None; max_num_states],
state_queue: Vec::with_capacity(100),
num_offsets: num_offsets,
num_offsets,
}
}

Expand Down Expand Up @@ -84,6 +84,7 @@ impl ParametricStateIndex {
}
}

#[derive(Clone)]
pub struct ParametricDFA {
distance: Vec<u8>,
transitions: Vec<Transition>,
Expand Down Expand Up @@ -170,7 +171,7 @@ impl ParametricDFA {
parametric_state_index.get_or_allocate(default_successor);
let mut state_builder =
dfa_builder.add_state(state_id, distance, default_successor_id);
for &(ref chr, ref characteristic_vec) in alphabet.iter() {
for (chr, characteristic_vec) in alphabet.iter() {
let chi = characteristic_vec.shift_and_mask(state.offset as usize, mask);
let dest_state: ParametricState = self.transition(state, chi).apply(state);
let dest_state_id = parametric_state_index.get_or_allocate(dest_state);
Expand Down Expand Up @@ -284,7 +285,7 @@ impl ParametricDFA {
distance,
max_distance,
transitions,
diameter: multistate_diameter as usize,
diameter: multistate_diameter,
}
}
}
20 changes: 10 additions & 10 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ fn test_levenshtein_dfa_slow() {

for left in test_sample.lefts() {
for m in 0..4u8 {
let dfa = parametric_dfas[m as usize].build_dfa(&left, false);
let dfa = parametric_dfas[m as usize].build_dfa(left, false);
for right in test_sample.rights() {
let expected = levenshtein::levenshtein(&left, &right) as u8;
let expected = levenshtein::levenshtein(left, right) as u8;
let expected_distance = make_distance(expected, m);
let result_distance = dfa.eval(&right);
let result_distance = dfa.eval(right);
assert_eq!(expected_distance, result_distance);
}
}
Expand Down Expand Up @@ -276,8 +276,8 @@ fn test_prefix() {
let parametric_dfa = ParametricDFA::from_nfa(&nfa);
let dfa = parametric_dfa.build_dfa(q, true);
assert_eq!(dfa.eval(q), Distance::Exact(0u8));
assert_eq!(dfa.eval(&"a"), Distance::AtLeast(1u8));
assert_eq!(dfa.eval(&"ab"), Distance::AtLeast(1u8));
assert_eq!(dfa.eval("a"), Distance::AtLeast(1u8));
assert_eq!(dfa.eval("ab"), Distance::AtLeast(1u8));
for d in 3..10 {
assert_eq!(dfa.eval(&"abcdefghij"[..d]), Distance::Exact(0u8));
}
Expand All @@ -289,11 +289,11 @@ fn test_applied_distance() {
let nfa = LevenshteinNFA::levenshtein(1, true);
let parametric_dfa = ParametricDFA::from_nfa(&nfa);
let dfa = parametric_dfa.build_custom_dfa(q, true, true);
assert_eq!(dfa.eval(&"abcde"), Distance::Exact(0u8));
assert_eq!(dfa.eval(&"abcd"), Distance::Exact(0u8));
assert_eq!(dfa.eval(&"abde"), Distance::Exact(1u8));
assert_eq!(dfa.eval(&"abdce"), Distance::Exact(1u8));
assert_eq!(dfa.eval(&"abbbb"), Distance::AtLeast(2u8));
assert_eq!(dfa.eval("abcde"), Distance::Exact(0u8));
assert_eq!(dfa.eval("abcd"), Distance::Exact(0u8));
assert_eq!(dfa.eval("abde"), Distance::Exact(1u8));
assert_eq!(dfa.eval("abdce"), Distance::Exact(1u8));
assert_eq!(dfa.eval("abbbb"), Distance::AtLeast(2u8));
}

fn test_prefix_aux(
Expand Down