Skip to content

Commit

Permalink
new method findSimilarWords
Browse files Browse the repository at this point in the history
  • Loading branch information
jaumeortola committed Jul 29, 2020
1 parent f1b7679 commit 660c98a
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
29 changes: 27 additions & 2 deletions morfologik-speller/src/main/java/morfologik/speller/Speller.java
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,26 @@ private void addReplacement(List<CandidateData> candidates, String replacement)
}
}

/**
* Find similar words even if the original word is a correct word that exists in the dictionary
*
* @param word The original word.
* @return A list of suggested candidate replacements.
*/
public ArrayList<CandidateData> findSimilarWordCandidates(String word) {
return findReplacementCandidates(word, true);
}

public ArrayList<String> findSimilarWords(String word) {
final List<CandidateData> result = findSimilarWordCandidates(word);
final ArrayList<String> resultSuggestions = new ArrayList<>(result.size());
for (CandidateData cd : result) {
resultSuggestions.add(cd.getWord());
}
return resultSuggestions;
}


/**
* Find suggestions by using K. Oflazer's algorithm. See Jan Daciuk's s_fsa
* package, spell.cc for further explanation.
Expand All @@ -391,6 +411,7 @@ public ArrayList<String> findReplacements(String word) {
return resultSuggestions;
}


/**
* Find and return suggestions by using K. Oflazer's algorithm. See Jan Daciuk's s_fsa
* package, spell.cc for further explanation. This method is identical to
Expand All @@ -400,14 +421,18 @@ public ArrayList<String> findReplacements(String word) {
* @return A list of suggested candidate replacements.
*/
public ArrayList<CandidateData> findReplacementCandidates(String word) {
return findReplacementCandidates(word, false);
}

public ArrayList<CandidateData> findReplacementCandidates(String word, boolean evenIfWordInDictionary) {
if (!dictionaryMetadata.getInputConversionPairs().isEmpty()) {
word = DictionaryLookup.applyReplacements(word, dictionaryMetadata.getInputConversionPairs());
}

// candidate strings, including same additional data such as edit distance from the original word.
List<CandidateData> candidates = new ArrayList<>();

if (word.length() > 0 && word.length() < MAX_WORD_LENGTH && !isInDictionary(word)) {
if (word.length() > 0 && word.length() < MAX_WORD_LENGTH && (!isInDictionary(word) || evenIfWordInDictionary)) {
List<String> wordsToCheck = new ArrayList<>();
if (replacementsTheRest != null && word.length() > 1) {
for (final String wordChecked : getAllReplacements(word, 0, 0)) {
Expand Down Expand Up @@ -467,7 +492,7 @@ public ArrayList<CandidateData> findReplacementCandidates(String word) {
for (final CandidateData cd : candidates) {
String replaced = DictionaryLookup.applyReplacements(cd.getWord(), dictionaryMetadata.getOutputConversionPairs());
// Add only the first occurrence of a given word.
if (words.add(replaced)) {
if (words.add(replaced) && !replaced.equals(word)) {
result.add(new CandidateData(replaced, cd.origDistance));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,21 @@ public void testFindReplacementsUsingFrequency() throws IOException {
assertTrue(reps.get(5).equals("dist"));
assertTrue(reps.get(6).equals("gist"));
}

@Test
public void testFindSimilarWords() throws IOException {
final URL url = getClass().getResource("dict-with-freq.dict");
final Speller spell = new Speller(Dictionary.read(url));

List<String> reps = spell.findSimilarWords("fist");
assertTrue(reps.toString().equals("[list, mist, dist, gist, wist, hist]"));
reps = spell.findSimilarWords("mist");
assertTrue(reps.toString().equals("[list, fist, dist, gist, wist, hist]"));
reps = spell.findSimilarWords("Fist");
assertTrue(reps.toString().equals("[fist, list, mist, dist, gist, wist, hist]"));
reps = spell.findSimilarWords("licit");
assertTrue(reps.toString().equals("[list, fist, mist, dist, gist, wist, hist]"));
}

@Test
public void testConcurrentReplacements() throws IOException {
Expand Down

0 comments on commit 660c98a

Please sign in to comment.