Skip to content

Commit

Permalink
Add cmu_dict... test and fix write error
Browse files Browse the repository at this point in the history
This commit adds some testing for the cmudictionary.py module.

In testing cmudictionary.CMU_Dictionary.add_dictionary_entries()
a bug was uncovered in .check_transcription() which caused the
output file to have more spaces than necessary. The cause was that
function did not split the phone string when it was supposed to.
This commit fixes the error and includes the test to prevent regression.
  • Loading branch information
Christian Brickhouse committed Aug 18, 2022
1 parent ce2c5cf commit 40503a6
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
4 changes: 2 additions & 2 deletions fave/cmudictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def check_transcription(self, transcription):
# checked for correct format)

# convert to upper case and split into phones
phones = transcription.upper().split()
phones = transcription.upper().split(' ')

# check that phones are separated by spaces
# (len(w) > 3: transcription could just consist of a single phone!)
Expand All @@ -266,7 +266,7 @@ def check_transcription(self, transcription):
self.check_phone(phone, transcription, index)
except ValueError as err:
raise err
return transcription
return transcription.split(' ')

def check_phone(self, phone, transcription, index):
"""checks that a phone entered by the user is part of the Arpabet"""
Expand Down
49 changes: 49 additions & 0 deletions tests/fave/test_cmudictionary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import pytest
from fave import cmudictionary

KWARGS = {
'verbose': 1
}

CMU_EXCERPT = """
TEST T EH1 S T
TEST'S T EH1 S T S
TESTA T EH1 S T AH0
TESTAMENT T EH1 S T AH0 M AH0 N T
TESTAMENTARY T EH2 S T AH0 M EH1 N T ER0 IY0
TESTED T EH1 S T AH0 D
TESTER T EH1 S T ER0
TESTERMAN T EH1 S T ER0 M AH0 N
TESTERS T EH1 S T ER0 Z
TESTERS T EH1 S T AH0 Z
"""

def test_dictionary_init(tmp_path):
d = tmp_path / "sub"
d.mkdir()
p = d / "cmu_dictionary.txt"
p.write_text(CMU_EXCERPT)

dict_obj = cmudictionary.CMU_Dictionary(p, **KWARGS)

assert p.read_text() == CMU_EXCERPT

def test_add_dictionary_entries(tmp_path):
d = tmp_path / "sub"
d.mkdir()

p = d / "cmu_dictionary.txt"
p.write_text(CMU_EXCERPT)

dict_obj = cmudictionary.CMU_Dictionary(p, **KWARGS)
# TODO the above code is duplicated from the init test
# so it might be better to have it commonly available

new_word = "LINGUISTICS\tL IH0 NG G W IH1 S T IH0 K S "
new_word_file = d / "new_word_file.dict"
new_word_file.write_text(new_word)

dict_obj.add_dictionary_entries(new_word_file, path=d)

added_entries_file = d / dict_obj.DICT_ADDITIONS
assert new_word.replace("\t", " ") in added_entries_file.read_text()

0 comments on commit 40503a6

Please sign in to comment.