Add cmu_dict... test and fix write error

This commit adds some testing for the cmudictionary.py module. In testing cmudictionary.CMU_Dictionary.add_dictionary_entries() a bug was uncovered in .check_transcription() which caused the output file to have more spaces than necessary. The cause was that function did not split the phone string when it was supposed to. This commit fixes the error and includes the test to prevent regression.
chrisbrickhouse · Aug 18, 2022 · 40503a6 · 40503a6
1 parent ce2c5cf
commit 40503a6
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 2 deletions.
diff --git a/fave/cmudictionary.py b/fave/cmudictionary.py
@@ -245,7 +245,7 @@ def check_transcription(self, transcription):
         # checked for correct format)
 
         # convert to upper case and split into phones
-        phones = transcription.upper().split()
+        phones = transcription.upper().split(' ')
 
         # check that phones are separated by spaces
         # (len(w) > 3:  transcription could just consist of a single phone!)
@@ -266,7 +266,7 @@ def check_transcription(self, transcription):
                     self.check_phone(phone, transcription, index)
                 except ValueError as err:
                     raise err
-        return transcription
+        return transcription.split(' ')
 
     def check_phone(self, phone, transcription, index):
         """checks that a phone entered by the user is part of the Arpabet"""

diff --git a/tests/fave/test_cmudictionary.py b/tests/fave/test_cmudictionary.py
@@ -0,0 +1,49 @@
+import pytest
+from fave import cmudictionary
+
+KWARGS = {
+        'verbose': 1
+    }
+
+CMU_EXCERPT = """
+TEST  T EH1 S T 
+TEST'S  T EH1 S T S 
+TESTA  T EH1 S T AH0 
+TESTAMENT  T EH1 S T AH0 M AH0 N T 
+TESTAMENTARY  T EH2 S T AH0 M EH1 N T ER0 IY0 
+TESTED  T EH1 S T AH0 D 
+TESTER  T EH1 S T ER0 
+TESTERMAN  T EH1 S T ER0 M AH0 N 
+TESTERS  T EH1 S T ER0 Z 
+TESTERS  T EH1 S T AH0 Z 
+"""
+
+def test_dictionary_init(tmp_path):
+    d = tmp_path / "sub"
+    d.mkdir()
+    p = d / "cmu_dictionary.txt"
+    p.write_text(CMU_EXCERPT)
+
+    dict_obj = cmudictionary.CMU_Dictionary(p, **KWARGS)
+
+    assert p.read_text() == CMU_EXCERPT
+
+def test_add_dictionary_entries(tmp_path):
+    d = tmp_path / "sub"
+    d.mkdir()
+
+    p = d / "cmu_dictionary.txt"
+    p.write_text(CMU_EXCERPT)
+
+    dict_obj = cmudictionary.CMU_Dictionary(p, **KWARGS)
+    # TODO the above code is duplicated from the init test
+    #      so it might be better to have it commonly available
+
+    new_word = "LINGUISTICS\tL IH0 NG G W IH1 S T IH0 K S "
+    new_word_file = d / "new_word_file.dict"
+    new_word_file.write_text(new_word)
+
+    dict_obj.add_dictionary_entries(new_word_file, path=d)
+
+    added_entries_file = d / dict_obj.DICT_ADDITIONS
+    assert new_word.replace("\t", "  ") in added_entries_file.read_text()