Merge cmu.py and cmudictionary.py

chrisbrickhouse · May 16, 2020 · 25d6e2f · 25d6e2f
1 parent d785972
commit 25d6e2f
Show file tree

Hide file tree

Showing 7 changed files with 295 additions and 146 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,5 @@
 *\~
 .Rhistory
 .DS_Store
-*.sublime-project
+*.sublime-project
+*egg-info
diff --git a/fave/align/aligner.py b/fave/align/aligner.py
@@ -38,16 +38,17 @@
 import time
 import logging
 import wave
+import pkg_resources
 from . import transcriptprocessor
-from . import cmudictionary
+from fave import cmudictionary
 from . import praat
 
 
 class Aligner():
     """
-	The Aligner class is the main user entry point to the FAVE library. It
-	handles the interface between all the different modules and automates
-	the process in a way that allows easy use in scripts or larger programs.
+    The Aligner class is the main user entry point to the FAVE library. It
+    handles the interface between all the different modules and automates
+    the process in a way that allows easy use in scripts or larger programs.
     """
     # pylint: disable=too-many-instance-attributes
     # Code debt: most of the instance attributes should be passed to functions
@@ -59,50 +60,49 @@ def __init__(
             self,
             wavfile,
             trsfile,
-            inputfile=None,
-            tgfile=None,
-            dictionary_file=None,
-            no_prompt=False,
-            verbose=False,
-            check=False,
-            htktoolspath=''
+            tgfile,
+            **kwargs
     ):
-        dictionary_file = dictionary_file or ['align', 'model', 'dict']
         self.logger = logging.getLogger(__name__)
         logging.basicConfig(
-            format='%(levelname)s:%(message)s',
-            level=logging.DEBUG)
+            format='%(name)s - %(levelname)s:%(message)s',
+            level=kwargs['verbose'])
 
         self.count_unclear = 0
         self.count_uncertain = 0
         self.count_words = 0
-
-        dictionary_file = os.path.join(*dictionary_file)
-
         self.audio = wavfile
-        self.transcript = trsfile
+        default_dict = pkg_resources.resource_filename('align', 'model/dict')
+        if trsfile:
+            self.transcript = trsfile
+        else:
+            self.transcript = os.path.splitext(wavfile)[0] + '.txt'
         if tgfile:
             self.textgrid = tgfile
         else:
             self.textgrid = os.path.splitext(trsfile)[0] + '.TextGrid'
-        self.verbose = verbose
-        self.prompt = not no_prompt
-        self.check = check
-        if not htktoolspath and 'HTKTOOLSPATH' in os.environ:
-            self.htktoolspath = '$HTKTOOLSPATH'
-        else:
-            self.htktoolspath = htktoolspath
-        kwargs = {
-            'verbose': verbose,
-            'prompt': not no_prompt,
-            'check': check
-        }
+
+        self.__config(**kwargs)
+
+        dictionary_file = kwargs['dict'] or default_dict
+
+        kwargs['prompt'] = False
         args = []
+
         self.cmu_dict = cmudictionary.CMU_Dictionary(dictionary_file, *args, **kwargs)
-        if inputfile:
-            self.cmu_dict.add_dictionary_entries(inputfile)
+
+        if kwargs['import']:
+            self.cmu_dict.add_dictionary_entries(kwargs['import'])
+
         self.transcript = transcriptprocessor.TranscriptProcesor(
-            trsfile, self.cmu_dict)
+            self.transcript,
+            self.cmu_dict,
+            *args,
+            **kwargs)
+
+    def __config(self,**kwargs):
+        self.htktoolspath = kwargs['htktoolspath']
+        self.check = kwargs['check']
 
     def read_transcript(self):
         """Interface with TranscriptProcesor to read a file"""
@@ -129,8 +129,8 @@ def get_duration(self, FADIR='', PRAATPATH=''):
             f.close()
             duration = round((nx / sr), 3)
         except wave.Error:  # wave.py does not seem to support 32-bit .wav files???
-			self.logger.debug('Script path is %s',os.path.join(
-				FADIR, "praatScripts", "get_duration.praat"))
+            self.logger.debug('Script path is %s',os.path.join(
+                FADIR, "praatScripts", "get_duration.praat"))
             if PRAATPATH:
                 dur_command = "%s %s %s" % (PRAATPATH, os.path.join(
                     FADIR, "praatScripts", "get_duration.praat"), self.audio)
@@ -360,16 +360,16 @@ def __align(self, chunk, trs_input, outfile,
         # outfile = output TextGrid
 
         self.logger.info(f"Aligning chunk {chunk}")
-        self.logger.info(
-            f"input transcript: {trs_input}\noutput file: {outfile}")
+        self.logger.debug(f"input transcript: {trs_input}")
+        self.logger.debug(f"output file: {outfile}")
 
         # change to Forced Alignment Toolkit directory for all the temp and
         # preparation files
         if FADIR:
             self.logger.debug(f"Changing working directory to {FADIR}")
             os.chdir(FADIR)
 
-        self.logger.info("Current working directory is: %s", os.getcwd())
+        self.logger.debug("Current working directory is: %s", os.getcwd())
         # derive unique identifier for tmp directory and all its file (from
         # name of the sound "chunk")
         identifier = re.sub(

diff --git a/fave/align/transcriptprocessor.py b/fave/align/transcriptprocessor.py
@@ -34,7 +34,7 @@
 import sys
 import os
 import logging
-from . import cmudictionary
+from fave import cmudictionary
 
 
 class TranscriptProcesor():
@@ -66,7 +66,7 @@ def __init__(
         self.logger = logging.getLogger(__name__)
         logging.basicConfig(
             format='%(levelname)s:%(message)s',
-            level=logging.DEBUG)
+            level=kwargs['verbose'])
 
         self.file = transript_file
         self.__config_flags(**kwargs)
@@ -86,21 +86,10 @@ def __init__(
         self.dictionary = pronunciation_dictionary
 
     def __config_flags(self, **kwargs):
-        self.verbose = False
-        self.prompt = False
-        self.check = False
-        try:
-            self.verbose = kwargs['verbose']
-        except KeyError:
-            pass
-        try:
-            self.prompt = kwargs['prompt']
-        except KeyError:
-            pass
-        try:
-            self.check = kwargs['check']
-        except KeyError:
-            pass
+        self.prompt = kwargs['prompt']
+        if kwargs['check']:
+            self.unknownFile = kwargs['check']
+        self.check = bool(kwargs['check'])
 
     def check_dictionary_entries(self, wavfile):
         """checks that all words in lines have an entry in the CMU dictionary;
@@ -155,8 +144,7 @@ def check_dictionary_entries(self, wavfile):
                         :2]) + "_" + "dict")
             self.logger.debug(f"temp_dict is {temp_dict}")
             self.dictionary.write_dict(temp_dict)
-            if self.verbose:
-                self.logger.debug(
+            self.logger.debug(
                     "Written updated temporary version of CMU dictionary.")
             # forced alignment must use updated cmudict, not original one
             self.temp_dict_dir = temp_dict
@@ -165,10 +153,10 @@ def check_dictionary_entries(self, wavfile):
         # write list of unknown words and suggested transcriptions for
         # truncated words to file
         if self.check:
-            self.dictionary.write_unknown_words(unknown)
+            self.dictionary.write_unknown_words(unknown,self.unknownFile)
             self.logger.info(
                 "Written list of unknown words in transcription to file %s.",
-                self.check)
+                self.unknownFile)
             if __name__ == "__main__":
                 sys.exit()  # It shouldn't just die, but return and clean up after itself
 
@@ -182,8 +170,8 @@ def preprocess_transcription(self, line):
         # INPUT:  string line = line of orthographic transcription
         # OUTPUT:  list words = list of individual words in transcription
 
-        self.logger.info("Preprocessing transcript line")
-        self.logger.debug(line)
+        self.logger.debug("Preprocessing transcript line:")
+        self.logger.debug(f"    {line}")
         flag_uncertain = self.flag_uncertain
         last_beg_uncertain = self.last_beg_uncertain
         last_end_uncertain = self.last_end_uncertain

diff --git a/fave/align/cmudictionary.py → fave/cmudictionary.py b/fave/align/cmudictionary.py → fave/cmudictionary.py
@@ -105,21 +105,21 @@ def __init__(self, dictionary_file, **kwargs):
         """
         Initializes object by reading in CMU dictionary (or similar)
 
-	Parameters
-	----------
+        Parameters
+        ----------
         dictionary_file : string
-			The full path to the location of a CMU-style dictionary.
+            The full path to the location of a CMU-style dictionary.
         verbose : bool
-			Whether to print debug information.
+            Whether to print debug information.
         prompt : bool
-			Whether to prompt the user to fix errors.
-		check : bool
-			Whether this is an alignment or transcript check.
+            Whether to prompt the user to fix errors.
+        check : bool
+            Whether this is an alignment or transcript check.
         """
         self.logger = logging.getLogger(__name__)
         logging.basicConfig(
             format='%(levelname)s:%(message)s',
-            level=logging.DEBUG)
+            level=kwargs['verbose'])
 
         self.__config_flags(**kwargs)
 
@@ -305,11 +305,11 @@ def __check_word(self, word, next_word):
 
         if word.upper() in self.cmu_dict:
             return True
-        self.logger.info(f'Cannot find {word} in dictionary')
+        self.logger.debug(f'Cannot find {word} in dictionary')
         if self.intended.search(next_word):
             self.logger.debug(f'Hint given: {next_word}')
             if next_word in self.cmu_dict:
-                self.logger.info('Clue is in dictionary')
+                self.logger.debug('Clue is in dictionary')
                 # pylint: disable=no-else-return
                 if self.check:
                     self.logger.debug(
@@ -332,7 +332,7 @@ def check_word(self, word, next_word='', unknown=None, line=''):
         if not isinstance(unknown, dict):
             unknown = {}
 
-        self.logger.info(f'Checking if \'{word}\' in dictionary')
+        self.logger.debug(f'Checking if \'{word}\' in dictionary')
         inDict = bool(self.__check_word(word, next_word))
 
         cmudict = self.cmu_dict
@@ -382,7 +382,10 @@ def check_word(self, word, next_word='', unknown=None, line=''):
             # if "check transcription" option is selected, add word to list of
             # unknown words
             if not inDict:
-                self.logger.warning(f"Unknown word '{word}' in line '{line}'")
+                if self.check:
+                    self.logger.info(f"Unknown word '{word}'")
+                else:
+                    self.logger.warning(f"Unknown word '{word}'")
                 unknown[word] = ("", clue.lstrip('+'), line)
                 return unknown
         if word in self.STYLE_ENTRIES:
@@ -459,3 +462,75 @@ def write_unknown_words(self, unknown, fname="unknown.txt"):
         """writes the list of unknown words to file"""
         with open(fname, 'w') as f:
             f.write(self._write_words(unknown))
+
+#
+# !!! This is NOT the original cmu.py file !!!             ##
+#
+# Last modified by Ingrid Rosenfelder:  April 6, 2010                ##
+# - all comments beginning with double pound sign ("##")             ##
+# - (comment before read_dict(f) deleted)                            ##
+# - docstrings for all classes and functions                         ##
+#
+
+
+import re
+
+
+class Phone:
+
+    """represents a CMU dict phoneme (label and distinctive features)"""
+    # !!! not to be confused with class extractFormants.Phone !!!
+    label = ''  # label
+    vc = ''  # vocalic (+ = vocalic, - = consonantal)
+    vlng = ''  # vowel length (l = long, s = short, d = diphthong, a = ???, 0 = n/a)
+    vheight = ''  # vowel height (1 = high, 2 = mid, 3 = low)
+    vfront = ''  # vowel frontness (1 = front, 2 = central, 3 = back)
+    vrnd = ''  # vowel roundness (+ = rounded, - = unrounded, 0 = n/a)
+    ctype = ''  # manner of articulation (s = stop, a = affricate, f = fricative, n = nasal, l = lateral, r = glide, 0 = n/a)
+    cplace = ''  # place of articulation (l = labial, b = labiodental, d = dental, a = apical, p = palatal, v = velar, 0 = n/a)
+    cvox = ''  # consonant voicing (+ = voiced, - = unvoiced, 0 = n/a)
+
+
+def read_dict(f):
+    """reads the CMU dictionary and returns it as dictionary object,
+    allowing multiple pronunciations for the same word"""
+    dictfile = open(f, 'r')
+    lines = dictfile.readlines()
+    dict = {}
+    pat = re.compile('  *')  # two spaces separating CMU dict entries
+    for line in lines:
+        line = line.rstrip()
+        line = re.sub(pat, ' ', line)  # reduce all spaces to one
+        word = line.split(' ')[0]  # orthographic transcription
+        phones = line.split(' ')[1:]  # phonemic transcription
+        if word not in dict:
+            dict[word] = [phones]
+                # phonemic transcriptions represented as list of lists of
+                # phones
+        else:
+            dict[word].append(
+                phones)  # add alternative pronunciation to list of pronunciations
+    dictfile.close()
+    return dict
+
+
+def read_phoneset(f):
+    """reads the CMU phoneset (assigns distinctive features to each phoneme);
+      returns it as dictionary object"""
+    lines = open(f, 'r').readlines()
+    phoneset = {}
+    for line in lines[1:]:  # leave out header line
+        p = Phone()
+        line = line.rstrip('\n')
+        label = line.split()[0]  # phoneme label
+        p.label = label
+        p.vc = line.split()[1]  # vocalic
+        p.vlng = line.split()[2]  # vowel length
+        p.vheight = line.split()[3]  # vowel height
+        p.vfront = line.split()[4]  # vowel frontness
+        p.vrnd = line.split()[5]  # vowel roundness
+        p.ctype = line.split()[6]  # consonants:  manner of articulation
+        p.cplace = line.split()[7]  # consonants:  place of articulation
+        p.cvox = line.split()[8]  # consonants:  voicing
+        phoneset[label] = p
+    return phoneset