Skip to content

Commit

Permalink
Merge cmu.py and cmudictionary.py
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisbrickhouse committed May 16, 2020
1 parent d785972 commit 25d6e2f
Show file tree
Hide file tree
Showing 7 changed files with 295 additions and 146 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
*\~
.Rhistory
.DS_Store
*.sublime-project
*.sublime-project
*egg-info
76 changes: 38 additions & 38 deletions fave/align/aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,17 @@
import time
import logging
import wave
import pkg_resources
from . import transcriptprocessor
from . import cmudictionary
from fave import cmudictionary
from . import praat


class Aligner():
"""
The Aligner class is the main user entry point to the FAVE library. It
handles the interface between all the different modules and automates
the process in a way that allows easy use in scripts or larger programs.
The Aligner class is the main user entry point to the FAVE library. It
handles the interface between all the different modules and automates
the process in a way that allows easy use in scripts or larger programs.
"""
# pylint: disable=too-many-instance-attributes
# Code debt: most of the instance attributes should be passed to functions
Expand All @@ -59,50 +60,49 @@ def __init__(
self,
wavfile,
trsfile,
inputfile=None,
tgfile=None,
dictionary_file=None,
no_prompt=False,
verbose=False,
check=False,
htktoolspath=''
tgfile,
**kwargs
):
dictionary_file = dictionary_file or ['align', 'model', 'dict']
self.logger = logging.getLogger(__name__)
logging.basicConfig(
format='%(levelname)s:%(message)s',
level=logging.DEBUG)
format='%(name)s - %(levelname)s:%(message)s',
level=kwargs['verbose'])

self.count_unclear = 0
self.count_uncertain = 0
self.count_words = 0

dictionary_file = os.path.join(*dictionary_file)

self.audio = wavfile
self.transcript = trsfile
default_dict = pkg_resources.resource_filename('align', 'model/dict')
if trsfile:
self.transcript = trsfile
else:
self.transcript = os.path.splitext(wavfile)[0] + '.txt'
if tgfile:
self.textgrid = tgfile
else:
self.textgrid = os.path.splitext(trsfile)[0] + '.TextGrid'
self.verbose = verbose
self.prompt = not no_prompt
self.check = check
if not htktoolspath and 'HTKTOOLSPATH' in os.environ:
self.htktoolspath = '$HTKTOOLSPATH'
else:
self.htktoolspath = htktoolspath
kwargs = {
'verbose': verbose,
'prompt': not no_prompt,
'check': check
}

self.__config(**kwargs)

dictionary_file = kwargs['dict'] or default_dict

kwargs['prompt'] = False
args = []

self.cmu_dict = cmudictionary.CMU_Dictionary(dictionary_file, *args, **kwargs)
if inputfile:
self.cmu_dict.add_dictionary_entries(inputfile)

if kwargs['import']:
self.cmu_dict.add_dictionary_entries(kwargs['import'])

self.transcript = transcriptprocessor.TranscriptProcesor(
trsfile, self.cmu_dict)
self.transcript,
self.cmu_dict,
*args,
**kwargs)

def __config(self,**kwargs):
self.htktoolspath = kwargs['htktoolspath']
self.check = kwargs['check']

def read_transcript(self):
"""Interface with TranscriptProcesor to read a file"""
Expand All @@ -129,8 +129,8 @@ def get_duration(self, FADIR='', PRAATPATH=''):
f.close()
duration = round((nx / sr), 3)
except wave.Error: # wave.py does not seem to support 32-bit .wav files???
self.logger.debug('Script path is %s',os.path.join(
FADIR, "praatScripts", "get_duration.praat"))
self.logger.debug('Script path is %s',os.path.join(
FADIR, "praatScripts", "get_duration.praat"))
if PRAATPATH:
dur_command = "%s %s %s" % (PRAATPATH, os.path.join(
FADIR, "praatScripts", "get_duration.praat"), self.audio)
Expand Down Expand Up @@ -360,16 +360,16 @@ def __align(self, chunk, trs_input, outfile,
# outfile = output TextGrid

self.logger.info(f"Aligning chunk {chunk}")
self.logger.info(
f"input transcript: {trs_input}\noutput file: {outfile}")
self.logger.debug(f"input transcript: {trs_input}")
self.logger.debug(f"output file: {outfile}")

# change to Forced Alignment Toolkit directory for all the temp and
# preparation files
if FADIR:
self.logger.debug(f"Changing working directory to {FADIR}")
os.chdir(FADIR)

self.logger.info("Current working directory is: %s", os.getcwd())
self.logger.debug("Current working directory is: %s", os.getcwd())
# derive unique identifier for tmp directory and all its file (from
# name of the sound "chunk")
identifier = re.sub(
Expand Down
34 changes: 11 additions & 23 deletions fave/align/transcriptprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import sys
import os
import logging
from . import cmudictionary
from fave import cmudictionary


class TranscriptProcesor():
Expand Down Expand Up @@ -66,7 +66,7 @@ def __init__(
self.logger = logging.getLogger(__name__)
logging.basicConfig(
format='%(levelname)s:%(message)s',
level=logging.DEBUG)
level=kwargs['verbose'])

self.file = transript_file
self.__config_flags(**kwargs)
Expand All @@ -86,21 +86,10 @@ def __init__(
self.dictionary = pronunciation_dictionary

def __config_flags(self, **kwargs):
self.verbose = False
self.prompt = False
self.check = False
try:
self.verbose = kwargs['verbose']
except KeyError:
pass
try:
self.prompt = kwargs['prompt']
except KeyError:
pass
try:
self.check = kwargs['check']
except KeyError:
pass
self.prompt = kwargs['prompt']
if kwargs['check']:
self.unknownFile = kwargs['check']
self.check = bool(kwargs['check'])

def check_dictionary_entries(self, wavfile):
"""checks that all words in lines have an entry in the CMU dictionary;
Expand Down Expand Up @@ -155,8 +144,7 @@ def check_dictionary_entries(self, wavfile):
:2]) + "_" + "dict")
self.logger.debug(f"temp_dict is {temp_dict}")
self.dictionary.write_dict(temp_dict)
if self.verbose:
self.logger.debug(
self.logger.debug(
"Written updated temporary version of CMU dictionary.")
# forced alignment must use updated cmudict, not original one
self.temp_dict_dir = temp_dict
Expand All @@ -165,10 +153,10 @@ def check_dictionary_entries(self, wavfile):
# write list of unknown words and suggested transcriptions for
# truncated words to file
if self.check:
self.dictionary.write_unknown_words(unknown)
self.dictionary.write_unknown_words(unknown,self.unknownFile)
self.logger.info(
"Written list of unknown words in transcription to file %s.",
self.check)
self.unknownFile)
if __name__ == "__main__":
sys.exit() # It shouldn't just die, but return and clean up after itself

Expand All @@ -182,8 +170,8 @@ def preprocess_transcription(self, line):
# INPUT: string line = line of orthographic transcription
# OUTPUT: list words = list of individual words in transcription

self.logger.info("Preprocessing transcript line")
self.logger.debug(line)
self.logger.debug("Preprocessing transcript line:")
self.logger.debug(f" {line}")
flag_uncertain = self.flag_uncertain
last_beg_uncertain = self.last_beg_uncertain
last_end_uncertain = self.last_end_uncertain
Expand Down
99 changes: 87 additions & 12 deletions fave/align/cmudictionary.py → fave/cmudictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,21 +105,21 @@ def __init__(self, dictionary_file, **kwargs):
"""
Initializes object by reading in CMU dictionary (or similar)
Parameters
----------
Parameters
----------
dictionary_file : string
The full path to the location of a CMU-style dictionary.
The full path to the location of a CMU-style dictionary.
verbose : bool
Whether to print debug information.
Whether to print debug information.
prompt : bool
Whether to prompt the user to fix errors.
check : bool
Whether this is an alignment or transcript check.
Whether to prompt the user to fix errors.
check : bool
Whether this is an alignment or transcript check.
"""
self.logger = logging.getLogger(__name__)
logging.basicConfig(
format='%(levelname)s:%(message)s',
level=logging.DEBUG)
level=kwargs['verbose'])

self.__config_flags(**kwargs)

Expand Down Expand Up @@ -305,11 +305,11 @@ def __check_word(self, word, next_word):

if word.upper() in self.cmu_dict:
return True
self.logger.info(f'Cannot find {word} in dictionary')
self.logger.debug(f'Cannot find {word} in dictionary')
if self.intended.search(next_word):
self.logger.debug(f'Hint given: {next_word}')
if next_word in self.cmu_dict:
self.logger.info('Clue is in dictionary')
self.logger.debug('Clue is in dictionary')
# pylint: disable=no-else-return
if self.check:
self.logger.debug(
Expand All @@ -332,7 +332,7 @@ def check_word(self, word, next_word='', unknown=None, line=''):
if not isinstance(unknown, dict):
unknown = {}

self.logger.info(f'Checking if \'{word}\' in dictionary')
self.logger.debug(f'Checking if \'{word}\' in dictionary')
inDict = bool(self.__check_word(word, next_word))

cmudict = self.cmu_dict
Expand Down Expand Up @@ -382,7 +382,10 @@ def check_word(self, word, next_word='', unknown=None, line=''):
# if "check transcription" option is selected, add word to list of
# unknown words
if not inDict:
self.logger.warning(f"Unknown word '{word}' in line '{line}'")
if self.check:
self.logger.info(f"Unknown word '{word}'")
else:
self.logger.warning(f"Unknown word '{word}'")
unknown[word] = ("", clue.lstrip('+'), line)
return unknown
if word in self.STYLE_ENTRIES:
Expand Down Expand Up @@ -459,3 +462,75 @@ def write_unknown_words(self, unknown, fname="unknown.txt"):
"""writes the list of unknown words to file"""
with open(fname, 'w') as f:
f.write(self._write_words(unknown))

#
# !!! This is NOT the original cmu.py file !!! ##
#
# Last modified by Ingrid Rosenfelder: April 6, 2010 ##
# - all comments beginning with double pound sign ("##") ##
# - (comment before read_dict(f) deleted) ##
# - docstrings for all classes and functions ##
#


import re


class Phone:

"""represents a CMU dict phoneme (label and distinctive features)"""
# !!! not to be confused with class extractFormants.Phone !!!
label = '' # label
vc = '' # vocalic (+ = vocalic, - = consonantal)
vlng = '' # vowel length (l = long, s = short, d = diphthong, a = ???, 0 = n/a)
vheight = '' # vowel height (1 = high, 2 = mid, 3 = low)
vfront = '' # vowel frontness (1 = front, 2 = central, 3 = back)
vrnd = '' # vowel roundness (+ = rounded, - = unrounded, 0 = n/a)
ctype = '' # manner of articulation (s = stop, a = affricate, f = fricative, n = nasal, l = lateral, r = glide, 0 = n/a)
cplace = '' # place of articulation (l = labial, b = labiodental, d = dental, a = apical, p = palatal, v = velar, 0 = n/a)
cvox = '' # consonant voicing (+ = voiced, - = unvoiced, 0 = n/a)


def read_dict(f):
"""reads the CMU dictionary and returns it as dictionary object,
allowing multiple pronunciations for the same word"""
dictfile = open(f, 'r')
lines = dictfile.readlines()
dict = {}
pat = re.compile(' *') # two spaces separating CMU dict entries
for line in lines:
line = line.rstrip()
line = re.sub(pat, ' ', line) # reduce all spaces to one
word = line.split(' ')[0] # orthographic transcription
phones = line.split(' ')[1:] # phonemic transcription
if word not in dict:
dict[word] = [phones]
# phonemic transcriptions represented as list of lists of
# phones
else:
dict[word].append(
phones) # add alternative pronunciation to list of pronunciations
dictfile.close()
return dict


def read_phoneset(f):
"""reads the CMU phoneset (assigns distinctive features to each phoneme);
returns it as dictionary object"""
lines = open(f, 'r').readlines()
phoneset = {}
for line in lines[1:]: # leave out header line
p = Phone()
line = line.rstrip('\n')
label = line.split()[0] # phoneme label
p.label = label
p.vc = line.split()[1] # vocalic
p.vlng = line.split()[2] # vowel length
p.vheight = line.split()[3] # vowel height
p.vfront = line.split()[4] # vowel frontness
p.vrnd = line.split()[5] # vowel roundness
p.ctype = line.split()[6] # consonants: manner of articulation
p.cplace = line.split()[7] # consonants: place of articulation
p.cvox = line.split()[8] # consonants: voicing
phoneset[label] = p
return phoneset
Loading

0 comments on commit 25d6e2f

Please sign in to comment.