diff --git a/.gitignore b/.gitignore index 16161f6..5338873 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ *\~ .Rhistory .DS_Store -*.sublime-project \ No newline at end of file +*.sublime-project +*egg-info diff --git a/fave/align/aligner.py b/fave/align/aligner.py index e322d6a..43b12e5 100644 --- a/fave/align/aligner.py +++ b/fave/align/aligner.py @@ -38,16 +38,17 @@ import time import logging import wave +import pkg_resources from . import transcriptprocessor -from . import cmudictionary +from fave import cmudictionary from . import praat class Aligner(): """ - The Aligner class is the main user entry point to the FAVE library. It - handles the interface between all the different modules and automates - the process in a way that allows easy use in scripts or larger programs. + The Aligner class is the main user entry point to the FAVE library. It + handles the interface between all the different modules and automates + the process in a way that allows easy use in scripts or larger programs. """ # pylint: disable=too-many-instance-attributes # Code debt: most of the instance attributes should be passed to functions @@ -59,50 +60,49 @@ def __init__( self, wavfile, trsfile, - inputfile=None, - tgfile=None, - dictionary_file=None, - no_prompt=False, - verbose=False, - check=False, - htktoolspath='' + tgfile, + **kwargs ): - dictionary_file = dictionary_file or ['align', 'model', 'dict'] self.logger = logging.getLogger(__name__) logging.basicConfig( - format='%(levelname)s:%(message)s', - level=logging.DEBUG) + format='%(name)s - %(levelname)s:%(message)s', + level=kwargs['verbose']) self.count_unclear = 0 self.count_uncertain = 0 self.count_words = 0 - - dictionary_file = os.path.join(*dictionary_file) - self.audio = wavfile - self.transcript = trsfile + default_dict = pkg_resources.resource_filename('align', 'model/dict') + if trsfile: + self.transcript = trsfile + else: + self.transcript = os.path.splitext(wavfile)[0] + '.txt' if tgfile: self.textgrid = tgfile else: self.textgrid = os.path.splitext(trsfile)[0] + '.TextGrid' - self.verbose = verbose - self.prompt = not no_prompt - self.check = check - if not htktoolspath and 'HTKTOOLSPATH' in os.environ: - self.htktoolspath = '$HTKTOOLSPATH' - else: - self.htktoolspath = htktoolspath - kwargs = { - 'verbose': verbose, - 'prompt': not no_prompt, - 'check': check - } + + self.__config(**kwargs) + + dictionary_file = kwargs['dict'] or default_dict + + kwargs['prompt'] = False args = [] + self.cmu_dict = cmudictionary.CMU_Dictionary(dictionary_file, *args, **kwargs) - if inputfile: - self.cmu_dict.add_dictionary_entries(inputfile) + + if kwargs['import']: + self.cmu_dict.add_dictionary_entries(kwargs['import']) + self.transcript = transcriptprocessor.TranscriptProcesor( - trsfile, self.cmu_dict) + self.transcript, + self.cmu_dict, + *args, + **kwargs) + + def __config(self,**kwargs): + self.htktoolspath = kwargs['htktoolspath'] + self.check = kwargs['check'] def read_transcript(self): """Interface with TranscriptProcesor to read a file""" @@ -129,8 +129,8 @@ def get_duration(self, FADIR='', PRAATPATH=''): f.close() duration = round((nx / sr), 3) except wave.Error: # wave.py does not seem to support 32-bit .wav files??? - self.logger.debug('Script path is %s',os.path.join( - FADIR, "praatScripts", "get_duration.praat")) + self.logger.debug('Script path is %s',os.path.join( + FADIR, "praatScripts", "get_duration.praat")) if PRAATPATH: dur_command = "%s %s %s" % (PRAATPATH, os.path.join( FADIR, "praatScripts", "get_duration.praat"), self.audio) @@ -360,8 +360,8 @@ def __align(self, chunk, trs_input, outfile, # outfile = output TextGrid self.logger.info(f"Aligning chunk {chunk}") - self.logger.info( - f"input transcript: {trs_input}\noutput file: {outfile}") + self.logger.debug(f"input transcript: {trs_input}") + self.logger.debug(f"output file: {outfile}") # change to Forced Alignment Toolkit directory for all the temp and # preparation files @@ -369,7 +369,7 @@ def __align(self, chunk, trs_input, outfile, self.logger.debug(f"Changing working directory to {FADIR}") os.chdir(FADIR) - self.logger.info("Current working directory is: %s", os.getcwd()) + self.logger.debug("Current working directory is: %s", os.getcwd()) # derive unique identifier for tmp directory and all its file (from # name of the sound "chunk") identifier = re.sub( diff --git a/fave/align/transcriptprocessor.py b/fave/align/transcriptprocessor.py index 84c6f65..bb32a09 100644 --- a/fave/align/transcriptprocessor.py +++ b/fave/align/transcriptprocessor.py @@ -34,7 +34,7 @@ import sys import os import logging -from . import cmudictionary +from fave import cmudictionary class TranscriptProcesor(): @@ -66,7 +66,7 @@ def __init__( self.logger = logging.getLogger(__name__) logging.basicConfig( format='%(levelname)s:%(message)s', - level=logging.DEBUG) + level=kwargs['verbose']) self.file = transript_file self.__config_flags(**kwargs) @@ -86,21 +86,10 @@ def __init__( self.dictionary = pronunciation_dictionary def __config_flags(self, **kwargs): - self.verbose = False - self.prompt = False - self.check = False - try: - self.verbose = kwargs['verbose'] - except KeyError: - pass - try: - self.prompt = kwargs['prompt'] - except KeyError: - pass - try: - self.check = kwargs['check'] - except KeyError: - pass + self.prompt = kwargs['prompt'] + if kwargs['check']: + self.unknownFile = kwargs['check'] + self.check = bool(kwargs['check']) def check_dictionary_entries(self, wavfile): """checks that all words in lines have an entry in the CMU dictionary; @@ -155,8 +144,7 @@ def check_dictionary_entries(self, wavfile): :2]) + "_" + "dict") self.logger.debug(f"temp_dict is {temp_dict}") self.dictionary.write_dict(temp_dict) - if self.verbose: - self.logger.debug( + self.logger.debug( "Written updated temporary version of CMU dictionary.") # forced alignment must use updated cmudict, not original one self.temp_dict_dir = temp_dict @@ -165,10 +153,10 @@ def check_dictionary_entries(self, wavfile): # write list of unknown words and suggested transcriptions for # truncated words to file if self.check: - self.dictionary.write_unknown_words(unknown) + self.dictionary.write_unknown_words(unknown,self.unknownFile) self.logger.info( "Written list of unknown words in transcription to file %s.", - self.check) + self.unknownFile) if __name__ == "__main__": sys.exit() # It shouldn't just die, but return and clean up after itself @@ -182,8 +170,8 @@ def preprocess_transcription(self, line): # INPUT: string line = line of orthographic transcription # OUTPUT: list words = list of individual words in transcription - self.logger.info("Preprocessing transcript line") - self.logger.debug(line) + self.logger.debug("Preprocessing transcript line:") + self.logger.debug(f" {line}") flag_uncertain = self.flag_uncertain last_beg_uncertain = self.last_beg_uncertain last_end_uncertain = self.last_end_uncertain diff --git a/fave/align/cmudictionary.py b/fave/cmudictionary.py similarity index 82% rename from fave/align/cmudictionary.py rename to fave/cmudictionary.py index 7ae1da3..7014cf9 100644 --- a/fave/align/cmudictionary.py +++ b/fave/cmudictionary.py @@ -105,21 +105,21 @@ def __init__(self, dictionary_file, **kwargs): """ Initializes object by reading in CMU dictionary (or similar) - Parameters - ---------- + Parameters + ---------- dictionary_file : string - The full path to the location of a CMU-style dictionary. + The full path to the location of a CMU-style dictionary. verbose : bool - Whether to print debug information. + Whether to print debug information. prompt : bool - Whether to prompt the user to fix errors. - check : bool - Whether this is an alignment or transcript check. + Whether to prompt the user to fix errors. + check : bool + Whether this is an alignment or transcript check. """ self.logger = logging.getLogger(__name__) logging.basicConfig( format='%(levelname)s:%(message)s', - level=logging.DEBUG) + level=kwargs['verbose']) self.__config_flags(**kwargs) @@ -305,11 +305,11 @@ def __check_word(self, word, next_word): if word.upper() in self.cmu_dict: return True - self.logger.info(f'Cannot find {word} in dictionary') + self.logger.debug(f'Cannot find {word} in dictionary') if self.intended.search(next_word): self.logger.debug(f'Hint given: {next_word}') if next_word in self.cmu_dict: - self.logger.info('Clue is in dictionary') + self.logger.debug('Clue is in dictionary') # pylint: disable=no-else-return if self.check: self.logger.debug( @@ -332,7 +332,7 @@ def check_word(self, word, next_word='', unknown=None, line=''): if not isinstance(unknown, dict): unknown = {} - self.logger.info(f'Checking if \'{word}\' in dictionary') + self.logger.debug(f'Checking if \'{word}\' in dictionary') inDict = bool(self.__check_word(word, next_word)) cmudict = self.cmu_dict @@ -382,7 +382,10 @@ def check_word(self, word, next_word='', unknown=None, line=''): # if "check transcription" option is selected, add word to list of # unknown words if not inDict: - self.logger.warning(f"Unknown word '{word}' in line '{line}'") + if self.check: + self.logger.info(f"Unknown word '{word}'") + else: + self.logger.warning(f"Unknown word '{word}'") unknown[word] = ("", clue.lstrip('+'), line) return unknown if word in self.STYLE_ENTRIES: @@ -459,3 +462,75 @@ def write_unknown_words(self, unknown, fname="unknown.txt"): """writes the list of unknown words to file""" with open(fname, 'w') as f: f.write(self._write_words(unknown)) + +# +# !!! This is NOT the original cmu.py file !!! ## +# +# Last modified by Ingrid Rosenfelder: April 6, 2010 ## +# - all comments beginning with double pound sign ("##") ## +# - (comment before read_dict(f) deleted) ## +# - docstrings for all classes and functions ## +# + + +import re + + +class Phone: + + """represents a CMU dict phoneme (label and distinctive features)""" + # !!! not to be confused with class extractFormants.Phone !!! + label = '' # label + vc = '' # vocalic (+ = vocalic, - = consonantal) + vlng = '' # vowel length (l = long, s = short, d = diphthong, a = ???, 0 = n/a) + vheight = '' # vowel height (1 = high, 2 = mid, 3 = low) + vfront = '' # vowel frontness (1 = front, 2 = central, 3 = back) + vrnd = '' # vowel roundness (+ = rounded, - = unrounded, 0 = n/a) + ctype = '' # manner of articulation (s = stop, a = affricate, f = fricative, n = nasal, l = lateral, r = glide, 0 = n/a) + cplace = '' # place of articulation (l = labial, b = labiodental, d = dental, a = apical, p = palatal, v = velar, 0 = n/a) + cvox = '' # consonant voicing (+ = voiced, - = unvoiced, 0 = n/a) + + +def read_dict(f): + """reads the CMU dictionary and returns it as dictionary object, + allowing multiple pronunciations for the same word""" + dictfile = open(f, 'r') + lines = dictfile.readlines() + dict = {} + pat = re.compile(' *') # two spaces separating CMU dict entries + for line in lines: + line = line.rstrip() + line = re.sub(pat, ' ', line) # reduce all spaces to one + word = line.split(' ')[0] # orthographic transcription + phones = line.split(' ')[1:] # phonemic transcription + if word not in dict: + dict[word] = [phones] + # phonemic transcriptions represented as list of lists of + # phones + else: + dict[word].append( + phones) # add alternative pronunciation to list of pronunciations + dictfile.close() + return dict + + +def read_phoneset(f): + """reads the CMU phoneset (assigns distinctive features to each phoneme); + returns it as dictionary object""" + lines = open(f, 'r').readlines() + phoneset = {} + for line in lines[1:]: # leave out header line + p = Phone() + line = line.rstrip('\n') + label = line.split()[0] # phoneme label + p.label = label + p.vc = line.split()[1] # vocalic + p.vlng = line.split()[2] # vowel length + p.vheight = line.split()[3] # vowel height + p.vfront = line.split()[4] # vowel frontness + p.vrnd = line.split()[5] # vowel roundness + p.ctype = line.split()[6] # consonants: manner of articulation + p.cplace = line.split()[7] # consonants: place of articulation + p.cvox = line.split()[8] # consonants: voicing + phoneset[label] = p + return phoneset diff --git a/fave/extract/cmu.py b/fave/extract/cmu.py deleted file mode 100644 index 48d6992..0000000 --- a/fave/extract/cmu.py +++ /dev/null @@ -1,71 +0,0 @@ -# -# !!! This is NOT the original cmu.py file !!! ## -# -# Last modified by Ingrid Rosenfelder: April 6, 2010 ## -# - all comments beginning with double pound sign ("##") ## -# - (comment before read_dict(f) deleted) ## -# - docstrings for all classes and functions ## -# - - -import re - - -class Phone: - - """represents a CMU dict phoneme (label and distinctive features)""" - # !!! not to be confused with class extractFormants.Phone !!! - label = '' # label - vc = '' # vocalic (+ = vocalic, - = consonantal) - vlng = '' # vowel length (l = long, s = short, d = diphthong, a = ???, 0 = n/a) - vheight = '' # vowel height (1 = high, 2 = mid, 3 = low) - vfront = '' # vowel frontness (1 = front, 2 = central, 3 = back) - vrnd = '' # vowel roundness (+ = rounded, - = unrounded, 0 = n/a) - ctype = '' # manner of articulation (s = stop, a = affricate, f = fricative, n = nasal, l = lateral, r = glide, 0 = n/a) - cplace = '' # place of articulation (l = labial, b = labiodental, d = dental, a = apical, p = palatal, v = velar, 0 = n/a) - cvox = '' # consonant voicing (+ = voiced, - = unvoiced, 0 = n/a) - - -def read_dict(f): - """reads the CMU dictionary and returns it as dictionary object, - allowing multiple pronunciations for the same word""" - dictfile = open(f, 'r') - lines = dictfile.readlines() - dict = {} - pat = re.compile(' *') # two spaces separating CMU dict entries - for line in lines: - line = line.rstrip() - line = re.sub(pat, ' ', line) # reduce all spaces to one - word = line.split(' ')[0] # orthographic transcription - phones = line.split(' ')[1:] # phonemic transcription - if word not in dict: - dict[word] = [phones] - # phonemic transcriptions represented as list of lists of - # phones - else: - dict[word].append( - phones) # add alternative pronunciation to list of pronunciations - dictfile.close() - return dict - - -def read_phoneset(f): - """reads the CMU phoneset (assigns distinctive features to each phoneme); - returns it as dictionary object""" - lines = open(f, 'r').readlines() - phoneset = {} - for line in lines[1:]: # leave out header line - p = Phone() - line = line.rstrip('\n') - label = line.split()[0] # phoneme label - p.label = label - p.vc = line.split()[1] # vocalic - p.vlng = line.split()[2] # vowel length - p.vheight = line.split()[3] # vowel height - p.vfront = line.split()[4] # vowel frontness - p.vrnd = line.split()[5] # vowel roundness - p.ctype = line.split()[6] # consonants: manner of articulation - p.cplace = line.split()[7] # consonants: place of articulation - p.cvox = line.split()[8] # consonants: voicing - phoneset[label] = p - return phoneset diff --git a/fave/extract/extractFormants.py b/fave/extract/extractFormants.py index 5894600..9f42ed2 100755 --- a/fave/extract/extractFormants.py +++ b/fave/extract/extractFormants.py @@ -71,7 +71,7 @@ import praat import esps import plotnik -import cmu +from fave import cmudictionary as cmu import vowel import subprocess diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..67f7175 --- /dev/null +++ b/setup.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +from setuptools import setup + +packages = \ +['fave', 'fave.align', 'fave.align.model', 'fave.extract'] + +package_data = \ +{'': ['*'], + 'fave': ['praatScripts/*'], + 'fave.align': ['examples/*', 'examples/test/*', 'old_docs/*', 'readme_img/*'], + 'fave.align.model': ['11025/*', + '16000 (old model)/*', + '16000/*', + '8000/*', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict05172012', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_05232011', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_06222010', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_08222011', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_10192010', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11022011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_11092011', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_BACKUP', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_CELESTE_05232011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/dict_from_alignment_computer_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'backups dicts/merged_dict_11022011', + 'g-dropping Jiahong/*', + 'g-dropping Jiahong/16000/*'], + 'fave.extract': ['config/*', 'old_docs/*']} + +setup_kwargs = { + 'name': 'fave', + 'version': '2.0.0.dev0', + 'description': 'Forced alignment and vowel extraction', + 'long_description': None, + 'author': 'FAVE contributors', + 'author_email': None, + 'maintainer': None, + 'maintainer_email': None, + 'url': None, + 'packages': packages, + 'package_data': package_data, + 'python_requires': '>=3.7,<4.0', +} + + +setup(**setup_kwargs)