-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathextractFormants.py
executable file
·2437 lines (2094 loc) · 104 KB
/
extractFormants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
#
# !!! This is NOT the original extractFormants.py file !!! ##
#
# - all comments beginning with a double pound sign ("##") ##
# - docstrings for all classes and functions ##
# - alphabetic ordering outside of main program: ##
# 1. classes ##
# 2. functions ##
# - allow multiple speakers in input TextGrids ##
# - user prompted for speaker info ##
# - excluded from analysis: ##
# - uncertain and unclear transcriptions ##
# - overlaps ##
# - last syllables of truncated words ##
# - entries on style tier added to vowel measurements ##
# - boolean options (instead of 'T', 'F') ##
# - poles and bandwidths as returned by LPC analysis added to output ##
# - Mahalanobis distance takes formant settings from options/defaults ##
# - speakers' last names optional ##
# - fixed rounding problem with phone duration (< 50 ms) ##
# - changed Praat Formant method to Burg for Mahalanobis measurement method ##
# - adapted Mahalanobis method to vary number of formants from 3 to 6 (Burg), ##
# then choose winning pair from all F1/F2 combinations of these ##
# - changed Praat object from` LPC to Formant ##
# - no restriction on # of formants per frame for Formant objects ##
# - smoothing of formant tracks ( -> parameter nSmoothing in options) ##
# - FAAV measurement procedure: ##
# - AY has 50 ms left padding and is measured at maximum F1 ##
# - Tuw measured at beginning of segment ##
# - OW, AW measured halfway between beginning of segment and F1 maximum ##
# - EY is measured at maximum F1, but without extra padding ##
# - returns F3 and corresponding bandwidth, if possible ##
# - outputs and summarizes chosen nFormants (in separate file) ##
# - integrated remeasurement.py ##
# - new options: remeasurement and candidates ##
# - fixed checkTextGrid so that compatible with FA online interface output ##
# - added ethnicity and location to speaker object & changed output file ##
# - added "both" as output option (writes Plotnik file AND text file) ##
# - added "--speaker=speakerfile" option ##
# - added normalization and calculation of means for each vowel class ##
# - corrected anae() method index error ##
# - added phila_system as separate option (no longer dependent on file name) ##
# - changed "phila_system" option to "vowelSystem" to allow multiple values: ##
# - "Phila" ##
# - "NorthAmerican" (default) ##
# - "simplifiedARPABET" ##
# - fixed interference between minimum vowel length and smoothing window ##
# - added output of formant "tracks" (formant measurements at 20%, 35%, 50%, ##
# 65% and 80% of the vowel duration) in angular brackets in Plotnik files ##
# - fixed floating point problem of minimum duration in getTransitionLength ##
# - fixed errors caused by gaps in the vowel wave forms (at measurement point)##
#
#
"""
Takes as input a sound file and a Praat .TextGrid file (with word and phone tiers)
and outputs automatically extracted F1 and F2 measurements for each vowel
(either as a tab-delimited text file or as a Plotnik file).
"""
import sys
import os
import shutil
import argparse
import math
import re
import time
import pkg_resources
import csv
import pickle
import subprocess
from itertools import tee, islice
from bisect import bisect_left
import numpy as np
from tqdm import tqdm
import fave
from fave.extract import esps
from fave.extract import plotnik
from fave.extract import vowel
from fave import praat
from fave import cmudictionary as cmu
from fave.extract.remeasure import remeasure
from fave.extract.mahalanobis import mahalanobis
SCRIPTS_HOME = pkg_resources.resource_filename('fave','praatScripts')
os.chdir(os.getcwd())
uncertain = re.compile(r"\(\(([\*\+]?['\w]+\-?)\)\)")
CONSONANTS = ['B', 'CH', 'D', 'DH', 'F', 'G', 'HH', 'JH', 'K', 'L', 'M',
'N', 'NG', 'P', 'R', 'S', 'SH', 'T', 'TH', 'V', 'W', 'Y', 'Z', 'ZH']
VOWELS = ['AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'EH',
'ER', 'EY', 'IH', 'IY', 'OW', 'OY', 'UH', 'UW']
SPECIAL = ['BR', 'CG', 'LS', 'LG', 'NS']
#
class Phone:
"""represents a single phone (label, times and Plotnik code (for vowels))"""
# !!! not the same as class cmu.Phone !!!
def __init__(self):
self.label = '' # phone label (Arpabet coding)
self.code = '' # Plotnik vowel code ("xx.xxxxx")
self.xmin = None # beginning of phone
self.xmax = None # end of phone
self.cd = '' # Plotnik code: vowel class
self.fm = '' # Plotnik code: following segment - manner
self.fp = '' # Plotnik code: following segment - place
self.fv = '' # Plotnik code: following segment - voice
self.ps = '' # Plotnik code: preceding segment
self.fs = '' # Plotnik code: following sequences
self.overlap = False
self.pp = None # preceding phone (Arpabet label)
self.arpa = '' # Arpabet coding WITHOUT stress digit
self.stress = None # stress digit
class Speaker:
"""represents a speaker (background info)"""
def __init__(self):
self.name = ''
self.first_name = ''
self.last_name = ''
self.age = ''
self.sex = ''
self.ethnicity = ''
self.years_of_schooling = ''
self.location = ''
self.city = '' # 'Philadelphia'
self.state = '' # 'PA'
self.year = '' # year of recording
self.tiernum = None # tiernum points to phone tier = first tier for given speaker
class VowelMeasurement:
"""represents a vowel measurement"""
# !!! not the same as class plotnik.VowelMeasurement !!!
def __init__(self):
self.phone = '' # Arpabet coding
self.stress = '' # stress level ("1", "2", "0")
self.style = '' # style label (if present)
self.word = '' # corresponding word
self.f1 = None # first formant
self.f2 = None # second formant
self.f3 = None # third formant
self.b1 = None # bandwidth of first formant
self.b2 = None # bandwidth of second formant
self.b3 = None # bandwidth of third formant
self.t = '' # time of measurement
self.code = '' # Plotnik vowel code ("xx.xxxxx")
self.cd = '' # Plotnik code for vowel class
self.fm = '' # Plotnik code for manner of following segment
self.fp = '' # Plotnik code for place of following segment
self.fv = '' # Plotnik code for voicing of following segment
self.ps = '' # Plotnik code for preceding segment
self.fs = '' # Plotnik code for following sequences
self.text = '' # ???
self.beg = None # beginning of vowel
self.end = None # end of vowel
self.dur = None # duration of vowel
self.poles = [] # original list of poles returned by LPC analysis
self.bandwidths = []
# original list of bandwidths returned by LPC analysis
self.times = []
self.winner_poles = []
self.winner_bandwidths = []
self.all_poles = []
self.all_bandwidths = []
self.nFormants = None # actual formant settings used in the measurement (for Mahalanobis distance method)
self.glide = '' # Plotnik glide coding
self.norm_f1 = None # normalized F1
self.norm_f2 = None # normalized F2
self.norm_f3 = None # normalized F3
self.tracks = []
# formant "tracks" (five sample points at 20%, 35%, 50%, 65% and
# 80% of the vowel)
self.all_tracks = []
# formant "tracks" for all possible formant settings (needed for
# remeasurement)
self.norm_tracks = [] # normalized formant "tracks"
self.pre_seg = ''
self.fol_seg = ''
self.context = ''
self.p_index = ''
self.word_trans = ''
self.pre_word_trans = ''
self.fol_word_trans = ''
self.pre_word = ''
self.fol_word = ''
class VowelMean:
"""represents the mean and standard deviation for a given vowel class"""
def __init__(self):
self.pc = '' # Plotnik vowel class
self.means = ['', '', ''] # means for F1, F2, F3
self.stdvs = ['', '', ''] # standard deviations for F1, F2, F3
self.n = [0, 0, 0]
# number of tokens used to calculate means and standard deviations
self.values = [[], [], []] # formant values from individual tokens
self.norm_means = ['', '', ''] # normalized means
self.norm_stdvs = ['', '', ''] # normalized standard deviations
self.trackvalues = []
# formant "tracks" (5 measurement points) from individual tokens
self.trackmeans = [] # means values for formant "tracks"
self.trackmeans_norm = [] # normalized mean formant "tracks"
def __str__(self):
return '<Means for vowel class %s: means=%s, stdvs=%s, tokens=%s,\nnormalized: means=%s, stdvs=%s, values:\n\tF1: %s,\n\tF2: %s,\n\tF3: %s>' % (self.pc, self.means, self.stdvs, self.n, self.norm_means, self.norm_stdvs, self.values[0], self.values[1], self.values[2])
class Word:
"""represents a word (transcription, times and list of phones)"""
def __init__(self):
self.transcription = '' # transcription
self.phones = [] # list of phones
self.xmin = None # beginning of word
self.xmax = None # end of word
self.style = '' # style label (if present)
#
def addOverlaps(words, tg, speaker):
"""for a given speaker, checks each phone interval against overlaps on other tiers"""
# NOTE: this thing can really slow down the program if you're checking some 20,000 phone intervals...
# -> use of pointers speeds up this part of the program by a factor of 18 or so :-)
# initialize pointers
pointer = []
for r in range(len(tg) // 2):
pointer.append(0)
# check all vowel phones in speaker's word list
for w in words:
for p in w.phones:
# don't bother checking for overlaps for consonants (speeds up the
# program)
if isVowel(p.label):
# check all other (word) tiers if corresponding interval is non-empty
# (word tiers vs. interval tiers: speeds up program by a factor of 2-2.5)
for sn in range(len(tg) // 2): # sn = speaknum!
if (sn * 2) != speaker.tiernum:
# go up to last interval that overlaps with p
while pointer[sn] < len(tg[sn * 2 + 1]) and tg[sn * 2 + 1][pointer[sn]].xmin() < p.xmax:
# current interval for comparison
i = tg[sn * 2 + 1][pointer[sn]]
# if boundaries overlap and interval not empty
if ((((i.xmin() <= p.xmin) or (p.xmin <= i.xmin() <= p.xmax))
and ((i.xmax() >= p.xmax) or (p.xmin <= i.xmax() <= p.xmax)))
and not i.mark().upper() in ["SP","sil",""]):
p.overlap = True
pointer[sn] += 1
# go back one interval, since the last interval needs
# to be checked again for the next phone
pointer[sn] -= 1
return words
def addPlotnikCodes(words, phoneset, speaker, vowelSystem):
"""takes a list of words and adds Plotnik codes to the vowels"""
for w in words:
n = getNumVowels(w)
if n == 0:
continue
for i in range(len(w.phones)):
if isVowel(w.phones[i].label):
code, prec_p = plotnik.cmu2plotnik_code(i, w.phones, w.transcription, phoneset, speaker, vowelSystem)
if code: # no code returned if it's a consonant
w.phones[i].code = code # whole code
w.phones[i].cd = code.split('.')[0] # vowel class code
w.phones[i].fm = code.split('.')[1][0] # following segment - manner
w.phones[i].fp = code.split('.')[1][1] # following segment - place
w.phones[i].fv = code.split('.')[1][2] # following segment - voice
w.phones[i].ps = code.split('.')[1][3] # preceding segment
w.phones[i].fs = code.split('.')[1][4] # following sequences
if (prec_p and prec_p != '') or prec_p == '': # phone is a vowel and has or has not preceding segment
w.phones[i].pp = prec_p
return words
def addStyleCodes(words, tg):
"""copies coding from style tier to each word"""
i_start = 0 # start interval on style tier
for w in words:
# iterate over the style tier from approximately the point where the
# style code for the last word was found
for i, s in enumerate(tg[-1][i_start:]):
# break off style tier iteration after the end of the word
if s.xmin() >= w.xmax:
# set new start interval (NOTE: i starts counting from the
# previous i_start!)
i_start += i - \
2 # start next iteration two intervals before, just in case
if i_start < 0:
i_start = 0 # keep i_start >= 0
break
# add style code, if style code interval overlaps with the word
if s.mark().upper() != "SP":
if ((s.xmin() <= w.xmin <= s.xmax() and s.xmin() <= w.xmax <= s.xmax()) # "perfect" case: entire word contained in style tier interval
or (w.xmin <= s.xmin() and s.xmin() <= w.xmax <= s.xmax()) # word shifted to the left relative to style tier interval
or (s.xmin() <= w.xmin <= s.xmax() and s.xmax() <= w.xmax) # word shifted to the right relative to style tier interval
or (w.xmin <= s.xmin() and s.xmax() <= w.xmax)): # "worst" case: word interval contains style tier interval
w.style = s.mark().upper()
# set new start interval (NOTE: i starts counting from the
# previous i_start!)
i_start += i - 1 # start one interval before, just in case
if i_start < 0:
i_start = 0 # keep i_start >= 0
break
return words
def anae(v, formants, times):
"""returns time of measurement according to the ANAE (2006) guidelines"""
F1 = [f[0] if len(f) >= 1 else None for f in formants]
# NOTE: 'None' elements in the two formant lists are
# needed
F2 = [f[1] if len(f) >= 2 else None for f in formants]
# to preserve the alignment with the 'times' list!
# measure at F1 maximum, except for "AE" or "AO"
if v == 'AE':
i = F2.index(max(F2))
elif v == 'AO':
i = F2.index(min(F2))
else:
i = F1.index(max(F1))
measurementPoint = times[i]
return measurementPoint
def calculateMeans(measurements):
"""takes a list of vowel measurements and calculates the means for each vowel class"""
# initialize vowel means
means = {}
for p in plotnik.PLOTNIKCODES:
newmean = VowelMean()
newmean.pc = p
means[p] = newmean
# process measurements
for m in measurements:
# only include tokens with primary stress
if m.stress != '1':
continue
# exclude tokens with F1 < 200 Hz
if m.f1 < 200:
continue
# exclude glide measurements
if m.glide == 'g':
continue
# exclude function words
if m.word.upper() in ['A', 'AH', 'AM', "AN'", 'AN', 'AND', 'ARE', "AREN'T", 'AS', 'AT', 'AW', 'BECAUSE', 'BUT', 'COULD',
'EH', 'FOR', 'FROM', 'GET', 'GONNA', 'GOT', 'GOTTA', 'GOTTEN',
'HAD', 'HAS', 'HAVE', 'HE', "HE'S", 'HIGH', 'HUH',
'I', "I'LL", "I'M", "I'VE", "I'D", 'IN', 'IS', 'IT', "IT'S", 'ITS', 'JUST', 'MEAN', 'MY',
'NAH', 'NOT', 'OF', 'OH', 'ON', 'OR', 'OUR', 'SAYS', 'SHE', "SHE'S", 'SHOULD', 'SO',
'THAN', 'THAT', "THAT'S", 'THE', 'THEM', 'THERE', "THERE'S", 'THEY', 'TO', 'UH', 'UM', 'UP',
'WAS', "WASN'T", 'WE', 'WERE', 'WHAT', 'WHEN', 'WHICH', 'WHO', 'WITH', 'WOULD',
'YEAH', 'YOU', "YOU'VE"]:
continue
# exclude /ae, e, i, aw/ before nasals
if m.cd in ['3', '2', '1', '42'] and m.fm == '4':
continue
# exclude vowels before /l/
if m.fm == '5' and not m.cd == '39':
continue
# exclude vowels after /w, y/
if m.ps == '9':
continue
# exclude vowels after obstruent + liquid clusters
if m.ps == '8':
continue
# add measurements to means object
if m.f1:
means[m.cd].values[0].append(m.f1)
if m.f2:
means[m.cd].values[1].append(m.f2)
if m.f3:
means[m.cd].values[2].append(m.f3)
# collect formant tracks
means[m.cd].trackvalues.append(m.tracks)
# calculate means and standard deviations
for p in plotnik.PLOTNIKCODES:
for i in range(3):
means[p].n[i] = len(means[p].values[i])
# number of tokens for formant i
mean, stdv = mean_stdv(means[p].values[i])
# mean and standard deviation for formant i
if mean:
means[p].means[i] = round(mean, 0)
if stdv:
means[p].stdvs[i] = round(stdv, 0)
# formant tracks
for j in range(10):
t_mean, t_stdv = mean_stdv([t[j] for t in means[p].trackvalues if t[j]])
if t_mean and t_stdv != None:
means[p].trackmeans.append((t_mean, t_stdv))
else: # can't leave empty values in the tracks
means[p].trackmeans.append(('', ''))
return means
def changeCase(word, case):
"""changes the case of output transcriptions to upper or lower case according to config settings"""
if case == 'lower':
w = word.lower()
# assume 'upper' here
else:
w = word.upper()
return w
def checkLocation(file):
"""checks whether a given file exists at a given location"""
if not os.path.exists(file):
print("ERROR: Could not locate %s" % file)
sys.exit()
def checkSpeechSoftware(speechSoftware):
"""checks that either Praat or ESPS is available as a speech analysis program"""
if speechSoftware in ['ESPS', 'esps']:
if os.name == 'nt':
print("ERROR: ESPS was specified as the speech analysis program, but this option is not yet compatible with Windows")
sys.exit()
if not programExists('formant'):
print("ERROR: ESPS was specified as the speech analysis program, but the command 'formant' is not in your path")
sys.exit()
else:
return 'esps'
elif speechSoftware in ['praat', 'Praat']:
if not ((PRAATPATH and programExists(speechSoftware, PRAATPATH)) or (os.name == 'posix' and programExists(speechSoftware)) or (os.name == 'nt' and programExists('praatcon.exe'))):
print("ERROR: Praat was specified as the speech analysis program, but the command 'praat' ('praatcon' for Windows) is not in your path")
sys.exit()
else:
return speechSoftware
else:
print("ERROR: unsupported speech analysis software %s" % speechSoftware)
sys.exit()
def checkTextGridFile(tgFile):
"""checks whether a TextGrid file exists and has the correct file format"""
checkLocation(tgFile)
lines = open(tgFile, 'r').readlines()
if 'File type = "' not in lines[0]:
print("ERROR: %s does not appear to be a Praat TextGrid file (the string 'File type=' does not appear in the first line.)" % tgFile)
sys.exit()
def checkTiers(tg, mfa):
"""performs a check on the correct tier structure of a TextGrid"""
# odd tiers must be phone tiers; even tiers word tiers (but vice versa in terms of indices!)
# last tier can (optionally) be style tier
if mfa:
phone_tier = lambda x: 2 * x + 1
word_tier = lambda x: 2 * x
else:
phone_tier = lambda x: 2 * x
word_tier = lambda x: 2 * x + 1
speakers = []
ns, style = divmod(len(tg), 2)
# "ns": number of speakers (well, "noise" is not a speaker...)
# style tier
if style and tg[-1].name().strip().upper() not in ["STYLE", "FOCUS"]:
sys.exit("ERROR! Odd number of tiers in TextGrid, but last tier is not style tier.")
else:
# to make this compatible with output from the FA online interface
# (where there are just two tiers)
if ns == 1: # len(tg) == 2:
return speakers
for i in range(ns):
# even (in terms of indices) tiers must be phone tiers
if not "PHONE" in tg[phone_tier(i)].name().split(' - ')[1].strip().upper():
print("ERROR! Tier %i should be phone tier but isn't." % phone_tier(i))
sys.exit()
# odd (in terms of indices) tiers must be word tiers
elif not "WORD" in tg[word_tier(i)].name().split(' - ')[1].strip().upper():
print("ERROR! Tier %i should be word tier but isn't." % word_tier(i))
sys.exit()
# speaker name must be the same for phone and word tier
elif tg[phone_tier(i)].name().split(' - ')[0].strip().upper() != tg[word_tier(i)].name().split(' - ')[0].strip().upper():
print("ERROR! Speaker name does not match for tiers %i and %i." % (phone_tier(i), word_tier(i)))
sys.exit()
else:
# add speaker name to list of speakers
speakers.append(tg[phone_tier(i)].name().split(' - ')[0].strip())
if len(speakers) == 0:
sys.exit("ERROR! No speakers in TextGrid?!")
else:
return speakers
def checkWavFile(wavFile):
"""checks whether a given sound file exists at a given location"""
checkLocation(wavFile)
def convertTimes(times, offset):
"""adds a specified offset to all time stamps"""
convertedTimes = [t + offset for t in times]
return convertedTimes
def detectMonophthong(formants, measurementPoint, index):
"""checks whether the formant tracks indicate a monophthong {m}, or a weak/shortented glide {s}"""
# classify as a monophthong, weak/shortened glide or diphthong according to range of movement of F2:
# if maximum F2 after point of measurement is less than 100 Hz above F2 at
# point of measurement: -> monophthong
F2atPOM = formants[index][1]
maximumF2AfterPOM = max([
formants[j][1] for j in range(index, len(formants)) if len(formants[j]) > 1])
F2Movement = round(maximumF2AfterPOM - F2atPOM, 3)
if F2Movement <= 100:
glide = 'm'
# if maximum F2 after point of measurement is between 100-300 Hz above F2
# at point of measurement: -> weak/shortened glide
elif 100 < F2Movement <= 300:
glide = 's'
# if maximum F2 after point of measurement is more than 300 Hz above F2 at
# point of measurement: -> diphthong
else:
glide = ''
return glide
def extractPortion(wavFile, vowelWavFile, beg, end, soundEditor):
"""extracts a single vowel (or any other part) from the main sound file"""
if soundEditor == 'sox': # this is the default setting, since it's faster
# force output format because there have been issues with some sound
# files where Praat could not read the extracted portion
os.system(os.path.join(SOXPATH, 'sox') + ' ' + wavFile + ' -t wavpcm ' +
os.path.join(SCRIPTS_HOME, vowelWavFile) + ' trim ' + str(beg) + ' ' + str(end - beg))
elif soundEditor == 'praat':
os.system(os.path.join(PRAATPATH, PRAATNAME) + ' ' + SCRIPTS_HOME + '/extractSegment.praat ' +
os.path.join(os.path.pardir, wavFile) + ' ' + vowelWavFile + ' ' + str(beg) + ' ' + str(end))
else:
pass
def faav(phone, formants, times, intensity):
"""returns the time of measurement according to the FAAV guidelines"""
# get intensity cutoffs for all vowels not measured one third into the
# vowel
if (phone.label[:-1] in ["AY", "EY", "OW", "AW"]) or (phone.label[:-1] == "UW" and phone.cd == "73"):
# get intensity cutoff at 10% below maximum intensity
beg_cutoff, end_cutoff = getIntensityCutoff(intensity.intensities(), intensity.times())
# make sure we do have an intensity contour (i.e. several measurement point, and not just one)
# if there is only one measurement point in the intensity object, the cutoffs will be identical
# in that case, reset the cutoffs to include the whole vowel
if beg_cutoff == end_cutoff:
beg_cutoff = times[0]
end_cutoff = times[-1]
# modify cutoffs to make sure we are measuring in the first half of the
# vowel
beg_cutoff, end_cutoff = modifyIntensityCutoff(beg_cutoff, end_cutoff, phone, intensity.intensities(), intensity.times())
# measure "AY" and "EY" at F1 maximum
# (NOTE: While "AY" receives extra padding at the beginning to possible go before the segment boundary in the search for an F1 maximum, "EY" does not)
if phone.label[:-1] in ["AY", "EY"]:
measurementPoint = getTimeOfF1Maximum(formants, times, beg_cutoff, end_cutoff)
# measure Tuw at the beginning of the segment
elif phone.label[:-1] == "UW" and phone.cd == "73":
measurementPoint = max([phone.xmin, beg_cutoff])
# measure "OW" and "AW" halfway between beginning of segment and F1
# maximum
elif phone.label[:-1] in ["OW", "AW"]:
maxF1time = getTimeOfF1Maximum(formants, times, beg_cutoff, end_cutoff)
if maxF1time > phone.xmin:
measurementPoint = max([beg_cutoff, phone.xmin + (maxF1time - phone.xmin) / 2])
else:
measurementPoint = max([beg_cutoff, phone.xmin])
# measure all other vowels at 1/3 of the way into the vowel's duration
else:
measurementPoint = phone.xmin + (phone.xmax - phone.xmin) / 3
return measurementPoint
def getFormantTracks(poles, times, xmin, xmax):
"""returns formant tracks (values at 20%, 35%, 50%, 65% and 80% of the vowel duration)"""
tracks = []
# total duration of vowel
dur = xmax - xmin
# get measurement points for formant tracks (20%, 35%, 50%, 65% and 80%
# into the vowel)
measurement_times = [xmin + (0.2 * dur) + (0.15 * dur * i)
for i in range(5)]
for t in measurement_times:
index = getTimeIndex(t, times)
try:
F1 = poles[index][0]
F2 = poles[index][1]
tracks.append(F1)
tracks.append(F2)
except IndexError:
# if we only have F1 but no matching F2, that measurement is probably not reliable enough
# so append nothing for both of them
tracks.append('')
tracks.append('')
return tracks
def getIntensityCutoff(intensities, times):
"""returns the beginning and end times for the 10%-below-maximum-intensity interval"""
# get intensity cutoff and index of maximum intensity
z_max = intensities.index(max(intensities))
cutoff = 0.9 * max(intensities)
# get left boundary
z_left = 0
for z in range(z_max, -1, -1):
if intensities[z] < cutoff:
z_left = z + 1
break
# get right boundary
z_right = len(intensities) - 1
for z in range(z_max, len(intensities)):
if intensities[z] < cutoff:
z_right = z - 1
break
beg_cutoff = times[z_left]
end_cutoff = times[z_right]
return beg_cutoff, end_cutoff
def getMeasurementPoint(phone, formants, times, intensity, measurementPointMethod):
"""returns the point of formant measurement, according to the measurement method selected"""
if measurementPointMethod == 'third':
# measure at 1/3 of the way into the vowel's duration
measurementPoint = phone.xmin + (phone.xmax - phone.xmin) / 3
elif measurementPointMethod == 'fourth':
# measure at 1/4 of the way into the vowel's duration
measurementPoint = phone.xmin + (phone.xmax - phone.xmin) / 4
elif measurementPointMethod == 'mid':
# measure at 1/2 of the way into the vowel's duration
measurementPoint = phone.xmin + (phone.xmax - phone.xmin) / 2
elif measurementPointMethod == 'lennig':
# measure according to Lennig (1978)
transition = getTransitionLength(phone.xmin, phone.xmax)
# remove vowel transitions
trimmedFormants, trimmedTimes = trimFormants(formants, times, phone.xmin + transition, phone.xmax - transition)
measurementPoint = lennig(trimmedFormants, trimmedTimes)
elif measurementPointMethod == 'anae':
# measure according to the ANAE (2006) guidelines
transition = getTransitionLength(phone.xmin, phone.xmax)
# remove vowel transitions
trimmedFormants, trimmedTimes = trimFormants(formants, times, phone.xmin + transition, phone.xmax - transition)
measurementPoint = anae(phone.label, trimmedFormants, trimmedTimes)
elif measurementPointMethod == 'faav':
measurementPoint = faav(phone, formants, times, intensity)
elif measurementPointMethod == 'maxint':
measurementPoint = maximumIntensity(intensity.intensities(), intensity.times())
else:
print("ERROR: Unsupported measurement point selection method %s" % measurementPointMethod)
print(__doc__)
return measurementPoint
def getNumVowels(word):
"""returns the number of vowels in a word"""
n = 0
for p in word.phones:
if isVowel(p.label):
n += 1
return n
def getPadding(phone, windowSize, maxTime):
"""checks that the padding for the analysis window does not exceed file boundaries; adjusts padding accordingly"""
# if the phone is at the beginning (or end) of the sound file, we need to make sure that the added window will not
# extend past the beginning (or end) of the file, since this will mess up extractPortion();
# if it does, truncate the added window to the available space
# check padding at beginning of vowel
if phone.xmin - windowSize < 0:
padBeg = phone.xmin
# extend left padding for AY
elif phone.label[:-1] == "AY":
if phone.xmin - 2 * windowSize < 0:
padBeg = phone.xmin
else:
padBeg = 2 * windowSize
else:
padBeg = windowSize
# check padding at end of vowel
if phone.xmax + windowSize > maxTime:
padEnd = maxTime - phone.xmax
else:
padEnd = windowSize
return (padBeg, padEnd)
def getSoundEditor():
"""checks whether SoX or Praat are available as sound editors"""
# use sox for manipulating the files if we have it, since it's faster
if (SOXPATH and programExists('sox', SOXPATH)) or (os.name == 'posix' and programExists('sox')) or (os.name == 'nt' and programExists('sox.exe')):
soundEditor = 'sox'
elif (PRAATPATH and programExists('praat', PRAATPATH)) or (os.name == 'posix' and programExists('praat')) or (os.name == 'nt' and programExists('praatcon.exe')):
soundEditor = 'praat'
else:
print("ERROR: neither 'praat' ('praatcon' for Windows) nor 'sox' can be found in your path")
print("One of these two programs must be available for processing the audio file")
sys.exit()
return soundEditor
def getSpeakerBackground(speakername, speakernum):
"""prompts the user to enter background information for a given speaker"""
speaker = Speaker()
print("Please enter background information for speaker %s:" % speakername)
print("(Press [return] if correct; if not, simply enter new data (do not use [delete]).)")
speaker.name = input("Name:\t\t\t%s\t" % speakername.strip())
if not speaker.name:
speaker.name = speakername.strip()
try:
speaker.first_name = input("First name:\t\t%s\t" % speaker.name.strip().split()[0])
if not speaker.first_name:
speaker.first_name = speaker.name.strip().split()[0]
# some speakers' last names are not known!
try:
# NOTE: only initial letter of speaker's last name is
# automatically taken over from tier name
speaker.last_name = input("Last name:\t\t%s\t" % speaker.name.strip().split()[1][0])
if not speaker.last_name:
speaker.last_name = speaker.name.strip().split()[1][0]
except IndexError:
speaker.last_name = input("Last name:\t\t")
except:
speaker.first_name = ''
speaker.last_name = ''
speaker.sex = input("Sex:\t\t\t")
# check that speaker sex is defined - this is required for the Mahalanobis
# method!
if formantPredictionMethod == "mahalanobis":
if not speaker.sex:
print("ERROR! Speaker sex must be defined for the 'mahalanobis' formantPredictionMethod!")
sys.exit()
speaker.age = input("Age:\t\t\t")
## speaker.city = input("City:\t\tPhiladelphia")
# if not speaker.city:
## speaker.city = "Philadelphia"
## speaker.state = input("State:\t\tPA")
# if not speaker.state:
## speaker.state = "PA"
speaker.ethnicity = input("Ethnicity:\t\t")
speaker.location = input("Location:\t\t")
speaker.year = input("Year of recording:\t")
speaker.years_of_schooling = input("Years of schooling:\t")
speaker.tiernum = speakernum * 2
# tiernum points to first tier for given speaker
return speaker
def getTimeIndex(t, times):
"""gets the index of the nearest time value from an ordered list of times"""
# the two following cases can happen if a short vowel is at the beginning
# or end of a file
if t < times[0]:
# print "WARNING: measurement point %f is less than earliest time stamp %f for formant measurements, selecting earliest point as measurement" % (t, times[0])
# return the index of the first measurement
return 0
if t > times[-1]:
# print "WARNING: measurement point %f is less than latest time stamp %f for formant measurements, selecting latest point as measurement" % (t, times[-1])
# return the index of the last measurement
return len(times) - 1
prev_time = 0.0
for i in range(len(times)):
if t > times[i]:
prev_time = times[i]
continue
else:
# determine nearest index
if abs(t - prev_time) > abs(t - times[i]):
return i
else:
return i - 1
def getTimeOfF1Maximum(formants, times, beg_cutoff, end_cutoff):
"""returns the time at which F1 reaches it maximum (within the cutoff limits)"""
# get search interval for F1 maximum
trimmedFormants, trimmedTimes = trimFormants(formants, times, beg_cutoff, end_cutoff)
# get F1 maximum
F1 = [f[0] if f else 0 for f in trimmedFormants]
# 'else' for those weird cases where there is a hole in the formant tracks...
i = F1.index(max(F1))
measurementPoint = trimmedTimes[i]
return measurementPoint
def getTransitionLength(minimum, maximum):
"""sets the transition time to the surrounding consonants to 20msec; if the vowel is shorter than 40msec, to zero"""
# needed to remove transitions for Lennig and ANAE measurement methods
if round(maximum - minimum, 3) <= 0.04:
transition = 0
else:
transition = 0.02
return transition
def getVowelMeasurement(vowelFileStem, p, w, speechSoftware, formantPredictionMethod, measurementPointMethod, nFormants, maxFormant, windowSize, preEmphasis, padBeg, padEnd, speaker):
"""makes a vowel measurement"""
vowelWavFile = vowelFileStem + '.wav'
# get necessary files (LPC or formant)
# via ESPS: ## NOTE: I haven't checked the path issues for the ESPS
# option yet...
if speechSoftware == 'esps':
esps.runFormant(vowelWavFile)
if formantPredictionMethod == 'mahalanobis':
lpc = esps.LPC()
lpc.read(vowelFileStem + '.pole')
else:
fmt = esps.Formant()
fmt.read(vowelFileStem + '.pole', vowelFileStem + '.fb')
# clean up the temporary files we created for this vowel
esps.rmFormantFiles(vowelFileStem)
# via Praat: ## NOTE: all temp files are in the "/bin" directory!
else: # assume praat here
if formantPredictionMethod == 'mahalanobis':
# get measurements for nFormants = 3, 4, 5, 6
LPCs = []
nFormants = 3
while nFormants <= 6:
os.system(os.path.join(PRAATPATH, PRAATNAME) + ' ' + os.path.join(SCRIPTS_HOME, 'extractFormants.praat') + ' ' +
vowelWavFile + ' ' + str(nFormants) + ' ' + str(maxFormant) + ' ' ' ' + str(windowSize) + ' ' + str(preEmphasis) + ' burg')
lpc = praat.Formant()
lpc.read(os.path.join(SCRIPTS_HOME, vowelFileStem + '.Formant'))
LPCs.append(lpc)
nFormants += 1
else:
os.system(os.path.join(PRAATPATH, PRAATNAME) + ' ' + os.path.join(SCRIPTS_HOME, 'extractFormants.praat') + ' ' +
vowelWavFile + ' ' + str(nFormants) + ' ' + str(maxFormant) + ' ' + str(windowSize) + ' ' + str(preEmphasis) + ' burg')
fmt = praat.Formant()
fmt.read(os.path.join(SCRIPTS_HOME, vowelFileStem + '.Formant'))
os.remove(os.path.join(SCRIPTS_HOME, vowelFileStem + '.Formant'))
# get Intensity object for intensity cutoff
# (only for those vowels where we need it)
if (p.label[:-1] in ["AY", "EY", "OW", "AW"]) or (p.label[:-1] == "UW" and p.cd == "73"):
os.system(os.path.join(PRAATPATH, PRAATNAME) + ' ' + os.path.join(SCRIPTS_HOME, 'getIntensity.praat') + ' ' + vowelWavFile)
intensity = praat.Intensity()
intensity.read(os.path.join(SCRIPTS_HOME, vowelFileStem + '.Intensity'))
os.remove(os.path.join(SCRIPTS_HOME, vowelFileStem + '.Intensity'))
intensity.change_offset(p.xmin - padBeg)
else:
intensity = praat.Intensity()
# get measurement according to formant prediction method
# Mahalanobis:
if formantPredictionMethod == 'mahalanobis':
convertedTimes = []
poles = []
bandwidths = []
for lpc in LPCs:
convertedTimes.append(convertTimes(lpc.times(), p.xmin - padBeg))
# add offset to all time stamps from Formant
# file
poles.append(lpc.formants())
bandwidths.append(lpc.bandwidths())
vm = measureVowel(p, w, poles, bandwidths, convertedTimes, intensity, measurementPointMethod,
formantPredictionMethod, padBeg, padEnd, means, covs)
# default:
else: # assume 'default' here
convertedTimes = [convertTimes(fmt.times(), p.xmin - padBeg)]
formants = [fmt.formants()]
bandwidths = [fmt.bandwidths()]
vm = measureVowel(p, w, formants, bandwidths, convertedTimes, intensity, measurementPointMethod,
formantPredictionMethod, padBeg, padEnd, '', '')
os.remove(os.path.join(SCRIPTS_HOME, vowelWavFile))
return vm
def getWordsAndPhones(tg, phoneset, speaker, vowelSystem, mfa):
"""takes a Praat TextGrid file and returns a list of the words in the file,
along with their associated phones, and Plotnik codes for the vowels"""
if mfa:
phone_tier = lambda x: 2 * x + 1
word_tier = lambda x: 2 * x
else:
phone_tier = lambda x: 2 * x
word_tier = lambda x: 2 * x + 1
phone_midpoints = [p.xmin() + 0.5 * (p.xmax() - p.xmin()) \
for p in tg[phone_tier(int(speaker.tiernum/2))]]
words = []
# iterate along word tier for given speaker
for w in tg[int(word_tier(int(speaker.tiernum/2)))]: # for each interval...
word = Word()
word.transcription = w.mark()
word.xmin = w.xmin()
word.xmax = w.xmax()
word.phones = []
# get a slice of the phone tier which minimally includes phones
# that are at least halfway contained in this word at each margin
left = bisect_left(phone_midpoints, word.xmin)
right = bisect_left(phone_midpoints, word.xmax)
for p in tg[phone_tier(int(speaker.tiernum/2))][left:right]:
phone = Phone()
phone.label = p.mark().upper()
phone.xmin = p.xmin()
phone.xmax = p.xmax()
word.phones.append(phone)
# count initial number of vowels here! (because uncertain
# transcriptions are discarded on a by-word basis)
if phone.label and isVowel(phone.label):
global count_vowels
count_vowels += 1
words.append(word)
# add Plotnik-style codes for the preceding and following segments for all
# vowels
words = addPlotnikCodes(words, phoneset, speaker, vowelSystem)
# add style codes, if applicable
if len(tg) % 2:
words = addStyleCodes(words, tg)
# add overlap coding for phones
words = addOverlaps(words, tg, speaker)
return words
def hasPrimaryStress(label):
"""checks whether a vowel has primary stress"""
if label[-1] == '1': # NOTE: this assumes that there are no empty intervals on the phone tier!
return True
else:
return False
def isVowel(label):