Skip to content

Commit

Permalink
refactored extractFormants.py and remeasure.py to use scipy and numpy…
Browse files Browse the repository at this point in the history
… instead ofrpy2. Produces identical results for PH00-1-1-JStevens
  • Loading branch information
JoFrhwld committed Oct 31, 2013
1 parent 7c2aba1 commit 0183805
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 15 deletions.
4 changes: 2 additions & 2 deletions FAVE-extract/bin/extractFormants.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ def loadCovs(inFile):
for line in open(inFile, 'rU').readlines():
vowel = line.strip().split('\t')[0]
values = np.array([float(x) for x in line.strip().split('\t')[1:]])
covs[vowel] = np.reshape(values, (4,-1))
covs[vowel] = linalg.inv(np.reshape(values, (4,-1)))

return covs

Expand Down Expand Up @@ -1540,7 +1540,7 @@ def predictF1F2(phone, selectedpoles, selectedbandwidths, means, covs):
## vector with current pole combination and associated bandwidths
x = np.array([poles[i], poles[j], math.log(bandwidths[i]), math.log(bandwidths[j])])
## calculate Mahalanobis distance between x and ANAE mean
dist = mahalanobis(x, means[vowel], linalg.inv(covs[vowel]))
dist = mahalanobis(x, means[vowel], covs[vowel])
## append poles and bandwidths to list of values
## (if F3 and bandwidth measurements exist, add to list of appended values)
if len(poles) > 2:
Expand Down
27 changes: 14 additions & 13 deletions FAVE-extract/bin/remeasure.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,16 @@ def excludeOutliers(vowels, vowelMeans, vowelCovs):
"""
Finds outliers and excludes them.
"""
#sys.stderr.write("Excluding outlying vowels...")
sys.stderr.write("Excluding outlying vowels...")
outvowels = {}
for vowel in vowels:
ntokens = len(vowels[vowel])
if ntokens >= 10:
outlie = 4.75
outvowels[vowel] = pruneVowels(vowels, vowel, vowelMeans, vowelCovs, outlie)
if vowel in vowelCovs:
ntokens = len(vowels[vowel])
if ntokens >= 10:
outlie = 4.75
outvowels[vowel] = pruneVowels(vowels, vowel, vowelMeans, vowelCovs, outlie)
else:
outvowels[vowel] = vowels[vowel]
else:
outvowels[vowel] = vowels[vowel]
#sys.stderr.write("excluded.\n")
Expand All @@ -121,7 +124,7 @@ def pruneVowels(vowels, vowel, vowelMeans, vowelCovs, outlie):
outtokens = [ ]
for token in vowels[vowel]:
x = np.array(token)
dist = mahalanobis(x, vowelMeans[vowel], linalg.inv(vowelCovs[vowel]))
dist = mahalanobis(x, vowelMeans[vowel], vowelCovs[vowel])
if dist**2 <= outlie:
outtokens.append(token)
if len(outtokens) >= 10:
Expand All @@ -141,7 +144,7 @@ def calculateVowelMeans(vowels):
calculates [means] and [covariance matrices] for each vowel class.
It returns these as R objects in dictionaries indexed by the vowel class.
"""
#sys.stderr.write("Calculating vowel means...")
sys.stderr.write("Calculating vowel means...")
vowelMeans = {}
vowelCovs = {}
for vowel in vowels:
Expand All @@ -153,7 +156,9 @@ def calculateVowelMeans(vowels):


vowelMeans[vowel] = np.array([vF1.mean(), vF2.mean(), vB1.mean(), vB2.mean(), vDur.mean()])
vowelCovs[vowel] = np.cov(np.vstack((vF1, vF2, vB1, vB2, vDur)))
vowel_cov = np.cov(np.vstack((vF1, vF2, vB1, vB2, vDur)))
if linalg.det(vowel_cov) != 0:
vowelCovs[vowel] = linalg.inv(vowel_cov)
#sys.stderr.write("Vowel means calculated\n")
return vowelMeans, vowelCovs

Expand Down Expand Up @@ -217,13 +222,9 @@ def repredictF1F2(measurements, vowelMeans, vowelCovs, vowels):
valuesList.append([float(vm.f1), float(vm.f2), vm.f3, math.log(float(vm.b1)), math.log(float(vm.b2)), vm.b3, lDur])
distanceList.append(0)
nFormantsList.append(vm.nFormants)
elif linalg.det(vowelCovs[vowel]) == 0: ## determinant of the covariance matrix is zero
valuesList.append([float(vm.f1), float(vm.f2), vm.f3, math.log(float(vm.b1)), math.log(float(vm.b2)), vm.b3, lDur])
distanceList.append(0)
nFormantsList.append(vm.nFormants)
## "real" re-measurement
else:
dist = mahalanobis(x, vowelMeans[vowel], linalg.inv(vowelCovs[vowel]))
dist = mahalanobis(x, vowelMeans[vowel], vowelCovs[vowel])
valuesList.append(outvalues)
distanceList.append(dist)
nFormantsList.append(i + 3) ## these are the formant setting used, not the actual number of formants returned
Expand Down

1 comment on commit 0183805

@scjs
Copy link
Contributor

@scjs scjs commented on 0183805 Nov 2, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if only one token was measured for a vowel type, calculateVowelMeans() in remeasure.py returns RuntimeWarning on line 159 and creates vowel_cov filled with NaN, which causes a crash on line 160 when calling linalg.det()

Please sign in to comment.