Skip to content

Commit

Permalink
Merge pull request #19 from Forced-Alignment-and-Vowel-Extraction/use…
Browse files Browse the repository at this point in the history
…-bandwidths

Use bandwidths for vowel optimization
  • Loading branch information
JoFrhwld authored Jun 26, 2024
2 parents 2cdae37 + 1987739 commit 9d87100
Show file tree
Hide file tree
Showing 17 changed files with 1,148 additions and 616 deletions.
65 changes: 42 additions & 23 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ website:
style: floating
align: left
contents:
- index.qmd
- section: Home
contents:
- index.qmd
- auto: usage
- section: Dev Notes
contents:
- auto: dev
# - usage/getting_started.qmd
# - usage/all_arguments.qmd
# - usage/pythonic_use.ipynb
Expand Down Expand Up @@ -82,34 +87,48 @@ quartodoc:
- fave_corpus
- fave_subcorpora
- title: Vowel Measurements
desc: Vowel Measurements
#kind: page
options:
dynamic: true
contents:
- name: VowelMeasurement
members:
- to_tracks_df
- to_param_df
- to_point_df
contents:
- name: measurements.vowel_measurement
children: linked
- VowelMeasurement
- VowelClass
- VowelClassCollection
- SpeakerCollection

- name: VowelClass
members:
- to_tracks_df
- to_param_df
- to_point_df
# - title: Vowel Measurements
# desc: Vowel Measurements
# options:
# dynamic: true
# contents:
# - name: VowelMeasurement
# members:
# - to_tracks_df
# - to_param_df
# - to_point_df

- name: VowelClassCollection
members:
- to_tracks_df
- to_param_df
- to_point_df
# - name: VowelClass
# members:
# - to_tracks_df
# - to_param_df
# - to_point_df

- name: SpeakerCollection
members:
- to_tracks_df
- to_param_df
- to_point_df
# - name: VowelClassCollection
# members:
# - to_tracks_df
# - to_param_df
# - to_point_df

# - name: SpeakerCollection
# members:
# - to_tracks_df
# - to_param_df
# - to_point_df
- title: Calculations
contents:
- measurements.calcs.mahalanobis
- title: Optimization
desc: Functions for optimizing formant measurements
contents:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
53 changes: 53 additions & 0 deletions docs/dev/variable_names.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
---
title: Variable Naming Conventions
---

## Property naming descriptors

### From Descriptors

- `cand`: candidate tracks.
- Returns
- list of [](`fasttrackpy.OneTrack`)s
- a [](`numpy.array`) of concatenated results from [](`fasttrackpy.OneTrack`)s
- `winner`: The winner track
- Returns
- A single [](`fasttrackpy.OneTrack`)
- a [](`numpy.array`) of concatenated results from winner [](`fasttrackpy.OneTrack`)s

### Value Descriptors

- `param`: The DCT parameters
- `maxformant`: The maximum formant
- `error`: The smoothing error term
- `bparam`: The formant bandwidths parameters

### Summary Descriptors

- `mean`: A mean
- `cov`: A covariance matrix
- `icov`: An inverse covariance matrix


### Derived Values Descriptors

- `mahal`: Mahalanobis distance
- `logprob`: The log probability

### Scope Descriptors

- `vm`: Vowel Measurement
- `vclass`: Vowel Class
- `speaker`: Speaker
- `corpus`: Corpus

### Scope Subdivision Descriptors

- `global`: Global
- `byvclass`: By VowelClass

## Property Naming Structure

- `source`_`value`_`derived`_`scope`_`subdivision`
- `source`_`value`_`summary`
- `source`_`value`
3 changes: 3 additions & 0 deletions docs/usage/index.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
title: Usage
---
469 changes: 238 additions & 231 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repository = "https://github.com/Forced-Alignment-and-Vowel-Extraction/new-fave"
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
aligned-textgrid = "^0.6.7"
fasttrackpy = "^0.4.7"
fasttrackpy = "^0.4.8"
numpy = "^1.26.4"
tqdm = "^4.66.2"
fave-recode = "^0.3.0"
Expand All @@ -26,6 +26,7 @@ python-magic = {version = "^0.4.27", markers = "sys_platform != 'win32'"}
python-magic-bin = {version = "^0.4.14", markers = "sys_platform == 'win32'"}
scipy = "^1.13.1"
cloudpickle = "^3.0.0"
nptyping = "^2.5.0"


[tool.poetry.group.docs.dependencies]
Expand Down
2 changes: 2 additions & 0 deletions src/new_fave/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from new_fave.patterns.fave_subcorpora import fave_subcorpora
from new_fave.patterns.writers import write_data, pickle_speakers, unpickle_speakers

from importlib.metadata import version

__all__ = [
"VowelMeasurement",
"VowelClass",
Expand Down
129 changes: 129 additions & 0 deletions src/new_fave/measurements/calcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import numpy as np
import nptyping as npt
from nptyping import NDArray, Shape, Float
from typing import Any
import scipy.stats as stats
import warnings
import functools

def mahalanobis(
params:NDArray[Shape['Dim, Cand'], Float],
param_means:NDArray[Shape['Dim, 1'], Float],
inv_cov:NDArray[Shape['Dim, Dim'], Float]
)->NDArray[Shape["Cand"], Float]:
"""
Calculates the Mahalanobis distance.
Args:
params (NDArray[Shape['Dim, Cand'], Float]):
The parameters for which the Mahalanobis distance is to be calculated.
param_means (NDArray[Shape['Dim, 1'], Float]):
The mean of the distribution.
inv_cov (NDArray[Shape['Dim, Dim'], Float]):
The inverse of the covariance matrix of the distribution.
Returns:
(NDArray[Shape["Cand"], Float]):
The Mahalanobis distance of each parameter from the distribution.
"""

x_mu = params - param_means
left = np.dot(x_mu.T, inv_cov)
mahal = np.dot(left, x_mu)
return mahal.diagonal()

def mahal_log_prob(
mahals: NDArray[Shape["Cand"], Float],
params: NDArray[Shape["*, *, ..."], Float]
) -> NDArray[Shape["Cand"], Float]:
"""
Args:
mahals (NDArray[Shape["Cand"], Float]):
The Mahalanobis distances.
params (NDArray[Shape["*, *, ..."], Float]):
The parameters across which the mahalanobis
distance was calculated
Returns:
(NDArray[Shape["Cand"], Float]):
The log probability
"""
df = np.prod(params.shape[0:-1])
log_prob = stats.chi2.logsf(
mahals,
df = df
)
if np.isfinite(log_prob).mean() < 0.5:
log_prob = np.zeros(shape = log_prob.shape)
return log_prob


def param_to_cov(
params:NDArray[Shape["*, *, ..."], Float]
) -> NDArray[Shape["X, X"], Float]:
"""
Calculates the covariance matrix of the given parameters.
Args:
params (NDArray[Shape["*, *, ..."], Float]):
The parameters for which the covariance matrix is to be calculated.
Returns:
(NDArray[Shape["X, X"], Float]):
The covariance matrix of the parameters.
"""
N = params.shape[-1]
square_params = params.reshape(-1, N)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
param_cov = np.cov(square_params)

return param_cov

def cov_to_icov(
cov_mat: NDArray[Shape["X, X"], Float]
) -> NDArray[Shape["X, X"], Float]:
"""
Calculates the inverse covariance matrix of the given covariance matrix.
Args:
cov_mat (NDArray[Shape["X, X"], Float]):
The covariance matrix for which the inverse is to be calculated.
Returns:
(NDArray[Shape["X, X"], Float]):
The inverse covariance matrix of the given covariance matrix.
"""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
try:
params_icov = np.linalg.inv(cov_mat)
except:
params_icov = np.array([
[np.nan] * cov_mat.size
]).reshape(
cov_mat.shape[0],
cov_mat.shape[1]
)

return params_icov

def clear_cached_properties(obj:object) -> None:
"""Clear the cache of any property in an object
Args:
obj (object): Any object.
"""
clses = obj.__class__.mro()
to_clear = []

to_clear += [
k
for cls in clses
for k, v in vars(cls).items()
if isinstance(v, functools.cached_property)
]
for var in to_clear:
if var in obj.__dict__:
del obj.__dict__[var]
Loading

0 comments on commit 9d87100

Please sign in to comment.