Merge pull request #19 from Forced-Alignment-and-Vowel-Extraction/use…

…-bandwidths Use bandwidths for vowel optimization
Forced-Alignment-and-Vowel-Extraction · Jun 26, 2024 · 9d87100 · 9d87100
2 parents 2cdae37 + 1987739
commit 9d87100
Show file tree

Hide file tree

Showing 17 changed files with 1,148 additions and 616 deletions.
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -25,8 +25,13 @@ website:
         style: floating
         align: left
         contents:
-          - index.qmd
+          - section: Home
+            contents:
+              - index.qmd
           - auto: usage
+          - section: Dev Notes
+            contents:
+              - auto: dev
           # - usage/getting_started.qmd
           # - usage/all_arguments.qmd
           # - usage/pythonic_use.ipynb
@@ -82,34 +87,48 @@ quartodoc:
         - fave_corpus
         - fave_subcorpora
     - title: Vowel Measurements
-      desc: Vowel Measurements
+      #kind: page
       options:
         dynamic: true
-      contents: 
-        - name: VowelMeasurement
-          members:
-            - to_tracks_df
-            - to_param_df
-            - to_point_df
+      contents:
+        - name: measurements.vowel_measurement
+          children: linked
+        - VowelMeasurement
+        - VowelClass
+        - VowelClassCollection
+        - SpeakerCollection
 
-        - name: VowelClass
-          members:
-            - to_tracks_df
-            - to_param_df
-            - to_point_df
+    # - title: Vowel Measurements
+    #   desc: Vowel Measurements
+    #   options:
+    #     dynamic: true
+    #   contents: 
+    #     - name: VowelMeasurement
+    #       members:
+    #         - to_tracks_df
+    #         - to_param_df
+    #         - to_point_df
 
-        - name: VowelClassCollection
-          members:
-            - to_tracks_df
-            - to_param_df
-            - to_point_df
+    #     - name: VowelClass
+    #       members:
+    #         - to_tracks_df
+    #         - to_param_df
+    #         - to_point_df
 
-        - name: SpeakerCollection
-          members:
-            - to_tracks_df
-            - to_param_df
-            - to_point_df
+    #     - name: VowelClassCollection
+    #       members:
+    #         - to_tracks_df
+    #         - to_param_df
+    #         - to_point_df
 
+    #     - name: SpeakerCollection
+    #       members:
+    #         - to_tracks_df
+    #         - to_param_df
+    #         - to_point_df
+    - title: Calculations
+      contents:
+        - measurements.calcs.mahalanobis
     - title: Optimization
       desc: Functions for optimizing formant measurements
       contents:

diff --git a/docs/dev_plan/assets/ay.wav → docs/dev/assets/ay.wav b/docs/dev_plan/assets/ay.wav → docs/dev/assets/ay.wav
diff --git a/...s/corpus/josef-fruehwald_speaker.TextGrid → ...s/corpus/josef-fruehwald_speaker.TextGrid b/...s/corpus/josef-fruehwald_speaker.TextGrid → ...s/corpus/josef-fruehwald_speaker.TextGrid
diff --git a/...assets/corpus/josef-fruehwald_speaker.wav → ...assets/corpus/josef-fruehwald_speaker.wav b/...assets/corpus/josef-fruehwald_speaker.wav → ...assets/corpus/josef-fruehwald_speaker.wav
diff --git a/docs/dev_plan/index.qmd → docs/dev/index.qmd b/docs/dev_plan/index.qmd → docs/dev/index.qmd
diff --git a/docs/dev_plan/new-fave-approach.qmd → docs/dev/new-fave-approach.qmd b/docs/dev_plan/new-fave-approach.qmd → docs/dev/new-fave-approach.qmd
diff --git a/docs/dev/variable_names.qmd b/docs/dev/variable_names.qmd
@@ -0,0 +1,53 @@
+---
+title: Variable Naming Conventions
+---
+
+## Property naming descriptors
+
+### From Descriptors
+
+- `cand`: candidate tracks. 
+    - Returns 
+        - list of [](`fasttrackpy.OneTrack`)s
+        - a [](`numpy.array`) of concatenated results from [](`fasttrackpy.OneTrack`)s
+- `winner`: The winner track
+    - Returns
+        - A single [](`fasttrackpy.OneTrack`)
+        - a [](`numpy.array`) of concatenated results from winner [](`fasttrackpy.OneTrack`)s
+
+### Value Descriptors
+
+- `param`: The DCT parameters
+- `maxformant`: The maximum formant
+- `error`: The smoothing error term
+- `bparam`: The formant bandwidths parameters
+
+### Summary Descriptors
+
+- `mean`: A mean
+- `cov`: A covariance matrix
+- `icov`: An inverse covariance matrix
+
+
+### Derived Values Descriptors
+
+- `mahal`: Mahalanobis distance
+- `logprob`: The log probability
+
+### Scope Descriptors
+
+- `vm`: Vowel Measurement
+- `vclass`: Vowel Class
+- `speaker`: Speaker
+- `corpus`: Corpus
+
+### Scope Subdivision Descriptors
+
+- `global`: Global
+- `byvclass`: By VowelClass
+
+## Property Naming Structure
+
+- `source`_`value`_`derived`_`scope`_`subdivision`
+- `source`_`value`_`summary`
+- `source`_`value`
diff --git a/docs/usage/index.qmd b/docs/usage/index.qmd
@@ -0,0 +1,3 @@
+---
+title: Usage
+---
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,7 +13,7 @@ repository = "https://github.com/Forced-Alignment-and-Vowel-Extraction/new-fave"
 [tool.poetry.dependencies]
 python = ">=3.11,<3.12"
 aligned-textgrid = "^0.6.7"
-fasttrackpy = "^0.4.7"
+fasttrackpy = "^0.4.8"
 numpy = "^1.26.4"
 tqdm = "^4.66.2"
 fave-recode = "^0.3.0"
@@ -26,6 +26,7 @@ python-magic = {version = "^0.4.27", markers = "sys_platform != 'win32'"}
 python-magic-bin = {version = "^0.4.14", markers = "sys_platform == 'win32'"}
 scipy = "^1.13.1"
 cloudpickle = "^3.0.0"
+nptyping = "^2.5.0"
 
 
 [tool.poetry.group.docs.dependencies]

diff --git a/src/new_fave/__init__.py b/src/new_fave/__init__.py
@@ -8,6 +8,8 @@
 from new_fave.patterns.fave_subcorpora import fave_subcorpora
 from new_fave.patterns.writers import write_data, pickle_speakers, unpickle_speakers
 
+from importlib.metadata import version
+
 __all__ = [
     "VowelMeasurement", 
     "VowelClass", 

diff --git a/src/new_fave/measurements/calcs.py b/src/new_fave/measurements/calcs.py
@@ -0,0 +1,129 @@
+import numpy as np
+import nptyping as npt
+from nptyping import NDArray, Shape, Float
+from typing import Any
+import scipy.stats as stats
+import warnings
+import functools
+
+def mahalanobis(
+        params:NDArray[Shape['Dim, Cand'], Float], 
+        param_means:NDArray[Shape['Dim, 1'], Float], 
+        inv_cov:NDArray[Shape['Dim, Dim'], Float]
+    )->NDArray[Shape["Cand"], Float]:
+    """
+    Calculates the Mahalanobis distance.
+
+    Args:
+        params (NDArray[Shape['Dim, Cand'], Float]): 
+            The parameters for which the Mahalanobis distance is to be calculated.
+        param_means (NDArray[Shape['Dim, 1'], Float]): 
+            The mean of the distribution.
+        inv_cov (NDArray[Shape['Dim, Dim'], Float]): 
+            The inverse of the covariance matrix of the distribution.
+
+    Returns:
+        (NDArray[Shape["Cand"], Float]): 
+            The Mahalanobis distance of each parameter from the distribution.
+    """    
+
+    x_mu = params - param_means
+    left = np.dot(x_mu.T, inv_cov)
+    mahal = np.dot(left, x_mu)
+    return mahal.diagonal()
+
+def mahal_log_prob(
+        mahals: NDArray[Shape["Cand"], Float], 
+        params: NDArray[Shape["*, *, ..."], Float]
+    ) -> NDArray[Shape["Cand"], Float]:
+    """
+    
+    Args:
+        mahals (NDArray[Shape["Cand"], Float]): 
+            The Mahalanobis distances.
+        params (NDArray[Shape["*, *, ..."], Float]): 
+            The parameters across which the mahalanobis
+            distance was calculated
+
+    Returns:
+        (NDArray[Shape["Cand"], Float]): 
+            The log probability
+    """
+    df = np.prod(params.shape[0:-1])
+    log_prob = stats.chi2.logsf(
+            mahals,
+            df = df
+        )
+    if np.isfinite(log_prob).mean() < 0.5:
+        log_prob = np.zeros(shape = log_prob.shape)    
+    return log_prob
+
+
+def param_to_cov(
+    params:NDArray[Shape["*, *, ..."], Float]
+) -> NDArray[Shape["X, X"], Float]:
+    """
+    Calculates the covariance matrix of the given parameters.
+
+    Args:
+        params (NDArray[Shape["*, *, ..."], Float]): 
+            The parameters for which the covariance matrix is to be calculated.
+
+    Returns:
+        (NDArray[Shape["X, X"], Float]): 
+            The covariance matrix of the parameters.
+    """    
+    N = params.shape[-1]
+    square_params = params.reshape(-1, N)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        param_cov = np.cov(square_params)
+
+    return param_cov
+
+def cov_to_icov(
+    cov_mat: NDArray[Shape["X, X"], Float]
+) -> NDArray[Shape["X, X"], Float]:
+    """
+    Calculates the inverse covariance matrix of the given covariance matrix.
+
+    Args:
+        cov_mat (NDArray[Shape["X, X"], Float]): 
+            The covariance matrix for which the inverse is to be calculated.
+
+    Returns:
+        (NDArray[Shape["X, X"], Float]): 
+            The inverse covariance matrix of the given covariance matrix.
+    """    
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        try:
+            params_icov = np.linalg.inv(cov_mat)
+        except:
+            params_icov = np.array([
+                [np.nan] * cov_mat.size
+            ]).reshape(
+                cov_mat.shape[0],
+                cov_mat.shape[1]
+            )
+
+    return params_icov
+
+def clear_cached_properties(obj:object) -> None:
+    """Clear the cache of any property in an object
+
+    Args:
+        obj (object): Any object.
+    """
+    clses = obj.__class__.mro()
+    to_clear = []
+
+    to_clear += [
+        k 
+        for cls in clses
+        for k, v in vars(cls).items()
+        if isinstance(v, functools.cached_property)
+    ]
+    for var in to_clear:
+        if var in obj.__dict__:
+            del obj.__dict__[var]