Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…into dev
  • Loading branch information
JoFrhwld committed Mar 22, 2024
2 parents 7c00cb5 + 121f927 commit ee56c51
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 155 deletions.
65 changes: 40 additions & 25 deletions docs/reference/CandidateTracks.qmd
Original file line number Diff line number Diff line change
@@ -1,38 +1,53 @@
# CandidateTracks { #fasttrackpy.CandidateTracks }

`CandidateTracks(self, sound, min_max_formant=4000, max_max_formant=7000, nstep=20, n_formants=4, window_length=0.025, time_step=0.002, pre_emphasis_from=50, smoother=Smoother(), loss_fun=Loss(), agg_fun=Agg())`
`CandidateTracks(self, sound=None, samples=None, sampling_frequency=None, xmin=0.0, min_max_formant=4000, max_max_formant=7000, nstep=20, n_formants=4, window_length=0.025, time_step=0.002, pre_emphasis_from=50, smoother=Smoother(), loss_fun=Loss(), agg_fun=Agg())`

A class for candidate tracks for a single formant

You can provide *either*

- A parselmouth `Sound` object to the `sound` argument

xor

- An array of audio samples to the `samples` argument
- The sampling frequency to the `sampling_frequency` argument
- Any optional time offset to the `xmin` argument.

If a `Sound` object is passed to `sound`, any values passed to `samples`,
`sampling_frequency` and `xmin` are ignored.

## Parameters

| Name | Type | Description | Default |
|---------------------|----------|--------------------------------------------------------------------------|--------------|
| `sound` | pm.Sound | A `parselmouth.Sound` object. | _required_ |
| `min_max_formant` | float | The lowest max-formant value to try. Defaults to 4000. | `4000` |
| `max_max_formant` | float | The highest max formant to try. Defaults to 7000. | `7000` |
| `nstep` | int | The number of steps from the min to the max max formant. Defaults to 20. | `20` |
| `n_formants` | int | The number of formants to track. Defaults to 4. | `4` |
| `window_length` | float | Window length of the formant analysis. Defaults to 0.025. | `0.025` |
| `time_step` | float | Time step of the formant analyusis window. Defaults to 0.002. | `0.002` |
| `pre_emphasis_from` | float | Pre-emphasis threshold. Defaults to 50. | `50` |
| `smoother` | Smoother | The smoother method to use. Defaults to `Smoother()`. | `Smoother()` |
| `loss_fun` | Loss | The loss function to use. Defaults to Loss(). | `Loss()` |
| `agg_fun` | Agg | The loss aggregation function to use. Defaults to Agg(). | `Agg()` |
| Name | Type | Description | Default |
|----------------------|------------|--------------------------------------------------------------------------|--------------|
| `sound` | pm.Sound | A `parselmouth.Sound` object. | `None` |
| `samples` | np.ndarray | A numpy array of audio samples. | `None` |
| `sampling_frequency` | float | The audio sampling frequency. | `None` |
| `xmin` | float | The time offset for the audio. Defaults to 0.0. | `0.0` |
| `min_max_formant` | float | The lowest max-formant value to try. Defaults to 4000. | `4000` |
| `max_max_formant` | float | The highest max formant to try. Defaults to 7000. | `7000` |
| `nstep` | int | The number of steps from the min to the max max formant. Defaults to 20. | `20` |
| `n_formants` | int | The number of formants to track. Defaults to 4. | `4` |
| `window_length` | float | Window length of the formant analysis. Defaults to 0.025. | `0.025` |
| `time_step` | float | Time step of the formant analyusis window. Defaults to 0.002. | `0.002` |
| `pre_emphasis_from` | float | Pre-emphasis threshold. Defaults to 50. | `50` |
| `smoother` | Smoother | The smoother method to use. Defaults to `Smoother()`. | `Smoother()` |
| `loss_fun` | Loss | The loss function to use. Defaults to Loss(). | `Loss()` |
| `agg_fun` | Agg | The loss aggregation function to use. Defaults to Agg(). | `Agg()` |

## Attributes

| Name | Type | Description |
|----------------|-----------------------------------|-------------------------------------------------------------------------------|
| candidates | list\[OneTrack, ...\] | A list of `OneTrack` tracks. |
| min_n_measured | int | The smallest number of successfully measured formants across all `candidates` |
| smooth_errors | np.array | The error terms for each treack in `candidates` |
| winner_idx | int | The candidate track with the smallest error term |
| winner | OneTrack | The winning `OneTrack` track. |
| file_name | str | The filename of the audio file, if set. |
| interval | aligned_textgrid.SequenceInterval | The textgrid interval of the sound, if set. |
| id | str | The interval id of the sound, if set. |
| group | str | The tier group name of the sound, if set. |
| Name | Type | Description |
|---------------|-----------------------------------|--------------------------------------------------|
| candidates | list\[OneTrack, ...\] | A list of `OneTrack` tracks. |
| smooth_errors | np.array | The error terms for each treack in `candidates` |
| winner_idx | int | The candidate track with the smallest error term |
| winner | OneTrack | The winning `OneTrack` track. |
| file_name | str | The filename of the audio file, if set. |
| interval | aligned_textgrid.SequenceInterval | The textgrid interval of the sound, if set. |
| id | str | The interval id of the sound, if set. |
| group | str | The tier group name of the sound, if set. |

## Methods

Expand Down
65 changes: 40 additions & 25 deletions docs/reference/OneTrack.qmd
Original file line number Diff line number Diff line change
@@ -1,38 +1,53 @@
# OneTrack { #fasttrackpy.OneTrack }

`OneTrack(self, maximum_formant, sound, n_formants=4, window_length=0.025, time_step=0.002, pre_emphasis_from=50, smoother=Smoother(), loss_fun=Loss(), agg_fun=Agg())`
`OneTrack(self, maximum_formant, sound=None, samples=None, sampling_frequency=None, xmin=0.0, n_formants=4, window_length=0.025, time_step=0.002, pre_emphasis_from=50, smoother=Smoother(), loss_fun=Loss(), agg_fun=Agg())`

A single formant track.

You can provide *either*

- A parselmouth `Sound` object to the `sound` argument

xor

- An array of audio samples to the `samples` argument
- The sampling frequency to the `sampling_frequency` argument
- Any optional time offset to the `xmin` argument.

If a `Sound` object is passed to `sound`, any values passed to `samples`,
`sampling_frequency` and `xmin` are ignored.

## Parameters

| Name | Type | Description | Default |
|---------------------|----------|---------------------------------------------------------------|--------------|
| `sound` | pm.Sound | A `parselmouth.Sound` object. | _required_ |
| `maximum_formant` | float | max formant | _required_ |
| `n_formants` | int | The number of formants to track. Defaults to 4. | `4` |
| `window_length` | float | Window length of the formant analysis. Defaults to 0.025. | `0.025` |
| `time_step` | float | Time step of the formant analyusis window. Defaults to 0.002. | `0.002` |
| `pre_emphasis_from` | float | Pre-emphasis threshold. Defaults to 50. | `50` |
| `smoother` | Smoother | The smoother method to use. Defaults to `Smoother()`. | `Smoother()` |
| `loss_fun` | Loss | The loss function to use. Defaults to Loss(). | `Loss()` |
| `agg_fun` | Agg | The loss aggregation function to use. Defaults to Agg(). | `Agg()` |
| Name | Type | Description | Default |
|----------------------|------------|---------------------------------------------------------------|--------------|
| `sound` | pm.Sound | A `parselmouth.Sound` object. | `None` |
| `samples` | np.ndarray | A numpy array of audio samples. | `None` |
| `sampling_frequency` | float | The audio sampling frequency. | `None` |
| `xmin` | float | The time offset for the audio. Defaults to 0.0. | `0.0` |
| `maximum_formant` | float | max formant | _required_ |
| `n_formants` | int | The number of formants to track. Defaults to 4. | `4` |
| `window_length` | float | Window length of the formant analysis. Defaults to 0.025. | `0.025` |
| `time_step` | float | Time step of the formant analyusis window. Defaults to 0.002. | `0.002` |
| `pre_emphasis_from` | float | Pre-emphasis threshold. Defaults to 50. | `50` |
| `smoother` | Smoother | The smoother method to use. Defaults to `Smoother()`. | `Smoother()` |
| `loss_fun` | Loss | The loss function to use. Defaults to Loss(). | `Loss()` |
| `agg_fun` | Agg | The loss aggregation function to use. Defaults to Agg(). | `Agg()` |

## Attributes

| Name | Type | Description |
|---------------------|-----------------------------------|----------------------------------------------------------------------------------------------|
| maximum_formant | float | The max formant |
| time_domain | np.array | The time domain of the formant estimates |
| formants | np.ndarray | A (formants, time) array of values. The formants as initially estimated by praat-parselmouth |
| n_measured_formants | int | The total number of formants for which formant tracks were estimatable |
| smoothed_formants | np.ndarray | The smoothed formant values, using the method passed to `smoother`. |
| parameters | np.ndarray | The smoothing parameters. |
| smooth_error | float | The error term between formants and smoothed formants. |
| file_name | str | The filename of the audio file, if set. |
| interval | aligned_textgrid.SequenceInterval | The textgrid interval of the sound, if set. |
| id | str | The interval id of the sound, if set. |
| group | str | The tier group name of the sound, if set. |
| Name | Type | Description |
|-------------------|-----------------------------------|----------------------------------------------------------------------------------------------|
| maximum_formant | float | The max formant |
| time_domain | np.array | The time domain of the formant estimates |
| formants | np.ndarray | A (formants, time) array of values. The formants as initially estimated by praat-parselmouth |
| smoothed_formants | np.ndarray | The smoothed formant values, using the method passed to `smoother`. |
| parameters | np.ndarray | The smoothing parameters. |
| smooth_error | float | The error term between formants and smoothed formants. |
| file_name | str | The filename of the audio file, if set. |
| interval | aligned_textgrid.SequenceInterval | The textgrid interval of the sound, if set. |
| id | str | The interval id of the sound, if set. |
| group | str | The tier group name of the sound, if set. |

## Methods

Expand Down
66 changes: 30 additions & 36 deletions docs/usage/pythonic_use.ipynb

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions src/fasttrackpy/patterns/audio_textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
import re

from pathlib import Path
import multiprocessing
from tqdm import tqdm
from joblib import Parallel, delayed, wrap_non_picklable_objects
from joblib import Parallel, cpu_count, delayed
import warnings

try:
Expand Down Expand Up @@ -66,7 +65,6 @@ def get_target_intervals(
return intervals

@delayed
@wrap_non_picklable_objects
def get_candidates(args_dict):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand Down Expand Up @@ -151,7 +149,9 @@ def process_audio_textgrid(

arg_list = [
{
"sound": x,
"samples": x.values,
"sampling_frequency": x.sampling_frequency,
"xmin": x.xmin,
#"interval": interval,
"min_max_formant": min_max_formant,
"max_max_formant": max_max_formant,
Expand All @@ -166,8 +166,8 @@ def process_audio_textgrid(
} for x, interval in zip(sound_parts, target_intervals)
]

n_jobs = multiprocessing.cpu_count()
candidate_list = Parallel(n_jobs=n_jobs, prefer="threads")(
n_jobs = cpu_count()
candidate_list = Parallel(n_jobs=n_jobs)(
get_candidates(args_dict=arg) for arg in tqdm(arg_list)
)
for cand, interval in zip(candidate_list, target_intervals):
Expand Down
18 changes: 8 additions & 10 deletions src/fasttrackpy/patterns/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
import re
from collections import namedtuple
from pathlib import Path
import multiprocessing
from tqdm import tqdm
from functools import reduce
from operator import add
from joblib import Parallel, delayed, wrap_non_picklable_objects
from joblib import Parallel, cpu_count, delayed
import warnings


Expand Down Expand Up @@ -100,12 +99,10 @@ def get_target_intervals(
return intervals

@delayed
@wrap_non_picklable_objects
def get_candidates(args_dict, progress_bar:tqdm):
def get_candidates(args_dict):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
candidates = CandidateTracks(**args_dict)
progress_bar.update()
if candidates.winner.formants.shape[1] == 1:
warnings.warn("formant tracking error")
return candidates
Expand Down Expand Up @@ -178,7 +175,6 @@ def process_corpus(
for tiers in all_tiers
]
all_candidates = []
progress_bar = tqdm(total=reduce(add, [len(x) for x in all_intervals]))
for intervals in all_intervals:
sound = pm.Sound(str(intervals[0].wav))
sound_parts = [
Expand All @@ -189,7 +185,9 @@ def process_corpus(

arg_list = [
{
"sound": x,
"samples": x.values,
"sampling_frequency": x.sampling_frequency,
"xmin": x.xmin,
#"interval": interval,
"min_max_formant": min_max_formant,
"max_max_formant": max_max_formant,
Expand All @@ -204,9 +202,9 @@ def process_corpus(
} for x, interval in zip(sound_parts, intervals)
]

n_jobs = multiprocessing.cpu_count()
candidate_list = Parallel(n_jobs=n_jobs, prefer="threads")(
get_candidates(args_dict=arg, progress_bar=progress_bar) for arg in arg_list
n_jobs = cpu_count()
candidate_list = Parallel(n_jobs=n_jobs)(
get_candidates(args_dict=arg) for arg in tqdm(arg_list)
)

for cand, interval in zip(candidate_list, intervals):
Expand Down
13 changes: 6 additions & 7 deletions src/fasttrackpy/patterns/just_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
Loss,\
Agg

import multiprocessing
from tqdm import tqdm
from joblib import Parallel, delayed, wrap_non_picklable_objects

from joblib import Parallel, cpu_count, delayed

try:
import magic
Expand Down Expand Up @@ -120,7 +118,9 @@ def process_audio_file(

sound_to_process = sound.extract_part(from_time = xmin, to_time = xmax)
candidates = CandidateTracks(
sound=sound_to_process,
samples=sound_to_process.values,
sampling_frequency=sound_to_process.sampling_frequency,
xmin = sound_to_process.xmin,
min_max_formant=min_max_formant,
max_max_formant=max_max_formant,
nstep=nstep,
Expand All @@ -136,7 +136,6 @@ def process_audio_file(
return candidates

@delayed
@wrap_non_picklable_objects
def wrapped_audio(args_dict):
return process_audio_file(**args_dict)

Expand Down Expand Up @@ -201,9 +200,9 @@ def process_directory(
}
for x in all_audio
]
n_jobs = multiprocessing.cpu_count()
n_jobs = cpu_count()

all_candidates = Parallel(n_jobs=n_jobs, prefer="threads")(
all_candidates = Parallel(n_jobs=n_jobs)(
wrapped_audio(args_dict=arg) for arg in tqdm(arg_list)
)
for x, path in zip(all_candidates, all_audio):
Expand Down
Loading

0 comments on commit ee56c51

Please sign in to comment.