Merge branch 'dev' of https://github.com/FastTrackiverse/fasttrackpy …

…into dev
FastTrackiverse · Mar 22, 2024 · ee56c51 · ee56c51
2 parents 7c00cb5 + 121f927
commit ee56c51
Show file tree

Hide file tree

Showing 8 changed files with 226 additions and 155 deletions.
diff --git a/docs/reference/CandidateTracks.qmd b/docs/reference/CandidateTracks.qmd
@@ -1,38 +1,53 @@
 # CandidateTracks { #fasttrackpy.CandidateTracks }
 
-`CandidateTracks(self, sound, min_max_formant=4000, max_max_formant=7000, nstep=20, n_formants=4, window_length=0.025, time_step=0.002, pre_emphasis_from=50, smoother=Smoother(), loss_fun=Loss(), agg_fun=Agg())`
+`CandidateTracks(self, sound=None, samples=None, sampling_frequency=None, xmin=0.0, min_max_formant=4000, max_max_formant=7000, nstep=20, n_formants=4, window_length=0.025, time_step=0.002, pre_emphasis_from=50, smoother=Smoother(), loss_fun=Loss(), agg_fun=Agg())`
 
 A class for candidate tracks for a single formant
 
+You can provide *either*
+
+- A parselmouth `Sound` object to the `sound` argument
+
+xor
+
+- An array of audio samples to the `samples` argument
+- The sampling frequency to the `sampling_frequency` argument
+- Any optional time offset to the `xmin` argument.
+
+If a `Sound` object is passed to `sound`, any values passed to `samples`,
+`sampling_frequency` and `xmin` are ignored.
+
 ## Parameters
 
-| Name                | Type     | Description                                                              | Default      |
-|---------------------|----------|--------------------------------------------------------------------------|--------------|
-| `sound`             | pm.Sound | A `parselmouth.Sound` object.                                            | _required_   |
-| `min_max_formant`   | float    | The lowest max-formant value to try. Defaults to 4000.                   | `4000`       |
-| `max_max_formant`   | float    | The highest max formant to try. Defaults to 7000.                        | `7000`       |
-| `nstep`             | int      | The number of steps from the min to the max max formant. Defaults to 20. | `20`         |
-| `n_formants`        | int      | The number of formants to track. Defaults to 4.                          | `4`          |
-| `window_length`     | float    | Window length of the formant analysis. Defaults to 0.025.                | `0.025`      |
-| `time_step`         | float    | Time step of the formant analyusis window. Defaults to 0.002.            | `0.002`      |
-| `pre_emphasis_from` | float    | Pre-emphasis threshold. Defaults to 50.                                  | `50`         |
-| `smoother`          | Smoother | The smoother method to use. Defaults to `Smoother()`.                    | `Smoother()` |
-| `loss_fun`          | Loss     | The loss function to use. Defaults to Loss().                            | `Loss()`     |
-| `agg_fun`           | Agg      | The loss aggregation function to use. Defaults to Agg().                 | `Agg()`      |
+| Name                 | Type       | Description                                                              | Default      |
+|----------------------|------------|--------------------------------------------------------------------------|--------------|
+| `sound`              | pm.Sound   | A `parselmouth.Sound` object.                                            | `None`       |
+| `samples`            | np.ndarray | A numpy array of audio samples.                                          | `None`       |
+| `sampling_frequency` | float      | The audio sampling frequency.                                            | `None`       |
+| `xmin`               | float      | The time offset for the audio. Defaults to 0.0.                          | `0.0`        |
+| `min_max_formant`    | float      | The lowest max-formant value to try. Defaults to 4000.                   | `4000`       |
+| `max_max_formant`    | float      | The highest max formant to try. Defaults to 7000.                        | `7000`       |
+| `nstep`              | int        | The number of steps from the min to the max max formant. Defaults to 20. | `20`         |
+| `n_formants`         | int        | The number of formants to track. Defaults to 4.                          | `4`          |
+| `window_length`      | float      | Window length of the formant analysis. Defaults to 0.025.                | `0.025`      |
+| `time_step`          | float      | Time step of the formant analyusis window. Defaults to 0.002.            | `0.002`      |
+| `pre_emphasis_from`  | float      | Pre-emphasis threshold. Defaults to 50.                                  | `50`         |
+| `smoother`           | Smoother   | The smoother method to use. Defaults to `Smoother()`.                    | `Smoother()` |
+| `loss_fun`           | Loss       | The loss function to use. Defaults to Loss().                            | `Loss()`     |
+| `agg_fun`            | Agg        | The loss aggregation function to use. Defaults to Agg().                 | `Agg()`      |
 
 ## Attributes
 
-| Name           | Type                              | Description                                                                   |
-|----------------|-----------------------------------|-------------------------------------------------------------------------------|
-| candidates     | list\[OneTrack, ...\]             | A list of `OneTrack` tracks.                                                  |
-| min_n_measured | int                               | The smallest number of successfully measured formants across all `candidates` |
-| smooth_errors  | np.array                          | The error terms for each treack in `candidates`                               |
-| winner_idx     | int                               | The candidate track with the smallest error term                              |
-| winner         | OneTrack                          | The winning `OneTrack` track.                                                 |
-| file_name      | str                               | The filename of the audio file, if set.                                       |
-| interval       | aligned_textgrid.SequenceInterval | The textgrid interval of the sound, if set.                                   |
-| id             | str                               | The interval id of the sound, if set.                                         |
-| group          | str                               | The tier group name of the sound, if set.                                     |
+| Name          | Type                              | Description                                      |
+|---------------|-----------------------------------|--------------------------------------------------|
+| candidates    | list\[OneTrack, ...\]             | A list of `OneTrack` tracks.                     |
+| smooth_errors | np.array                          | The error terms for each treack in `candidates`  |
+| winner_idx    | int                               | The candidate track with the smallest error term |
+| winner        | OneTrack                          | The winning `OneTrack` track.                    |
+| file_name     | str                               | The filename of the audio file, if set.          |
+| interval      | aligned_textgrid.SequenceInterval | The textgrid interval of the sound, if set.      |
+| id            | str                               | The interval id of the sound, if set.            |
+| group         | str                               | The tier group name of the sound, if set.        |
 
 ## Methods
 

diff --git a/docs/reference/OneTrack.qmd b/docs/reference/OneTrack.qmd
@@ -1,38 +1,53 @@
 # OneTrack { #fasttrackpy.OneTrack }
 
-`OneTrack(self, maximum_formant, sound, n_formants=4, window_length=0.025, time_step=0.002, pre_emphasis_from=50, smoother=Smoother(), loss_fun=Loss(), agg_fun=Agg())`
+`OneTrack(self, maximum_formant, sound=None, samples=None, sampling_frequency=None, xmin=0.0, n_formants=4, window_length=0.025, time_step=0.002, pre_emphasis_from=50, smoother=Smoother(), loss_fun=Loss(), agg_fun=Agg())`
 
 A single formant track.
 
+You can provide *either*
+
+- A parselmouth `Sound` object to the `sound` argument
+
+xor
+
+- An array of audio samples to the `samples` argument
+- The sampling frequency to the `sampling_frequency` argument
+- Any optional time offset to the `xmin` argument.
+
+If a `Sound` object is passed to `sound`, any values passed to `samples`,
+`sampling_frequency` and `xmin` are ignored.
+
 ## Parameters
 
-| Name                | Type     | Description                                                   | Default      |
-|---------------------|----------|---------------------------------------------------------------|--------------|
-| `sound`             | pm.Sound | A `parselmouth.Sound` object.                                 | _required_   |
-| `maximum_formant`   | float    | max formant                                                   | _required_   |
-| `n_formants`        | int      | The number of formants to track. Defaults to 4.               | `4`          |
-| `window_length`     | float    | Window length of the formant analysis. Defaults to 0.025.     | `0.025`      |
-| `time_step`         | float    | Time step of the formant analyusis window. Defaults to 0.002. | `0.002`      |
-| `pre_emphasis_from` | float    | Pre-emphasis threshold. Defaults to 50.                       | `50`         |
-| `smoother`          | Smoother | The smoother method to use. Defaults to `Smoother()`.         | `Smoother()` |
-| `loss_fun`          | Loss     | The loss function to use. Defaults to Loss().                 | `Loss()`     |
-| `agg_fun`           | Agg      | The loss aggregation function to use. Defaults to Agg().      | `Agg()`      |
+| Name                 | Type       | Description                                                   | Default      |
+|----------------------|------------|---------------------------------------------------------------|--------------|
+| `sound`              | pm.Sound   | A `parselmouth.Sound` object.                                 | `None`       |
+| `samples`            | np.ndarray | A numpy array of audio samples.                               | `None`       |
+| `sampling_frequency` | float      | The audio sampling frequency.                                 | `None`       |
+| `xmin`               | float      | The time offset for the audio. Defaults to 0.0.               | `0.0`        |
+| `maximum_formant`    | float      | max formant                                                   | _required_   |
+| `n_formants`         | int        | The number of formants to track. Defaults to 4.               | `4`          |
+| `window_length`      | float      | Window length of the formant analysis. Defaults to 0.025.     | `0.025`      |
+| `time_step`          | float      | Time step of the formant analyusis window. Defaults to 0.002. | `0.002`      |
+| `pre_emphasis_from`  | float      | Pre-emphasis threshold. Defaults to 50.                       | `50`         |
+| `smoother`           | Smoother   | The smoother method to use. Defaults to `Smoother()`.         | `Smoother()` |
+| `loss_fun`           | Loss       | The loss function to use. Defaults to Loss().                 | `Loss()`     |
+| `agg_fun`            | Agg        | The loss aggregation function to use. Defaults to Agg().      | `Agg()`      |
 
 ## Attributes
 
-| Name                | Type                              | Description                                                                                  |
-|---------------------|-----------------------------------|----------------------------------------------------------------------------------------------|
-| maximum_formant     | float                             | The max formant                                                                              |
-| time_domain         | np.array                          | The time domain of the formant estimates                                                     |
-| formants            | np.ndarray                        | A (formants, time) array of values. The formants as initially estimated by praat-parselmouth |
-| n_measured_formants | int                               | The total number of formants for which formant tracks were estimatable                       |
-| smoothed_formants   | np.ndarray                        | The smoothed formant values, using the method passed to `smoother`.                          |
-| parameters          | np.ndarray                        | The smoothing parameters.                                                                    |
-| smooth_error        | float                             | The error term between formants and smoothed formants.                                       |
-| file_name           | str                               | The filename of the audio file, if set.                                                      |
-| interval            | aligned_textgrid.SequenceInterval | The textgrid interval of the sound, if set.                                                  |
-| id                  | str                               | The interval id of the sound, if set.                                                        |
-| group               | str                               | The tier group name of the sound, if set.                                                    |
+| Name              | Type                              | Description                                                                                  |
+|-------------------|-----------------------------------|----------------------------------------------------------------------------------------------|
+| maximum_formant   | float                             | The max formant                                                                              |
+| time_domain       | np.array                          | The time domain of the formant estimates                                                     |
+| formants          | np.ndarray                        | A (formants, time) array of values. The formants as initially estimated by praat-parselmouth |
+| smoothed_formants | np.ndarray                        | The smoothed formant values, using the method passed to `smoother`.                          |
+| parameters        | np.ndarray                        | The smoothing parameters.                                                                    |
+| smooth_error      | float                             | The error term between formants and smoothed formants.                                       |
+| file_name         | str                               | The filename of the audio file, if set.                                                      |
+| interval          | aligned_textgrid.SequenceInterval | The textgrid interval of the sound, if set.                                                  |
+| id                | str                               | The interval id of the sound, if set.                                                        |
+| group             | str                               | The tier group name of the sound, if set.                                                    |
 
 ## Methods
 

diff --git a/docs/usage/pythonic_use.ipynb b/docs/usage/pythonic_use.ipynb
diff --git a/src/fasttrackpy/patterns/audio_textgrid.py b/src/fasttrackpy/patterns/audio_textgrid.py
@@ -6,9 +6,8 @@
 import re
 
 from pathlib import Path
-import multiprocessing
 from tqdm import tqdm
-from joblib import Parallel, delayed, wrap_non_picklable_objects
+from joblib import Parallel, cpu_count, delayed
 import warnings
 
 try:
@@ -66,7 +65,6 @@ def get_target_intervals(
     return intervals
 
 @delayed
-@wrap_non_picklable_objects
 def get_candidates(args_dict):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
@@ -151,7 +149,9 @@ def process_audio_textgrid(
 
     arg_list = [
         {
-            "sound": x,
+            "samples": x.values,
+            "sampling_frequency": x.sampling_frequency,
+            "xmin": x.xmin,
             #"interval": interval,
             "min_max_formant": min_max_formant,
             "max_max_formant": max_max_formant,
@@ -166,8 +166,8 @@ def process_audio_textgrid(
         } for x, interval in zip(sound_parts, target_intervals)
     ]
 
-    n_jobs = multiprocessing.cpu_count()
-    candidate_list = Parallel(n_jobs=n_jobs, prefer="threads")(
+    n_jobs = cpu_count()
+    candidate_list = Parallel(n_jobs=n_jobs)(
         get_candidates(args_dict=arg) for arg in tqdm(arg_list)
         )
     for cand, interval in zip(candidate_list, target_intervals):

diff --git a/src/fasttrackpy/patterns/corpus.py b/src/fasttrackpy/patterns/corpus.py
@@ -7,11 +7,10 @@
 import re
 from collections import namedtuple
 from pathlib import Path
-import multiprocessing
 from tqdm import tqdm
 from functools import reduce
 from operator import add
-from joblib import Parallel, delayed, wrap_non_picklable_objects
+from joblib import Parallel, cpu_count, delayed
 import warnings
 
 
@@ -100,12 +99,10 @@ def get_target_intervals(
     return intervals
 
 @delayed
-@wrap_non_picklable_objects
-def get_candidates(args_dict, progress_bar:tqdm):
+def get_candidates(args_dict):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         candidates =  CandidateTracks(**args_dict)
-        progress_bar.update()
     if candidates.winner.formants.shape[1] == 1:
         warnings.warn("formant tracking error")
     return candidates
@@ -178,7 +175,6 @@ def process_corpus(
             for tiers in all_tiers
         ]
     all_candidates = []
-    progress_bar = tqdm(total=reduce(add, [len(x) for x in all_intervals]))
     for intervals in all_intervals:
         sound = pm.Sound(str(intervals[0].wav))
         sound_parts = [
@@ -189,7 +185,9 @@ def process_corpus(
 
         arg_list = [
             {
-                "sound": x,
+                "samples": x.values,
+                "sampling_frequency": x.sampling_frequency,
+                "xmin": x.xmin,
                 #"interval": interval,
                 "min_max_formant": min_max_formant,
                 "max_max_formant": max_max_formant,
@@ -204,9 +202,9 @@ def process_corpus(
             } for x, interval in zip(sound_parts, intervals)
         ]
 
-        n_jobs = multiprocessing.cpu_count()
-        candidate_list = Parallel(n_jobs=n_jobs, prefer="threads")(
-            get_candidates(args_dict=arg, progress_bar=progress_bar) for arg in arg_list
+        n_jobs = cpu_count()
+        candidate_list = Parallel(n_jobs=n_jobs)(
+            get_candidates(args_dict=arg) for arg in tqdm(arg_list)
             )
 
         for cand, interval in zip(candidate_list, intervals):

diff --git a/src/fasttrackpy/patterns/just_audio.py b/src/fasttrackpy/patterns/just_audio.py
@@ -8,10 +8,8 @@
                         Loss,\
                         Agg
 
-import multiprocessing
 from tqdm import tqdm
-from joblib import Parallel, delayed, wrap_non_picklable_objects
-
+from joblib import Parallel, cpu_count, delayed
 
 try:
     import magic
@@ -120,7 +118,9 @@ def process_audio_file(
 
     sound_to_process = sound.extract_part(from_time = xmin, to_time = xmax)
     candidates = CandidateTracks(
-        sound=sound_to_process,
+        samples=sound_to_process.values,
+        sampling_frequency=sound_to_process.sampling_frequency,
+        xmin = sound_to_process.xmin,
         min_max_formant=min_max_formant,
         max_max_formant=max_max_formant,
         nstep=nstep,
@@ -136,7 +136,6 @@ def process_audio_file(
     return candidates
 
 @delayed
-@wrap_non_picklable_objects
 def wrapped_audio(args_dict):
     return process_audio_file(**args_dict)
 
@@ -201,9 +200,9 @@ def process_directory(
             }
             for x in all_audio
     ]
-    n_jobs = multiprocessing.cpu_count()
+    n_jobs = cpu_count()
 
-    all_candidates = Parallel(n_jobs=n_jobs, prefer="threads")(
+    all_candidates = Parallel(n_jobs=n_jobs)(
         wrapped_audio(args_dict=arg) for arg in tqdm(arg_list)
         )
     for x, path in zip(all_candidates, all_audio):