FastTrackiverse · JoFrhwld · Nov 14, 2023 · Nov 13, 2023 · Nov 13, 2023 · Nov 13, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,8 @@ scikit-learn = "^1.3.2"
 polars = "^0.19.13"
 pytest-cov = "^4.1.0"
 pytest = "^7.4.3"
+python-magic = {version = "^0.4.27", markers = "sys_platform != 'win32'"}
+python-magic-bin = {version = "^0.4.14", markers = "sys_platform == 'win32'"}
 
 [tool.poetry.group.dev.dependencies]
 jupyter = "^1.0.0"
@@ -42,6 +44,7 @@ build-backend = "poetry.core.masonry.api"
 addopts = [
     "--import-mode=importlib",
     "--cov-config=tests/.coveragerc",
+    "--cov-report=xml",
     "--cov"
 ]
 filterwarnings =[

diff --git a/src/fasttrackpy/patterns/just_audio.py b/src/fasttrackpy/patterns/just_audio.py
@@ -0,0 +1,140 @@
+import warnings
+from pathlib import Path
+from typing import Union
+from collections.abc import Callable
+import parselmouth as pm
+from fasttrackpy import CandidateTracks,\
+                        Smoother,\
+                        Loss,\
+                        Agg
+
+try:
+    import magic
+    no_magic = False
+except:
+    warnings.warn("libmagic not found. "\
+                "Some audio file types won't be discovered by fasttrack. "\
+                "(mp3, ogg, ...)")
+    import sndhdr
+    from sndhdr import SndHeaders
+    no_magic = True
+
+def create_audio_checker(no_magic:bool = no_magic) -> Callable:
+    """Return an audio checker, dependent on 
+       availability of libmagic.
+
+    Args:
+        no_magic (bool): is libmagic available
+
+    Returns:
+        (Callable): A sound file checker
+    """
+
+    def magic_checker(path: str)->bool:
+        """Checks whether a file is an audio file using libmagic
+
+        Args:
+            path (str): Path to the file in question
+
+        Returns:
+            (bool): Whether or not the file is an audio file
+        """
+        file_mime = magic.from_file(path, mime=True)
+        return "audio" in file_mime
+
+    def sndhdr_checker(path: str)->bool:
+        """Checks whether a file is an audio file using `sndhdr`
+
+        Args:
+            path (str): Path to the file
+
+        Returns:
+            (bool): Whether or not the file is an audio file.
+        """
+        hdr_info = sndhdr.what(path)
+        return isinstance(hdr_info, SndHeaders)
+
+    if no_magic:
+        return sndhdr_checker
+
+    return magic_checker
+
+is_audio = create_audio_checker(no_magic=no_magic)
+
+def process_audio_file(
+        path: Union[str, Path],
+        xmin:float = 0,
+        xmax: float = None,
+        min_max_formant:float = 4000,
+        max_max_formant:float = 7000,
+        nstep:int = 20,
+        n_formants: int = 4,
+        window_length: float = 0.05,
+        time_step: float = 0.002,
+        pre_emphasis_from: float = 50,
+        smoother: Smoother = Smoother(),
+        loss_fun: Loss = Loss(),
+        agg_fun: Agg = Agg()
+)->CandidateTracks:
+    if not is_audio(str(path)):
+        raise TypeError(f"The file at {str(path)} is not an audio file")
+
+    sound = pm.Sound(str(path))
+    if not xmax:
+        xmax = sound.xmax
+
+    sound_to_process = sound.extract_part(from_time = xmin, to_time = xmax)
+    candidates = CandidateTracks(
+        sound=sound_to_process,
+        min_max_formant=min_max_formant,
+        max_max_formant=max_max_formant,
+        nstep=nstep,
+        n_formants=n_formants,
+        window_length=window_length,
+        time_step=time_step,
+        pre_emphasis_from=pre_emphasis_from,
+        smoother=smoother,
+        loss_fun=loss_fun,
+        agg_fun=agg_fun
+    )
+    candidates.file_name = Path(str(path)).name
+    return candidates
+
+def process_directory(
+        path: Union[str, Path],
+        min_max_formant:float = 4000,
+        max_max_formant:float = 7000,
+        nstep:int = 20,
+        n_formants: int = 4,
+        window_length: float = 0.05,
+        time_step: float = 0.002,
+        pre_emphasis_from: float = 50,
+        smoother: Smoother = Smoother(),
+        loss_fun: Loss = Loss(),
+        agg_fun: Agg = Agg()
+)->list[CandidateTracks]:
+    if not isinstance(path, Path) and isinstance(path, str):
+        path = Path(path)
+
+    all_files = path.glob("*")
+    all_audio = [x for x in all_files if is_audio(str(x))]
+    all_candidates = [
+        process_audio_file(
+            path = x,
+            min_max_formant=min_max_formant,
+            max_max_formant=max_max_formant,
+            nstep=nstep,
+            n_formants=n_formants,
+            window_length=window_length,
+            time_step=time_step,
+            pre_emphasis_from=pre_emphasis_from,
+            smoother=smoother,
+            loss_fun=loss_fun,
+            agg_fun=agg_fun
+        ) for x in all_audio
+    ]
+    for x, path in zip(all_candidates, all_audio):
+        x.file_name = Path(str(path)).name
+
+    return all_candidates
+
diff --git a/src/fasttrackpy/processors/outputs.py b/src/fasttrackpy/processors/outputs.py
@@ -1,7 +1,20 @@
 import numpy as np
 import polars as pl
+from pathlib import Path
 
-def to_dataframe(self):
+def add_metadata(self, out_df):
+    if self.file_name:
+        out_df = out_df.with_columns(
+            file_name = pl.lit(self.file_name)
+        )
+
+    if self.id:
+        out_df = out_df.with_columns(
+            id = pl.lit(self.id)
+        )
+    return out_df
+
+def formant_to_dataframe(self):
     """Return data as a data frame
 
     Returns:
@@ -15,12 +28,12 @@ def to_dataframe(self):
     ]
 
     orig_df = pl.DataFrame(
-        data = self.formants.T,
+        data = self.formants[0:self.n_measured_formants].T,
         schema=orig_names
     )
 
     smooth_df = pl.DataFrame(
-        data = self.smoothed_formants.T,
+        data = self.smoothed_formants[0:self.n_measured_formants].T,
         schema=smooth_names
     )
 
@@ -33,4 +46,63 @@ def to_dataframe(self):
         smooth_method = pl.lit(self.smoother.smooth_fun.__name__)
     )
 
-    return out_df
+    out_df = add_metadata(self, out_df)       
+
+    return out_df
+
+def param_to_dataframe(self):
+    """Return data as a data frame
+
+    Returns:
+        (pl.DataFrame): A data frame
+    """
+
+    schema = [
+        f"F{x}" for x in 
+        np.arange(self.parameters.shape[0])+1
+    ]
+    param_df = pl.DataFrame(
+        data = self.parameters.T,schema=schema
+    )
+
+    param_df = add_metadata(self, param_df)    
+
+    return param_df
+
+def get_big_df(self, output):
+        all_df = [x.to_df(output = output) for x in self.candidates]
+        all_df = [
+            x.with_columns(
+                candidate = idx+1
+            )
+            for idx, x in enumerate(all_df)
+        ]
+
+        big_df = pl.concat(all_df, how = "diagonal")
+        return big_df
+
+def write_data(
+        candidates,
+        file: Path = None,
+        destination: Path = None,
+        which: str = "winner",
+        output: str = "formants"
+):
+    df = candidates.to_df(which = which, output = output)
+    if file:
+        df.write_csv(file = file)
+        return
+
+    if destination and candidates.file_name:
+        file = destination.joinpath(
+            candidates.winner.file_name
+        ).with_suffix(".csv")
+        df.write_csv(file = file)
+        return
+
+    if destination:
+        file = destination.joinpath("output.csv")
+        df.write_csv(file = file)
+        return
+
+    raise ValueError("Either 'file' or 'destination' needs to be set")