Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

6 processing patterns #14

Merged
merged 30 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
368f9d4
Merge pull request #13 from JoFrhwld/main
JoFrhwld Nov 13, 2023
e04febe
unsure if libmagic is alwaus available
JoFrhwld Nov 13, 2023
7c0f879
need to include pyproject
JoFrhwld Nov 13, 2023
915c629
conditional magic import
JoFrhwld Nov 13, 2023
a13de5f
fixing conditionals
JoFrhwld Nov 13, 2023
327f613
conditional checker
JoFrhwld Nov 13, 2023
5e6059e
trying to add platform constraints
JoFrhwld Nov 13, 2023
9aaf571
we're getting there
JoFrhwld Nov 13, 2023
6cc0514
more tests
JoFrhwld Nov 13, 2023
90d40b4
filter imputation warnings
JoFrhwld Nov 13, 2023
79bb373
process audio function
JoFrhwld Nov 13, 2023
27abf1e
fix dataframe output
JoFrhwld Nov 13, 2023
f4fd72b
adjustments to output dataframes
JoFrhwld Nov 13, 2023
cb66430
write output
JoFrhwld Nov 13, 2023
8458dfc
file name and id attributes and setters
JoFrhwld Nov 13, 2023
f9aedb2
directory processing
JoFrhwld Nov 13, 2023
148b73b
tests
JoFrhwld Nov 13, 2023
1078999
warning in the wrong place
JoFrhwld Nov 13, 2023
65c6323
light refactor to save on test writing
JoFrhwld Nov 13, 2023
4ef0565
typofix
JoFrhwld Nov 13, 2023
478b096
test draft
JoFrhwld Nov 14, 2023
3d852fa
caching and methods for dfs
JoFrhwld Nov 14, 2023
c61e3d3
tests for candidates.to_df
JoFrhwld Nov 14, 2023
a927eb9
tests for df tracks
JoFrhwld Nov 14, 2023
dbeae27
write data tests
JoFrhwld Nov 14, 2023
4c28bae
hinting
JoFrhwld Nov 14, 2023
4e282ca
outputs test
JoFrhwld Nov 14, 2023
d77d2bd
output testing
JoFrhwld Nov 14, 2023
0963eae
audio tests
JoFrhwld Nov 14, 2023
9dc1971
typo left out caching
JoFrhwld Nov 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ scikit-learn = "^1.3.2"
polars = "^0.19.13"
pytest-cov = "^4.1.0"
pytest = "^7.4.3"
python-magic = {version = "^0.4.27", markers = "sys_platform != 'win32'"}
python-magic-bin = {version = "^0.4.14", markers = "sys_platform == 'win32'"}

[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"
Expand All @@ -42,6 +44,7 @@ build-backend = "poetry.core.masonry.api"
addopts = [
"--import-mode=importlib",
"--cov-config=tests/.coveragerc",
"--cov-report=xml",
"--cov"
]
filterwarnings =[
Expand Down
140 changes: 140 additions & 0 deletions src/fasttrackpy/patterns/just_audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import warnings
from pathlib import Path
from typing import Union
from collections.abc import Callable
import parselmouth as pm
from fasttrackpy import CandidateTracks,\
Smoother,\
Loss,\
Agg

try:
import magic
no_magic = False
except:
warnings.warn("libmagic not found. "\
"Some audio file types won't be discovered by fasttrack. "\
"(mp3, ogg, ...)")
import sndhdr
from sndhdr import SndHeaders
no_magic = True

def create_audio_checker(no_magic:bool = no_magic) -> Callable:
"""Return an audio checker, dependent on
availability of libmagic.

Args:
no_magic (bool): is libmagic available

Returns:
(Callable): A sound file checker
"""

def magic_checker(path: str)->bool:
"""Checks whether a file is an audio file using libmagic

Args:
path (str): Path to the file in question

Returns:
(bool): Whether or not the file is an audio file
"""
file_mime = magic.from_file(path, mime=True)
return "audio" in file_mime

def sndhdr_checker(path: str)->bool:
"""Checks whether a file is an audio file using `sndhdr`

Args:
path (str): Path to the file

Returns:
(bool): Whether or not the file is an audio file.
"""
hdr_info = sndhdr.what(path)
return isinstance(hdr_info, SndHeaders)

if no_magic:
return sndhdr_checker

return magic_checker

is_audio = create_audio_checker(no_magic=no_magic)

def process_audio_file(
path: Union[str, Path],
xmin:float = 0,
xmax: float = None,
min_max_formant:float = 4000,
max_max_formant:float = 7000,
nstep:int = 20,
n_formants: int = 4,
window_length: float = 0.05,
time_step: float = 0.002,
pre_emphasis_from: float = 50,
smoother: Smoother = Smoother(),
loss_fun: Loss = Loss(),
agg_fun: Agg = Agg()
)->CandidateTracks:
if not is_audio(str(path)):
raise TypeError(f"The file at {str(path)} is not an audio file")

sound = pm.Sound(str(path))
if not xmax:
xmax = sound.xmax

sound_to_process = sound.extract_part(from_time = xmin, to_time = xmax)
candidates = CandidateTracks(
sound=sound_to_process,
min_max_formant=min_max_formant,
max_max_formant=max_max_formant,
nstep=nstep,
n_formants=n_formants,
window_length=window_length,
time_step=time_step,
pre_emphasis_from=pre_emphasis_from,
smoother=smoother,
loss_fun=loss_fun,
agg_fun=agg_fun
)
candidates.file_name = Path(str(path)).name
return candidates

def process_directory(
path: Union[str, Path],
min_max_formant:float = 4000,
max_max_formant:float = 7000,
nstep:int = 20,
n_formants: int = 4,
window_length: float = 0.05,
time_step: float = 0.002,
pre_emphasis_from: float = 50,
smoother: Smoother = Smoother(),
loss_fun: Loss = Loss(),
agg_fun: Agg = Agg()
)->list[CandidateTracks]:
if not isinstance(path, Path) and isinstance(path, str):
path = Path(path)

all_files = path.glob("*")
all_audio = [x for x in all_files if is_audio(str(x))]
all_candidates = [
process_audio_file(
path = x,
min_max_formant=min_max_formant,
max_max_formant=max_max_formant,
nstep=nstep,
n_formants=n_formants,
window_length=window_length,
time_step=time_step,
pre_emphasis_from=pre_emphasis_from,
smoother=smoother,
loss_fun=loss_fun,
agg_fun=agg_fun
) for x in all_audio
]
for x, path in zip(all_candidates, all_audio):
x.file_name = Path(str(path)).name

return all_candidates

80 changes: 76 additions & 4 deletions src/fasttrackpy/processors/outputs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
import numpy as np
import polars as pl
from pathlib import Path

def to_dataframe(self):
def add_metadata(self, out_df):
if self.file_name:
out_df = out_df.with_columns(
file_name = pl.lit(self.file_name)
)

if self.id:
out_df = out_df.with_columns(
id = pl.lit(self.id)
)
return out_df

def formant_to_dataframe(self):
"""Return data as a data frame

Returns:
Expand All @@ -15,12 +28,12 @@ def to_dataframe(self):
]

orig_df = pl.DataFrame(
data = self.formants.T,
data = self.formants[0:self.n_measured_formants].T,
schema=orig_names
)

smooth_df = pl.DataFrame(
data = self.smoothed_formants.T,
data = self.smoothed_formants[0:self.n_measured_formants].T,
schema=smooth_names
)

Expand All @@ -33,4 +46,63 @@ def to_dataframe(self):
smooth_method = pl.lit(self.smoother.smooth_fun.__name__)
)

return out_df
out_df = add_metadata(self, out_df)

return out_df

def param_to_dataframe(self):
"""Return data as a data frame

Returns:
(pl.DataFrame): A data frame
"""

schema = [
f"F{x}" for x in
np.arange(self.parameters.shape[0])+1
]
param_df = pl.DataFrame(
data = self.parameters.T,schema=schema
)

param_df = add_metadata(self, param_df)

return param_df

def get_big_df(self, output):
all_df = [x.to_df(output = output) for x in self.candidates]
all_df = [
x.with_columns(
candidate = idx+1
)
for idx, x in enumerate(all_df)
]

big_df = pl.concat(all_df, how = "diagonal")
return big_df

def write_data(
candidates,
file: Path = None,
destination: Path = None,
which: str = "winner",
output: str = "formants"
):
df = candidates.to_df(which = which, output = output)
if file:
df.write_csv(file = file)
return

if destination and candidates.file_name:
file = destination.joinpath(
candidates.winner.file_name
).with_suffix(".csv")
df.write_csv(file = file)
return

if destination:
file = destination.joinpath("output.csv")
df.write_csv(file = file)
return

raise ValueError("Either 'file' or 'destination' needs to be set")
Loading