Skip to content

Commit

Permalink
Merge branch 'release/5.1.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
hbredin committed Jan 12, 2025
2 parents b1fc349 + 258d5b3 commit fcb0604
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 24 deletions.
6 changes: 6 additions & 0 deletions doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
Changelog
#########

Version 5.1.1 (2025-01-12)
~~~~~~~~~~~~~~~~~~~~~~~~~~

- chore: remove deprecated use of `delim_whitespace`
- chore: use `importlib.metadata` instead of `pkg_resources`

Version 5.1.0 (2024-04-05)
~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
9 changes: 5 additions & 4 deletions pyannote/database/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,18 @@
from . import protocol as protocol_module

from pyannote.database.protocol.protocol import ProtocolFile
import yaml


import warnings
from numbers import Number
from typing import Text, Dict, Callable, Any, Union
import functools

from .protocol.protocol import Subset, Scope
from .protocol.protocol import Subset
from .protocol.segmentation import SegmentationProtocol
from .protocol.speaker_diarization import SpeakerDiarizationProtocol

import pkg_resources
from importlib.metadata import entry_points

from .util import get_annotated

Expand All @@ -68,7 +69,7 @@
# All "Loader" classes types (eg RTTMLoader, UEMLoader, ...) retrieved from the entry point.
LOADERS = {
ep.name: ep
for ep in pkg_resources.iter_entry_points(group="pyannote.database.loader")
for ep in entry_points(group="pyannote.database.loader")
}


Expand Down
6 changes: 3 additions & 3 deletions pyannote/database/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def load_trial(file_trial):
"""

trials = pd.read_table(
file_trial, delim_whitespace=True, names=["reference", "uri1", "uri2"]
file_trial, sep="\s+", names=["reference", "uri1", "uri2"]
)

for _, reference, uri1, uri2 in trials.itertuples():
Expand Down Expand Up @@ -289,7 +289,7 @@ def __init__(self, ctm: Path):
"confidence": float,
}
self.data_ = pd.read_csv(
ctm, names=names, dtype=dtype, delim_whitespace=True
ctm, names=names, dtype=dtype, sep="\s+"
).groupby("uri")

def __call__(self, current_file: ProtocolFile) -> Union["Doc", None]:
Expand Down Expand Up @@ -354,7 +354,7 @@ def __init__(self, mapping: Path):
"uri": str,
}
self.data_ = pd.read_csv(
mapping, names=names, dtype=dtype, delim_whitespace=True
mapping, names=names, dtype=dtype, sep="\s+"
)

# get colum 'value' dtype, allowing us to acces it during subset
Expand Down
33 changes: 16 additions & 17 deletions pyannote/database/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,17 @@ def get_unique_identifier(item):
def get_annotated(current_file):
"""Get part of the file that is annotated.
Parameters
----------
current_file : `dict`
File generated by a `pyannote.database` protocol.
Returns
-------
annotated : `pyannote.core.Timeline`
Part of the file that is annotated. Defaults to
`current_file["annotated"]`. When it does not exist, try to use the
full audio extent. When that fails, use "annotation" extent.
Parameters
----------
current_file : `dict`
File generated by a `pyannote.database` protocol.
Returns
-------
annotated : `pyannote.core.Timeline`
Part of the file that is annotated. Defaults to
`current_file["annotated"]`. When it does not exist, try to use the
full audio extent. When that fails, use "annotation" extent.
"""

# if protocol provides 'annotated' key, use it
Expand Down Expand Up @@ -179,7 +179,7 @@ def load_rttm(file_rttm, keep_type="SPEAKER"):
file_rttm,
names=names,
dtype=dtype,
delim_whitespace=True,
sep="\s+",
keep_default_na=True,
)

Expand Down Expand Up @@ -213,7 +213,7 @@ def load_stm(file_stm):
dtype = {"uri": str, "speaker": str, "start": float, "end": float}
data = pd.read_csv(
file_stm,
delim_whitespace=True,
sep="\s+",
usecols=[0, 2, 3, 4],
dtype=dtype,
names=list(dtype),
Expand Down Expand Up @@ -250,7 +250,7 @@ def load_mdtm(file_mdtm):
file_mdtm,
names=names,
dtype=dtype,
delim_whitespace=True,
sep="\s+",
keep_default_na=False,
)

Expand Down Expand Up @@ -281,7 +281,7 @@ def load_uem(file_uem):

names = ["uri", "NA1", "start", "end"]
dtype = {"uri": str, "start": float, "end": float}
data = pd.read_csv(file_uem, names=names, dtype=dtype, delim_whitespace=True)
data = pd.read_csv(file_uem, names=names, dtype=dtype, sep="\s+")

timelines = dict()
for uri, parts in data.groupby("uri"):
Expand All @@ -306,7 +306,7 @@ def load_lab(path, uri: str = None) -> Annotation:

names = ["start", "end", "label"]
dtype = {"start": float, "end": float, "label": str}
data = pd.read_csv(path, names=names, dtype=dtype, delim_whitespace=True)
data = pd.read_csv(path, names=names, dtype=dtype, sep="\s+")

annotation = Annotation(uri=uri)
for i, turn in data.iterrows():
Expand Down Expand Up @@ -388,7 +388,6 @@ def __init__(self, mapping, keep_missing=False):
self.keep_missing = keep_missing

def __call__(self, current_file):

if not self.keep_missing:
missing = set(current_file["annotation"].labels()) - set(self.mapping)
if missing and not self.keep_missing:
Expand Down

0 comments on commit fcb0604

Please sign in to comment.