Merge branch 'release/5.1.1'

pyannote · Jan 12, 2025 · fcb0604 · fcb0604
2 parents b1fc349 + 258d5b3
commit fcb0604
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 24 deletions.
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
@@ -2,6 +2,12 @@
 Changelog
 #########
 
+Version 5.1.1 (2025-01-12)
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ - chore: remove deprecated use of `delim_whitespace`
+ - chore: use `importlib.metadata` instead of `pkg_resources`
+
 Version 5.1.0 (2024-04-05)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/pyannote/database/custom.py b/pyannote/database/custom.py
@@ -49,17 +49,18 @@
 from . import protocol as protocol_module
 
 from pyannote.database.protocol.protocol import ProtocolFile
-import yaml
+
+
 import warnings
 from numbers import Number
 from typing import Text, Dict, Callable, Any, Union
 import functools
 
-from .protocol.protocol import Subset, Scope
+from .protocol.protocol import Subset
 from .protocol.segmentation import SegmentationProtocol
 from .protocol.speaker_diarization import SpeakerDiarizationProtocol
 
-import pkg_resources
+from importlib.metadata import entry_points
 
 from .util import get_annotated
 
@@ -68,7 +69,7 @@
 # All "Loader" classes types (eg RTTMLoader, UEMLoader, ...) retrieved from the entry point.
 LOADERS = {
     ep.name: ep
-    for ep in pkg_resources.iter_entry_points(group="pyannote.database.loader")
+    for ep in entry_points(group="pyannote.database.loader")
 }
 
 

diff --git a/pyannote/database/loader.py b/pyannote/database/loader.py
@@ -90,7 +90,7 @@ def load_trial(file_trial):
     """
 
     trials = pd.read_table(
-        file_trial, delim_whitespace=True, names=["reference", "uri1", "uri2"]
+        file_trial, sep="\s+", names=["reference", "uri1", "uri2"]
     )
 
     for _, reference, uri1, uri2 in trials.itertuples():
@@ -289,7 +289,7 @@ def __init__(self, ctm: Path):
             "confidence": float,
         }
         self.data_ = pd.read_csv(
-            ctm, names=names, dtype=dtype, delim_whitespace=True
+            ctm, names=names, dtype=dtype, sep="\s+"
         ).groupby("uri")
 
     def __call__(self, current_file: ProtocolFile) -> Union["Doc", None]:
@@ -354,7 +354,7 @@ def __init__(self, mapping: Path):
             "uri": str,
         }
         self.data_ = pd.read_csv(
-            mapping, names=names, dtype=dtype, delim_whitespace=True
+            mapping, names=names, dtype=dtype, sep="\s+"
         )
 
         # get colum 'value' dtype, allowing us to acces it during subset

diff --git a/pyannote/database/util.py b/pyannote/database/util.py
@@ -78,17 +78,17 @@ def get_unique_identifier(item):
 def get_annotated(current_file):
     """Get part of the file that is annotated.
 
-        Parameters
-        ----------
-        current_file : `dict`
-            File generated by a `pyannote.database` protocol.
-
-        Returns
-        -------
-        annotated : `pyannote.core.Timeline`
-            Part of the file that is annotated. Defaults to
-            `current_file["annotated"]`. When it does not exist, try to use the
-            full audio extent. When that fails, use "annotation" extent.
+    Parameters
+    ----------
+    current_file : `dict`
+        File generated by a `pyannote.database` protocol.
+
+    Returns
+    -------
+    annotated : `pyannote.core.Timeline`
+        Part of the file that is annotated. Defaults to
+        `current_file["annotated"]`. When it does not exist, try to use the
+        full audio extent. When that fails, use "annotation" extent.
     """
 
     # if protocol provides 'annotated' key, use it
@@ -179,7 +179,7 @@ def load_rttm(file_rttm, keep_type="SPEAKER"):
         file_rttm,
         names=names,
         dtype=dtype,
-        delim_whitespace=True,
+        sep="\s+",
         keep_default_na=True,
     )
 
@@ -213,7 +213,7 @@ def load_stm(file_stm):
     dtype = {"uri": str, "speaker": str, "start": float, "end": float}
     data = pd.read_csv(
         file_stm,
-        delim_whitespace=True,
+        sep="\s+",
         usecols=[0, 2, 3, 4],
         dtype=dtype,
         names=list(dtype),
@@ -250,7 +250,7 @@ def load_mdtm(file_mdtm):
         file_mdtm,
         names=names,
         dtype=dtype,
-        delim_whitespace=True,
+        sep="\s+",
         keep_default_na=False,
     )
 
@@ -281,7 +281,7 @@ def load_uem(file_uem):
 
     names = ["uri", "NA1", "start", "end"]
     dtype = {"uri": str, "start": float, "end": float}
-    data = pd.read_csv(file_uem, names=names, dtype=dtype, delim_whitespace=True)
+    data = pd.read_csv(file_uem, names=names, dtype=dtype, sep="\s+")
 
     timelines = dict()
     for uri, parts in data.groupby("uri"):
@@ -306,7 +306,7 @@ def load_lab(path, uri: str = None) -> Annotation:
 
     names = ["start", "end", "label"]
     dtype = {"start": float, "end": float, "label": str}
-    data = pd.read_csv(path, names=names, dtype=dtype, delim_whitespace=True)
+    data = pd.read_csv(path, names=names, dtype=dtype, sep="\s+")
 
     annotation = Annotation(uri=uri)
     for i, turn in data.iterrows():
@@ -388,7 +388,6 @@ def __init__(self, mapping, keep_missing=False):
         self.keep_missing = keep_missing
 
     def __call__(self, current_file):
-
         if not self.keep_missing:
             missing = set(current_file["annotation"].labels()) - set(self.mapping)
             if missing and not self.keep_missing: