Asynchronous microphone reading (#19)

* Make MicrophoneAudioSource read samples asynchronously (still compatible with drawing). Document sources. Optimize imports * Replace file audio source reliability subclasses with composition
juanmc2005 · Dec 27, 2021 · 35f60f1 · 35f60f1
1 parent 2cb1554
commit 35f60f1
Show file tree

Hide file tree

Showing 8 changed files with 167 additions and 68 deletions.
diff --git a/src/diart/demo.py b/src/diart/demo.py
@@ -1,11 +1,10 @@
-from pathlib import Path
 import argparse
+from pathlib import Path
 
 import diart.sources as src
 from diart.pipelines import OnlineSpeakerDiarization
 from diart.sinks import OutputBuilder
 
-
 # Define script arguments
 parser = argparse.ArgumentParser()
 parser.add_argument("source", type=str, help="Path to an audio file | 'microphone'")
@@ -43,13 +42,13 @@
     args.source = Path(args.source).expanduser()
     uri = args.source.name.split(".")[0]
     output_dir = args.source.parent if args.output is None else Path(args.output)
-    # Simulate an unreliable recording protocol yielding new audio with a varying refresh rate
-    audio_source = src.ReliableFileAudioSource(
+    audio_source = src.FileAudioSource(
         file=args.source,
         uri=uri,
         sample_rate=args.sample_rate,
-        window_duration=pipeline.duration,
-        step=args.step,
+        reader=src.RegularAudioFileReader(
+            args.sample_rate, pipeline.duration, args.step
+        ),
     )
 else:
     output_dir = Path("~/").expanduser() if args.output is None else Path(args.output)
@@ -62,7 +61,6 @@
     latency=args.latency,
     output_path=output_dir / "output.rttm",
     visualization="slide",
-    # reference=output_dir / f"{uri}.rttm",
 )
 # Build pipeline from audio source and stream results to the output builder
 pipeline.from_source(audio_source, output_waveform=True).subscribe(output_builder)

diff --git a/src/diart/functional.py b/src/diart/functional.py
@@ -1,9 +1,10 @@
-import torch
+from typing import Union, Optional, List, Literal, Iterable, Tuple
+
 import numpy as np
-from pyannote.core import Annotation, Segment, SlidingWindow, SlidingWindowFeature
-from pyannote.audio.utils.signal import Binarize as PyanBinarize
+import torch
 from pyannote.audio.pipelines.utils import PipelineModel, get_model, get_devices
-from typing import Union, Optional, List, Literal, Iterable, Tuple
+from pyannote.audio.utils.signal import Binarize as PyanBinarize
+from pyannote.core import Annotation, Segment, SlidingWindow, SlidingWindowFeature
 
 from .mapping import SpeakerMap, SpeakerMapBuilder
 

diff --git a/src/diart/mapping.py b/src/diart/mapping.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
+
+from typing import Callable, Iterable, List, Optional, Text, Tuple, Union
+
 import numpy as np
-from scipy.optimize import linear_sum_assignment
 from pyannote.core.utils.distance import cdist
-from typing import Callable, Iterable, List, Optional, Text, Tuple, Union
+from scipy.optimize import linear_sum_assignment
 
 
 class MappingMatrixObjective:

diff --git a/src/diart/operators.py b/src/diart/operators.py
@@ -1,11 +1,11 @@
-import rx
-from rx import operators as ops
-from rx.core import Observable
 from dataclasses import dataclass
 from typing import Callable, Optional, List, Any
+
 import numpy as np
+import rx
 from pyannote.core import SlidingWindow, SlidingWindowFeature
-
+from rx import operators as ops
+from rx.core import Observable
 
 Operator = Callable[[Observable], Observable]
 

diff --git a/src/diart/pipelines.py b/src/diart/pipelines.py
@@ -1,11 +1,12 @@
+from typing import Optional
+
 import rx
 import rx.operators as ops
-from typing import Optional
 from pyannote.audio.pipelines.utils import PipelineModel
 
-from .sources import AudioSource
-from . import operators as my_ops
 from . import functional as fn
+from . import operators as my_ops
+from .sources import AudioSource
 
 
 class OnlineSpeakerDiarization:

diff --git a/src/diart/sinks.py b/src/diart/sinks.py
@@ -1,12 +1,13 @@
-import numpy as np
-from rx.core import Observer
-from pyannote.core import Annotation, Segment, SlidingWindowFeature, notebook
-from pyannote.metrics.diarization import DiarizationErrorRate
-from pyannote.database.util import load_rttm
-from typing import Literal, Union, Text, Optional, Tuple
 from pathlib import Path
 from traceback import print_exc
+from typing import Literal, Union, Text, Optional, Tuple
+
 import matplotlib.pyplot as plt
+import numpy as np
+from pyannote.core import Annotation, Segment, SlidingWindowFeature, notebook
+from pyannote.database.util import load_rttm
+from pyannote.metrics.diarization import DiarizationErrorRate
+from rx.core import Observer
 
 
 class OutputBuilder(Observer):

diff --git a/src/diart/sources.py b/src/diart/sources.py
@@ -1,104 +1,147 @@
-from rx.subject import Subject
-from pyannote.audio.core.io import Audio, AudioFile
-from pyannote.core import SlidingWindowFeature, SlidingWindow
 import random
-from typing import Tuple
 import time
+from queue import SimpleQueue
+from typing import Tuple, Text, Optional, Iterable
+
 import sounddevice as sd
 from einops import rearrange
+from pyannote.audio.core.io import Audio, AudioFile
+from pyannote.core import SlidingWindowFeature, SlidingWindow
+from rx.subject import Subject
 
 
 class AudioSource:
-    def __init__(self, uri: str, sample_rate: int):
+    """Represents a source of audio that can start streaming via the `stream` property.
+
+    Parameters
+    ----------
+    uri: Text
+        Unique identifier of the audio source.
+    sample_rate: int
+        Sample rate of the audio source.
+    """
+    def __init__(self, uri: Text, sample_rate: int):
         self.uri = uri
         self.sample_rate = sample_rate
         self.stream = Subject()
         self.resolution = 1 / sample_rate
 
     @property
     def is_regular(self) -> bool:
+        """Whether the stream is regular. Defaults to False.
+        A regular stream always yields the same amount of samples per event.
+        """
         return False
 
     @property
-    def duration(self):
+    def duration(self) -> Optional[float]:
+        """The duration of the stream if known. Defaults to None (unknown duration)"""
         return None
 
     def read(self):
+        """Start reading the source and yielding samples through the stream"""
         raise NotImplementedError
 
 
-class FileAudioSource(AudioSource):
-    def __init__(self, file: AudioFile, uri: str, sample_rate: int):
-        super().__init__(uri, sample_rate)
+class AudioFileReader:
+    """Represents a method for reading an audio file.
+
+    Parameters
+    ----------
+    sample_rate: int
+        Sample rate of the audio file.
+    """
+    def __init__(self, sample_rate: int):
         self.audio = Audio(sample_rate=sample_rate, mono=True)
-        self._duration = self.audio.get_duration(file)
-        self.file = file
+        self.resolution = 1 / sample_rate
 
     @property
-    def duration(self):
-        return self._duration
+    def sample_rate(self) -> int:
+        return self.audio.sample_rate
 
-    def to_iterable(self):
+    @property
+    def is_regular(self) -> bool:
+        """Whether the reading is regular. Defaults to False.
+        A regular reading method always yields the same amount of samples."""
+        return False
+
+    def get_duration(self, file: AudioFile) -> float:
+        return self.audio.get_duration(file)
+
+    def iterate(self, file: AudioFile) -> Iterable[SlidingWindowFeature]:
+        """Return an iterable over the file's samples"""
         raise NotImplementedError
 
-    def read(self):
-        for waveform in self.to_iterable():
-            try:
-                self.stream.on_next(waveform)
-            except Exception as e:
-                self.stream.on_error(e)
-        self.stream.on_completed()
 
+class RegularAudioFileReader(AudioFileReader):
+    """Reads a file always yielding the same number of samples with a given step.
 
-class ReliableFileAudioSource(FileAudioSource):
+    Parameters
+    ----------
+    sample_rate: int
+        Sample rate of the audio file.
+    window_duration: float
+        Duration of each chunk of samples (window) in seconds.
+    step_duration: float
+        Step duration between chunks in seconds.
+    """
     def __init__(
         self,
-        file: AudioFile,
-        uri: str,
         sample_rate: int,
         window_duration: float,
-        step: float
+        step_duration: float,
     ):
-        super().__init__(file, uri, sample_rate)
+        super().__init__(sample_rate)
         self.window_duration = window_duration
-        self.step = step
+        self.step_duration = step_duration
         self.window_samples = int(round(self.window_duration * self.sample_rate))
-        self.step_samples = int(round(self.step * self.sample_rate))
+        self.step_samples = int(round(self.step_duration * self.sample_rate))
 
     @property
     def is_regular(self) -> bool:
         return True
 
-    def to_iterable(self):
-        waveform, _ = self.audio(self.file)
+    def iterate(self, file: AudioFile) -> Iterable[SlidingWindowFeature]:
+        waveform, _ = self.audio(file)
         chunks = rearrange(
             waveform.unfold(1, self.window_samples, self.step_samples),
             "channel chunk frame -> chunk channel frame",
         ).numpy()
         for i, chunk in enumerate(chunks):
             w = SlidingWindow(
-                start=i * self.step,
+                start=i * self.step_duration,
                 duration=self.resolution,
                 step=self.resolution
             )
             yield SlidingWindowFeature(chunk.T, w)
 
 
-class UnreliableFileAudioSource(FileAudioSource):
+class IrregularAudioFileReader(AudioFileReader):
+    """Reads an audio file yielding a different number of non-overlapping samples in each event.
+    This class is useful to simulate how a system would work in unreliable reading conditions.
+
+    Parameters
+    ----------
+    sample_rate: int
+        Sample rate of the audio file.
+    refresh_rate_range: (float, float)
+        Duration range within which to determine the number of samples to yield (in seconds).
+    simulate_delay: bool
+        Whether to simulate that the samples are being read in real time before they are yielded.
+        Defaults to False (no delay).
+    """
     def __init__(
         self,
-        file: AudioFile,
-        uri: str,
         sample_rate: int,
         refresh_rate_range: Tuple[float, float],
-        simulate_delay: bool = False
+        simulate_delay: bool = False,
     ):
-        super().__init__(file, uri, sample_rate)
+        super().__init__(sample_rate)
         self.start, self.end = refresh_rate_range
         self.delay = simulate_delay
 
-    def to_iterable(self):
-        waveform, _ = self.audio(self.file)
+    def iterate(self, file: AudioFile) -> Iterable[SlidingWindowFeature]:
+        waveform, _ = self.audio(file)
         total_samples = waveform.shape[1]
         i = 0
         while i < total_samples:
@@ -111,7 +154,55 @@ def to_iterable(self):
             yield waveform[:, last_i:i]
 
 
+class FileAudioSource(AudioSource):
+    """Represents an audio source tied to a file.
+
+    Parameters
+    ----------
+    file: AudioFile
+        The file to stream.
+    uri: Text
+        Unique identifier of the audio source.
+    sample_rate: int
+        Sample rate of the audio source.
+    reader: AudioFileReader
+        Determines how the file will be read.
+    """
+    def __init__(
+        self,
+        file: AudioFile,
+        uri: Text,
+        sample_rate: int,
+        reader: AudioFileReader
+    ):
+        super().__init__(uri, sample_rate)
+        self.reader = reader
+        self._duration = self.reader.get_duration(file)
+        self.file = file
+
+    @property
+    def is_regular(self) -> bool:
+        """The regularity depends on the reader"""
+        return self.reader.is_regular
+
+    @property
+    def duration(self) -> Optional[float]:
+        """The duration of a file is known"""
+        return self._duration
+
+    def read(self):
+        """Send each chunk of samples through the stream"""
+        for waveform in self.reader.iterate(self.file):
+            try:
+                self.stream.on_next(waveform)
+            except Exception as e:
+                self.stream.on_error(e)
+        self.stream.on_completed()
+
+
 class MicrophoneAudioSource(AudioSource):
+    """Represents an audio source tied to the default microphone available"""
+
     def __init__(self, sample_rate: int):
         super().__init__("live_recording", sample_rate)
         self.block_size = 1024
@@ -120,15 +211,19 @@ def __init__(self, sample_rate: int):
             samplerate=sample_rate,
             latency=0,
             blocksize=self.block_size,
+            callback=self._read_callback
         )
+        self.queue = SimpleQueue()
+
+    def _read_callback(self, samples, *args):
+        self.queue.put_nowait(samples[:, [0]].T)
 
     def read(self):
         self.mic_stream.start()
         while self.mic_stream:
             try:
-                samples = self.mic_stream.read(self.block_size)[0]
+                self.stream.on_next(self.queue.get())
             except Exception as e:
                 self.stream.on_error(e)
                 break
-            self.stream.on_next(samples[:, [0]].T)
         self.stream.on_completed()
diff --git a/src/diart/utils.py b/src/diart/utils.py
@@ -1,7 +1,8 @@
-from pyannote.core import Annotation, Segment, SlidingWindowFeature, notebook
-import matplotlib.pyplot as plt
 from typing import Optional
 
+import matplotlib.pyplot as plt
+from pyannote.core import Annotation, Segment, SlidingWindowFeature, notebook
+
 
 def visualize_feature(duration: Optional[float] = None):
     def apply(feature: SlidingWindowFeature):