Skip to content

Commit

Permalink
Asynchronous microphone reading (#19)
Browse files Browse the repository at this point in the history
* Make MicrophoneAudioSource read samples asynchronously (still compatible with drawing). Document sources. Optimize imports

* Replace file audio source reliability subclasses with composition
  • Loading branch information
juanmc2005 authored Dec 27, 2021
1 parent 2cb1554 commit 35f60f1
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 68 deletions.
12 changes: 5 additions & 7 deletions src/diart/demo.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from pathlib import Path
import argparse
from pathlib import Path

import diart.sources as src
from diart.pipelines import OnlineSpeakerDiarization
from diart.sinks import OutputBuilder


# Define script arguments
parser = argparse.ArgumentParser()
parser.add_argument("source", type=str, help="Path to an audio file | 'microphone'")
Expand Down Expand Up @@ -43,13 +42,13 @@
args.source = Path(args.source).expanduser()
uri = args.source.name.split(".")[0]
output_dir = args.source.parent if args.output is None else Path(args.output)
# Simulate an unreliable recording protocol yielding new audio with a varying refresh rate
audio_source = src.ReliableFileAudioSource(
audio_source = src.FileAudioSource(
file=args.source,
uri=uri,
sample_rate=args.sample_rate,
window_duration=pipeline.duration,
step=args.step,
reader=src.RegularAudioFileReader(
args.sample_rate, pipeline.duration, args.step
),
)
else:
output_dir = Path("~/").expanduser() if args.output is None else Path(args.output)
Expand All @@ -62,7 +61,6 @@
latency=args.latency,
output_path=output_dir / "output.rttm",
visualization="slide",
# reference=output_dir / f"{uri}.rttm",
)
# Build pipeline from audio source and stream results to the output builder
pipeline.from_source(audio_source, output_waveform=True).subscribe(output_builder)
Expand Down
9 changes: 5 additions & 4 deletions src/diart/functional.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import torch
from typing import Union, Optional, List, Literal, Iterable, Tuple

import numpy as np
from pyannote.core import Annotation, Segment, SlidingWindow, SlidingWindowFeature
from pyannote.audio.utils.signal import Binarize as PyanBinarize
import torch
from pyannote.audio.pipelines.utils import PipelineModel, get_model, get_devices
from typing import Union, Optional, List, Literal, Iterable, Tuple
from pyannote.audio.utils.signal import Binarize as PyanBinarize
from pyannote.core import Annotation, Segment, SlidingWindow, SlidingWindowFeature

from .mapping import SpeakerMap, SpeakerMapBuilder

Expand Down
6 changes: 4 additions & 2 deletions src/diart/mapping.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import annotations

from typing import Callable, Iterable, List, Optional, Text, Tuple, Union

import numpy as np
from scipy.optimize import linear_sum_assignment
from pyannote.core.utils.distance import cdist
from typing import Callable, Iterable, List, Optional, Text, Tuple, Union
from scipy.optimize import linear_sum_assignment


class MappingMatrixObjective:
Expand Down
8 changes: 4 additions & 4 deletions src/diart/operators.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import rx
from rx import operators as ops
from rx.core import Observable
from dataclasses import dataclass
from typing import Callable, Optional, List, Any

import numpy as np
import rx
from pyannote.core import SlidingWindow, SlidingWindowFeature

from rx import operators as ops
from rx.core import Observable

Operator = Callable[[Observable], Observable]

Expand Down
7 changes: 4 additions & 3 deletions src/diart/pipelines.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import Optional

import rx
import rx.operators as ops
from typing import Optional
from pyannote.audio.pipelines.utils import PipelineModel

from .sources import AudioSource
from . import operators as my_ops
from . import functional as fn
from . import operators as my_ops
from .sources import AudioSource


class OnlineSpeakerDiarization:
Expand Down
13 changes: 7 additions & 6 deletions src/diart/sinks.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np
from rx.core import Observer
from pyannote.core import Annotation, Segment, SlidingWindowFeature, notebook
from pyannote.metrics.diarization import DiarizationErrorRate
from pyannote.database.util import load_rttm
from typing import Literal, Union, Text, Optional, Tuple
from pathlib import Path
from traceback import print_exc
from typing import Literal, Union, Text, Optional, Tuple

import matplotlib.pyplot as plt
import numpy as np
from pyannote.core import Annotation, Segment, SlidingWindowFeature, notebook
from pyannote.database.util import load_rttm
from pyannote.metrics.diarization import DiarizationErrorRate
from rx.core import Observer


class OutputBuilder(Observer):
Expand Down
175 changes: 135 additions & 40 deletions src/diart/sources.py
Original file line number Diff line number Diff line change
@@ -1,104 +1,147 @@
from rx.subject import Subject
from pyannote.audio.core.io import Audio, AudioFile
from pyannote.core import SlidingWindowFeature, SlidingWindow
import random
from typing import Tuple
import time
from queue import SimpleQueue
from typing import Tuple, Text, Optional, Iterable

import sounddevice as sd
from einops import rearrange
from pyannote.audio.core.io import Audio, AudioFile
from pyannote.core import SlidingWindowFeature, SlidingWindow
from rx.subject import Subject


class AudioSource:
def __init__(self, uri: str, sample_rate: int):
"""Represents a source of audio that can start streaming via the `stream` property.
Parameters
----------
uri: Text
Unique identifier of the audio source.
sample_rate: int
Sample rate of the audio source.
"""
def __init__(self, uri: Text, sample_rate: int):
self.uri = uri
self.sample_rate = sample_rate
self.stream = Subject()
self.resolution = 1 / sample_rate

@property
def is_regular(self) -> bool:
"""Whether the stream is regular. Defaults to False.
A regular stream always yields the same amount of samples per event.
"""
return False

@property
def duration(self):
def duration(self) -> Optional[float]:
"""The duration of the stream if known. Defaults to None (unknown duration)"""
return None

def read(self):
"""Start reading the source and yielding samples through the stream"""
raise NotImplementedError


class FileAudioSource(AudioSource):
def __init__(self, file: AudioFile, uri: str, sample_rate: int):
super().__init__(uri, sample_rate)
class AudioFileReader:
"""Represents a method for reading an audio file.
Parameters
----------
sample_rate: int
Sample rate of the audio file.
"""
def __init__(self, sample_rate: int):
self.audio = Audio(sample_rate=sample_rate, mono=True)
self._duration = self.audio.get_duration(file)
self.file = file
self.resolution = 1 / sample_rate

@property
def duration(self):
return self._duration
def sample_rate(self) -> int:
return self.audio.sample_rate

def to_iterable(self):
@property
def is_regular(self) -> bool:
"""Whether the reading is regular. Defaults to False.
A regular reading method always yields the same amount of samples."""
return False

def get_duration(self, file: AudioFile) -> float:
return self.audio.get_duration(file)

def iterate(self, file: AudioFile) -> Iterable[SlidingWindowFeature]:
"""Return an iterable over the file's samples"""
raise NotImplementedError

def read(self):
for waveform in self.to_iterable():
try:
self.stream.on_next(waveform)
except Exception as e:
self.stream.on_error(e)
self.stream.on_completed()

class RegularAudioFileReader(AudioFileReader):
"""Reads a file always yielding the same number of samples with a given step.
class ReliableFileAudioSource(FileAudioSource):
Parameters
----------
sample_rate: int
Sample rate of the audio file.
window_duration: float
Duration of each chunk of samples (window) in seconds.
step_duration: float
Step duration between chunks in seconds.
"""
def __init__(
self,
file: AudioFile,
uri: str,
sample_rate: int,
window_duration: float,
step: float
step_duration: float,
):
super().__init__(file, uri, sample_rate)
super().__init__(sample_rate)
self.window_duration = window_duration
self.step = step
self.step_duration = step_duration
self.window_samples = int(round(self.window_duration * self.sample_rate))
self.step_samples = int(round(self.step * self.sample_rate))
self.step_samples = int(round(self.step_duration * self.sample_rate))

@property
def is_regular(self) -> bool:
return True

def to_iterable(self):
waveform, _ = self.audio(self.file)
def iterate(self, file: AudioFile) -> Iterable[SlidingWindowFeature]:
waveform, _ = self.audio(file)
chunks = rearrange(
waveform.unfold(1, self.window_samples, self.step_samples),
"channel chunk frame -> chunk channel frame",
).numpy()
for i, chunk in enumerate(chunks):
w = SlidingWindow(
start=i * self.step,
start=i * self.step_duration,
duration=self.resolution,
step=self.resolution
)
yield SlidingWindowFeature(chunk.T, w)


class UnreliableFileAudioSource(FileAudioSource):
class IrregularAudioFileReader(AudioFileReader):
"""Reads an audio file yielding a different number of non-overlapping samples in each event.
This class is useful to simulate how a system would work in unreliable reading conditions.
Parameters
----------
sample_rate: int
Sample rate of the audio file.
refresh_rate_range: (float, float)
Duration range within which to determine the number of samples to yield (in seconds).
simulate_delay: bool
Whether to simulate that the samples are being read in real time before they are yielded.
Defaults to False (no delay).
"""
def __init__(
self,
file: AudioFile,
uri: str,
sample_rate: int,
refresh_rate_range: Tuple[float, float],
simulate_delay: bool = False
simulate_delay: bool = False,
):
super().__init__(file, uri, sample_rate)
super().__init__(sample_rate)
self.start, self.end = refresh_rate_range
self.delay = simulate_delay

def to_iterable(self):
waveform, _ = self.audio(self.file)
def iterate(self, file: AudioFile) -> Iterable[SlidingWindowFeature]:
waveform, _ = self.audio(file)
total_samples = waveform.shape[1]
i = 0
while i < total_samples:
Expand All @@ -111,7 +154,55 @@ def to_iterable(self):
yield waveform[:, last_i:i]


class FileAudioSource(AudioSource):
"""Represents an audio source tied to a file.
Parameters
----------
file: AudioFile
The file to stream.
uri: Text
Unique identifier of the audio source.
sample_rate: int
Sample rate of the audio source.
reader: AudioFileReader
Determines how the file will be read.
"""
def __init__(
self,
file: AudioFile,
uri: Text,
sample_rate: int,
reader: AudioFileReader
):
super().__init__(uri, sample_rate)
self.reader = reader
self._duration = self.reader.get_duration(file)
self.file = file

@property
def is_regular(self) -> bool:
"""The regularity depends on the reader"""
return self.reader.is_regular

@property
def duration(self) -> Optional[float]:
"""The duration of a file is known"""
return self._duration

def read(self):
"""Send each chunk of samples through the stream"""
for waveform in self.reader.iterate(self.file):
try:
self.stream.on_next(waveform)
except Exception as e:
self.stream.on_error(e)
self.stream.on_completed()


class MicrophoneAudioSource(AudioSource):
"""Represents an audio source tied to the default microphone available"""

def __init__(self, sample_rate: int):
super().__init__("live_recording", sample_rate)
self.block_size = 1024
Expand All @@ -120,15 +211,19 @@ def __init__(self, sample_rate: int):
samplerate=sample_rate,
latency=0,
blocksize=self.block_size,
callback=self._read_callback
)
self.queue = SimpleQueue()

def _read_callback(self, samples, *args):
self.queue.put_nowait(samples[:, [0]].T)

def read(self):
self.mic_stream.start()
while self.mic_stream:
try:
samples = self.mic_stream.read(self.block_size)[0]
self.stream.on_next(self.queue.get())
except Exception as e:
self.stream.on_error(e)
break
self.stream.on_next(samples[:, [0]].T)
self.stream.on_completed()
5 changes: 3 additions & 2 deletions src/diart/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from pyannote.core import Annotation, Segment, SlidingWindowFeature, notebook
import matplotlib.pyplot as plt
from typing import Optional

import matplotlib.pyplot as plt
from pyannote.core import Annotation, Segment, SlidingWindowFeature, notebook


def visualize_feature(duration: Optional[float] = None):
def apply(feature: SlidingWindowFeature):
Expand Down

0 comments on commit 35f60f1

Please sign in to comment.