Skip to content

Commit

Permalink
Merge pull request #44 from rhasspy/synesthesiam-20240109-debug-recor…
Browse files Browse the repository at this point in the history
…ding

Add debug recording for wake/stt
  • Loading branch information
synesthesiam authored Jan 10, 2024
2 parents 350eee6 + f6e397a commit dc304e2
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 8 deletions.
8 changes: 8 additions & 0 deletions wyoming_satellite/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,9 @@ async def main() -> None:
help="Host address for zeroconf discovery (default: detect)",
)
#
parser.add_argument(
"--debug-recording-dir", help="Directory to store audio for debugging"
)
parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
args = parser.parse_args()

Expand Down Expand Up @@ -258,6 +261,10 @@ async def main() -> None:
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
_LOGGER.debug(args)

if args.debug_recording_dir:
args.debug_recording_dir = Path(args.debug_recording_dir)
_LOGGER.info("Recording audio to %s", args.debug_recording_dir)

wyoming_info = Info(
satellite=Satellite(
name=args.name,
Expand Down Expand Up @@ -320,6 +327,7 @@ async def main() -> None:
tts_stop=split_command(args.tts_stop_command),
error=split_command(args.error_command),
),
debug_recording_dir=args.debug_recording_dir,
)

satellite: SatelliteBase
Expand Down
110 changes: 104 additions & 6 deletions wyoming_satellite/satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from wyoming.wake import Detect, Detection, WakeProcessAsyncClient

from .settings import SatelliteSettings
from .utils import multiply_volume, run_event_command, wav_to_events
from .utils import DebugAudioWriter, multiply_volume, run_event_command, wav_to_events
from .vad import SileroVad
from .webrtc import WebRtcAudio

Expand Down Expand Up @@ -60,6 +60,19 @@ def __init__(self, settings: SatelliteSettings) -> None:
self._event_task: Optional[asyncio.Task] = None
self._event_queue: "Optional[asyncio.Queue[Event]]" = None

# Debug audio recording
self.wake_audio_writer: Optional[DebugAudioWriter] = None
self.stt_audio_writer: Optional[DebugAudioWriter] = None
if settings.debug_recording_dir:
self.wake_audio_writer = DebugAudioWriter(
settings.debug_recording_dir,
"wake",
ring_buffer_size=(2 * 16000 * 2 * 1), # last 2 sec
)
self.stt_audio_writer = DebugAudioWriter(
settings.debug_recording_dir, "stt"
)

@property
def is_running(self) -> bool:
"""True if not stopping/stopped."""
Expand Down Expand Up @@ -747,7 +760,16 @@ async def event_from_server(self, event: Event) -> None:
_LOGGER.info("Streaming audio")
await self._send_run_pipeline()
await self.trigger_streaming_start()
elif Detection.is_type(event.type):
# Start debug recording
if self.stt_audio_writer is not None:
self.stt_audio_writer.start()
elif Transcript.is_type(event.type):
# Stop debug recording
if self.stt_audio_writer is not None:
self.stt_audio_writer.stop()

# We're always streaming
_LOGGER.info("Streaming audio")

# Re-trigger streaming start even though we technically don't stop
Expand All @@ -764,6 +786,14 @@ async def event_from_mic(
# Forward to server
await self.event_to_server(event)

# Debug audio recording
if self.stt_audio_writer is not None:
if audio_bytes is None:
chunk = AudioChunk.from_event(event)
audio_bytes = chunk.audio

self.stt_audio_writer.write(audio_bytes)


# -----------------------------------------------------------------------------

Expand Down Expand Up @@ -801,13 +831,33 @@ async def event_from_server(self, event: Event) -> None:

if RunSatellite.is_type(event.type):
_LOGGER.info("Waiting for speech")
elif Detection.is_type(event.type):
# Start debug recording
if self.stt_audio_writer is not None:
self.stt_audio_writer.start()
elif Transcript.is_type(event.type):
# Stop debug recording
if self.stt_audio_writer is not None:
self.stt_audio_writer.stop()

async def event_from_mic(
self, event: Event, audio_bytes: Optional[bytes] = None
) -> None:
if not AudioChunk.is_type(event.type):
return

# Only unpack chunk once
chunk: Optional[AudioChunk] = None

# Debug audio recording
if self.stt_audio_writer is not None:
if audio_bytes is None:
# Need to unpack
chunk = AudioChunk.from_event(event)
audio_bytes = chunk.audio

self.stt_audio_writer.write(audio_bytes)

if (
self.is_streaming
and (self.timeout_seconds is not None)
Expand All @@ -817,6 +867,10 @@ async def event_from_mic(
self.is_streaming = False
self.timeout_seconds = None

# Stop debug recording
if self.stt_audio_writer is not None:
self.stt_audio_writer.stop()

# Stop pipeline
await self.event_to_server(AudioStop().event())

Expand All @@ -825,10 +879,11 @@ async def event_from_mic(

if not self.is_streaming:
# Check VAD
chunk: Optional[AudioChunk] = None
if audio_bytes is None:
# Need to unpack
chunk = AudioChunk.from_event(event)
if chunk is None:
# Need to unpack
chunk = AudioChunk.from_event(event)

audio_bytes = chunk.audio

if not self.vad(audio_bytes):
Expand Down Expand Up @@ -905,26 +960,61 @@ def __init__(self, settings: SatelliteSettings) -> None:
if settings.vad.enabled:
_LOGGER.warning("VAD is enabled but will not be used")

# Used for debug audio recording so both wake and stt WAV files have the
# same timestamp.
self._debug_recording_timestamp: Optional[int] = None

async def event_from_server(self, event: Event) -> None:
if Transcript.is_type(event.type):
# Only check event types once
is_run_satellite = False
is_transcript = False

if RunSatellite.is_type(event.type):
is_run_satellite = True
elif Transcript.is_type(event.type):
is_transcript = True

if is_transcript:
# Stop streaming before event_from_server is called because it will
# play the "done" WAV.
self.is_streaming = False

# Stop debug recording (stt)
if self.stt_audio_writer is not None:
self.stt_audio_writer.stop()

await super().event_from_server(event)

if RunSatellite.is_type(event.type) or Transcript.is_type(event.type):
if is_run_satellite or is_transcript:
# Stop streaming and go back to wake word detection
self.is_streaming = False
await self.trigger_streaming_stop()
await self._send_wake_detect()
_LOGGER.info("Waiting for wake word")

# Start debug recording (wake)
self._debug_recording_timestamp = time.monotonic_ns()
if self.wake_audio_writer is not None:
self.wake_audio_writer.start(timestamp=self._debug_recording_timestamp)

async def event_from_mic(
self, event: Event, audio_bytes: Optional[bytes] = None
) -> None:
if not AudioChunk.is_type(event.type):
return

# Debug audio recording
if (self.wake_audio_writer is not None) or (self.stt_audio_writer is not None):
if audio_bytes is None:
chunk = AudioChunk.from_event(event)
audio_bytes = chunk.audio

if self.wake_audio_writer is not None:
self.wake_audio_writer.write(audio_bytes)

if self.stt_audio_writer is not None:
self.stt_audio_writer.write(audio_bytes)

if self.is_streaming:
# Forward to server
await self.event_to_server(event)
Expand All @@ -947,6 +1037,14 @@ async def event_from_wake(self, event: Event) -> None:
_LOGGER.debug("Wake word detection occurred during refractory period")
return

# Stop debug recording (wake)
if self.wake_audio_writer is not None:
self.wake_audio_writer.stop()

# Start debug recording (stt)
if self.stt_audio_writer is not None:
self.stt_audio_writer.start(timestamp=self._debug_recording_timestamp)

self.is_streaming = True
_LOGGER.debug("Streaming audio")

Expand Down
5 changes: 5 additions & 0 deletions wyoming_satellite/settings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Satellite settings."""
from abc import ABC
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional


Expand Down Expand Up @@ -160,4 +161,8 @@ class SatelliteSettings:
wake: WakeSettings = field(default_factory=WakeSettings)
snd: SndSettings = field(default_factory=SndSettings)
event: EventSettings = field(default_factory=EventSettings)

restart_timeout: float = 5.0

debug_recording_dir: Optional[Path] = None
"""Path to directory where debug audio is written."""
9 changes: 8 additions & 1 deletion wyoming_satellite/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
"""Utility methods."""
from .audio import AudioBuffer, chunk_samples, multiply_volume, wav_to_events
from .audio import (
AudioBuffer,
DebugAudioWriter,
chunk_samples,
multiply_volume,
wav_to_events,
)
from .misc import (
get_mac_address,
needs_silero,
Expand All @@ -11,6 +17,7 @@
__all__ = [
"AudioBuffer",
"chunk_samples",
"DebugAudioWriter",
"get_mac_address",
"multiply_volume",
"needs_silero",
Expand Down
75 changes: 74 additions & 1 deletion wyoming_satellite/utils/audio.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Audio utilities."""
import array
import logging
import time
import wave
from pathlib import Path
from typing import Iterable, Iterator, Union
from typing import Iterable, Iterator, Optional, Union

from pyring_buffer import RingBuffer
from wyoming.audio import AudioChunk, AudioStart, AudioStop
from wyoming.event import Event

Expand Down Expand Up @@ -130,3 +132,74 @@ def wav_to_events(
audio_bytes = wav_file.readframes(samples_per_chunk)

yield AudioStop(timestamp=timestamp).event()


class DebugAudioWriter:
def __init__(
self,
dir_path: Union[str, Path],
suffix: str,
rate: int = 16000,
width: int = 2,
channels: int = 1,
ring_buffer_size: Optional[int] = None,
) -> None:
self.dir_path = Path(dir_path)
self.suffix = suffix
self.rate = rate
self.width = width
self.channels = channels

self._wav_path: Optional[Path] = None
self._wav_writer: Optional[wave.Wave_write] = None

# If ring buffer size is set, we will hold audio in a ring buffer
# instead of directly writing it to disk.
#
# This allows only the last few seconds of wake word audio to be stored.
self._ring_buffer: Optional[RingBuffer] = None
if ring_buffer_size is not None:
# Hold audio in a ring buffer before writing
self._ring_buffer = RingBuffer(ring_buffer_size)

def start(self, timestamp: Optional[int] = None) -> None:
self.stop()

if timestamp is None:
timestamp = time.monotonic_ns()

self._wav_path = self.dir_path / f"{timestamp}-{self.suffix}.wav"
self._wav_path.parent.mkdir(parents=True, exist_ok=True)

self._wav_writer = wave.open(str(self._wav_path), "wb")
self._wav_writer.setframerate(self.rate)
self._wav_writer.setsampwidth(self.width)
self._wav_writer.setnchannels(self.channels)

_LOGGER.debug("Started recording to %s", self._wav_path)

def write(self, audio: bytes) -> None:
if self._wav_writer is None:
return

if self._ring_buffer is not None:
# Hold audio in a ring buffer before writing
self._ring_buffer.put(audio)
else:
# Write directly to disk
self._wav_writer.writeframes(audio)

def stop(self) -> None:
if self._wav_writer is None:
return

if self._ring_buffer is not None:
# Write all audio now and reset buffer
self._wav_writer.writeframes(self._ring_buffer.getvalue())
self._ring_buffer = RingBuffer(self._ring_buffer.maxlen)

self._wav_writer.close()
self._wav_writer = None

_LOGGER.debug("Stopped recording to %s", self._wav_path)
self._wav_path = None

0 comments on commit dc304e2

Please sign in to comment.