Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reintroduce TTS WS #434

Merged
merged 1 commit into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 39 additions & 21 deletions deepgram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,12 @@
from .client import (
SpeakOptions,
SpeakRESTOptions,
# SpeakWebSocketOptions,
SpeakWSOptions,
# FileSource,
SpeakRestSource,
SpeakSource,
)
from .client import SpeakWebSocketEvents
from .client import SpeakWebSocketEvents, SpeakWebSocketMessage

## speak REST
from .client import (
Expand All @@ -122,21 +122,23 @@
SpeakRESTResponse,
)

# ## speak WebSocket
# from .client import (
# SpeakWebSocketClient,
# AsyncSpeakWebSocketClient,
# )
# from .client import (
# SpeakWebSocketResponse,
# # OpenResponse,
# # MetadataResponse,
# FlushedResponse,
# # CloseResponse,
# # UnhandledResponse,
# WarningResponse,
# # ErrorResponse,
# )
## speak WebSocket
from .client import (
SpeakWebSocketClient,
AsyncSpeakWebSocketClient,
SpeakWSClient,
AsyncSpeakWSClient,
)
from .client import (
# OpenResponse,
# MetadataResponse,
FlushedResponse,
ClearedResponse,
# CloseResponse,
# UnhandledResponse,
WarningResponse,
# ErrorResponse,
)

# manage
from .client import ManageClient, AsyncManageClient
Expand Down Expand Up @@ -180,10 +182,26 @@
)

# utilities
# pylint: disable=wrong-import-position
from .audio import Microphone, DeepgramMicrophoneError
from .audio import (
LOGGING,
CHANNELS,
RATE,
CHUNK,
INPUT_LOGGING,
INPUT_CHANNELS,
INPUT_RATE,
INPUT_CHUNK,
)

LOGGING = INPUT_LOGGING
CHANNELS = INPUT_CHANNELS
RATE = INPUT_RATE
CHUNK = INPUT_CHUNK

from .audio import Speaker
from .audio import (
OUTPUT_LOGGING,
OUTPUT_CHANNELS,
OUTPUT_RATE,
OUTPUT_CHUNK,
)

# pylint: enable=wrong-import-position
16 changes: 15 additions & 1 deletion deepgram/audio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,19 @@
# SPDX-License-Identifier: MIT

from .microphone import Microphone
from .microphone import LOGGING, CHANNELS, RATE, CHUNK
from .microphone import DeepgramMicrophoneError
from .microphone import (
LOGGING as INPUT_LOGGING,
CHANNELS as INPUT_CHANNELS,
RATE as INPUT_RATE,
CHUNK as INPUT_CHUNK,
)

from .speaker import Speaker
from .speaker import DeepgramSpeakerError
from .speaker import (
LOGGING as OUTPUT_LOGGING,
CHANNELS as OUTPUT_CHANNELS,
RATE as OUTPUT_RATE,
CHUNK as OUTPUT_CHUNK,
)
1 change: 0 additions & 1 deletion deepgram/audio/microphone/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from ...utils import verboselogs

# Constants for microphone

LOGGING = verboselogs.WARNING
CHANNELS = 1
RATE = 16000
Expand Down
83 changes: 60 additions & 23 deletions deepgram/audio/microphone/microphone.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import inspect
import asyncio
import threading
from typing import Optional, Callable, TYPE_CHECKING
from typing import Optional, Callable, Union, TYPE_CHECKING
import logging

from ...utils import verboselogs
Expand All @@ -21,10 +21,10 @@ class Microphone: # pylint: disable=too-many-instance-attributes
"""

_logger: verboselogs.VerboseLogger
_exit: threading.Event

_audio: "pyaudio.PyAudio"
_stream: "pyaudio.Stream"

_chunk: int
_rate: int
_format: int
Expand All @@ -34,9 +34,10 @@ class Microphone: # pylint: disable=too-many-instance-attributes

_asyncio_loop: asyncio.AbstractEventLoop
_asyncio_thread: threading.Thread
_exit: threading.Event

_push_callback_org: object
_push_callback: object
_push_callback_org: Optional[Callable] = None
_push_callback: Optional[Callable] = None

def __init__(
self,
Expand All @@ -53,6 +54,7 @@ def __init__(
self._logger = verboselogs.VerboseLogger(__name__)
self._logger.addHandler(logging.StreamHandler())
self._logger.setLevel(verbose)

self._exit = threading.Event()

self._audio = pyaudio.PyAudio()
Expand All @@ -71,9 +73,16 @@ def _start_asyncio_loop(self) -> None:

def is_active(self) -> bool:
"""
returns True if the stream is active, False otherwise
is_active - returns the state of the stream
Args:
None
Returns:
True if the stream is active, False otherwise
"""
self._logger.debug("Microphone.is_active ENTER")

if self._stream is None:
self._logger.error("stream is None")
self._logger.debug("Microphone.is_active LEAVE")
Expand All @@ -87,24 +96,34 @@ def is_active(self) -> bool:

def set_callback(self, push_callback: Callable) -> None:
"""
Set the callback function to be called when data is received.
set_callback - sets the callback function to be called when data is received.
Args:
push_callback (Callable): The callback function to be called when data is received.
This should be the websocket send function.
Returns:
None
"""
self._push_callback_org = push_callback

def start(self) -> bool:
"""
starts the microphone stream
starts - starts the microphone stream
Returns:
bool: True if the stream was started, False otherwise
"""
self._logger.debug("Microphone.start ENTER")

self._logger.info("format: %s", self._format)
self._logger.info("channels: %d", self._channels)
self._logger.info("rate: %d", self._rate)
self._logger.info("chunk: %d", self._chunk)
self._logger.info("input_device_id: %d", self._input_device_index)
# self._logger.info("input_device_id: %d", self._input_device_index)

if self._push_callback_org is None:
self._logger.error("start() failed. No callback set.")
self._logger.error("start failed. No callback set.")
self._logger.debug("Microphone.start LEAVE")
return False

Expand All @@ -114,9 +133,13 @@ def start(self) -> bool:
self._asyncio_thread = threading.Thread(target=self._start_asyncio_loop)
self._asyncio_thread.start()

self._push_callback = lambda data: asyncio.run_coroutine_threadsafe(
self._push_callback_org(data), self._asyncio_loop
).result()
self._push_callback = lambda data: (
asyncio.run_coroutine_threadsafe(
self._push_callback_org(data), self._asyncio_loop
).result()
if self._push_callback_org
else None
)
else:
self._logger.verbose("regular threaded callback")
self._push_callback = self._push_callback_org
Expand All @@ -134,7 +157,7 @@ def start(self) -> bool:
self._exit.clear()
self._stream.start_stream()

self._logger.notice("start() succeeded")
self._logger.notice("start succeeded")
self._logger.debug("Microphone.start LEAVE")
return True

Expand Down Expand Up @@ -176,41 +199,50 @@ def _callback(

def mute(self) -> bool:
"""
Mutes the microphone stream
mute - mutes the microphone stream
Returns:
bool: True if the stream was muted, False otherwise
"""
self._logger.debug("Microphone.mute ENTER")

if self._stream is None:
self._logger.error("mute() failed. Library not initialized.")
self._logger.error("mute failed. Library not initialized.")
self._logger.debug("Microphone.mute LEAVE")
return False

self._is_muted = True

self._logger.notice("mute() succeeded")
self._logger.notice("mute succeeded")
self._logger.debug("Microphone.mute LEAVE")
return True

def unmute(self) -> bool:
"""
Unmutes the microphone stream
unmute - unmutes the microphone stream
Returns:
bool: True if the stream was unmuted, False otherwise
"""
self._logger.debug("Microphone.unmute ENTER")

if self._stream is None:
self._logger.error("unmute() failed. Library not initialized.")
self._logger.error("unmute failed. Library not initialized.")
self._logger.debug("Microphone.unmute LEAVE")
return False

self._is_muted = False

self._logger.notice("unmute() succeeded")
self._logger.notice("unmute succeeded")
self._logger.debug("Microphone.unmute LEAVE")
return True

def finish(self) -> bool:
"""
Stops the microphone stream
finish - stops the microphone stream
Returns:
bool: True if the stream was stopped, False otherwise
"""
self._logger.debug("Microphone.finish ENTER")

Expand All @@ -219,19 +251,24 @@ def finish(self) -> bool:

# Stop the stream.
if self._stream is not None:
self._logger.notice("stopping stream...")
self._stream.stop_stream()
self._stream.close()
self._stream = None # type: ignore
self._logger.notice("stream stopped")

# clean up the thread
if (
inspect.iscoroutinefunction(self._push_callback_org)
and self._asyncio_thread is not None
# inspect.iscoroutinefunction(self._push_callback_org)
# and
self._asyncio_thread
is not None
):
self._logger.notice("stopping asyncio loop...")
self._asyncio_loop.call_soon_threadsafe(self._asyncio_loop.stop)
self._asyncio_thread.join()
self._asyncio_thread = None # type: ignore
self._logger.notice("stream/recv thread joined")
self._logger.notice("_asyncio_thread joined")

self._logger.notice("finish succeeded")
self._logger.debug("Microphone.finish LEAVE")
Expand Down
7 changes: 7 additions & 0 deletions deepgram/audio/speaker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT

from .speaker import Speaker
from .errors import DeepgramSpeakerError
from .constants import LOGGING, CHANNELS, RATE, CHUNK
12 changes: 12 additions & 0 deletions deepgram/audio/speaker/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT

from ...utils import verboselogs

# Constants for microphone
LOGGING = verboselogs.WARNING
TIMEOUT = 0.050
CHANNELS = 1
RATE = 16000
CHUNK = 8194
21 changes: 21 additions & 0 deletions deepgram/audio/speaker/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT


# exceptions for speaker
class DeepgramSpeakerError(Exception):
"""
Exception raised for known errors related to Speaker library.
Attributes:
message (str): The error message describing the exception.
"""

def __init__(self, message: str):
super().__init__(message)
self.name = "DeepgramSpeakerError"
self.message = message

def __str__(self):
return f"{self.name}: {self.message}"
Loading
Loading