From a338fde3b22e9ccd5af17a837c8b38a85f4d769b Mon Sep 17 00:00:00 2001 From: Maxwin-z Date: Fri, 14 Jun 2024 23:53:57 +0800 Subject: [PATCH 1/3] fix: no voice when use cache --- manim_voiceover/voiceover_scene.py | 1 + 1 file changed, 1 insertion(+) diff --git a/manim_voiceover/voiceover_scene.py b/manim_voiceover/voiceover_scene.py index 687bb29..8030aa6 100644 --- a/manim_voiceover/voiceover_scene.py +++ b/manim_voiceover/voiceover_scene.py @@ -68,6 +68,7 @@ def add_voiceover_text( dict_ = self.speech_service._wrap_generate_from_text(text, **kwargs) tracker = VoiceoverTracker(self, dict_, self.speech_service.cache_dir) + self.renderer.skip_animations = self.renderer._original_skipping_status self.add_sound(str(Path(self.speech_service.cache_dir) / dict_["final_audio"])) self.current_tracker = tracker From e66dc7e16701fd3a106c251aa4388b83bb895082 Mon Sep 17 00:00:00 2001 From: Maxwin-z Date: Sat, 15 Jun 2024 00:18:14 +0800 Subject: [PATCH 2/3] feat: add edge_tts service --- manim_voiceover/services/edge.py | 87 ++++++++++++++++++++++++++++++++ pyproject.toml | 5 +- 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 manim_voiceover/services/edge.py diff --git a/manim_voiceover/services/edge.py b/manim_voiceover/services/edge.py new file mode 100644 index 0000000..56aa735 --- /dev/null +++ b/manim_voiceover/services/edge.py @@ -0,0 +1,87 @@ +from manim_voiceover.services.base import SpeechService +from manim_voiceover.helper import prompt_ask_missing_extras, remove_bookmarks +from pathlib import Path +from manim import logger +from typing import Optional + +try: + from edge_tts import Communicate +except ImportError: + logger.error( + 'Missing packages. Run `pip install "manim-voiceover[edge]"` to use EdgeTTSService.' + ) + + +class EdgeTTSService(SpeechService): + """EdgeTTSService class allows you to use Microsoft Edge's online text-to-speech service. + This is a wrapper for the edge-tts library. + See the `edge-tts documentation `__ + for more information.""" + + def __init__( + self, + voice: str = "en-US-AriaNeural", + rate: str = "+0%", + volume: str = "+0%", + pitch: str = "+0Hz", + proxy: Optional[str] = None, + **kwargs + ): + prompt_ask_missing_extras("edge_tts", "edge_tts", "EdgeTTSService") + SpeechService.__init__(self, **kwargs) + self.voice = voice + self.rate = rate + self.volume = volume + self.pitch = pitch + self.proxy = proxy + + def generate_from_text( + self, text: str, cache_dir: str = None, path: str = None, **kwargs + ) -> dict: + """""" + if cache_dir is None: + cache_dir = self.cache_dir + + input_text = remove_bookmarks(text) + input_data = { + "input_text": input_text, + "service": "edge", + "config": { + "voice": self.voice, + "rate": self.rate, + "volume": self.volume, + "pitch": self.pitch, + }, + } + + cached_result = self.get_cached_result(input_data, cache_dir) + if cached_result is not None: + return cached_result + + if path is None: + audio_path = self.get_audio_basename(input_data) + ".mp3" + else: + audio_path = path + + comm = Communicate( + input_text, + voice=self.voice, + rate=self.rate, + volume=self.volume, + pitch=self.pitch, + proxy=self.proxy, + ) + + output_file = str(Path(cache_dir) / audio_path) + with open(output_file, "wb") as f: + for chunk in comm.stream_sync(): + if chunk["type"] == "audio": + f.write(chunk["data"]) + + json_dict = { + "input_text": text, + "input_data": input_data, + "original_audio": audio_path, + } + + return json_dict diff --git a/pyproject.toml b/pyproject.toml index 70d3419..38d0698 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ openai-whisper = { version = "^20230314", optional = true } stable-ts = { version ="^2.6.2", optional = true } python-slugify = "^8.0.1" elevenlabs = {version = "^0.2.27", optional = true} +edge_tts = { version = "^6.1.12", optional = true } [tool.poetry.extras] azure = ["azure-cognitiveservices-speech"] @@ -74,6 +75,7 @@ recorder = ["PyAudio", "pynput"] translate = ["deepl"] elevenlabs = ["elevenlabs"] transcribe = ["openai-whisper", "stable-ts"] +edge = ["edge_tts"] all = [ "azure-cognitiveservices-speech", "gTTS", @@ -86,7 +88,8 @@ all = [ "deepl", "openai-whisper", "stable-ts", - "elevenlabs" + "elevenlabs", + "edge" ] [tool.poetry.group.dev.dependencies] From 91e1a001608bc2bee836d89f9f83f7456dab8421 Mon Sep 17 00:00:00 2001 From: Maxwin-z Date: Mon, 17 Jun 2024 10:36:15 +0800 Subject: [PATCH 3/3] Revert "fix: no voice when use cache" This reverts commit a338fde3b22e9ccd5af17a837c8b38a85f4d769b. --- manim_voiceover/voiceover_scene.py | 1 - 1 file changed, 1 deletion(-) diff --git a/manim_voiceover/voiceover_scene.py b/manim_voiceover/voiceover_scene.py index 8030aa6..687bb29 100644 --- a/manim_voiceover/voiceover_scene.py +++ b/manim_voiceover/voiceover_scene.py @@ -68,7 +68,6 @@ def add_voiceover_text( dict_ = self.speech_service._wrap_generate_from_text(text, **kwargs) tracker = VoiceoverTracker(self, dict_, self.speech_service.cache_dir) - self.renderer.skip_animations = self.renderer._original_skipping_status self.add_sound(str(Path(self.speech_service.cache_dir) / dict_["final_audio"])) self.current_tracker = tracker