From 8cc85e8b33e7716d0139a3820c9cfb863890d278 Mon Sep 17 00:00:00 2001 From: daviirodrig <30713947+daviirodrig@users.noreply.github.com> Date: Thu, 22 Feb 2024 03:57:57 -0300 Subject: [PATCH] update voice settings and option to delete voice --- backend/prompts.json | 9 +++- backend/src/components/tabs/elevenlabs.py | 59 +++++++++++++++++++++++ backend/src/config.py | 4 ++ backend/src/tts.py | 6 ++- 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/backend/prompts.json b/backend/prompts.json index bf2220c..972e8c1 100644 --- a/backend/prompts.json +++ b/backend/prompts.json @@ -22,10 +22,15 @@ "voice_id": "9Fa9ozDyMkNFPnyRbRZD", "model": "eleven_multilingual_v2" }, + "waldemar_puto": { + "prompt_id": "puto_arrombado_xinga_demais", + "voice_id": "9Fa9ozDyMkNFPnyRbRZD", + "model": "eleven_multilingual_v2" + }, "pescador_puto": { "prompt_id": "objetivo_pescador", "voice_id": "t0jbNlBVZ17f02VDIeMI", - "model": "eleven_multilingual_v1" + "model": "eleven_multilingual_v2" }, "asmr_puto": { "prompt_id": "objetivo_relaxar_asmr", @@ -35,7 +40,7 @@ "padeiro_puto": { "prompt_id": "objetivo_padeiro", "voice_id": "ZQe5CZNOzWyzPSCn5a3c", - "model": "eleven_multilingual_v1" + "model": "eleven_multilingual_v2" } }, "current_personality_id": "waldemar" diff --git a/backend/src/components/tabs/elevenlabs.py b/backend/src/components/tabs/elevenlabs.py index 939c9f4..fa1b354 100644 --- a/backend/src/components/tabs/elevenlabs.py +++ b/backend/src/components/tabs/elevenlabs.py @@ -1,5 +1,7 @@ +from elevenlabs import Voice import gradio as gr from src.tts import tts +from src.config import global_config from loguru import logger @@ -41,6 +43,25 @@ def gen_voices_html(): return gr.HTML(full_html) +def delete_voice(voice_id: str) -> str: + voice = Voice(voice_id=voice_id) + try: + voice.delete() + logger.info(f"Voice {voice_id} deleted") + return "Voice deleted with success!" + except Exception as e: + logger.info(f"Failed to delete {voice_id} error: {e}") + return f"Failed to delete voice error: {e}" + + +def set_voice_settings(stability: float, style: float, similarity_boost: float): + global_config.voice_stability = stability + global_config.voice_style = style + global_config.voice_similarity_boost = similarity_boost + global_config.save() + return "Voice settings updated with success!" + + def elevenlabs_tab(): with gr.Tab("Elevenlabs") as tab: with gr.Tab("Available Voices"): @@ -68,4 +89,42 @@ def elevenlabs_tab(): allow_flagging="never", ) + with gr.Tab("Delete Voice Clone"): + gr.Interface( + fn=delete_voice, + inputs=[gr.Textbox(label="Voice ID to delete")], + outputs="text", + allow_flagging="never", + ) + + with gr.Tab("Voice Settings"): + gr.Interface( + fn=set_voice_settings, + inputs=[ + gr.Slider( + label="Stability", + value=global_config.voice_stability, + minimum=0.01, + maximum=1.00, + step=0.01, + ), + gr.Slider( + label="Style", + value=global_config.voice_style, + minimum=0.01, + maximum=1.00, + step=0.01, + ), + gr.Slider( + label="Similarity Boost", + value=global_config.voice_similarity_boost, + minimum=0.01, + maximum=1.00, + step=0.01, + ), + ], + outputs="text", + allow_flagging="never", + ) + return tab diff --git a/backend/src/config.py b/backend/src/config.py index 732216e..5a723d3 100644 --- a/backend/src/config.py +++ b/backend/src/config.py @@ -49,6 +49,10 @@ class GlobalConfig: elevenlabs_voice_id: str = env_or_default("ELEVENLABS_VOICE_ID") elevenlabs_model: Optional[str] = env_or_default("ELEVENLABS_MODEL", "eleven_multilingual_v2") + voice_stability: Optional[float] = float(env_or_default("VOICE_STABILITY", "0.30")) + voice_similarity_boost: Optional[float] = float(env_or_default("VOICE_SIMILARITY_BOOST", "0.75")) + voice_style: Optional[float] = float(env_or_default("VOICE_STYLE", "0.40")) + narrator_volume: int = int(env_or_default("NARRATOR_VOLUME", "100")) discord_webhook_key: Optional[str] = env_or_default("DISCORD_WEBHOOK_KEY") diff --git a/backend/src/tts.py b/backend/src/tts.py index 4e35542..33849f8 100644 --- a/backend/src/tts.py +++ b/backend/src/tts.py @@ -79,7 +79,11 @@ def non_stream(): logger.info(f"Using {global_config.elevenlabs_buffer_size} elevenlabs buffer size") voice = Voice( voice_id=global_config.elevenlabs_voice_id, - settings=VoiceSettings(stability=0.05, similarity_boost=0.75, style=0.75), + settings=VoiceSettings( + stability=global_config.voice_stability, # type: ignore + similarity_boost=global_config.voice_similarity_boost, # type: ignore + style=global_config.voice_style, + ), ) gen = generate(