From 8cc85e8b33e7716d0139a3820c9cfb863890d278 Mon Sep 17 00:00:00 2001
From: daviirodrig <30713947+daviirodrig@users.noreply.github.com>
Date: Thu, 22 Feb 2024 03:57:57 -0300
Subject: [PATCH] update voice settings and option to delete voice

---
 backend/prompts.json                      |  9 +++-
 backend/src/components/tabs/elevenlabs.py | 59 +++++++++++++++++++++++
 backend/src/config.py                     |  4 ++
 backend/src/tts.py                        |  6 ++-
 4 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/backend/prompts.json b/backend/prompts.json
index bf2220c..972e8c1 100644
--- a/backend/prompts.json
+++ b/backend/prompts.json
@@ -22,10 +22,15 @@
         "voice_id": "9Fa9ozDyMkNFPnyRbRZD",
         "model": "eleven_multilingual_v2"
       },
+      "waldemar_puto": {
+        "prompt_id": "puto_arrombado_xinga_demais",
+        "voice_id": "9Fa9ozDyMkNFPnyRbRZD",
+        "model": "eleven_multilingual_v2"
+      },
       "pescador_puto": {
         "prompt_id": "objetivo_pescador",
         "voice_id": "t0jbNlBVZ17f02VDIeMI",
-        "model": "eleven_multilingual_v1"
+        "model": "eleven_multilingual_v2"
       },
       "asmr_puto": {
         "prompt_id": "objetivo_relaxar_asmr",
@@ -35,7 +40,7 @@
       "padeiro_puto": {
         "prompt_id": "objetivo_padeiro",
         "voice_id": "ZQe5CZNOzWyzPSCn5a3c",
-        "model": "eleven_multilingual_v1"
+        "model": "eleven_multilingual_v2"
       }
     },
     "current_personality_id": "waldemar"
diff --git a/backend/src/components/tabs/elevenlabs.py b/backend/src/components/tabs/elevenlabs.py
index 939c9f4..fa1b354 100644
--- a/backend/src/components/tabs/elevenlabs.py
+++ b/backend/src/components/tabs/elevenlabs.py
@@ -1,5 +1,7 @@
+from elevenlabs import Voice
 import gradio as gr
 from src.tts import tts
+from src.config import global_config
 from loguru import logger
 
 
@@ -41,6 +43,25 @@ def gen_voices_html():
     return gr.HTML(full_html)
 
 
+def delete_voice(voice_id: str) -> str:
+    voice = Voice(voice_id=voice_id)
+    try:
+        voice.delete()
+        logger.info(f"Voice {voice_id} deleted")
+        return "Voice deleted with success!"
+    except Exception as e:
+        logger.info(f"Failed to delete {voice_id} error: {e}")
+        return f"Failed to delete voice error: {e}"
+
+
+def set_voice_settings(stability: float, style: float, similarity_boost: float):
+    global_config.voice_stability = stability
+    global_config.voice_style = style
+    global_config.voice_similarity_boost = similarity_boost
+    global_config.save()
+    return "Voice settings updated with success!"
+
+
 def elevenlabs_tab():
     with gr.Tab("Elevenlabs") as tab:
         with gr.Tab("Available Voices"):
@@ -68,4 +89,42 @@ def elevenlabs_tab():
                 allow_flagging="never",
             )
 
+        with gr.Tab("Delete Voice Clone"):
+            gr.Interface(
+                fn=delete_voice,
+                inputs=[gr.Textbox(label="Voice ID to delete")],
+                outputs="text",
+                allow_flagging="never",
+            )
+
+        with gr.Tab("Voice Settings"):
+            gr.Interface(
+                fn=set_voice_settings,
+                inputs=[
+                    gr.Slider(
+                        label="Stability",
+                        value=global_config.voice_stability,
+                        minimum=0.01,
+                        maximum=1.00,
+                        step=0.01,
+                    ),
+                    gr.Slider(
+                        label="Style",
+                        value=global_config.voice_style,
+                        minimum=0.01,
+                        maximum=1.00,
+                        step=0.01,
+                    ),
+                    gr.Slider(
+                        label="Similarity Boost",
+                        value=global_config.voice_similarity_boost,
+                        minimum=0.01,
+                        maximum=1.00,
+                        step=0.01,
+                    ),
+                ],
+                outputs="text",
+                allow_flagging="never",
+            )
+
     return tab
diff --git a/backend/src/config.py b/backend/src/config.py
index 732216e..5a723d3 100644
--- a/backend/src/config.py
+++ b/backend/src/config.py
@@ -49,6 +49,10 @@ class GlobalConfig:
     elevenlabs_voice_id: str = env_or_default("ELEVENLABS_VOICE_ID")
     elevenlabs_model: Optional[str] = env_or_default("ELEVENLABS_MODEL", "eleven_multilingual_v2")
 
+    voice_stability: Optional[float] = float(env_or_default("VOICE_STABILITY", "0.30"))
+    voice_similarity_boost: Optional[float] = float(env_or_default("VOICE_SIMILARITY_BOOST", "0.75"))
+    voice_style: Optional[float] = float(env_or_default("VOICE_STYLE", "0.40"))
+
     narrator_volume: int = int(env_or_default("NARRATOR_VOLUME", "100"))
 
     discord_webhook_key: Optional[str] = env_or_default("DISCORD_WEBHOOK_KEY")
diff --git a/backend/src/tts.py b/backend/src/tts.py
index 4e35542..33849f8 100644
--- a/backend/src/tts.py
+++ b/backend/src/tts.py
@@ -79,7 +79,11 @@ def non_stream():
         logger.info(f"Using {global_config.elevenlabs_buffer_size} elevenlabs buffer size")
         voice = Voice(
             voice_id=global_config.elevenlabs_voice_id,
-            settings=VoiceSettings(stability=0.05, similarity_boost=0.75, style=0.75),
+            settings=VoiceSettings(
+                stability=global_config.voice_stability,  # type: ignore
+                similarity_boost=global_config.voice_similarity_boost,  # type: ignore
+                style=global_config.voice_style,
+            ),
         )
 
         gen = generate(