From b2d59ef7226aabb240d2ebc2f1a3b12525b0d868 Mon Sep 17 00:00:00 2001 From: erew123 <35898566+erew123@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:21:08 +0000 Subject: [PATCH 1/5] Update AllTalk Integration Update AllTalk Integration * Added Standard Generation mode as an alternative to Streaming * Integrated RVC (voice conversion) support with voice selection * Added RVC pitch adjustment (-24 to +24) * RVC controls automatically disable when using Streaming mode * Standard generation mode set as default This now opens up Kobold to all the TTS engines that AllTalk supports, as well as the RVC/Voice2voice pipeline. So for example, you can use Piper TTS, which is very low on GPU/CPU RAM and resource, then use the RVC/Voice2voice pipeline to change the TTS output to sound like any RVC based voice you want. Full details here https://github.com/erew123/alltalk_tts/wiki/RVC-(Retrieval%E2%80%90based-Voice-Conversion) along with a link to 100,000+ voices. Streaming generation only works with the Coqui XTTS engine, --- index.html | 251 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 196 insertions(+), 55 deletions(-) diff --git a/index.html b/index.html index 97ce280..de0776c 100644 --- a/index.html +++ b/index.html @@ -4181,7 +4181,7 @@ var current_wi = []; //each item stores a wi object. var wi_insertlocation = 0; //after memory var wi_searchdepth = 0; //search everything - var generateimagesinterval = 750; //if generated images is enabled, it will trigger after every 700 new characters in context. + var generateimagesinterval = 700; //if generated images is enabled, it will trigger after every 700 new characters in context. var nextgeneratedimagemilestone = generateimagesinterval; //used to keep track of when to generate the next image var image_db = {}; //stores a dictionary of pending images var interrogation_db = {}; @@ -11653,7 +11653,7 @@ function do_auto_gen_image(truncated_context) { var tclen = truncated_context.length; - var sentence = truncated_context.substring(tclen - 400, tclen); + var sentence = truncated_context.substring(tclen - 380, tclen); sentence = start_trim_to_sentence(sentence); sentence = end_trim_to_sentence(sentence,true); if (sentence.length > 0) { @@ -11881,44 +11881,115 @@ } },false); } - - function toggle_tts_mode() - { + function toggle_tts_mode() { document.getElementById("xtts_container").classList.add("hidden"); document.getElementById("oai_tts_container").classList.add("hidden"); - if(document.getElementById("ttsselect").value==XTTS_ID || document.getElementById("ttsselect").value==ALLTALK_ID) - { + document.getElementById("alltalk_specific_controls").classList.add("hidden"); + + const selectedTTS = document.getElementById("ttsselect").value; + + if(selectedTTS == XTTS_ID || selectedTTS == ALLTALK_ID) { document.getElementById("xtts_container").classList.remove("hidden"); - fetch_xtts_voices(true, document.getElementById("ttsselect").value==XTTS_ID); + + if(selectedTTS == ALLTALK_ID) { + document.getElementById("alltalk_specific_controls").classList.remove("hidden"); + if(localsettings.saved_alltalk_url) { + fetch_rvc_voices(localsettings.saved_alltalk_url); + } + update_rvc_state(); + setupAllTalkControls(); // Add this line to setup the controls + } + fetch_xtts_voices(true, selectedTTS == XTTS_ID); } - else if(document.getElementById("ttsselect").value==OAI_TTS_ID) - { + else if(selectedTTS == OAI_TTS_ID) { document.getElementById("oai_tts_container").classList.remove("hidden"); } } - function set_xtts_url() - { + + // Call setupAllTalkControls when the page loads + document.addEventListener('DOMContentLoaded', function() { + setupAllTalkControls(); + }); + // Fetch RVC voices for AllTalk + function fetch_rvc_voices(url) { + fetch(url + "/api/rvcvoices") + .then(response => response.json()) + .then(data => { + console.log("RVC voices response:", data); // Debug log + const rvcSelect = document.getElementById("alltalk_rvc_voice"); + rvcSelect.innerHTML = ''; + + if (data.status === "success" && Array.isArray(data.rvcvoices)) { // Changed from data.voices to data.rvcvoices + data.rvcvoices.forEach(voice => { // Changed from data.voices to data.rvcvoices + if (voice !== "Disabled") { + const option = document.createElement("option"); + option.value = voice; + option.textContent = voice.split("\\").pop().replace(".pth", ""); + rvcSelect.appendChild(option); + } + }); + } + }) + .catch(error => { + console.log("Error fetching RVC voices:", error); + }); + } + // Function to handle RVC dropdown state (Disable if Streaming Selected) + function update_rvc_state() { + const streamingMode = document.getElementById("alltalk_streaming").value === "true"; + const rvcSelect = document.getElementById("alltalk_rvc_voice"); + const rvcPitch = document.getElementById("alltalk_rvc_pitch"); + + console.log("Updating RVC state, streaming mode:", streamingMode); + + // Update disabled state + rvcSelect.disabled = streamingMode; + rvcPitch.disabled = streamingMode; + + // Only reset to Disabled if switching to streaming mode + if (streamingMode) { + rvcSelect.value = "Disabled"; + } + } + // Make sure RVC Pitch Slider works + function setupAllTalkControls() { + const pitchSlider = document.getElementById("alltalk_rvc_pitch"); + const pitchValue = document.getElementById("alltalk_rvc_pitch_value"); + + if (pitchSlider && pitchValue) { + pitchSlider.addEventListener("input", function() { + pitchValue.textContent = this.value; + }); + } + } + // Listener for AllTalk streaming mode changes + document.addEventListener('DOMContentLoaded', function() { + const streamingSelect = document.getElementById("alltalk_streaming"); + streamingSelect.addEventListener("change", function() { + console.log("Streaming mode changed to:", this.value); // Debug log + update_rvc_state(); + }); + }); + // Update set_xtts_url to use the new fetch_rvc_voices function + function set_xtts_url() { let is_xtts = (document.getElementById("ttsselect").value==XTTS_ID); let epname = (is_xtts?"XTTS":"AllTalk"); inputBox("Enter "+epname+" API Server URL.",epname+" API Server URL",(is_xtts?localsettings.saved_xtts_url:localsettings.saved_alltalk_url),"Input "+epname+" API Server URL", ()=>{ let userinput = getInputBoxValue(); userinput = userinput.trim(); - if(userinput!="" && userinput.slice(-1)=="/") - { + if(userinput!="" && userinput.slice(-1)=="/") { userinput = userinput.slice(0, -1); } - if(userinput=="") - { + if(userinput=="") { userinput = (is_xtts?default_xtts_base:default_alltalk_base); } if (userinput != null && userinput!="") { - if(is_xtts) - { + if(is_xtts) { localsettings.saved_xtts_url = userinput.trim(); - } - else - { + } else { localsettings.saved_alltalk_url = userinput.trim(); + // Fetch RVC voices with new URL + fetch_rvc_voices(userinput.trim()); } xtts_is_connected = false; @@ -12044,48 +12115,100 @@ console.log("XTTS Speak Error: " + error); }); } - else - { + else { //alltalk - const formData = new FormData(); - formData.append("text_input", text); // max 2000 chars - formData.append("text_filtering", "none"); // (none|standard|html) - formData.append("character_voice_gen", document.getElementById("xtts_voices").value); - formData.append("narrator_enabled", false); - formData.append("narrator_voice_gen", document.getElementById("xtts_voices").value); - formData.append("text_not_inside", "character"); // character or narrator, determines which to use - formData.append("language", document.getElementById("xtts_lang").value.trim().toLowerCase()); - formData.append("output_file_name", "audiofile"); // NOTE: file name only, with no extension and no dashes! - formData.append("output_file_timestamp", true); - formData.append("autoplay", false); //to play in browser - formData.append("autoplay_volume", 1.0); // (0.1..2.0) - formData.append("streaming", true); // unknown why - - fetch(localsettings.saved_alltalk_url + alltalk_gen_endpoint, { - method: 'POST', - body: formData, // send payload as FormData - }) - .then(response => response.arrayBuffer()) - .then(data => { - return audioContext.decodeAudioData(data); - }) - .then(decodedData => { - const playSound = audioContext.createBufferSource(); - playSound.buffer = decodedData; - playSound.connect(audioContext.destination); + const isStreaming = document.getElementById("alltalk_streaming").value === "true"; + + if (isStreaming) { + // Create a URLSearchParams object for streaming + const params = new URLSearchParams({ + text: text, + voice: document.getElementById("xtts_voices").value, + language: document.getElementById("xtts_lang").value.trim().toLowerCase(), + output_file: "stream_output.wav", + }); + + // Create streaming URL + const streamingUrl = `${localsettings.saved_alltalk_url}/api/tts-generate-streaming?${params.toString()}`; + + // Use the existing Web Audio API setup + const audioElement = new Audio(streamingUrl); xtts_is_playing = true; update_submit_button(false); - playSound.start(audioContext.currentTime); - playSound.onended = function() { + + audioElement.onended = function() { setTimeout(() => { xtts_is_playing = false; update_submit_button(false); console.log("Audio finished playing"); - },300); + }, 300); }; - }).catch((error) => { - console.log("AllTalk Speak Error: " + error); - }); + + audioElement.onerror = function(error) { + console.log("AllTalk Streaming Error:", error); + xtts_is_playing = false; + update_submit_button(false); + }; + + audioElement.play(); + } else { + // Standard mode using FormData + const formData = new FormData(); + formData.append("text_input", text); + formData.append("text_filtering", "none"); + formData.append("character_voice_gen", document.getElementById("xtts_voices").value); + formData.append("narrator_enabled", false); + formData.append("narrator_voice_gen", document.getElementById("xtts_voices").value); + formData.append("text_not_inside", "character"); + formData.append("language", document.getElementById("xtts_lang").value.trim().toLowerCase()); + formData.append("output_file_name", "audiofile"); + formData.append("output_file_timestamp", true); + formData.append("autoplay", false); + formData.append("autoplay_volume", 1.0); + formData.append("rvccharacter_voice_gen", document.getElementById("alltalk_rvc_voice").value); + formData.append("rvccharacter_pitch", document.getElementById("alltalk_rvc_pitch").value); + formData.append("rvcnarrator_voice_gen", document.getElementById("alltalk_rvc_voice").value); + formData.append("rvcnarrator_pitch", document.getElementById("alltalk_rvc_pitch").value); + + fetch(localsettings.saved_alltalk_url + "/api/tts-generate", { + method: 'POST', + body: formData, + }) + .then(response => response.json()) + .then(data => { + if (data.status === "generate-success") { + const audioUrl = `${localsettings.saved_alltalk_url}${data.output_file_url}`; + const audioElement = new Audio(audioUrl); + xtts_is_playing = true; + update_submit_button(false); + + audioElement.onended = function() { + setTimeout(() => { + xtts_is_playing = false; + update_submit_button(false); + console.log("Audio finished playing"); + }, 300); + }; + + audioElement.onerror = function(error) { + console.log("AllTalk Playback Error:", error); + xtts_is_playing = false; + update_submit_button(false); + }; + + audioElement.play(); + } else { + console.log("AllTalk Generation Error:", data); + xtts_is_playing = false; + update_submit_button(false); + } + }) + .catch((error) => { + console.log("AllTalk Request Error:", error); + xtts_is_playing = false; + update_submit_button(false); + }); + } } } } @@ -19601,6 +19724,24 @@ +
Narrate Both Sides
From c7c02a4d4642811ddf399d7913d226653d8f55c3 Mon Sep 17 00:00:00 2001 From: erew123 <35898566+erew123@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:29:09 +0000 Subject: [PATCH 2/5] AllTalk match PR to current Dev branch Line 4184 & 11656 --- index.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.html b/index.html index de0776c..2b35301 100644 --- a/index.html +++ b/index.html @@ -4181,7 +4181,7 @@ var current_wi = []; //each item stores a wi object. var wi_insertlocation = 0; //after memory var wi_searchdepth = 0; //search everything - var generateimagesinterval = 700; //if generated images is enabled, it will trigger after every 700 new characters in context. + var generateimagesinterval = 750; //if generated images is enabled, it will trigger after every 700 new characters in context. var nextgeneratedimagemilestone = generateimagesinterval; //used to keep track of when to generate the next image var image_db = {}; //stores a dictionary of pending images var interrogation_db = {}; @@ -11653,7 +11653,7 @@ function do_auto_gen_image(truncated_context) { var tclen = truncated_context.length; - var sentence = truncated_context.substring(tclen - 380, tclen); + var sentence = truncated_context.substring(tclen - 400, tclen); sentence = start_trim_to_sentence(sentence); sentence = end_trim_to_sentence(sentence,true); if (sentence.length > 0) { From b1c1f7d4dfc746fd331d528765e4a184159f1a96 Mon Sep 17 00:00:00 2001 From: Concedo Date: Sat, 14 Dec 2024 10:21:15 +0800 Subject: [PATCH 3/5] changed global dom events to direct function calls from the ui control event --- index.html | 144 +++++++++++++++++++++++------------------------------ 1 file changed, 61 insertions(+), 83 deletions(-) diff --git a/index.html b/index.html index 2b35301..4bee238 100644 --- a/index.html +++ b/index.html @@ -11881,23 +11881,22 @@ } },false); } - function toggle_tts_mode() { + + function toggle_tts_mode() + { document.getElementById("xtts_container").classList.add("hidden"); document.getElementById("oai_tts_container").classList.add("hidden"); document.getElementById("alltalk_specific_controls").classList.add("hidden"); - + const selectedTTS = document.getElementById("ttsselect").value; - + if(selectedTTS == XTTS_ID || selectedTTS == ALLTALK_ID) { document.getElementById("xtts_container").classList.remove("hidden"); - + if(selectedTTS == ALLTALK_ID) { document.getElementById("alltalk_specific_controls").classList.remove("hidden"); - if(localsettings.saved_alltalk_url) { - fetch_rvc_voices(localsettings.saved_alltalk_url); - } - update_rvc_state(); - setupAllTalkControls(); // Add this line to setup the controls + fetch_rvc_voices(); + adjust_alltalk_controls(); } fetch_xtts_voices(true, selectedTTS == XTTS_ID); } @@ -11906,19 +11905,18 @@ } } - // Call setupAllTalkControls when the page loads - document.addEventListener('DOMContentLoaded', function() { - setupAllTalkControls(); - }); // Fetch RVC voices for AllTalk - function fetch_rvc_voices(url) { - fetch(url + "/api/rvcvoices") + function fetch_rvc_voices() + { + if(!xtts_is_connected) //prevent it from constantly fetching, will only fetch once before connecting + { + fetch(localsettings.saved_alltalk_url + "/api/rvcvoices") .then(response => response.json()) .then(data => { console.log("RVC voices response:", data); // Debug log const rvcSelect = document.getElementById("alltalk_rvc_voice"); rvcSelect.innerHTML = ''; - + if (data.status === "success" && Array.isArray(data.rvcvoices)) { // Changed from data.voices to data.rvcvoices data.rvcvoices.forEach(voice => { // Changed from data.voices to data.rvcvoices if (voice !== "Disabled") { @@ -11933,43 +11931,21 @@ .catch(error => { console.log("Error fetching RVC voices:", error); }); + } } - // Function to handle RVC dropdown state (Disable if Streaming Selected) - function update_rvc_state() { + + //single callback to update alltalk controls on any alltalk UI event. + function adjust_alltalk_controls() { + const pitchSlider = document.getElementById("alltalk_rvc_pitch"); + const pitchValue = document.getElementById("alltalk_rvc_pitch_value"); + pitchValue.textContent = pitchSlider.value; const streamingMode = document.getElementById("alltalk_streaming").value === "true"; const rvcSelect = document.getElementById("alltalk_rvc_voice"); const rvcPitch = document.getElementById("alltalk_rvc_pitch"); - - console.log("Updating RVC state, streaming mode:", streamingMode); - - // Update disabled state rvcSelect.disabled = streamingMode; rvcPitch.disabled = streamingMode; - - // Only reset to Disabled if switching to streaming mode - if (streamingMode) { - rvcSelect.value = "Disabled"; - } } - // Make sure RVC Pitch Slider works - function setupAllTalkControls() { - const pitchSlider = document.getElementById("alltalk_rvc_pitch"); - const pitchValue = document.getElementById("alltalk_rvc_pitch_value"); - - if (pitchSlider && pitchValue) { - pitchSlider.addEventListener("input", function() { - pitchValue.textContent = this.value; - }); - } - } - // Listener for AllTalk streaming mode changes - document.addEventListener('DOMContentLoaded', function() { - const streamingSelect = document.getElementById("alltalk_streaming"); - streamingSelect.addEventListener("change", function() { - console.log("Streaming mode changed to:", this.value); // Debug log - update_rvc_state(); - }); - }); + // Update set_xtts_url to use the new fetch_rvc_voices function function set_xtts_url() { let is_xtts = (document.getElementById("ttsselect").value==XTTS_ID); @@ -11984,15 +11960,14 @@ userinput = (is_xtts?default_xtts_base:default_alltalk_base); } if (userinput != null && userinput!="") { + xtts_is_connected = false; if(is_xtts) { localsettings.saved_xtts_url = userinput.trim(); } else { localsettings.saved_alltalk_url = userinput.trim(); // Fetch RVC voices with new URL - fetch_rvc_voices(userinput.trim()); + fetch_rvc_voices(); } - - xtts_is_connected = false; fetch_xtts_voices(false, is_xtts); } },false); @@ -12115,10 +12090,11 @@ console.log("XTTS Speak Error: " + error); }); } - else { + else + { //alltalk const isStreaming = document.getElementById("alltalk_streaming").value === "true"; - + if (isStreaming) { // Create a URLSearchParams object for streaming const params = new URLSearchParams({ @@ -12130,12 +12106,12 @@ // Create streaming URL const streamingUrl = `${localsettings.saved_alltalk_url}/api/tts-generate-streaming?${params.toString()}`; - + // Use the existing Web Audio API setup const audioElement = new Audio(streamingUrl); xtts_is_playing = true; update_submit_button(false); - + audioElement.onended = function() { setTimeout(() => { xtts_is_playing = false; @@ -12143,13 +12119,13 @@ console.log("Audio finished playing"); }, 300); }; - + audioElement.onerror = function(error) { console.log("AllTalk Streaming Error:", error); xtts_is_playing = false; update_submit_button(false); }; - + audioElement.play(); } else { // Standard mode using FormData @@ -12181,7 +12157,7 @@ const audioElement = new Audio(audioUrl); xtts_is_playing = true; update_submit_button(false); - + audioElement.onended = function() { setTimeout(() => { xtts_is_playing = false; @@ -12189,13 +12165,13 @@ console.log("Audio finished playing"); }, 300); }; - + audioElement.onerror = function(error) { console.log("AllTalk Playback Error:", error); xtts_is_playing = false; update_submit_button(false); }; - + audioElement.play(); } else { console.log("AllTalk Generation Error:", data); @@ -19707,13 +19683,33 @@
-
Narrate Both Sides
From 5edcd34f6b468c932652122103ebdb5088f810b0 Mon Sep 17 00:00:00 2001 From: Concedo Date: Sat, 14 Dec 2024 17:46:21 +0800 Subject: [PATCH 4/5] fixed functionality of non-streaming to handle both alltalk v1 and v2 --- index.html | 142 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 82 insertions(+), 60 deletions(-) diff --git a/index.html b/index.html index 4bee238..09dfd8f 100644 --- a/index.html +++ b/index.html @@ -4125,7 +4125,9 @@ const xtts_gen_endpoint = "/tts_to_audio/"; const xtts_voices_endpoint = "/speakers_list"; const alltalk_gen_endpoint = "/api/tts-generate"; + const alltalk_stream_endpoint = "/api/tts-generate-streaming"; const alltalk_voices_endpoint = "/api/voices"; + const alltalk_rvc_voices_endpoint = "/api/rvcvoices"; //support for quick news updates const horde_news_endpoint = "https://hordenews.concedo.workers.dev" @@ -11910,13 +11912,12 @@ { if(!xtts_is_connected) //prevent it from constantly fetching, will only fetch once before connecting { - fetch(localsettings.saved_alltalk_url + "/api/rvcvoices") + fetch(localsettings.saved_alltalk_url + alltalk_rvc_voices_endpoint) .then(response => response.json()) .then(data => { console.log("RVC voices response:", data); // Debug log const rvcSelect = document.getElementById("alltalk_rvc_voice"); rvcSelect.innerHTML = ''; - if (data.status === "success" && Array.isArray(data.rvcvoices)) { // Changed from data.voices to data.rvcvoices data.rvcvoices.forEach(voice => { // Changed from data.voices to data.rvcvoices if (voice !== "Disabled") { @@ -11939,7 +11940,7 @@ const pitchSlider = document.getElementById("alltalk_rvc_pitch"); const pitchValue = document.getElementById("alltalk_rvc_pitch_value"); pitchValue.textContent = pitchSlider.value; - const streamingMode = document.getElementById("alltalk_streaming").value === "true"; + const streamingMode = (document.getElementById("alltalk_streaming").checked ? true : false); const rvcSelect = document.getElementById("alltalk_rvc_voice"); const rvcPitch = document.getElementById("alltalk_rvc_pitch"); rvcSelect.disabled = streamingMode; @@ -11972,6 +11973,7 @@ } },false); } + function tts_speak(text, speech_synth_override=null) { if(!text || text=="" || text.trim()=="") @@ -12087,13 +12089,15 @@ },300); }; }).catch((error) => { + xtts_is_playing = false; + update_submit_button(false); console.log("XTTS Speak Error: " + error); }); } else { //alltalk - const isStreaming = document.getElementById("alltalk_streaming").value === "true"; + const isStreaming = (document.getElementById("alltalk_streaming").checked ? true : false); if (isStreaming) { // Create a URLSearchParams object for streaming @@ -12105,28 +12109,9 @@ }); // Create streaming URL - const streamingUrl = `${localsettings.saved_alltalk_url}/api/tts-generate-streaming?${params.toString()}`; + const streamingUrl = `${localsettings.saved_alltalk_url}${alltalk_stream_endpoint}?${params.toString()}`; - // Use the existing Web Audio API setup - const audioElement = new Audio(streamingUrl); - xtts_is_playing = true; - update_submit_button(false); - - audioElement.onended = function() { - setTimeout(() => { - xtts_is_playing = false; - update_submit_button(false); - console.log("Audio finished playing"); - }, 300); - }; - - audioElement.onerror = function(error) { - console.log("AllTalk Streaming Error:", error); - xtts_is_playing = false; - update_submit_button(false); - }; - audioElement.play(); } else { // Standard mode using FormData const formData = new FormData(); @@ -12146,41 +12131,77 @@ formData.append("rvcnarrator_voice_gen", document.getElementById("alltalk_rvc_voice").value); formData.append("rvcnarrator_pitch", document.getElementById("alltalk_rvc_pitch").value); - fetch(localsettings.saved_alltalk_url + "/api/tts-generate", { + fetch(localsettings.saved_alltalk_url + alltalk_gen_endpoint, { method: 'POST', - body: formData, - }) - .then(response => response.json()) - .then(data => { - if (data.status === "generate-success") { - const audioUrl = `${localsettings.saved_alltalk_url}${data.output_file_url}`; - const audioElement = new Audio(audioUrl); + body: formData, // send payload as FormData + }).then(response => { + //content type can be JSON (alltalk v2) or raw audio (v1) + const contentType = response.headers.get("Content-Type"); + + let playDecodedAllTalkData = function(decodedData) + { + const playSound = audioContext.createBufferSource(); + playSound.buffer = decodedData; + playSound.connect(audioContext.destination); xtts_is_playing = true; update_submit_button(false); - - audioElement.onended = function() { + playSound.start(audioContext.currentTime); + playSound.onended = function() { setTimeout(() => { xtts_is_playing = false; update_submit_button(false); console.log("Audio finished playing"); - }, 300); + },300); }; + } - audioElement.onerror = function(error) { - console.log("AllTalk Playback Error:", error); + //alltalk v2 json + if (contentType && contentType.toLowerCase().includes("application/json")) + { + return response.json().then(data => { + if (data && data.output_file_url && data.status === "generate-success") + { + const audioUrl = `${localsettings.saved_alltalk_url}${data.output_file_url}`; + fetch(audioUrl) + .then(response => response.arrayBuffer()) + .then(data => { + return audioContext.decodeAudioData(data); + }) + .then(decodedData => { + playDecodedAllTalkData(decodedData); + }) + .catch((error) => { + console.log("AllTalk v2 Speak Error:", data); + xtts_is_playing = false; + update_submit_button(false); + }); + } else { + console.log("AllTalk Generation Error:", data); + xtts_is_playing = false; + update_submit_button(false); + } + }) + .catch((error) => { + console.log("AllTalk Request Error:", error); xtts_is_playing = false; update_submit_button(false); - }; - - audioElement.play(); - } else { - console.log("AllTalk Generation Error:", data); - xtts_is_playing = false; - update_submit_button(false); + }); } - }) - .catch((error) => { - console.log("AllTalk Request Error:", error); + else //alltalk v1 audio + { + return response.arrayBuffer().then(data => { + return audioContext.decodeAudioData(data); + }) + .then(decodedData => { + playDecodedAllTalkData(decodedData); + }).catch((error) => { + console.log("AllTalk v1 Speak Error: " + error); + xtts_is_playing = false; + update_submit_button(false); + }); + } + }).catch((error) => { + console.log("AllTalk Non-Stream Req Error: " + error); xtts_is_playing = false; update_submit_button(false); }); @@ -19692,22 +19713,23 @@ Language
- +
+
RVC Voice
+ - -
RVC Pitch
-
+ +
+
+
RVC Pitch
+
0 +
From 6fd0bbcb265ca93b142ad13018d06f0eabdbb684 Mon Sep 17 00:00:00 2001 From: Concedo Date: Sat, 14 Dec 2024 18:46:20 +0800 Subject: [PATCH 5/5] support streaming endpoint without creating a media player (since that gets blocked by autoplay plugins) --- index.html | 53 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/index.html b/index.html index 09dfd8f..4884f0b 100644 --- a/index.html +++ b/index.html @@ -12099,18 +12099,47 @@ //alltalk const isStreaming = (document.getElementById("alltalk_streaming").checked ? true : false); + let playDecodedAllTalkData = function(decodedData) + { + const playSound = audioContext.createBufferSource(); + playSound.buffer = decodedData; + playSound.connect(audioContext.destination); + xtts_is_playing = true; + update_submit_button(false); + playSound.start(audioContext.currentTime); + playSound.onended = function() { + setTimeout(() => { + xtts_is_playing = false; + update_submit_button(false); + console.log("Audio finished playing"); + },300); + }; + } + if (isStreaming) { // Create a URLSearchParams object for streaming const params = new URLSearchParams({ text: text, voice: document.getElementById("xtts_voices").value, language: document.getElementById("xtts_lang").value.trim().toLowerCase(), - output_file: "stream_output.wav", + output_file: "klite_stream_output.wav", }); - // Create streaming URL + // Create streaming URL, but right now it's as good as sync const streamingUrl = `${localsettings.saved_alltalk_url}${alltalk_stream_endpoint}?${params.toString()}`; - + fetch(streamingUrl) + .then(response => response.arrayBuffer()) + .then(data => { + return audioContext.decodeAudioData(data); + }) + .then(decodedData => { + playDecodedAllTalkData(decodedData); + }) + .catch((error) => { + console.log("AllTalk v2 Speak Error:", data); + xtts_is_playing = false; + update_submit_button(false); + }); } else { // Standard mode using FormData @@ -12137,24 +12166,6 @@ }).then(response => { //content type can be JSON (alltalk v2) or raw audio (v1) const contentType = response.headers.get("Content-Type"); - - let playDecodedAllTalkData = function(decodedData) - { - const playSound = audioContext.createBufferSource(); - playSound.buffer = decodedData; - playSound.connect(audioContext.destination); - xtts_is_playing = true; - update_submit_button(false); - playSound.start(audioContext.currentTime); - playSound.onended = function() { - setTimeout(() => { - xtts_is_playing = false; - update_submit_button(false); - console.log("Audio finished playing"); - },300); - }; - } - //alltalk v2 json if (contentType && contentType.toLowerCase().includes("application/json")) {