Merge pull request #18 from Didymos-IO/release/v0.7.0

TTS Robot Option for Chat.
Didymos-IO · Aug 15, 2023 · ae90b09 · ae90b09
2 parents 14e2817 + a7f1ef2
commit ae90b09
Show file tree

Hide file tree

Showing 7 changed files with 122 additions and 39 deletions.
diff --git a/backend/config/config.json b/backend/config/config.json
@@ -46,7 +46,7 @@
             },
             "saveFileVersion": "1.0.0",
             "tts": {
-                "engine": "WebSpeech",
+                "engine": "Coqui-AI",
                 "optionsCoquiAi": {
                     "emotion": "Dull",
                     "model": "tts_models/en/vctk/vits",
@@ -91,18 +91,18 @@
                         "user": ""
                     },
                     {
-                        "id": 1691534775689,
-                        "description": "About Riot",
-                        "type": "command",
+                        "action": "say",
                         "command": "!aboutriot",
-                        "rewardId": "",
+                        "cooldown": 0,
+                        "description": "About Riot",
+                        "id": 1691534775689,
+                        "isActive": true,
                         "isBoundToRole": true,
+                        "rewardId": "",
                         "role": "everyone",
-                        "action": "say",
                         "text": "Hello, I'm Riot! I was made by Whysper with art by Adara, and code by Kyle, and I run on OpenAI's ChatGPT.",
-                        "user": "",
-                        "isActive": true,
-                        "cooldown": 0
+                        "type": "command",
+                        "user": ""
                     }
                 ]
             }

diff --git a/src/components/icons/icons.tsx b/src/components/icons/icons.tsx
@@ -228,6 +228,22 @@ const QuestionCircle = () => {
   );
 };
 
+const Robot = () => {
+  return (
+    <svg
+      xmlns="http://www.w3.org/2000/svg"
+      width="16"
+      height="16"
+      fill="currentColor"
+      className="bi bi-robot"
+      viewBox="0 0 16 16"
+    >
+      <path d="M6 12.5a.5.5 0 0 1 .5-.5h3a.5.5 0 0 1 0 1h-3a.5.5 0 0 1-.5-.5ZM3 8.062C3 6.76 4.235 5.765 5.53 5.886a26.58 26.58 0 0 0 4.94 0C11.765 5.765 13 6.76 13 8.062v1.157a.933.933 0 0 1-.765.935c-.845.147-2.34.346-4.235.346-1.895 0-3.39-.2-4.235-.346A.933.933 0 0 1 3 9.219V8.062Zm4.542-.827a.25.25 0 0 0-.217.068l-.92.9a24.767 24.767 0 0 1-1.871-.183.25.25 0 0 0-.068.495c.55.076 1.232.149 2.02.193a.25.25 0 0 0 .189-.071l.754-.736.847 1.71a.25.25 0 0 0 .404.062l.932-.97a25.286 25.286 0 0 0 1.922-.188.25.25 0 0 0-.068-.495c-.538.074-1.207.145-1.98.189a.25.25 0 0 0-.166.076l-.754.785-.842-1.7a.25.25 0 0 0-.182-.135Z" />
+      <path d="M8.5 1.866a1 1 0 1 0-1 0V3h-2A4.5 4.5 0 0 0 1 7.5V8a1 1 0 0 0-1 1v2a1 1 0 0 0 1 1v1a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2v-1a1 1 0 0 0 1-1V9a1 1 0 0 0-1-1v-.5A4.5 4.5 0 0 0 10.5 3h-2V1.866ZM14 7.5V13a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V7.5A3.5 3.5 0 0 1 5.5 4h5A3.5 3.5 0 0 1 14 7.5Z" />
+    </svg>
+  );
+};
+
 const Send = (props: IconProps) => {
   const { isActive } = props;
 
@@ -357,6 +373,7 @@ export const Icons = {
   PlusCircle,
   Power,
   QuestionCircle,
+  Robot,
   Send,
   Speaker,
   Terminal,

diff --git a/src/components/stage/children/buttonBar.tsx b/src/components/stage/children/buttonBar.tsx
@@ -19,10 +19,12 @@ export const ButtonBar = (props: ButtonBarProps) => {
   } = props;
   const {
     isActive,
+    isChatReadOutloud,
     isSpeechSynthesisActive,
     isTwitchActive,
     haveRequestedStop,
     onlyRespondWhenSpokenTo,
+    setIsChatReadOutloud,
     setOnlyRespondWhenSpokenTo,
   } = useContext(StageContext)!;
   const isTranscriptionActive = !haveRequestedStop;
@@ -75,11 +77,14 @@ export const ButtonBar = (props: ButtonBarProps) => {
           <Icons.Speaker isActive={isSpeechSynthesisActive} />
         </button>
         <button
-          className={`btn btn-primary d-inline-block bg-gradient custom-shadow-sm me-2 px-2 text-dark`}
-          title="Log speech to chat"
-          disabled={true}
+          className={`btn btn-primary d-inline-block bg-gradient custom-shadow-sm me-2 px-2 ${
+            isChatReadOutloud ? "" : "text-dark"
+          }`}
+          title={isChatReadOutloud ? "Don't read outloud" : "Read chat outloud"}
+          disabled={!isActive}
+          onClick={() => setIsChatReadOutloud(!isChatReadOutloud)}
         >
-          <Icons.Terminal />
+          <Icons.Robot />
         </button>
         {/*
         <button

diff --git a/src/components/stage/stage.tsx b/src/components/stage/stage.tsx
@@ -2,7 +2,14 @@
 import { useContext, useEffect } from "react";
 import { ReactMic } from "react-mic";
 
-import { ChatMessage, Brain, Ears, Mouth, Personality } from "@/modules";
+import {
+  ChatMessage,
+  ChatResponse,
+  Brain,
+  Ears,
+  Mouth,
+  Personality,
+} from "@/modules";
 import { SettingsContext, StageContext, TwitchContext } from "@/state";
 
 import {
@@ -18,15 +25,8 @@ export const Stage = () => {
   const settingsContext = useContext(SettingsContext)!;
   const twitchContext = useContext(TwitchContext)!;
   const { index, loadSettings, settings } = settingsContext;
-  const {
-    channel,
-    disconnect,
-    isTwitchConnected,
-    setTriggers,
-    triggerLog,
-    twitchLog,
-    joinChannel,
-  } = twitchContext;
+  const { disconnect, setTriggers, triggerLog, twitchLog, joinChannel } =
+    twitchContext;
   const profiles = settings.profiles;
   const triggers = profiles[index].twitch.triggers;
   const { identity, openAiApi, tts } = profiles[index];
@@ -85,7 +85,10 @@ export const Stage = () => {
             talk(ttsMessage);
             break;
           case "response":
-            thinkUpResponse(`${message.userName} says, '${message.message}'`);
+            thinkUpResponse(
+              `${message.userName} says, '${message.message}'`,
+              context.isChatReadOutloud
+            );
             break;
           case "say":
             const sayMessage = {
@@ -130,7 +133,10 @@ export const Stage = () => {
   };
 
   const handleChatBoxSubmit = (text: string) => {
-    thinkUpResponse(Ears.cleanUpTextHeard(text, identity)); // nosonar
+    thinkUpResponse(
+      Ears.cleanUpTextHeard(text, identity),
+      context.isChatReadOutloud
+    ); // nosonar
   };
 
   const handleMicData = (recordedBlob: any) => {
@@ -201,18 +207,58 @@ export const Stage = () => {
     context.setIsTTSSpeaking(false);
   };
 
-  const thinkUpResponse = async (text: string) => {
-    context.setIsThinking(true);
+  const thinkUpResponse = async (
+    text: string,
+    shouldSpeakTextFirst?: boolean
+  ) => {
     context.setTranscribedText("");
     const userMessage = { role: "user", content: text };
     context.setChatHistory([...context.chatHistory, userMessage]);
-    const responseFromBrain = await Brain.thinkUpResponse(
-      text,
-      context.chatHistory,
-      identity,
-      openAiApi,
-      true
-    );
+    let responseFromBrain: ChatResponse;
+    if (shouldSpeakTextFirst) {
+      const talking = Mouth.speak(
+        text,
+        {
+          engine: "WebSpeech",
+          optionsCoquiAi: {
+            emotion: "Dull",
+            model: "tts_models/en/vctk/vits",
+            rate: 1,
+            voice: "p233",
+          },
+          optionsWebSpeech: {
+            pitch: 1,
+            rate: 1,
+            voice: 0,
+          },
+        },
+        () => {},
+        () => {
+          context.setIsThinking(true);
+        }
+      );
+      const thinking = Brain.thinkUpResponse(
+        text,
+        context.chatHistory,
+        identity,
+        openAiApi,
+        true
+      );
+      const waiting = [await talking, await thinking];
+      responseFromBrain =
+        typeof waiting[0] !== "string"
+          ? (waiting[0] as ChatResponse)
+          : (waiting[1] as ChatResponse);
+    } else {
+      context.setIsThinking(true);
+      responseFromBrain = await Brain.thinkUpResponse(
+        text,
+        context.chatHistory,
+        identity,
+        openAiApi,
+        true
+      );
+    }
     const { message, time, tokensUsed: newTokensUsed } = responseFromBrain;
     context.setAiTime(time);
     context.setTokensUsed(context.tokensUsed + newTokensUsed);

diff --git a/src/modules/mouth.ts b/src/modules/mouth.ts
@@ -30,7 +30,8 @@ const shutUp = () => {
 const speak = (
   text: string,
   settings: TtsSettings,
-  onStartSpeaking?: () => void
+  onStartSpeaking?: () => void,
+  onStopSpeaking?: () => void
 ): Promise<string> => {
   if (settings.engine === "Coqui-AI") {
     const { optionsCoquiAi } = settings;
@@ -41,13 +42,13 @@ const speak = (
       emotion: optionsCoquiAi.emotion,
     };
 
-    return speakViaCoqui(text, coquiOptions, onStartSpeaking);
+    return speakViaCoqui(text, coquiOptions, onStartSpeaking, onStopSpeaking);
   } else {
     const { optionsWebSpeech } = settings;
     if (onStartSpeaking) {
       onStartSpeaking();
     }
-    return speakViaWebSpeech(text, optionsWebSpeech);
+    return speakViaWebSpeech(text, optionsWebSpeech, onStopSpeaking);
   }
 };
 
@@ -57,7 +58,8 @@ const speak = (
 const speakViaCoqui = async (
   text: string,
   options?: TTSCoquiOptions,
-  onStartSpeaking?: () => void
+  onStartSpeaking?: () => void,
+  onStopSpeaking?: () => void
 ): Promise<string> => {
   const start = Date.now();
   let end: number = 0;
@@ -70,6 +72,9 @@ const speakViaCoqui = async (
         onStartSpeaking();
       }
       await _playAudioBuffer(wav);
+      if (onStopSpeaking) {
+        onStopSpeaking();
+      }
       const time = convertMsToSeconds(end - start);
       return time;
     });
@@ -80,7 +85,8 @@ const speakViaCoqui = async (
  */
 const speakViaWebSpeech = (
   text: string,
-  options?: TTSWebSpeechOptions
+  options?: TTSWebSpeechOptions,
+  onStopSpeaking?: () => void
 ): Promise<string> => {
   return new Promise<string>((resolve) => {
     const utterance = new SpeechSynthesisUtterance(text);
@@ -95,6 +101,9 @@ const speakViaWebSpeech = (
       utterance.rate = options.rate;
     }
     utterance.onend = () => {
+      if (onStopSpeaking) {
+        onStopSpeaking();
+      }
       resolve("00.00");
     };
     synthesis.speak(utterance);

diff --git a/src/state/stageContext.tsx b/src/state/stageContext.tsx
@@ -24,6 +24,7 @@ export const StageProvider = (props: StageProviderProps) => {
   const [isTTSSpeaking, setIsTTSSpeaking] = useState(false);
   const [isTwitchActive, setIsTwitchActive] = useState(false);
   const [isTranscribing, setIsTranscribing] = useState(false);
+  const [isChatReadOutloud, setIsChatReadOutloud] = useState(false);
   const [location, setLocation] = useState("");
   const [newestBlob, setNewestBlob] = useState<Blob>();
   const [onlyRespondWhenSpokenTo, setOnlyRespondWhenSpokenTo] = useState(false);
@@ -44,6 +45,8 @@ export const StageProvider = (props: StageProviderProps) => {
       setHaveRequestedStop,
       isActive,
       setIsActive,
+      isChatReadOutloud,
+      setIsChatReadOutloud,
       isRecording,
       setIsRecording,
       isSpeaking,
@@ -82,6 +85,7 @@ export const StageProvider = (props: StageProviderProps) => {
     chatHistory,
     haveRequestedStop,
     isActive,
+    isChatReadOutloud,
     isRecording,
     isSpeaking,
     isSpeechSynthesisActive,

diff --git a/src/types/contextTypes.ts b/src/types/contextTypes.ts
@@ -28,6 +28,8 @@ export type StageContextType = {
   setHaveRequestedStop: React.Dispatch<React.SetStateAction<boolean>>;
   isActive: boolean;
   setIsActive: React.Dispatch<React.SetStateAction<boolean>>;
+  isChatReadOutloud: boolean;
+  setIsChatReadOutloud: React.Dispatch<React.SetStateAction<boolean>>;
   isRecording: boolean;
   setIsRecording: React.Dispatch<React.SetStateAction<boolean>>;
   isSpeaking: boolean;