From 9119a333ae26ba61fb385ac147ba6bacc5ec0dc1 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Wed, 16 Oct 2024 23:11:22 -0700 Subject: [PATCH 1/3] fix --- ChatQnA/chatqna.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index 5fe6000f6b..25dacc0e70 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -136,14 +136,14 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di prompt_template = PromptTemplate.from_template(chat_template) input_variables = prompt_template.input_variables if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=prompt, context="\n".join(docs)) + prompt = prompt_template.format(question=prompt, context="\n".join(reranked_docs)) elif input_variables == ["question"]: prompt = prompt_template.format(question=prompt) else: print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") - prompt = ChatTemplate.generate_rag_prompt(prompt, docs) + prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs) else: - prompt = ChatTemplate.generate_rag_prompt(prompt, docs) + prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs) next_data["inputs"] = prompt From acc9f722287464785184893f88222bcfc53b83b0 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Fri, 18 Oct 2024 01:15:24 -0700 Subject: [PATCH 2/3] add multi language AudioQnA --- AudioQnA/Dockerfile.multilang | 32 ++++++ AudioQnA/audioqna_multilang.py | 97 +++++++++++++++++++ .../docker_compose/intel/cpu/xeon/README.md | 2 +- .../intel/cpu/xeon/compose_multilang.yaml | 64 ++++++++++++ AudioQnA/docker_image_build/build.yaml | 6 ++ 5 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 AudioQnA/Dockerfile.multilang create mode 100644 AudioQnA/audioqna_multilang.py create mode 100644 AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml diff --git a/AudioQnA/Dockerfile.multilang b/AudioQnA/Dockerfile.multilang new file mode 100644 index 0000000000..c62cb04048 --- /dev/null +++ b/AudioQnA/Dockerfile.multilang @@ -0,0 +1,32 @@ + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + git + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +WORKDIR /home/user/ +RUN git clone https://github.com/opea-project/GenAIComps.git + +WORKDIR /home/user/GenAIComps +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt + +COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py + +ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps + +USER user + +WORKDIR /home/user + +ENTRYPOINT ["python", "audioqna_multilang.py"] diff --git a/AudioQnA/audioqna_multilang.py b/AudioQnA/audioqna_multilang.py new file mode 100644 index 0000000000..5fb5e2a458 --- /dev/null +++ b/AudioQnA/audioqna_multilang.py @@ -0,0 +1,97 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +import os +import base64 + +from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType + +MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0") +MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) + +WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0") +WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066)) +GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0") +GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088)) +LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0") +LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888)) + + +def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + print(inputs) + if self.services[cur_node].service_type == ServiceType.ASR: + # {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'} + inputs["audio"] = inputs["byte_str"] + del inputs["byte_str"] + elif self.services[cur_node].service_type == ServiceType.LLM: + # convert TGI/vLLM to unified OpenAI /v1/chat/completions format + next_inputs = {} + next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified + next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}] + next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] + next_inputs["top_p"] = llm_parameters_dict["top_p"] + next_inputs["stream"] = inputs["streaming"] # False as default + next_inputs["frequency_penalty"] = inputs["frequency_penalty"] + # next_inputs["presence_penalty"] = inputs["presence_penalty"] + # next_inputs["repetition_penalty"] = inputs["repetition_penalty"] + next_inputs["temperature"] = inputs["temperature"] + inputs = next_inputs + elif self.services[cur_node].service_type == ServiceType.TTS: + next_inputs = {} + next_inputs["text"] = inputs["choices"][0]["message"]["content"] + next_inputs["text_language"] = kwargs["tts_text_language"] if "tts_text_language" in kwargs else "zh" + inputs = next_inputs + return inputs + +def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs): + if self.services[cur_node].service_type == ServiceType.TTS: + audio_base64 = base64.b64encode(data).decode('utf-8') + return {"byte_str": audio_base64} + return data + + +class AudioQnAService: + def __init__(self, host="0.0.0.0", port=8000): + self.host = host + self.port = port + ServiceOrchestrator.align_inputs = align_inputs + ServiceOrchestrator.align_outputs = align_outputs + self.megaservice = ServiceOrchestrator() + + def add_remote_service(self): + asr = MicroService( + name="asr", + host=WHISPER_SERVER_HOST_IP, + port=WHISPER_SERVER_PORT, + # endpoint="/v1/audio/transcriptions", + endpoint="/v1/asr", + use_remote_service=True, + service_type=ServiceType.ASR, + ) + llm = MicroService( + name="llm", + host=LLM_SERVER_HOST_IP, + port=LLM_SERVER_PORT, + endpoint="/v1/chat/completions", + use_remote_service=True, + service_type=ServiceType.LLM, + ) + tts = MicroService( + name="tts", + host=GPT_SOVITS_SERVER_HOST_IP, + port=GPT_SOVITS_SERVER_PORT, + # endpoint="/v1/audio/speech", + endpoint="/", + use_remote_service=True, + service_type=ServiceType.TTS, + ) + self.megaservice.add(asr).add(llm).add(tts) + self.megaservice.flow_to(asr, llm) + self.megaservice.flow_to(llm, tts) + self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) + + +if __name__ == "__main__": + audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) + audioqna.add_remote_service() diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md index d08061284d..29440d0868 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md +++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md @@ -131,5 +131,5 @@ curl http://${host_ip}:3002/v1/audio/speech \ curl http://${host_ip}:3008/v1/audioqna \ -X POST \ -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \ - -H 'Content-Type: application/json' + -H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav ``` diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml new file mode 100644 index 0000000000..d8ca1d7f8c --- /dev/null +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml @@ -0,0 +1,64 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "7066:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + command: --language "zh" + gpt-sovits-service: + image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} + container_name: gpt-sovits-service + ports: + - "9880:9880" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + tgi-service: + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + container_name: tgi-service + ports: + - "3006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + audioqna-xeon-backend-server: + image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest} + container_name: audioqna-xeon-backend-server + ports: + - "3008:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP} + - LLM_SERVER_PORT=${LLM_SERVER_PORT} + - LLM_MODEL_ID=${LLM_MODEL_ID} + - WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP} + - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT} + - GPT_SOVITS_SERVER_HOST_IP=${GPT_SOVITS_SERVER_HOST_IP} + - GPT_SOVITS_SERVER_PORT=${GPT_SOVITS_SERVER_PORT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/AudioQnA/docker_image_build/build.yaml b/AudioQnA/docker_image_build/build.yaml index 50c5140739..98ec0ccaa8 100644 --- a/AudioQnA/docker_image_build/build.yaml +++ b/AudioQnA/docker_image_build/build.yaml @@ -53,3 +53,9 @@ services: dockerfile: comps/tts/speecht5/Dockerfile extends: audioqna image: ${REGISTRY:-opea}/tts:${TAG:-latest} + gpt-sovits: + build: + context: GenAIComps + dockerfile: comps/tts/gpt-sovits/Dockerfile + extends: audioqna + image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} From aaa785402b8b61e81aa1764776e23d1a20b68cae Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 08:18:04 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- AudioQnA/audioqna_multilang.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/AudioQnA/audioqna_multilang.py b/AudioQnA/audioqna_multilang.py index 5fb5e2a458..8a4ffdd01a 100644 --- a/AudioQnA/audioqna_multilang.py +++ b/AudioQnA/audioqna_multilang.py @@ -2,8 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio -import os import base64 +import os from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType @@ -31,7 +31,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}] next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] - next_inputs["stream"] = inputs["streaming"] # False as default + next_inputs["stream"] = inputs["streaming"] # False as default next_inputs["frequency_penalty"] = inputs["frequency_penalty"] # next_inputs["presence_penalty"] = inputs["presence_penalty"] # next_inputs["repetition_penalty"] = inputs["repetition_penalty"] @@ -44,9 +44,10 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k inputs = next_inputs return inputs + def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs): if self.services[cur_node].service_type == ServiceType.TTS: - audio_base64 = base64.b64encode(data).decode('utf-8') + audio_base64 = base64.b64encode(data).decode("utf-8") return {"byte_str": audio_base64} return data