From 947cc0bf0dd417b52415cab0cab37cf8d6b69bba Mon Sep 17 00:00:00 2001 From: htagourti Date: Wed, 6 Nov 2024 13:28:10 +0000 Subject: [PATCH 01/18] added redis broker to llm-gateway --- scripts/build-services.sh | 3 ++- services/llm/llm-gateway/template.jsonnet | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/build-services.sh b/scripts/build-services.sh index 8a53100..e2de9fe 100755 --- a/scripts/build-services.sh +++ b/scripts/build-services.sh @@ -69,7 +69,8 @@ build_main_service() { build_llm() { echo "Building LLM..." generate_yaml_files "services/llm/llm-gateway" $1 $2 - generate_yaml_files "services/llm/vllm" + generate_yaml_files "services/stt/task-broker-redis" + #generate_yaml_files "services/llm/vllm" } build_studio() { diff --git a/services/llm/llm-gateway/template.jsonnet b/services/llm/llm-gateway/template.jsonnet index e7c5036..cfc0dd0 100644 --- a/services/llm/llm-gateway/template.jsonnet +++ b/services/llm/llm-gateway/template.jsonnet @@ -3,6 +3,7 @@ local config = import 'config.jsonnet'; local service = base.Service(config); local shared_mount = std.extVar('LINTO_SHARED_MOUNT'); local network = std.extVar('DOCKER_NETWORK'); +local redis_password = std.extVar('REDIS_PASSWORD'); local patch = { services: { @@ -13,12 +14,13 @@ local patch = { ], networks: [ 'net_llm_services', + 'task_broker_services', network, ], environment: { PYTHONUNBUFFERED:1, SERVICE_NAME:'LLM_Gateway', - OPENAI_API_BASE:'http://vllm-service:8000/v1', + OPENAI_API_BASE: 'https://chat.ai.linagora.exaion.com/v1/', OPENAI_API_TOKEN:'EMPTY', HTTP_PORT:80, CONCURRENCY:1, @@ -27,6 +29,8 @@ local patch = { SWAGGER_URL: '/llm-gateway', SWAGGER_PATH:'../document/swagger_llm_gateway.yml', RESULT_DB_PATH:'./results.sqlite', + SERVICES_BROKER: 'redis://task-broker-redis:6379', + BROKER_PASS: redis_password, }, }, }, @@ -34,6 +38,9 @@ local patch = { net_llm_services: { external: true, }, + task_broker_services: { + external: true, + }, }, }; From 8bb372bc32ce78c4a66cefee9206513f15cd7f29 Mon Sep 17 00:00:00 2001 From: htagourti Date: Fri, 8 Nov 2024 10:24:32 +0000 Subject: [PATCH 02/18] removed api url from llm config --- .envdefault | 4 ++++ services/llm/llm-gateway/template.jsonnet | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.envdefault b/.envdefault index 6d2d104..0f4ff7d 100644 --- a/.envdefault +++ b/.envdefault @@ -24,3 +24,7 @@ LINTO_FRONT_THEME=LinTO-green ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session SUPER_ADMIN_EMAIL=superadmin@mail.com SUPER_ADMIN_PWD=superadmin + +# OpenAI +OPENAI_API_TOKEN=sk*** +OPENAI_API_BASE=*** \ No newline at end of file diff --git a/services/llm/llm-gateway/template.jsonnet b/services/llm/llm-gateway/template.jsonnet index cfc0dd0..4bc5796 100644 --- a/services/llm/llm-gateway/template.jsonnet +++ b/services/llm/llm-gateway/template.jsonnet @@ -4,6 +4,8 @@ local service = base.Service(config); local shared_mount = std.extVar('LINTO_SHARED_MOUNT'); local network = std.extVar('DOCKER_NETWORK'); local redis_password = std.extVar('REDIS_PASSWORD'); +local openai_api_base = std.extVar('OPENAI_API_BASE'); +local open_api_token = std.extVar('OPENAI_API_TOKEN'); local patch = { services: { @@ -20,8 +22,8 @@ local patch = { environment: { PYTHONUNBUFFERED:1, SERVICE_NAME:'LLM_Gateway', - OPENAI_API_BASE: 'https://chat.ai.linagora.exaion.com/v1/', - OPENAI_API_TOKEN:'EMPTY', + OPENAI_API_BASE: openai_api_base, + OPENAI_API_TOKEN: open_api_token, HTTP_PORT:80, CONCURRENCY:1, TIMEOUT:60, From 54a3a36b50b8239bc436b9aab19514e521f2e367 Mon Sep 17 00:00:00 2001 From: htagourti Date: Fri, 8 Nov 2024 12:35:14 +0000 Subject: [PATCH 03/18] added the possibility to enable/disable vllm deployment --- scripts/build-services.sh | 9 +++++++-- scripts/dialog.sh | 20 ++++++++++++++++++++ scripts/setup-services.sh | 8 +++++++- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/scripts/build-services.sh b/scripts/build-services.sh index e2de9fe..0a3344d 100755 --- a/scripts/build-services.sh +++ b/scripts/build-services.sh @@ -56,6 +56,8 @@ generate_yaml_files() { -V DIARIZATION_DEFAULT=$diarization_service \ -V GPU_MODE=$gpu_mode \ -V ENABLE_SESSION_STUDIO=$enable_session_studio \ + -V OPENAI_API_BASE=$OPENAI_API_BASE \ + -V OPENAI_API_TOKEN=$OPENAI_API_TOKEN \ "${service_dir}/template.jsonnet" | yq eval -P - >"$RUNNING_DIR/$FILE_NAME.yaml" fi } @@ -70,7 +72,9 @@ build_llm() { echo "Building LLM..." generate_yaml_files "services/llm/llm-gateway" $1 $2 generate_yaml_files "services/stt/task-broker-redis" - #generate_yaml_files "services/llm/vllm" + if [ "$3" = "true" ]; then + generate_yaml_files "services/llm/vllm" + fi } build_studio() { @@ -161,6 +165,7 @@ main() { gpu_enable="${6:-false}" diarization_enable="${7:-false}" speaker_identification="${8:-false}" + vllm_enable="${9:-false}" case "$1" in stt-fr) @@ -173,7 +178,7 @@ main() { build_diarization $gpu_enable $speaker_identification ;; llm) - build_llm $traefik_exposed $gateway_exposed + build_llm $traefik_exposed $gateway_exposed $vllm_enable ;; studio) # Special rule for studio on param 4 who containing the information about live-streaming diff --git a/scripts/dialog.sh b/scripts/dialog.sh index 7f44696..382a00f 100755 --- a/scripts/dialog.sh +++ b/scripts/dialog.sh @@ -120,6 +120,23 @@ streaming_service() { echo "$selected_streaming_services" } +dialog_vllm() { + vllm=$(dialog --title "vLLM Backend deployment" --radiolist \ + "Do you want to deploy the vLLM service?" "$DIALOG_HEIGHT" "$DIALOG_WIDTH" 2 \ + 1 "Yes" off \ + 2 "No" off \ + 3>&1 1>&2 2>&3) + + case "$vllm" in + 1) + vllm_enable="true" + ;; + 2) + vllm_enable="false" + ;; + esac + echo "$vllm_enable" +} main() { case "$1" in @@ -144,6 +161,9 @@ main() { streaming_service) streaming_service ;; + vllm) + dialog_vllm + ;; *) echo "Usage: $0 {expose|transcription|deployment|gpu|domain|speaker_identification|streaming_service}" exit 1 diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh index 564c7b5..7560760 100755 --- a/scripts/setup-services.sh +++ b/scripts/setup-services.sh @@ -82,6 +82,7 @@ trigger_build_service() { #TODO: we expose to the gateway when studio is selected gpu_enable=false + enable_vllm=false diarization_enable="" live_streaming_enable=false speaker_identification="false" @@ -93,6 +94,8 @@ trigger_build_service() { else diarization_enable="stt-diarization-pyannote" fi + if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then + diarization_enable="stt-diarization-pyannote" fi if [[ "$services" =~ (^|[[:space:]])6($|[[:space:]]) ]]; then echo "Studio is selected, forcing API Gateway" @@ -102,6 +105,9 @@ trigger_build_service() { echo "Studio is selected, forcing API Gateway" live_streaming_enable=true fi + if [[ "$services" =~ (^|[[:space:]])4($|[[:space:]]) ]]; then + vllm_enable=$(./scripts/dialog.sh "vllm") + fi ./scripts/build-services.sh "main" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE" @@ -136,7 +142,7 @@ trigger_build_service() { 4) ./scripts/build-config.sh "llm" - ./scripts/build-services.sh "llm" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE" "$expose_traefik" "$expose_api_gateway" + ./scripts/build-services.sh "llm" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE" "$expose_traefik" "$expose_api_gateway" "" "" "" "$vllm_enable" ;; 5) From 487ea1d8c790bebaf6870c05773cd09f86cd3dfa Mon Sep 17 00:00:00 2001 From: htagourti Date: Fri, 8 Nov 2024 13:45:45 +0000 Subject: [PATCH 04/18] corrected vllm_enable variable name --- scripts/setup-services.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh index 7560760..e049367 100755 --- a/scripts/setup-services.sh +++ b/scripts/setup-services.sh @@ -82,7 +82,7 @@ trigger_build_service() { #TODO: we expose to the gateway when studio is selected gpu_enable=false - enable_vllm=false + vllm_enable=false diarization_enable="" live_streaming_enable=false speaker_identification="false" @@ -94,6 +94,7 @@ trigger_build_service() { else diarization_enable="stt-diarization-pyannote" fi + fi if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then diarization_enable="stt-diarization-pyannote" fi From 16dc68f97b23f89bdb22fa9da4dc4b043d7bc7ca Mon Sep 17 00:00:00 2001 From: htagourti Date: Fri, 8 Nov 2024 13:49:42 +0000 Subject: [PATCH 05/18] removed redundant if statement --- scripts/setup-services.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh index e049367..4e2efa5 100755 --- a/scripts/setup-services.sh +++ b/scripts/setup-services.sh @@ -95,9 +95,6 @@ trigger_build_service() { diarization_enable="stt-diarization-pyannote" fi fi - if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then - diarization_enable="stt-diarization-pyannote" - fi if [[ "$services" =~ (^|[[:space:]])6($|[[:space:]]) ]]; then echo "Studio is selected, forcing API Gateway" expose_api_gateway=true From d1cb0cb9a8c591018c37d4a250e656321ce98eaf Mon Sep 17 00:00:00 2001 From: htagourti Date: Fri, 13 Dec 2024 15:48:35 +0000 Subject: [PATCH 06/18] added missing env & corrected error in session-api template --- .envdefault | 6 +++++- services/live-session/session-api/template.jsonnet | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.envdefault b/.envdefault index 0f4ff7d..09eee96 100644 --- a/.envdefault +++ b/.envdefault @@ -27,4 +27,8 @@ SUPER_ADMIN_PWD=superadmin # OpenAI OPENAI_API_TOKEN=sk*** -OPENAI_API_BASE=*** \ No newline at end of file +OPENAI_API_BASE=*** + +ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session +SUPER_ADMIN_EMAIL=admin@mail.com +SUPER_ADMIN_PWD=superadminpassword \ No newline at end of file diff --git a/services/live-session/session-api/template.jsonnet b/services/live-session/session-api/template.jsonnet index e0031e2..e5050df 100644 --- a/services/live-session/session-api/template.jsonnet +++ b/services/live-session/session-api/template.jsonnet @@ -20,6 +20,8 @@ local patch = { # check with JS these env configuration SESSION_API_WEBSERVER_HTTP_PORT:'80', + STREAMING_WS_SECURE: 'true', + STREAMING_WS_SECURE: 'true', STREAMING_PASSPHRASE:'false', From 35ec0207890f9993219af4e9c68d621424aa2c33 Mon Sep 17 00:00:00 2001 From: yhoupert Date: Tue, 12 Nov 2024 15:46:31 +0100 Subject: [PATCH 07/18] Handle new studio api environment settings --- test.ipynb | 244 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 test.ipynb diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..c124640 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,244 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", + " warnings.warn(\n", + "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "200\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'stop' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 35\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mprint\u001b[39m(res_check\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m 34\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/transcribe\u001b[39m\u001b[38;5;124m\"\u001b[39m, files\u001b[38;5;241m=\u001b[39mfiles, data\u001b[38;5;241m=\u001b[39mdata, headers\u001b[38;5;241m=\u001b[39mheaders, verify\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 35\u001b[0m \u001b[43mstop\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m201\u001b[39m:\n\u001b[1;32m 37\u001b[0m result \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n", + "\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined" + ] + } + ], + "source": [ + "import requests\n", + "import json\n", + "import time\n", + "url = \"http://localhost/stt-french-whisper-v3/transcribe\"\n", + "\n", + "audio_path = \"/home/ubuntu/projects/linto-diarization/data_SUMMRE/004b_PADH.wav\"\n", + "\n", + "\n", + "files = {\n", + " 'file': ('audio.wav', open(audio_path, 'rb'), 'audio/wav')\n", + "}\n", + "diariz_conf = {\n", + "\"punctuationConfig\": {\n", + " \"enablePunctuation\": False, # Applies punctuation\n", + " \"serviceName\": None # Force serviceName (See SubService resolution)\n", + "},\n", + "\"enablePunctuation\": False, # Applies punctuation (Do not use, kept for backward compatibility)\n", + "\"diarizationConfig\": {\n", + " \"enableDiarization\": True, #Enables speaker diarization\n", + "}\n", + "}\n", + "data = {\n", + " 'transcriptionConfig': json.dumps(diariz_conf)\n", + "}\n", + "\n", + "headers = {\n", + " 'Accept': 'application/json'\n", + "}\n", + "endpoint = \"https://localhost/stt-french-whisper-v3\"\n", + "res_check = requests.get(f\"{endpoint}/healthcheck\", headers=headers, verify=False)\n", + "print(res_check.text)\n", + "print(res_check.status_code)\n", + "\n", + "response = requests.post(f\"{endpoint}/transcribe\", files=files, data=data, headers=headers, verify=False)\n", + "stop\n", + "if response.status_code == 201:\n", + " result = response.json()\n", + " print(\"Transcription lancée avec succès. Job ID:\", result['jobid'])\n", + "else:\n", + " print(\"Erreur lors de la requête:\", response.status_code, response.text)\n", + "\n", + "job_id = result['jobid']\n", + "job_status_url = f\"{endpoint}/job/{job_id}\"\n", + "\n", + "job_response = requests.get(job_status_url,headers=headers, verify=False)\n", + "if job_response.status_code == 202 or job_response.status_code == 201:\n", + " job_status = job_response.json()\n", + " print(\"Statut du job:\", job_status)\n", + " while (status := requests.get(job_status_url,headers=headers, verify=False)).json()['state'] != 'done' :\n", + " time.sleep(30)\n", + " print(status)\n", + " if status == 'failed':\n", + " break\n", + " job_response = requests.get(job_status_url,headers=headers,verify=False)\n", + " job_status = job_response.json()\n", + " \n", + "else:\n", + " print(\"Erreur lors de la vérification du statut:\", job_response.status_code, job_response.text)\n", + "result_id = job_status['result_id']\n", + "result_url = f\"{endpoint}/results/{result_id}\"\n", + "result_response = requests.get(result_url,headers=headers, verify=False)\n", + "print(result_response.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Gateway Timeout'" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "status.text" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transcription lancée avec succès. Job ID: 52fee215-919b-4544-85a0-c1ffc7d03754\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "'{\"state\": \"started\", \"steps\": {\"diarization\": {\"progress\": 0.0, \"required\": true, \"status\": \"started\"}, \"postprocessing\": {\"progress\": 0.0, \"required\": true, \"status\": \"pending\"}, \"preprocessing\": {\"progress\": 1.0, \"required\": true, \"status\": \"done\"}, \"punctuation\": {\"required\": false}, \"transcription\": {\"progress\": 1.0, \"required\": true, \"status\": \"done\"}}}'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "if response.status_code == 201:\n", + " result = response.json()\n", + " print(\"Transcription lancée avec succès. Job ID:\", result['jobid'])\n", + "else:\n", + " print(\"Erreur lors de la requête:\", response.status_code, response.text)\n", + "\n", + "job_id = result['jobid']\n", + "job_status_url = f\"{endpoint}/job/{job_id}\"\n", + "job_response = requests.get(job_status_url,headers=headers, verify=False)\n", + "job_response.text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "result_id = job_response.json()['result_id']\n", + "\n", + "result_url = f\"{endpoint}/results/{result_id}\"\n", + "result_response = requests.get(result_url,headers=headers, verify=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "job_status_url = f\"{endpoint}/job/{job_id}\"\n", + "\n", + "job_response = requests.get(job_status_url,headers=headers, verify=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm-gateway", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From b2d4b1efc54d5f07807419dabd7ad6456e2b02d0 Mon Sep 17 00:00:00 2001 From: yhoupert Date: Thu, 21 Nov 2024 16:52:33 +0100 Subject: [PATCH 08/18] Added streaming service khaldi and whisper --- .gitignore | 1 + scripts/build-config.sh | 35 ++++++++++ scripts/dialog.sh | 13 ++++ scripts/setup-services.sh | 1 + .../config.jsonnet | 65 +++++++++++++++++++ .../template.jsonnet | 37 +++++++++++ 6 files changed, 152 insertions(+) create mode 100644 services/live-session/stt-khaldi-french-streaming/config.jsonnet create mode 100644 services/live-session/stt-khaldi-french-streaming/template.jsonnet diff --git a/.gitignore b/.gitignore index 90dfe27..737f5a3 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ **/running/*.yaml +websocket.pcap \ No newline at end of file diff --git a/scripts/build-config.sh b/scripts/build-config.sh index 37b2b85..13278c8 100755 --- a/scripts/build-config.sh +++ b/scripts/build-config.sh @@ -115,6 +115,41 @@ build_session() { mkdir -p ${LINTO_LOCAL_MOUNT}/database/postgres/db-session-database/ } +build_khaldi-french-streaming() { + echo "Building Live streaming..." + TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/AMs/french" + + if [ ! -d "$TARGET_FOLDER" ]; then + ZIP_URL="https://dl.linto.ai/downloads/model-distribution/acoustic-models/fr-FR/linSTT_AM_fr-FR_v2.2.0.zip" + ZIP_FILE="${TARGET_FOLDER}/linSTT_AM_fr-FR_v2.2.0.zip" + + echo "Creating target folder: $TARGET_FOLDER" + mkdir -p "$TARGET_FOLDER" + curl -L -o "$ZIP_FILE" "$ZIP_URL" + unzip -o "$ZIP_FILE" -d "$TARGET_FOLDER" + rm "$ZIP_FILE" + fi + + TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/LMs/french" + + if [ ! -d "$TARGET_FOLDER" ]; then + ZIP_URL="https://dl.linto.ai/downloads/model-distribution/decoding-graphs/LVCSR/fr-FR/decoding_graph_fr-FR_Big_v2.2.0.zip" + ZIP_FILE="${TARGET_FOLDER}/linSTT_AM_fr-FR_v2.2.0.zip" + echo "Creating target folder: $TARGET_FOLDER" + mkdir -p "$TARGET_FOLDER" + curl -L -o "$ZIP_FILE" "$ZIP_URL" + unzip -o "$ZIP_FILE" -d "$TARGET_FOLDER" + rm "$ZIP_FILE" + fi +} + +build_whisper-streaming() { + echo "Building whisper..." + + mkdir -p ${LINTO_SHARED_MOUNT}/audios/api_uploads \ + ${LINTO_SHARED_MOUNT}/models/ +} + build_kaldi-french-streaming() { echo "Building Live streaming..." TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/AMs/french" diff --git a/scripts/dialog.sh b/scripts/dialog.sh index 382a00f..462a182 100755 --- a/scripts/dialog.sh +++ b/scripts/dialog.sh @@ -138,6 +138,16 @@ dialog_vllm() { echo "$vllm_enable" } +streaming_service() { + selected_streaming_services=$(dialog --title "Streaming Services" --checklist \ + "Streaming service selection?" "$DIALOG_HEIGHT" "$DIALOG_WIDTH" 2 \ + 1 "Linto french kaldi streaming service" off \ + 2 "Linto whisper streaming service" off \ + 3>&1 1>&2 2>&3) + + echo "$selected_streaming_services" +} + main() { case "$1" in expose) @@ -164,6 +174,9 @@ main() { vllm) dialog_vllm ;; + streaming_service) + streaming_service + ;; *) echo "Usage: $0 {expose|transcription|deployment|gpu|domain|speaker_identification|streaming_service}" exit 1 diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh index 4e2efa5..3bfa47a 100755 --- a/scripts/setup-services.sh +++ b/scripts/setup-services.sh @@ -89,6 +89,7 @@ trigger_build_service() { if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) && "$services" =~ (^|[[:space:]])(1|2)($|[[:space:]]) ]]; then speaker_identification=$(./scripts/dialog.sh "speaker_identification") + if [[ "$speaker_identification" == "true" ]]; then diarization_enable="stt-diarization-pyannote-qdrant" else diff --git a/services/live-session/stt-khaldi-french-streaming/config.jsonnet b/services/live-session/stt-khaldi-french-streaming/config.jsonnet new file mode 100644 index 0000000..26f7cc4 --- /dev/null +++ b/services/live-session/stt-khaldi-french-streaming/config.jsonnet @@ -0,0 +1,65 @@ +local tag = std.extVar('LINTO_IMAGE_TAG'); +local repo = std.extVar('DOCKER_REGISTRY'); +local domain = std.extVar('LINTO_DOMAIN'); + +local expose_with_traefik = std.extVar('EXPOSE_TRAEFIK') == "true"; +local expose_with_gateway = std.extVar('EXPOSE_GATEWAY') == "true"; + + +{ + //Generals + build_me: true, //Set to false to disable this build as a YAML file in ./running dir + service_name: 'stt-khaldi-french-streaming', + image: 'lintoai/linto-stt-kaldi:' + tag, + reserve_memory: '', //128M + reserve_cpu: '', //0.5 + limit_cpu: '', //1 + limit_memory: '', //512M + replicas: 1, + + //Main blocks + use_env_file: '', //Set to specified env file (.dockerenv) or leave blank + expose_with_traefik: expose_with_traefik, // TODO : set this to false after API GATEWAY tests + healthcheck: true, + expose_with_api_gateway: expose_with_gateway, + + //Traefik + traefik_endpoint: '/stt-khaldi-french-streaming', + traefik_strip_prefix: '/stt-khaldi-french-streaming', + traefik_server_port: 80, + traefik_domain: domain, + use_basic_auth: true, + + //Healthcheck + healthcheck_interval: '15s', + healthcheck_timeout: '10s', + healthcheck_retries: 4, + healthcheck_start_period: '10s', + restart_policy: false, + restart_condition: 'on-failure', + restart_delay: '5s', + restart_max_attempts: 3, + + //swarm node label constraints + swarm_node_label_constraints: [], //[['ip', 'ingress'], ['mongo', true]...] + + //swarm node role constraints + swarm_node_role_constraints: '', // worker, manager, or leave blank for none + + //API Gateway + gateway_server_port: 80, + gateway_server_desc:{ en: "Linto streaming service",fr:"Service de streaming Linto"}, + gateway_server_scope: 'llm', + + gateway_define_endpoints: [ + { + endpoint: 'stt-khaldi-french-streaming', + middlewares_order: 'logs', + middlewares: [ + { name: 'logs', params: { debug: '*' } } + ], + }, + ], + //Override command + command: [], +} \ No newline at end of file diff --git a/services/live-session/stt-khaldi-french-streaming/template.jsonnet b/services/live-session/stt-khaldi-french-streaming/template.jsonnet new file mode 100644 index 0000000..3970b2f --- /dev/null +++ b/services/live-session/stt-khaldi-french-streaming/template.jsonnet @@ -0,0 +1,37 @@ +local base = import '../../../jsonnet/base.libsonnet'; +local config = import 'config.jsonnet'; +local service = base.Service(config); +local shared_mount = std.extVar('LINTO_SHARED_MOUNT'); +local network = std.extVar('DOCKER_NETWORK'); + + +local patch = { + services: { + [config.service_name]: { + volumes: [ + shared_mount + '/audios/api_uploads/:/opt/audio', + shared_mount + '/models/AMs/french:/opt/AM', + shared_mount + '/models/LMs/french:/opt/LM', + ], + networks: [ + network, + 'session_network', + ], + environment: { + SERVICE_MODE: 'websocket', // task | http | websocket + MODEL_TYPE: 'lin', // lin | vosk + ENABLE_STREAMING: 'true', + STREAMING_PORT: '80', + CONCURRENCY: '1', + LANGUAGE: 'fr-FR', + }, + }, + }, + networks: { + session_network: { + external: true, + }, + }, +}; + +std.mergePatch(service, patch) From a480e553016eb7286cbca32588c98e7dd351c131 Mon Sep 17 00:00:00 2001 From: htagourti Date: Fri, 8 Nov 2024 10:24:32 +0000 Subject: [PATCH 09/18] removed api url from llm config --- .envdefault | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.envdefault b/.envdefault index 09eee96..847692e 100644 --- a/.envdefault +++ b/.envdefault @@ -30,5 +30,9 @@ OPENAI_API_TOKEN=sk*** OPENAI_API_BASE=*** ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session -SUPER_ADMIN_EMAIL=admin@mail.com -SUPER_ADMIN_PWD=superadminpassword \ No newline at end of file +SUPER_ADMIN_EMAIL=superadmin@mail.com +SUPER_ADMIN_PWD=superadmin + +# OpenAI +OPENAI_API_BASE: http://vllm-service:8000/v1 +OPENAI_API_TOKEN : EMPTY \ No newline at end of file From 1d823d2720dfced4bba26037ac9218148c98389c Mon Sep 17 00:00:00 2001 From: htagourti Date: Fri, 8 Nov 2024 12:35:14 +0000 Subject: [PATCH 10/18] added the possibility to enable/disable vllm deployment --- .envdefault | 4 ++-- scripts/setup-services.sh | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.envdefault b/.envdefault index 847692e..9791aaf 100644 --- a/.envdefault +++ b/.envdefault @@ -34,5 +34,5 @@ SUPER_ADMIN_EMAIL=superadmin@mail.com SUPER_ADMIN_PWD=superadmin # OpenAI -OPENAI_API_BASE: http://vllm-service:8000/v1 -OPENAI_API_TOKEN : EMPTY \ No newline at end of file +OPENAI_API_BASE=http://vllm-service:8000/v1 +OPENAI_API_TOKEN=EMPTY \ No newline at end of file diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh index 3bfa47a..58a80b5 100755 --- a/scripts/setup-services.sh +++ b/scripts/setup-services.sh @@ -83,6 +83,7 @@ trigger_build_service() { #TODO: we expose to the gateway when studio is selected gpu_enable=false vllm_enable=false + enable_vllm=false diarization_enable="" live_streaming_enable=false speaker_identification="false" @@ -95,7 +96,9 @@ trigger_build_service() { else diarization_enable="stt-diarization-pyannote" fi - fi + if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then + diarization_enable="stt-diarization-pyannote" + fi if [[ "$services" =~ (^|[[:space:]])6($|[[:space:]]) ]]; then echo "Studio is selected, forcing API Gateway" expose_api_gateway=true From 980d5a25e3e7458301d7e6b6a960ddc330a35398 Mon Sep 17 00:00:00 2001 From: htagourti Date: Fri, 13 Dec 2024 15:48:35 +0000 Subject: [PATCH 11/18] added missing env & corrected error in session-api template --- .envdefault | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.envdefault b/.envdefault index 9791aaf..a58e0d1 100644 --- a/.envdefault +++ b/.envdefault @@ -34,5 +34,9 @@ SUPER_ADMIN_EMAIL=superadmin@mail.com SUPER_ADMIN_PWD=superadmin # OpenAI -OPENAI_API_BASE=http://vllm-service:8000/v1 -OPENAI_API_TOKEN=EMPTY \ No newline at end of file +OPENAI_API_TOKEN=sk*** +OPENAI_API_BASE=*** + +ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session +SUPER_ADMIN_EMAIL=admin@mail.com +SUPER_ADMIN_PWD=superadminpassword \ No newline at end of file From 633fc0538f89e204b9ecf782a24001cf3ef3924f Mon Sep 17 00:00:00 2001 From: htagourti Date: Tue, 17 Dec 2024 16:01:15 +0000 Subject: [PATCH 12/18] added LLM gateway volume handling --- .gitignore | 3 +- conf-templates/llm/.hydra-conf/config.yaml | 45 +++++++++++++++++++ .../llm/.hydra-conf/services/en.yaml | 20 +++++++++ .../llm/.hydra-conf/services/fr.yaml | 20 +++++++++ conf-templates/llm/prompts/summarize-en.txt | 16 +++++++ .../{summary.txt => prompts/summarize-fr.txt} | 28 ++++++------ conf-templates/llm/summary.json | 23 ---------- scripts/build-config.sh | 6 +-- scripts/setup-services.sh | 1 + services/llm/llm-gateway/template.jsonnet | 3 +- 10 files changed, 123 insertions(+), 42 deletions(-) create mode 100644 conf-templates/llm/.hydra-conf/config.yaml create mode 100644 conf-templates/llm/.hydra-conf/services/en.yaml create mode 100644 conf-templates/llm/.hydra-conf/services/fr.yaml create mode 100644 conf-templates/llm/prompts/summarize-en.txt rename conf-templates/llm/{summary.txt => prompts/summarize-fr.txt} (98%) delete mode 100644 conf-templates/llm/summary.json diff --git a/.gitignore b/.gitignore index 737f5a3..54e5554 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ **/running/*.yaml -websocket.pcap \ No newline at end of file +websocket.pcap +.env \ No newline at end of file diff --git a/conf-templates/llm/.hydra-conf/config.yaml b/conf-templates/llm/.hydra-conf/config.yaml new file mode 100644 index 0000000..cd99493 --- /dev/null +++ b/conf-templates/llm/.hydra-conf/config.yaml @@ -0,0 +1,45 @@ +defaults : + - _self_ + - services : + - en + - fr + +prompt_path: ./prompts/ +backend_defaults : + name: null + modelName: null + totalContextLength: null + maxGenerationLength: null + tokenizerClass: null + createNewTurnAfter: null + summaryTurns: null + maxNewTurns: null + temperature: null + top_p: null + reduceSummary: null + consolidateSummary: null + service_name: ${oc.env:SERVICE_NAME,LLM_Gateway} + +api_params: + api_base: ${oc.env:OPENAI_API_BASE,http://localhost:9000/v1} + api_key: ${oc.env:OPENAI_API_TOKEN,EMPTY} + max_retries: ${oc.decode:${oc.env:MAX_RETRIES,6}} + max_retry_delay: ${oc.decode:${oc.env:MAX_RETRY_DELAY,10}} + service_port: ${oc.decode:${oc.env:HTTP_PORT,8000}} + workers: ${oc.decode:${oc.env:CONCURRENCY,1}} + timeout: ${oc.decode:${oc.env:TIMEOUT,60}} + ws_polling_interval: ${oc.decode:${oc.env:WS_POLLING_INTERVAL,3}} + +semaphore: + max_concurrent_inferences: ${oc.decode:${oc.env:MAX_CONCURRENT_INFERENCES,3}} + +swagger: + url: ${oc.env:SWAGGER_URL,/docs} + title: ${oc.env:SWAGGER_TITLE,STT API Documentation} + description: ${oc.env:SWAGGER_DESCRIPTION,API to make summary of text using LLMs.} + +services_broker: + url: ${oc.env:SERVICES_BROKER,redis://localhost:6379} + password: ${oc.env:BROKER_PASS,EMPTY} + +debug: false \ No newline at end of file diff --git a/conf-templates/llm/.hydra-conf/services/en.yaml b/conf-templates/llm/.hydra-conf/services/en.yaml new file mode 100644 index 0000000..1945087 --- /dev/null +++ b/conf-templates/llm/.hydra-conf/services/en.yaml @@ -0,0 +1,20 @@ +summarize/en: + type: summary + fields: 2 + name: summarize-en + description: + fr: English summary + backend: vLLM + flavor: + - name: llama + modelName: meta-llama-31-8b-it + totalContextLength: 128000 + maxGenerationLength: 2048 + tokenizerClass: LlamaTokenizer + createNewTurnAfter: 250 + summaryTurns: 3 + maxNewTurns: 9 + temperature: 0.2 + top_p: 0.7 + reduceSummary: false + consolidateSummary: false diff --git a/conf-templates/llm/.hydra-conf/services/fr.yaml b/conf-templates/llm/.hydra-conf/services/fr.yaml new file mode 100644 index 0000000..b52947d --- /dev/null +++ b/conf-templates/llm/.hydra-conf/services/fr.yaml @@ -0,0 +1,20 @@ +summarize/fr: + type: summary + fields: 2 + name: summarize-fr + description: + fr: Résumé français + backend: vLLM + flavor: + - name: llama + modelName: meta-llama-31-8b-it + totalContextLength: 128000 + maxGenerationLength: 2048 + tokenizerClass: LlamaTokenizer + createNewTurnAfter: 250 + summaryTurns: 3 + maxNewTurns: 9 + temperature: 0.2 + top_p: 0.7 + reduceSummary: false + consolidateSummary: false diff --git a/conf-templates/llm/prompts/summarize-en.txt b/conf-templates/llm/prompts/summarize-en.txt new file mode 100644 index 0000000..f4b24db --- /dev/null +++ b/conf-templates/llm/prompts/summarize-en.txt @@ -0,0 +1,16 @@ +You must summarize a transcript following these guidelines: +Always use standard spelling conventions. +Rely strictly on the text to be processed without including external information. +Remove the mention of the speaker followed by ":" in the summary. +Explain the content without using the first-person narrative. +Never write anything other than the summary of the processed speech turns, do not provide information about the reduction and processing carried out, never present the summarized text out of context (no "Here is the summary of the speech turns:"). +Never include in the summary any statements from the speech turns summarized so far. +The speech turns can be in any language and must be translated into English. + +### Speech turns summarized so far (do not repeat or summarize again) +{} + +### Speech turns to process +{} + +### Speech turns summarized (in English) \ No newline at end of file diff --git a/conf-templates/llm/summary.txt b/conf-templates/llm/prompts/summarize-fr.txt similarity index 98% rename from conf-templates/llm/summary.txt rename to conf-templates/llm/prompts/summarize-fr.txt index f98c37e..d706ac6 100644 --- a/conf-templates/llm/summary.txt +++ b/conf-templates/llm/prompts/summarize-fr.txt @@ -1,15 +1,15 @@ -Vous devez résumer une transcription en suivant les directives suivantes : -Toujours utiliser les conventions orthographiques standard du français. -S'appuyer strictement sur le texte à traiter sans inclure d'informations externes. -Enlever la mention du locuteur suivie de ":" dans le résumé. -Expliquer le propos sans reprendre le tour de parole à la première personne. -Ne jamais rien écrire d'autre que le résumé des tours de parole traités, ne pas donner d'informations sur la réduction et les traitements réalisés, ne jamais présenter le texte résumé en sortant du contexte (pas de "Voici le résumé des tours de parole : "). -Ne jamais inclure dans le résumé des propos issus des tours de paroles résumé jusque là. - -### Tours de parole résumés jusque là (ne surtout pas répéter ou résumer à nouveau) -{} - -### Tours de parole à traiter -{} - +Vous devez résumer une transcription en suivant les directives suivantes : +Toujours utiliser les conventions orthographiques standard du français. +S'appuyer strictement sur le texte à traiter sans inclure d'informations externes. +Enlever la mention du locuteur suivie de ":" dans le résumé. +Expliquer le propos sans reprendre le tour de parole à la première personne. +Ne jamais rien écrire d'autre que le résumé des tours de parole traités, ne pas donner d'informations sur la réduction et les traitements réalisés, ne jamais présenter le texte résumé en sortant du contexte (pas de "Voici le résumé des tours de parole : "). +Ne jamais inclure dans le résumé des propos issus des tours de paroles résumé jusque là. + +### Tours de parole résumés jusque là (ne surtout pas répéter ou résumer à nouveau) +{} + +### Tours de parole à traiter +{} + ### Tours de parole résumés (en français) \ No newline at end of file diff --git a/conf-templates/llm/summary.json b/conf-templates/llm/summary.json deleted file mode 100644 index 388d5f0..0000000 --- a/conf-templates/llm/summary.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "type": "summary", - "fields": 2, - "name": "summary", - "description": { - "fr": "Résumé des tours" - }, - "backend": "vLLM", - "flavor": [ - { - "name": "llama3", - "modelName": "casperhansen/llama-3-8b-instruct-awq", - "totalContextLength": 8192, - "maxGenerationLength": 2048, - "tokenizerClass": "LlamaTokenizer", - "createNewTurnAfter": 300, - "summaryTurns": 2, - "maxNewTurns": 10, - "temperature": 0.1, - "top_p": 0.8 - } - ] -} diff --git a/scripts/build-config.sh b/scripts/build-config.sh index 13278c8..522889b 100755 --- a/scripts/build-config.sh +++ b/scripts/build-config.sh @@ -52,9 +52,9 @@ build_stt() { build_llm() { echo "Building LLM..." - mkdir -p "${LINTO_SHARED_MOUNT}/llm_services/" \ - ${LINTO_SHARED_MOUNT}/models/ - cp -r "${CONFIG_TEMPLATES}/llm/"* "${LINTO_SHARED_MOUNT}/llm_services/" + mkdir -p ${LINTO_SHARED_MOUNT}/models/ + + cp -r "${CONFIG_TEMPLATES}/llm" "${LINTO_SHARED_MOUNT}" create_networks "net_llm_services" } diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh index 58a80b5..9451f01 100755 --- a/scripts/setup-services.sh +++ b/scripts/setup-services.sh @@ -96,6 +96,7 @@ trigger_build_service() { else diarization_enable="stt-diarization-pyannote" fi + fi if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then diarization_enable="stt-diarization-pyannote" fi diff --git a/services/llm/llm-gateway/template.jsonnet b/services/llm/llm-gateway/template.jsonnet index 4bc5796..02c77f2 100644 --- a/services/llm/llm-gateway/template.jsonnet +++ b/services/llm/llm-gateway/template.jsonnet @@ -12,7 +12,8 @@ local patch = { [config.service_name]: { volumes: [ shared_mount + '/models/:/root/.cache', - shared_mount + '/llm_services/:/usr/src/services/' + shared_mount + '/llm/.hydra-conf:/usr/src/.hydra-conf', + shared_mount + '/llm/prompts:/usr/src/prompts' ], networks: [ 'net_llm_services', From 4c313e4b012a288c52fab77ed2026a9568065a9a Mon Sep 17 00:00:00 2001 From: htagourti Date: Wed, 18 Dec 2024 16:29:24 +0000 Subject: [PATCH 13/18] removed extra / on studio-api --- services/studio/studio-api/template.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/studio/studio-api/template.jsonnet b/services/studio/studio-api/template.jsonnet index a5e4e09..6842efc 100644 --- a/services/studio/studio-api/template.jsonnet +++ b/services/studio/studio-api/template.jsonnet @@ -28,7 +28,7 @@ local patch = { DB_NAME: 'conversations', GATEWAY_SERVICES: 'http://api-gateway', - LLM_GATEWAY_SERVICES: 'http://llm-gateway/', + LLM_GATEWAY_SERVICES: 'http://llm-gateway', CORS_ENABLED:'true', CORS_API_WHITELIST: 'https://'+domain, From 05d33b024757f597f0a50fad3a973f2899168ff1 Mon Sep 17 00:00:00 2001 From: htagourti Date: Wed, 18 Dec 2024 19:01:13 +0000 Subject: [PATCH 14/18] removed duplicate field in session-api --- services/live-session/session-api/template.jsonnet | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/live-session/session-api/template.jsonnet b/services/live-session/session-api/template.jsonnet index e5050df..e0031e2 100644 --- a/services/live-session/session-api/template.jsonnet +++ b/services/live-session/session-api/template.jsonnet @@ -20,8 +20,6 @@ local patch = { # check with JS these env configuration SESSION_API_WEBSERVER_HTTP_PORT:'80', - STREAMING_WS_SECURE: 'true', - STREAMING_WS_SECURE: 'true', STREAMING_PASSPHRASE:'false', From d373e60ce09ce338bc0f38f13b84a44a1b720d84 Mon Sep 17 00:00:00 2001 From: htagourti Date: Wed, 18 Dec 2024 19:08:31 +0000 Subject: [PATCH 15/18] added new hydra conf to conf-template --- conf-templates/llm/.hydra-conf/services/en.yaml | 3 ++- conf-templates/llm/.hydra-conf/services/fr.yaml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/conf-templates/llm/.hydra-conf/services/en.yaml b/conf-templates/llm/.hydra-conf/services/en.yaml index 1945087..ef211fb 100644 --- a/conf-templates/llm/.hydra-conf/services/en.yaml +++ b/conf-templates/llm/.hydra-conf/services/en.yaml @@ -1,7 +1,8 @@ -summarize/en: +en: type: summary fields: 2 name: summarize-en + route: summarize/en description: fr: English summary backend: vLLM diff --git a/conf-templates/llm/.hydra-conf/services/fr.yaml b/conf-templates/llm/.hydra-conf/services/fr.yaml index b52947d..07c34e2 100644 --- a/conf-templates/llm/.hydra-conf/services/fr.yaml +++ b/conf-templates/llm/.hydra-conf/services/fr.yaml @@ -1,7 +1,8 @@ -summarize/fr: +fr: type: summary fields: 2 name: summarize-fr + route: summarize/fr description: fr: Résumé français backend: vLLM From 07264929d30bd2270453952b102102aebc6ebfe4 Mon Sep 17 00:00:00 2001 From: htagourti Date: Tue, 7 Jan 2025 11:33:04 +0000 Subject: [PATCH 16/18] removed unused varaible --- scripts/setup-services.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh index 9451f01..4451394 100755 --- a/scripts/setup-services.sh +++ b/scripts/setup-services.sh @@ -83,7 +83,6 @@ trigger_build_service() { #TODO: we expose to the gateway when studio is selected gpu_enable=false vllm_enable=false - enable_vllm=false diarization_enable="" live_streaming_enable=false speaker_identification="false" From 6b6f559247c1bec1337cf66ce4df1bd3d2de9d23 Mon Sep 17 00:00:00 2001 From: htagourti Date: Tue, 7 Jan 2025 11:58:09 +0000 Subject: [PATCH 17/18] Remove test file --- test.ipynb | 244 ----------------------------------------------------- 1 file changed, 244 deletions(-) delete mode 100644 test.ipynb diff --git a/test.ipynb b/test.ipynb deleted file mode 100644 index c124640..0000000 --- a/test.ipynb +++ /dev/null @@ -1,244 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", - " warnings.warn(\n", - "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n", - "200\n" - ] - }, - { - "ename": "NameError", - "evalue": "name 'stop' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 35\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mprint\u001b[39m(res_check\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m 34\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/transcribe\u001b[39m\u001b[38;5;124m\"\u001b[39m, files\u001b[38;5;241m=\u001b[39mfiles, data\u001b[38;5;241m=\u001b[39mdata, headers\u001b[38;5;241m=\u001b[39mheaders, verify\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 35\u001b[0m \u001b[43mstop\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m201\u001b[39m:\n\u001b[1;32m 37\u001b[0m result \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n", - "\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined" - ] - } - ], - "source": [ - "import requests\n", - "import json\n", - "import time\n", - "url = \"http://localhost/stt-french-whisper-v3/transcribe\"\n", - "\n", - "audio_path = \"/home/ubuntu/projects/linto-diarization/data_SUMMRE/004b_PADH.wav\"\n", - "\n", - "\n", - "files = {\n", - " 'file': ('audio.wav', open(audio_path, 'rb'), 'audio/wav')\n", - "}\n", - "diariz_conf = {\n", - "\"punctuationConfig\": {\n", - " \"enablePunctuation\": False, # Applies punctuation\n", - " \"serviceName\": None # Force serviceName (See SubService resolution)\n", - "},\n", - "\"enablePunctuation\": False, # Applies punctuation (Do not use, kept for backward compatibility)\n", - "\"diarizationConfig\": {\n", - " \"enableDiarization\": True, #Enables speaker diarization\n", - "}\n", - "}\n", - "data = {\n", - " 'transcriptionConfig': json.dumps(diariz_conf)\n", - "}\n", - "\n", - "headers = {\n", - " 'Accept': 'application/json'\n", - "}\n", - "endpoint = \"https://localhost/stt-french-whisper-v3\"\n", - "res_check = requests.get(f\"{endpoint}/healthcheck\", headers=headers, verify=False)\n", - "print(res_check.text)\n", - "print(res_check.status_code)\n", - "\n", - "response = requests.post(f\"{endpoint}/transcribe\", files=files, data=data, headers=headers, verify=False)\n", - "stop\n", - "if response.status_code == 201:\n", - " result = response.json()\n", - " print(\"Transcription lancée avec succès. Job ID:\", result['jobid'])\n", - "else:\n", - " print(\"Erreur lors de la requête:\", response.status_code, response.text)\n", - "\n", - "job_id = result['jobid']\n", - "job_status_url = f\"{endpoint}/job/{job_id}\"\n", - "\n", - "job_response = requests.get(job_status_url,headers=headers, verify=False)\n", - "if job_response.status_code == 202 or job_response.status_code == 201:\n", - " job_status = job_response.json()\n", - " print(\"Statut du job:\", job_status)\n", - " while (status := requests.get(job_status_url,headers=headers, verify=False)).json()['state'] != 'done' :\n", - " time.sleep(30)\n", - " print(status)\n", - " if status == 'failed':\n", - " break\n", - " job_response = requests.get(job_status_url,headers=headers,verify=False)\n", - " job_status = job_response.json()\n", - " \n", - "else:\n", - " print(\"Erreur lors de la vérification du statut:\", job_response.status_code, job_response.text)\n", - "result_id = job_status['result_id']\n", - "result_url = f\"{endpoint}/results/{result_id}\"\n", - "result_response = requests.get(result_url,headers=headers, verify=False)\n", - "print(result_response.text)" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Gateway Timeout'" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "status.text" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Transcription lancée avec succès. Job ID: 52fee215-919b-4544-85a0-c1ffc7d03754\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "text/plain": [ - "'{\"state\": \"started\", \"steps\": {\"diarization\": {\"progress\": 0.0, \"required\": true, \"status\": \"started\"}, \"postprocessing\": {\"progress\": 0.0, \"required\": true, \"status\": \"pending\"}, \"preprocessing\": {\"progress\": 1.0, \"required\": true, \"status\": \"done\"}, \"punctuation\": {\"required\": false}, \"transcription\": {\"progress\": 1.0, \"required\": true, \"status\": \"done\"}}}'" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "if response.status_code == 201:\n", - " result = response.json()\n", - " print(\"Transcription lancée avec succès. Job ID:\", result['jobid'])\n", - "else:\n", - " print(\"Erreur lors de la requête:\", response.status_code, response.text)\n", - "\n", - "job_id = result['jobid']\n", - "job_status_url = f\"{endpoint}/job/{job_id}\"\n", - "job_response = requests.get(job_status_url,headers=headers, verify=False)\n", - "job_response.text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "result_id = job_response.json()['result_id']\n", - "\n", - "result_url = f\"{endpoint}/results/{result_id}\"\n", - "result_response = requests.get(result_url,headers=headers, verify=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "job_status_url = f\"{endpoint}/job/{job_id}\"\n", - "\n", - "job_response = requests.get(job_status_url,headers=headers, verify=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "llm-gateway", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 7b9c3649220d2184a3ea2738124758de71a6f833 Mon Sep 17 00:00:00 2001 From: htagourti Date: Mon, 13 Jan 2025 09:14:01 +0000 Subject: [PATCH 18/18] updated default llm config --- conf-templates/llm/.hydra-conf/config.yaml | 1 + conf-templates/llm/.hydra-conf/services/en.yaml | 6 ++++-- conf-templates/llm/.hydra-conf/services/fr.yaml | 6 ++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/conf-templates/llm/.hydra-conf/config.yaml b/conf-templates/llm/.hydra-conf/config.yaml index cd99493..83da579 100644 --- a/conf-templates/llm/.hydra-conf/config.yaml +++ b/conf-templates/llm/.hydra-conf/config.yaml @@ -18,6 +18,7 @@ backend_defaults : top_p: null reduceSummary: null consolidateSummary: null + reduce_prompt: null service_name: ${oc.env:SERVICE_NAME,LLM_Gateway} api_params: diff --git a/conf-templates/llm/.hydra-conf/services/en.yaml b/conf-templates/llm/.hydra-conf/services/en.yaml index ef211fb..2143b71 100644 --- a/conf-templates/llm/.hydra-conf/services/en.yaml +++ b/conf-templates/llm/.hydra-conf/services/en.yaml @@ -2,14 +2,14 @@ en: type: summary fields: 2 name: summarize-en - route: summarize/en + route: summarize-en description: fr: English summary backend: vLLM flavor: - name: llama modelName: meta-llama-31-8b-it - totalContextLength: 128000 + totalContextLength: 32000 maxGenerationLength: 2048 tokenizerClass: LlamaTokenizer createNewTurnAfter: 250 @@ -19,3 +19,5 @@ en: top_p: 0.7 reduceSummary: false consolidateSummary: false + reduce_prompt: null + type: abstractive diff --git a/conf-templates/llm/.hydra-conf/services/fr.yaml b/conf-templates/llm/.hydra-conf/services/fr.yaml index 07c34e2..ca71662 100644 --- a/conf-templates/llm/.hydra-conf/services/fr.yaml +++ b/conf-templates/llm/.hydra-conf/services/fr.yaml @@ -2,14 +2,14 @@ fr: type: summary fields: 2 name: summarize-fr - route: summarize/fr + route: summarize-fr description: fr: Résumé français backend: vLLM flavor: - name: llama modelName: meta-llama-31-8b-it - totalContextLength: 128000 + totalContextLength: 32000 maxGenerationLength: 2048 tokenizerClass: LlamaTokenizer createNewTurnAfter: 250 @@ -19,3 +19,5 @@ fr: top_p: 0.7 reduceSummary: false consolidateSummary: false + reduce_prompt: null + type: abstractive