From 947cc0bf0dd417b52415cab0cab37cf8d6b69bba Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Wed, 6 Nov 2024 13:28:10 +0000
Subject: [PATCH 01/18] added redis broker to llm-gateway

---
 scripts/build-services.sh                 | 3 ++-
 services/llm/llm-gateway/template.jsonnet | 9 ++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/scripts/build-services.sh b/scripts/build-services.sh
index 8a53100..e2de9fe 100755
--- a/scripts/build-services.sh
+++ b/scripts/build-services.sh
@@ -69,7 +69,8 @@ build_main_service() {
 build_llm() {
     echo "Building LLM..."
     generate_yaml_files "services/llm/llm-gateway" $1 $2
-    generate_yaml_files "services/llm/vllm"
+    generate_yaml_files "services/stt/task-broker-redis"
+    #generate_yaml_files "services/llm/vllm"
 }
 
 build_studio() {
diff --git a/services/llm/llm-gateway/template.jsonnet b/services/llm/llm-gateway/template.jsonnet
index e7c5036..cfc0dd0 100644
--- a/services/llm/llm-gateway/template.jsonnet
+++ b/services/llm/llm-gateway/template.jsonnet
@@ -3,6 +3,7 @@ local config = import 'config.jsonnet';
 local service = base.Service(config);
 local shared_mount = std.extVar('LINTO_SHARED_MOUNT');
 local network = std.extVar('DOCKER_NETWORK');
+local redis_password = std.extVar('REDIS_PASSWORD');
 
 local patch = {
   services: {
@@ -13,12 +14,13 @@ local patch = {
       ],
       networks: [
         'net_llm_services',
+        'task_broker_services',
         network,
       ],
       environment: {
         PYTHONUNBUFFERED:1,
         SERVICE_NAME:'LLM_Gateway',
-        OPENAI_API_BASE:'http://vllm-service:8000/v1',
+        OPENAI_API_BASE: 'https://chat.ai.linagora.exaion.com/v1/',
         OPENAI_API_TOKEN:'EMPTY',
         HTTP_PORT:80,
         CONCURRENCY:1,
@@ -27,6 +29,8 @@ local patch = {
         SWAGGER_URL: '/llm-gateway',
         SWAGGER_PATH:'../document/swagger_llm_gateway.yml',
         RESULT_DB_PATH:'./results.sqlite',
+        SERVICES_BROKER: 'redis://task-broker-redis:6379',
+        BROKER_PASS: redis_password,
       },
     },
   },
@@ -34,6 +38,9 @@ local patch = {
     net_llm_services: {
       external: true,
     },
+    task_broker_services: {
+      external: true,
+    },
   },
 };
 

From 8bb372bc32ce78c4a66cefee9206513f15cd7f29 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Fri, 8 Nov 2024 10:24:32 +0000
Subject: [PATCH 02/18] removed api url from llm config

---
 .envdefault                               | 4 ++++
 services/llm/llm-gateway/template.jsonnet | 6 ++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.envdefault b/.envdefault
index 6d2d104..0f4ff7d 100644
--- a/.envdefault
+++ b/.envdefault
@@ -24,3 +24,7 @@ LINTO_FRONT_THEME=LinTO-green
 ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session
 SUPER_ADMIN_EMAIL=superadmin@mail.com
 SUPER_ADMIN_PWD=superadmin
+
+# OpenAI
+OPENAI_API_TOKEN=sk***
+OPENAI_API_BASE=***
\ No newline at end of file
diff --git a/services/llm/llm-gateway/template.jsonnet b/services/llm/llm-gateway/template.jsonnet
index cfc0dd0..4bc5796 100644
--- a/services/llm/llm-gateway/template.jsonnet
+++ b/services/llm/llm-gateway/template.jsonnet
@@ -4,6 +4,8 @@ local service = base.Service(config);
 local shared_mount = std.extVar('LINTO_SHARED_MOUNT');
 local network = std.extVar('DOCKER_NETWORK');
 local redis_password = std.extVar('REDIS_PASSWORD');
+local openai_api_base = std.extVar('OPENAI_API_BASE');
+local open_api_token = std.extVar('OPENAI_API_TOKEN');
 
 local patch = {
   services: {
@@ -20,8 +22,8 @@ local patch = {
       environment: {
         PYTHONUNBUFFERED:1,
         SERVICE_NAME:'LLM_Gateway',
-        OPENAI_API_BASE: 'https://chat.ai.linagora.exaion.com/v1/',
-        OPENAI_API_TOKEN:'EMPTY',
+        OPENAI_API_BASE: openai_api_base,
+        OPENAI_API_TOKEN: open_api_token,
         HTTP_PORT:80,
         CONCURRENCY:1,
         TIMEOUT:60,

From 54a3a36b50b8239bc436b9aab19514e521f2e367 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Fri, 8 Nov 2024 12:35:14 +0000
Subject: [PATCH 03/18] added the possibility to enable/disable vllm deployment

---
 scripts/build-services.sh |  9 +++++++--
 scripts/dialog.sh         | 20 ++++++++++++++++++++
 scripts/setup-services.sh |  8 +++++++-
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/scripts/build-services.sh b/scripts/build-services.sh
index e2de9fe..0a3344d 100755
--- a/scripts/build-services.sh
+++ b/scripts/build-services.sh
@@ -56,6 +56,8 @@ generate_yaml_files() {
             -V DIARIZATION_DEFAULT=$diarization_service \
             -V GPU_MODE=$gpu_mode \
             -V ENABLE_SESSION_STUDIO=$enable_session_studio \
+            -V OPENAI_API_BASE=$OPENAI_API_BASE \
+            -V OPENAI_API_TOKEN=$OPENAI_API_TOKEN \
             "${service_dir}/template.jsonnet" | yq eval -P - >"$RUNNING_DIR/$FILE_NAME.yaml"
     fi
 }
@@ -70,7 +72,9 @@ build_llm() {
     echo "Building LLM..."
     generate_yaml_files "services/llm/llm-gateway" $1 $2
     generate_yaml_files "services/stt/task-broker-redis"
-    #generate_yaml_files "services/llm/vllm"
+    if [ "$3" = "true" ]; then
+        generate_yaml_files "services/llm/vllm"
+    fi
 }
 
 build_studio() {
@@ -161,6 +165,7 @@ main() {
     gpu_enable="${6:-false}"
     diarization_enable="${7:-false}"
     speaker_identification="${8:-false}"
+    vllm_enable="${9:-false}"
 
     case "$1" in
     stt-fr)
@@ -173,7 +178,7 @@ main() {
         build_diarization $gpu_enable $speaker_identification
         ;;
     llm)
-        build_llm $traefik_exposed $gateway_exposed
+        build_llm $traefik_exposed $gateway_exposed $vllm_enable
         ;;
     studio)
         # Special rule for studio on param 4 who containing the information about live-streaming
diff --git a/scripts/dialog.sh b/scripts/dialog.sh
index 7f44696..382a00f 100755
--- a/scripts/dialog.sh
+++ b/scripts/dialog.sh
@@ -120,6 +120,23 @@ streaming_service() {
 
     echo "$selected_streaming_services"
 }
+dialog_vllm() {
+    vllm=$(dialog --title "vLLM Backend deployment" --radiolist \
+        "Do you want to deploy the vLLM service?" "$DIALOG_HEIGHT" "$DIALOG_WIDTH" 2 \
+        1 "Yes" off \
+        2 "No" off \
+        3>&1 1>&2 2>&3)
+
+    case "$vllm" in
+    1)
+        vllm_enable="true"
+        ;;
+    2)
+        vllm_enable="false"
+        ;;
+    esac
+    echo "$vllm_enable"
+}
 
 main() {
     case "$1" in
@@ -144,6 +161,9 @@ main() {
     streaming_service)
         streaming_service
         ;;
+    vllm)
+        dialog_vllm
+        ;;
     *)
         echo "Usage: $0 {expose|transcription|deployment|gpu|domain|speaker_identification|streaming_service}"
         exit 1
diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh
index 564c7b5..7560760 100755
--- a/scripts/setup-services.sh
+++ b/scripts/setup-services.sh
@@ -82,6 +82,7 @@ trigger_build_service() {
 
     #TODO: we expose to the gateway when studio is selected
     gpu_enable=false
+    enable_vllm=false
     diarization_enable=""
     live_streaming_enable=false
     speaker_identification="false"
@@ -93,6 +94,8 @@ trigger_build_service() {
         else
             diarization_enable="stt-diarization-pyannote"
         fi
+    if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then
+        diarization_enable="stt-diarization-pyannote"
     fi
     if [[ "$services" =~ (^|[[:space:]])6($|[[:space:]]) ]]; then
         echo "Studio is selected, forcing API Gateway"
@@ -102,6 +105,9 @@ trigger_build_service() {
         echo "Studio is selected, forcing API Gateway"
         live_streaming_enable=true
     fi
+    if [[ "$services" =~ (^|[[:space:]])4($|[[:space:]]) ]]; then
+        vllm_enable=$(./scripts/dialog.sh "vllm")
+    fi
 
     ./scripts/build-services.sh "main" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE"
 
@@ -136,7 +142,7 @@ trigger_build_service() {
 
         4)
             ./scripts/build-config.sh "llm"
-            ./scripts/build-services.sh "llm" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE" "$expose_traefik" "$expose_api_gateway"
+            ./scripts/build-services.sh "llm" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE" "$expose_traefik" "$expose_api_gateway" "" "" "" "$vllm_enable"
             ;;
         5)
 

From 487ea1d8c790bebaf6870c05773cd09f86cd3dfa Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Fri, 8 Nov 2024 13:45:45 +0000
Subject: [PATCH 04/18] corrected vllm_enable variable name

---
 scripts/setup-services.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh
index 7560760..e049367 100755
--- a/scripts/setup-services.sh
+++ b/scripts/setup-services.sh
@@ -82,7 +82,7 @@ trigger_build_service() {
 
     #TODO: we expose to the gateway when studio is selected
     gpu_enable=false
-    enable_vllm=false
+    vllm_enable=false
     diarization_enable=""
     live_streaming_enable=false
     speaker_identification="false"
@@ -94,6 +94,7 @@ trigger_build_service() {
         else
             diarization_enable="stt-diarization-pyannote"
         fi
+    fi    
     if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then
         diarization_enable="stt-diarization-pyannote"
     fi

From 16dc68f97b23f89bdb22fa9da4dc4b043d7bc7ca Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Fri, 8 Nov 2024 13:49:42 +0000
Subject: [PATCH 05/18] removed redundant if statement

---
 scripts/setup-services.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh
index e049367..4e2efa5 100755
--- a/scripts/setup-services.sh
+++ b/scripts/setup-services.sh
@@ -95,9 +95,6 @@ trigger_build_service() {
             diarization_enable="stt-diarization-pyannote"
         fi
     fi    
-    if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then
-        diarization_enable="stt-diarization-pyannote"
-    fi
     if [[ "$services" =~ (^|[[:space:]])6($|[[:space:]]) ]]; then
         echo "Studio is selected, forcing API Gateway"
         expose_api_gateway=true

From d1cb0cb9a8c591018c37d4a250e656321ce98eaf Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Fri, 13 Dec 2024 15:48:35 +0000
Subject: [PATCH 06/18] added missing env & corrected error in session-api
 template

---
 .envdefault                                        | 6 +++++-
 services/live-session/session-api/template.jsonnet | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.envdefault b/.envdefault
index 0f4ff7d..09eee96 100644
--- a/.envdefault
+++ b/.envdefault
@@ -27,4 +27,8 @@ SUPER_ADMIN_PWD=superadmin
 
 # OpenAI
 OPENAI_API_TOKEN=sk***
-OPENAI_API_BASE=***
\ No newline at end of file
+OPENAI_API_BASE=***
+
+ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session
+SUPER_ADMIN_EMAIL=admin@mail.com
+SUPER_ADMIN_PWD=superadminpassword
\ No newline at end of file
diff --git a/services/live-session/session-api/template.jsonnet b/services/live-session/session-api/template.jsonnet
index e0031e2..e5050df 100644
--- a/services/live-session/session-api/template.jsonnet
+++ b/services/live-session/session-api/template.jsonnet
@@ -20,6 +20,8 @@ local patch = {
 
         # check with JS these env configuration
         SESSION_API_WEBSERVER_HTTP_PORT:'80',
+        STREAMING_WS_SECURE: 'true',
+        
         STREAMING_WS_SECURE: 'true',
         STREAMING_PASSPHRASE:'false',
 

From 35ec0207890f9993219af4e9c68d621424aa2c33 Mon Sep 17 00:00:00 2001
From: yhoupert <yhoupert@linagora.com>
Date: Tue, 12 Nov 2024 15:46:31 +0100
Subject: [PATCH 07/18] Handle new studio api environment settings

---
 test.ipynb | 244 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 244 insertions(+)
 create mode 100644 test.ipynb

diff --git a/test.ipynb b/test.ipynb
new file mode 100644
index 0000000..c124640
--- /dev/null
+++ b/test.ipynb
@@ -0,0 +1,244 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n",
+      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n",
+      "200\n"
+     ]
+    },
+    {
+     "ename": "NameError",
+     "evalue": "name 'stop' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 35\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[38;5;28mprint\u001b[39m(res_check\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m     34\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/transcribe\u001b[39m\u001b[38;5;124m\"\u001b[39m, files\u001b[38;5;241m=\u001b[39mfiles, data\u001b[38;5;241m=\u001b[39mdata, headers\u001b[38;5;241m=\u001b[39mheaders, verify\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 35\u001b[0m \u001b[43mstop\u001b[49m\n\u001b[1;32m     36\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m201\u001b[39m:\n\u001b[1;32m     37\u001b[0m     result \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "import time\n",
+    "url = \"http://localhost/stt-french-whisper-v3/transcribe\"\n",
+    "\n",
+    "audio_path = \"/home/ubuntu/projects/linto-diarization/data_SUMMRE/004b_PADH.wav\"\n",
+    "\n",
+    "\n",
+    "files = {\n",
+    "    'file': ('audio.wav', open(audio_path, 'rb'), 'audio/wav')\n",
+    "}\n",
+    "diariz_conf = {\n",
+    "\"punctuationConfig\": {\n",
+    "    \"enablePunctuation\": False, # Applies punctuation\n",
+    "    \"serviceName\": None # Force serviceName (See SubService resolution)\n",
+    "},\n",
+    "\"enablePunctuation\": False, # Applies punctuation (Do not use, kept for backward compatibility)\n",
+    "\"diarizationConfig\": {\n",
+    "    \"enableDiarization\": True, #Enables speaker diarization\n",
+    "}\n",
+    "}\n",
+    "data = {\n",
+    "    'transcriptionConfig': json.dumps(diariz_conf)\n",
+    "}\n",
+    "\n",
+    "headers = {\n",
+    "    'Accept': 'application/json'\n",
+    "}\n",
+    "endpoint = \"https://localhost/stt-french-whisper-v3\"\n",
+    "res_check = requests.get(f\"{endpoint}/healthcheck\", headers=headers, verify=False)\n",
+    "print(res_check.text)\n",
+    "print(res_check.status_code)\n",
+    "\n",
+    "response = requests.post(f\"{endpoint}/transcribe\", files=files, data=data, headers=headers, verify=False)\n",
+    "stop\n",
+    "if response.status_code == 201:\n",
+    "    result = response.json()\n",
+    "    print(\"Transcription lancée avec succès. Job ID:\", result['jobid'])\n",
+    "else:\n",
+    "    print(\"Erreur lors de la requête:\", response.status_code, response.text)\n",
+    "\n",
+    "job_id = result['jobid']\n",
+    "job_status_url = f\"{endpoint}/job/{job_id}\"\n",
+    "\n",
+    "job_response = requests.get(job_status_url,headers=headers, verify=False)\n",
+    "if job_response.status_code == 202 or job_response.status_code == 201:\n",
+    "    job_status = job_response.json()\n",
+    "    print(\"Statut du job:\", job_status)\n",
+    "    while (status := requests.get(job_status_url,headers=headers, verify=False)).json()['state'] != 'done' :\n",
+    "        time.sleep(30)\n",
+    "        print(status)\n",
+    "        if status == 'failed':\n",
+    "            break\n",
+    "    job_response = requests.get(job_status_url,headers=headers,verify=False)\n",
+    "    job_status = job_response.json()\n",
+    "    \n",
+    "else:\n",
+    "    print(\"Erreur lors de la vérification du statut:\", job_response.status_code, job_response.text)\n",
+    "result_id = job_status['result_id']\n",
+    "result_url = f\"{endpoint}/results/{result_id}\"\n",
+    "result_response = requests.get(result_url,headers=headers, verify=False)\n",
+    "print(result_response.text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Gateway Timeout'"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "status.text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Transcription lancée avec succès. Job ID: 52fee215-919b-4544-85a0-c1ffc7d03754\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'{\"state\": \"started\", \"steps\": {\"diarization\": {\"progress\": 0.0, \"required\": true, \"status\": \"started\"}, \"postprocessing\": {\"progress\": 0.0, \"required\": true, \"status\": \"pending\"}, \"preprocessing\": {\"progress\": 1.0, \"required\": true, \"status\": \"done\"}, \"punctuation\": {\"required\": false}, \"transcription\": {\"progress\": 1.0, \"required\": true, \"status\": \"done\"}}}'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "if response.status_code == 201:\n",
+    "    result = response.json()\n",
+    "    print(\"Transcription lancée avec succès. Job ID:\", result['jobid'])\n",
+    "else:\n",
+    "    print(\"Erreur lors de la requête:\", response.status_code, response.text)\n",
+    "\n",
+    "job_id = result['jobid']\n",
+    "job_status_url = f\"{endpoint}/job/{job_id}\"\n",
+    "job_response = requests.get(job_status_url,headers=headers, verify=False)\n",
+    "job_response.text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "result_id = job_response.json()['result_id']\n",
+    "\n",
+    "result_url = f\"{endpoint}/results/{result_id}\"\n",
+    "result_response = requests.get(result_url,headers=headers, verify=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "job_status_url = f\"{endpoint}/job/{job_id}\"\n",
+    "\n",
+    "job_response = requests.get(job_status_url,headers=headers, verify=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llm-gateway",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From b2d4b1efc54d5f07807419dabd7ad6456e2b02d0 Mon Sep 17 00:00:00 2001
From: yhoupert <yhoupert@linagora.com>
Date: Thu, 21 Nov 2024 16:52:33 +0100
Subject: [PATCH 08/18] Added streaming service khaldi and whisper

---
 .gitignore                                    |  1 +
 scripts/build-config.sh                       | 35 ++++++++++
 scripts/dialog.sh                             | 13 ++++
 scripts/setup-services.sh                     |  1 +
 .../config.jsonnet                            | 65 +++++++++++++++++++
 .../template.jsonnet                          | 37 +++++++++++
 6 files changed, 152 insertions(+)
 create mode 100644 services/live-session/stt-khaldi-french-streaming/config.jsonnet
 create mode 100644 services/live-session/stt-khaldi-french-streaming/template.jsonnet

diff --git a/.gitignore b/.gitignore
index 90dfe27..737f5a3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 **/running/*.yaml
+websocket.pcap
\ No newline at end of file
diff --git a/scripts/build-config.sh b/scripts/build-config.sh
index 37b2b85..13278c8 100755
--- a/scripts/build-config.sh
+++ b/scripts/build-config.sh
@@ -115,6 +115,41 @@ build_session() {
     mkdir -p ${LINTO_LOCAL_MOUNT}/database/postgres/db-session-database/
 }
 
+build_khaldi-french-streaming() {
+    echo "Building Live streaming..."
+    TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/AMs/french"
+
+    if [ ! -d "$TARGET_FOLDER" ]; then
+        ZIP_URL="https://dl.linto.ai/downloads/model-distribution/acoustic-models/fr-FR/linSTT_AM_fr-FR_v2.2.0.zip"
+        ZIP_FILE="${TARGET_FOLDER}/linSTT_AM_fr-FR_v2.2.0.zip"
+
+        echo "Creating target folder: $TARGET_FOLDER"
+        mkdir -p "$TARGET_FOLDER"
+        curl -L -o "$ZIP_FILE" "$ZIP_URL"
+        unzip -o "$ZIP_FILE" -d "$TARGET_FOLDER"
+        rm "$ZIP_FILE"
+    fi
+
+    TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/LMs/french"
+
+    if [ ! -d "$TARGET_FOLDER" ]; then
+        ZIP_URL="https://dl.linto.ai/downloads/model-distribution/decoding-graphs/LVCSR/fr-FR/decoding_graph_fr-FR_Big_v2.2.0.zip"
+        ZIP_FILE="${TARGET_FOLDER}/linSTT_AM_fr-FR_v2.2.0.zip"
+        echo "Creating target folder: $TARGET_FOLDER"
+        mkdir -p "$TARGET_FOLDER"
+        curl -L -o "$ZIP_FILE" "$ZIP_URL"
+        unzip -o "$ZIP_FILE" -d "$TARGET_FOLDER"
+        rm "$ZIP_FILE"
+    fi
+}
+
+build_whisper-streaming() {
+    echo "Building whisper..."
+
+    mkdir -p ${LINTO_SHARED_MOUNT}/audios/api_uploads \
+        ${LINTO_SHARED_MOUNT}/models/
+}
+
 build_kaldi-french-streaming() {
     echo "Building Live streaming..."
     TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/AMs/french"
diff --git a/scripts/dialog.sh b/scripts/dialog.sh
index 382a00f..462a182 100755
--- a/scripts/dialog.sh
+++ b/scripts/dialog.sh
@@ -138,6 +138,16 @@ dialog_vllm() {
     echo "$vllm_enable"
 }
 
+streaming_service() {
+    selected_streaming_services=$(dialog --title "Streaming Services" --checklist \
+        "Streaming service selection?" "$DIALOG_HEIGHT" "$DIALOG_WIDTH" 2 \
+        1 "Linto french kaldi streaming service" off \
+        2 "Linto whisper streaming service" off \
+        3>&1 1>&2 2>&3)
+
+    echo "$selected_streaming_services"
+}
+
 main() {
     case "$1" in
     expose)
@@ -164,6 +174,9 @@ main() {
     vllm)
         dialog_vllm
         ;;
+    streaming_service)
+        streaming_service
+        ;;
     *)
         echo "Usage: $0 {expose|transcription|deployment|gpu|domain|speaker_identification|streaming_service}"
         exit 1
diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh
index 4e2efa5..3bfa47a 100755
--- a/scripts/setup-services.sh
+++ b/scripts/setup-services.sh
@@ -89,6 +89,7 @@ trigger_build_service() {
     if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) && "$services" =~ (^|[[:space:]])(1|2)($|[[:space:]]) ]]; then
         speaker_identification=$(./scripts/dialog.sh "speaker_identification")
 
+
         if [[ "$speaker_identification" == "true" ]]; then
             diarization_enable="stt-diarization-pyannote-qdrant"
         else
diff --git a/services/live-session/stt-khaldi-french-streaming/config.jsonnet b/services/live-session/stt-khaldi-french-streaming/config.jsonnet
new file mode 100644
index 0000000..26f7cc4
--- /dev/null
+++ b/services/live-session/stt-khaldi-french-streaming/config.jsonnet
@@ -0,0 +1,65 @@
+local tag = std.extVar('LINTO_IMAGE_TAG');
+local repo = std.extVar('DOCKER_REGISTRY');
+local domain = std.extVar('LINTO_DOMAIN');
+
+local expose_with_traefik = std.extVar('EXPOSE_TRAEFIK') == "true";
+local expose_with_gateway = std.extVar('EXPOSE_GATEWAY') == "true";
+
+
+{
+  //Generals
+  build_me: true,  //Set to false to disable this build as a YAML file in ./running dir
+  service_name: 'stt-khaldi-french-streaming',
+  image: 'lintoai/linto-stt-kaldi:' + tag,
+  reserve_memory: '',  //128M
+  reserve_cpu: '',  //0.5
+  limit_cpu: '',  //1
+  limit_memory: '',  //512M
+  replicas: 1,
+
+  //Main blocks
+  use_env_file: '',  //Set to specified env file (.dockerenv) or leave blank
+  expose_with_traefik: expose_with_traefik, // TODO : set this to false after API GATEWAY tests
+  healthcheck: true,
+  expose_with_api_gateway: expose_with_gateway,
+
+  //Traefik
+  traefik_endpoint: '/stt-khaldi-french-streaming',
+  traefik_strip_prefix: '/stt-khaldi-french-streaming',
+  traefik_server_port: 80,
+  traefik_domain: domain,
+  use_basic_auth: true,
+
+  //Healthcheck
+  healthcheck_interval: '15s',
+  healthcheck_timeout: '10s',
+  healthcheck_retries: 4,
+  healthcheck_start_period: '10s',
+  restart_policy: false,
+  restart_condition: 'on-failure',
+  restart_delay: '5s',
+  restart_max_attempts: 3,
+
+  //swarm node label constraints
+  swarm_node_label_constraints: [],  //[['ip', 'ingress'], ['mongo', true]...]
+
+  //swarm node role constraints
+  swarm_node_role_constraints: '',  // worker, manager, or leave blank for none
+
+  //API Gateway
+  gateway_server_port: 80,
+  gateway_server_desc:{ en: "Linto streaming service",fr:"Service de streaming Linto"},
+  gateway_server_scope: 'llm',
+
+  gateway_define_endpoints: [
+    {
+      endpoint: 'stt-khaldi-french-streaming',
+      middlewares_order: 'logs',
+      middlewares: [
+        { name: 'logs', params: { debug: '*' } }
+      ],
+    },
+  ],
+  //Override command
+  command: [],
+}
\ No newline at end of file
diff --git a/services/live-session/stt-khaldi-french-streaming/template.jsonnet b/services/live-session/stt-khaldi-french-streaming/template.jsonnet
new file mode 100644
index 0000000..3970b2f
--- /dev/null
+++ b/services/live-session/stt-khaldi-french-streaming/template.jsonnet
@@ -0,0 +1,37 @@
+local base = import '../../../jsonnet/base.libsonnet';
+local config = import 'config.jsonnet';
+local service = base.Service(config);
+local shared_mount = std.extVar('LINTO_SHARED_MOUNT');
+local network = std.extVar('DOCKER_NETWORK');
+
+
+local patch = {
+  services: {
+    [config.service_name]: {
+      volumes: [
+        shared_mount + '/audios/api_uploads/:/opt/audio',
+        shared_mount + '/models/AMs/french:/opt/AM',
+        shared_mount + '/models/LMs/french:/opt/LM',
+      ],
+      networks: [
+        network,
+        'session_network',
+      ],
+      environment: {
+        SERVICE_MODE: 'websocket',  // task | http | websocket
+        MODEL_TYPE: 'lin',  // lin | vosk
+        ENABLE_STREAMING: 'true',
+        STREAMING_PORT: '80',
+        CONCURRENCY: '1',
+        LANGUAGE: 'fr-FR',
+      },
+    },
+  },
+  networks: {
+    session_network: {
+      external: true,
+    },
+  },
+};
+
+std.mergePatch(service, patch)

From a480e553016eb7286cbca32588c98e7dd351c131 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Fri, 8 Nov 2024 10:24:32 +0000
Subject: [PATCH 09/18] removed api url from llm config

---
 .envdefault | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.envdefault b/.envdefault
index 09eee96..847692e 100644
--- a/.envdefault
+++ b/.envdefault
@@ -30,5 +30,9 @@ OPENAI_API_TOKEN=sk***
 OPENAI_API_BASE=***
 
 ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session
-SUPER_ADMIN_EMAIL=admin@mail.com
-SUPER_ADMIN_PWD=superadminpassword
\ No newline at end of file
+SUPER_ADMIN_EMAIL=superadmin@mail.com
+SUPER_ADMIN_PWD=superadmin
+
+# OpenAI
+OPENAI_API_BASE: http://vllm-service:8000/v1
+OPENAI_API_TOKEN : EMPTY
\ No newline at end of file

From 1d823d2720dfced4bba26037ac9218148c98389c Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Fri, 8 Nov 2024 12:35:14 +0000
Subject: [PATCH 10/18] added the possibility to enable/disable vllm deployment

---
 .envdefault               | 4 ++--
 scripts/setup-services.sh | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.envdefault b/.envdefault
index 847692e..9791aaf 100644
--- a/.envdefault
+++ b/.envdefault
@@ -34,5 +34,5 @@ SUPER_ADMIN_EMAIL=superadmin@mail.com
 SUPER_ADMIN_PWD=superadmin
 
 # OpenAI
-OPENAI_API_BASE: http://vllm-service:8000/v1
-OPENAI_API_TOKEN : EMPTY
\ No newline at end of file
+OPENAI_API_BASE=http://vllm-service:8000/v1
+OPENAI_API_TOKEN=EMPTY
\ No newline at end of file
diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh
index 3bfa47a..58a80b5 100755
--- a/scripts/setup-services.sh
+++ b/scripts/setup-services.sh
@@ -83,6 +83,7 @@ trigger_build_service() {
     #TODO: we expose to the gateway when studio is selected
     gpu_enable=false
     vllm_enable=false
+    enable_vllm=false
     diarization_enable=""
     live_streaming_enable=false
     speaker_identification="false"
@@ -95,7 +96,9 @@ trigger_build_service() {
         else
             diarization_enable="stt-diarization-pyannote"
         fi
-    fi    
+    if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then
+        diarization_enable="stt-diarization-pyannote"
+    fi
     if [[ "$services" =~ (^|[[:space:]])6($|[[:space:]]) ]]; then
         echo "Studio is selected, forcing API Gateway"
         expose_api_gateway=true

From 980d5a25e3e7458301d7e6b6a960ddc330a35398 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Fri, 13 Dec 2024 15:48:35 +0000
Subject: [PATCH 11/18] added missing env & corrected error in session-api
 template

---
 .envdefault | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.envdefault b/.envdefault
index 9791aaf..a58e0d1 100644
--- a/.envdefault
+++ b/.envdefault
@@ -34,5 +34,9 @@ SUPER_ADMIN_EMAIL=superadmin@mail.com
 SUPER_ADMIN_PWD=superadmin
 
 # OpenAI
-OPENAI_API_BASE=http://vllm-service:8000/v1
-OPENAI_API_TOKEN=EMPTY
\ No newline at end of file
+OPENAI_API_TOKEN=sk***
+OPENAI_API_BASE=***
+
+ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session
+SUPER_ADMIN_EMAIL=admin@mail.com
+SUPER_ADMIN_PWD=superadminpassword
\ No newline at end of file

From 633fc0538f89e204b9ecf782a24001cf3ef3924f Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Tue, 17 Dec 2024 16:01:15 +0000
Subject: [PATCH 12/18] added LLM gateway volume handling

---
 .gitignore                                    |  3 +-
 conf-templates/llm/.hydra-conf/config.yaml    | 45 +++++++++++++++++++
 .../llm/.hydra-conf/services/en.yaml          | 20 +++++++++
 .../llm/.hydra-conf/services/fr.yaml          | 20 +++++++++
 conf-templates/llm/prompts/summarize-en.txt   | 16 +++++++
 .../{summary.txt => prompts/summarize-fr.txt} | 28 ++++++------
 conf-templates/llm/summary.json               | 23 ----------
 scripts/build-config.sh                       |  6 +--
 scripts/setup-services.sh                     |  1 +
 services/llm/llm-gateway/template.jsonnet     |  3 +-
 10 files changed, 123 insertions(+), 42 deletions(-)
 create mode 100644 conf-templates/llm/.hydra-conf/config.yaml
 create mode 100644 conf-templates/llm/.hydra-conf/services/en.yaml
 create mode 100644 conf-templates/llm/.hydra-conf/services/fr.yaml
 create mode 100644 conf-templates/llm/prompts/summarize-en.txt
 rename conf-templates/llm/{summary.txt => prompts/summarize-fr.txt} (98%)
 delete mode 100644 conf-templates/llm/summary.json

diff --git a/.gitignore b/.gitignore
index 737f5a3..54e5554 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 **/running/*.yaml
-websocket.pcap
\ No newline at end of file
+websocket.pcap
+.env
\ No newline at end of file
diff --git a/conf-templates/llm/.hydra-conf/config.yaml b/conf-templates/llm/.hydra-conf/config.yaml
new file mode 100644
index 0000000..cd99493
--- /dev/null
+++ b/conf-templates/llm/.hydra-conf/config.yaml
@@ -0,0 +1,45 @@
+defaults :
+  - _self_
+  - services :
+    - en
+    - fr
+
+prompt_path: ./prompts/
+backend_defaults :
+  name: null
+  modelName: null
+  totalContextLength: null
+  maxGenerationLength: null
+  tokenizerClass: null
+  createNewTurnAfter: null
+  summaryTurns: null
+  maxNewTurns: null
+  temperature: null
+  top_p: null
+  reduceSummary: null
+  consolidateSummary: null
+  service_name: ${oc.env:SERVICE_NAME,LLM_Gateway}
+
+api_params:
+  api_base: ${oc.env:OPENAI_API_BASE,http://localhost:9000/v1}
+  api_key: ${oc.env:OPENAI_API_TOKEN,EMPTY}
+  max_retries: ${oc.decode:${oc.env:MAX_RETRIES,6}}
+  max_retry_delay: ${oc.decode:${oc.env:MAX_RETRY_DELAY,10}}
+  service_port: ${oc.decode:${oc.env:HTTP_PORT,8000}}
+  workers: ${oc.decode:${oc.env:CONCURRENCY,1}}
+  timeout: ${oc.decode:${oc.env:TIMEOUT,60}}
+  ws_polling_interval: ${oc.decode:${oc.env:WS_POLLING_INTERVAL,3}}
+
+semaphore:
+  max_concurrent_inferences: ${oc.decode:${oc.env:MAX_CONCURRENT_INFERENCES,3}}
+
+swagger:
+  url: ${oc.env:SWAGGER_URL,/docs}
+  title: ${oc.env:SWAGGER_TITLE,STT API Documentation}
+  description: ${oc.env:SWAGGER_DESCRIPTION,API to make summary of text using LLMs.}
+
+services_broker:
+  url: ${oc.env:SERVICES_BROKER,redis://localhost:6379}
+  password: ${oc.env:BROKER_PASS,EMPTY}
+
+debug: false
\ No newline at end of file
diff --git a/conf-templates/llm/.hydra-conf/services/en.yaml b/conf-templates/llm/.hydra-conf/services/en.yaml
new file mode 100644
index 0000000..1945087
--- /dev/null
+++ b/conf-templates/llm/.hydra-conf/services/en.yaml
@@ -0,0 +1,20 @@
+summarize/en:
+  type: summary
+  fields: 2
+  name: summarize-en
+  description:
+    fr: English summary
+  backend: vLLM
+  flavor:
+    - name: llama
+      modelName: meta-llama-31-8b-it
+      totalContextLength: 128000
+      maxGenerationLength: 2048
+      tokenizerClass: LlamaTokenizer
+      createNewTurnAfter: 250
+      summaryTurns: 3
+      maxNewTurns: 9
+      temperature: 0.2
+      top_p: 0.7
+      reduceSummary: false
+      consolidateSummary: false
diff --git a/conf-templates/llm/.hydra-conf/services/fr.yaml b/conf-templates/llm/.hydra-conf/services/fr.yaml
new file mode 100644
index 0000000..b52947d
--- /dev/null
+++ b/conf-templates/llm/.hydra-conf/services/fr.yaml
@@ -0,0 +1,20 @@
+summarize/fr:
+  type: summary
+  fields: 2
+  name: summarize-fr
+  description:
+    fr: Résumé français
+  backend: vLLM
+  flavor:
+    - name: llama
+      modelName: meta-llama-31-8b-it
+      totalContextLength: 128000
+      maxGenerationLength: 2048
+      tokenizerClass: LlamaTokenizer
+      createNewTurnAfter: 250
+      summaryTurns: 3
+      maxNewTurns: 9
+      temperature: 0.2
+      top_p: 0.7
+      reduceSummary: false
+      consolidateSummary: false
diff --git a/conf-templates/llm/prompts/summarize-en.txt b/conf-templates/llm/prompts/summarize-en.txt
new file mode 100644
index 0000000..f4b24db
--- /dev/null
+++ b/conf-templates/llm/prompts/summarize-en.txt
@@ -0,0 +1,16 @@
+You must summarize a transcript following these guidelines:
+Always use standard spelling conventions.
+Rely strictly on the text to be processed without including external information.
+Remove the mention of the speaker followed by ":" in the summary.
+Explain the content without using the first-person narrative.
+Never write anything other than the summary of the processed speech turns, do not provide information about the reduction and processing carried out, never present the summarized text out of context (no "Here is the summary of the speech turns:").
+Never include in the summary any statements from the speech turns summarized so far.
+The speech turns can be in any language and must be translated into English.
+
+### Speech turns summarized so far (do not repeat or summarize again)
+{}
+
+### Speech turns to process
+{}
+
+### Speech turns summarized (in English)
\ No newline at end of file
diff --git a/conf-templates/llm/summary.txt b/conf-templates/llm/prompts/summarize-fr.txt
similarity index 98%
rename from conf-templates/llm/summary.txt
rename to conf-templates/llm/prompts/summarize-fr.txt
index f98c37e..d706ac6 100644
--- a/conf-templates/llm/summary.txt
+++ b/conf-templates/llm/prompts/summarize-fr.txt
@@ -1,15 +1,15 @@
-Vous devez résumer une transcription en suivant les directives suivantes :
-Toujours utiliser les conventions orthographiques standard du français.
-S'appuyer strictement sur le texte à traiter sans inclure d'informations externes.
-Enlever la mention du locuteur suivie de ":" dans le résumé.
-Expliquer le propos sans reprendre le tour de parole à la première personne.
-Ne jamais rien écrire d'autre que le résumé des tours de parole traités, ne pas donner d'informations sur la réduction et les traitements réalisés, ne jamais présenter le texte résumé en sortant du contexte (pas de "Voici le résumé des tours de parole : ").
-Ne jamais inclure dans le résumé des propos issus des tours de paroles résumé jusque là.
-
-### Tours de parole résumés jusque là (ne surtout pas répéter ou résumer à nouveau)
-{}
-
-### Tours de parole à traiter
-{}
-
+Vous devez résumer une transcription en suivant les directives suivantes :
+Toujours utiliser les conventions orthographiques standard du français.
+S'appuyer strictement sur le texte à traiter sans inclure d'informations externes.
+Enlever la mention du locuteur suivie de ":" dans le résumé.
+Expliquer le propos sans reprendre le tour de parole à la première personne.
+Ne jamais rien écrire d'autre que le résumé des tours de parole traités, ne pas donner d'informations sur la réduction et les traitements réalisés, ne jamais présenter le texte résumé en sortant du contexte (pas de "Voici le résumé des tours de parole : ").
+Ne jamais inclure dans le résumé des propos issus des tours de paroles résumé jusque là.
+
+### Tours de parole résumés jusque là (ne surtout pas répéter ou résumer à nouveau)
+{}
+
+### Tours de parole à traiter
+{}
+
 ### Tours de parole résumés (en français)
\ No newline at end of file
diff --git a/conf-templates/llm/summary.json b/conf-templates/llm/summary.json
deleted file mode 100644
index 388d5f0..0000000
--- a/conf-templates/llm/summary.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "type": "summary",
-  "fields": 2,
-  "name": "summary",
-  "description": {
-    "fr": "Résumé des tours"
-  },
-  "backend": "vLLM",
-  "flavor": [
-    {
-      "name": "llama3",
-      "modelName": "casperhansen/llama-3-8b-instruct-awq",
-      "totalContextLength": 8192,
-      "maxGenerationLength": 2048,
-      "tokenizerClass": "LlamaTokenizer",
-      "createNewTurnAfter": 300,
-      "summaryTurns": 2,
-      "maxNewTurns": 10,
-      "temperature": 0.1,
-      "top_p": 0.8
-    }
-  ]
-}
diff --git a/scripts/build-config.sh b/scripts/build-config.sh
index 13278c8..522889b 100755
--- a/scripts/build-config.sh
+++ b/scripts/build-config.sh
@@ -52,9 +52,9 @@ build_stt() {
 build_llm() {
     echo "Building LLM..."
 
-    mkdir -p "${LINTO_SHARED_MOUNT}/llm_services/" \
-        ${LINTO_SHARED_MOUNT}/models/
-    cp -r "${CONFIG_TEMPLATES}/llm/"* "${LINTO_SHARED_MOUNT}/llm_services/"
+    mkdir -p ${LINTO_SHARED_MOUNT}/models/
+    
+    cp -r "${CONFIG_TEMPLATES}/llm" "${LINTO_SHARED_MOUNT}"
 
     create_networks "net_llm_services"
 }
diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh
index 58a80b5..9451f01 100755
--- a/scripts/setup-services.sh
+++ b/scripts/setup-services.sh
@@ -96,6 +96,7 @@ trigger_build_service() {
         else
             diarization_enable="stt-diarization-pyannote"
         fi
+    fi
     if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then
         diarization_enable="stt-diarization-pyannote"
     fi
diff --git a/services/llm/llm-gateway/template.jsonnet b/services/llm/llm-gateway/template.jsonnet
index 4bc5796..02c77f2 100644
--- a/services/llm/llm-gateway/template.jsonnet
+++ b/services/llm/llm-gateway/template.jsonnet
@@ -12,7 +12,8 @@ local patch = {
     [config.service_name]: {
       volumes: [
         shared_mount + '/models/:/root/.cache',
-        shared_mount + '/llm_services/:/usr/src/services/'
+        shared_mount + '/llm/.hydra-conf:/usr/src/.hydra-conf',
+        shared_mount + '/llm/prompts:/usr/src/prompts'
       ],
       networks: [
         'net_llm_services',

From 4c313e4b012a288c52fab77ed2026a9568065a9a Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Wed, 18 Dec 2024 16:29:24 +0000
Subject: [PATCH 13/18] removed extra / on studio-api

---
 services/studio/studio-api/template.jsonnet | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/studio/studio-api/template.jsonnet b/services/studio/studio-api/template.jsonnet
index a5e4e09..6842efc 100644
--- a/services/studio/studio-api/template.jsonnet
+++ b/services/studio/studio-api/template.jsonnet
@@ -28,7 +28,7 @@ local patch = {
         DB_NAME: 'conversations',
 
         GATEWAY_SERVICES: 'http://api-gateway',
-        LLM_GATEWAY_SERVICES: 'http://llm-gateway/',
+        LLM_GATEWAY_SERVICES: 'http://llm-gateway',
 
         CORS_ENABLED:'true',
         CORS_API_WHITELIST: 'https://'+domain,

From 05d33b024757f597f0a50fad3a973f2899168ff1 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Wed, 18 Dec 2024 19:01:13 +0000
Subject: [PATCH 14/18] removed duplicate field in session-api

---
 services/live-session/session-api/template.jsonnet | 2 --
 1 file changed, 2 deletions(-)

diff --git a/services/live-session/session-api/template.jsonnet b/services/live-session/session-api/template.jsonnet
index e5050df..e0031e2 100644
--- a/services/live-session/session-api/template.jsonnet
+++ b/services/live-session/session-api/template.jsonnet
@@ -20,8 +20,6 @@ local patch = {
 
         # check with JS these env configuration
         SESSION_API_WEBSERVER_HTTP_PORT:'80',
-        STREAMING_WS_SECURE: 'true',
-        
         STREAMING_WS_SECURE: 'true',
         STREAMING_PASSPHRASE:'false',
 

From d373e60ce09ce338bc0f38f13b84a44a1b720d84 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Wed, 18 Dec 2024 19:08:31 +0000
Subject: [PATCH 15/18] added new hydra conf to conf-template

---
 conf-templates/llm/.hydra-conf/services/en.yaml | 3 ++-
 conf-templates/llm/.hydra-conf/services/fr.yaml | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/conf-templates/llm/.hydra-conf/services/en.yaml b/conf-templates/llm/.hydra-conf/services/en.yaml
index 1945087..ef211fb 100644
--- a/conf-templates/llm/.hydra-conf/services/en.yaml
+++ b/conf-templates/llm/.hydra-conf/services/en.yaml
@@ -1,7 +1,8 @@
-summarize/en:
+en:
   type: summary
   fields: 2
   name: summarize-en
+  route: summarize/en
   description:
     fr: English summary
   backend: vLLM
diff --git a/conf-templates/llm/.hydra-conf/services/fr.yaml b/conf-templates/llm/.hydra-conf/services/fr.yaml
index b52947d..07c34e2 100644
--- a/conf-templates/llm/.hydra-conf/services/fr.yaml
+++ b/conf-templates/llm/.hydra-conf/services/fr.yaml
@@ -1,7 +1,8 @@
-summarize/fr:
+fr:
   type: summary
   fields: 2
   name: summarize-fr
+  route: summarize/fr
   description:
     fr: Résumé français
   backend: vLLM

From 07264929d30bd2270453952b102102aebc6ebfe4 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Tue, 7 Jan 2025 11:33:04 +0000
Subject: [PATCH 16/18] removed unused varaible

---
 scripts/setup-services.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh
index 9451f01..4451394 100755
--- a/scripts/setup-services.sh
+++ b/scripts/setup-services.sh
@@ -83,7 +83,6 @@ trigger_build_service() {
     #TODO: we expose to the gateway when studio is selected
     gpu_enable=false
     vllm_enable=false
-    enable_vllm=false
     diarization_enable=""
     live_streaming_enable=false
     speaker_identification="false"

From 6b6f559247c1bec1337cf66ce4df1bd3d2de9d23 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Tue, 7 Jan 2025 11:58:09 +0000
Subject: [PATCH 17/18] Remove test file

---
 test.ipynb | 244 -----------------------------------------------------
 1 file changed, 244 deletions(-)
 delete mode 100644 test.ipynb

diff --git a/test.ipynb b/test.ipynb
deleted file mode 100644
index c124640..0000000
--- a/test.ipynb
+++ /dev/null
@@ -1,244 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
-      "  warnings.warn(\n",
-      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1\n",
-      "200\n"
-     ]
-    },
-    {
-     "ename": "NameError",
-     "evalue": "name 'stop' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[1], line 35\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[38;5;28mprint\u001b[39m(res_check\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m     34\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/transcribe\u001b[39m\u001b[38;5;124m\"\u001b[39m, files\u001b[38;5;241m=\u001b[39mfiles, data\u001b[38;5;241m=\u001b[39mdata, headers\u001b[38;5;241m=\u001b[39mheaders, verify\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 35\u001b[0m \u001b[43mstop\u001b[49m\n\u001b[1;32m     36\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m201\u001b[39m:\n\u001b[1;32m     37\u001b[0m     result \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined"
-     ]
-    }
-   ],
-   "source": [
-    "import requests\n",
-    "import json\n",
-    "import time\n",
-    "url = \"http://localhost/stt-french-whisper-v3/transcribe\"\n",
-    "\n",
-    "audio_path = \"/home/ubuntu/projects/linto-diarization/data_SUMMRE/004b_PADH.wav\"\n",
-    "\n",
-    "\n",
-    "files = {\n",
-    "    'file': ('audio.wav', open(audio_path, 'rb'), 'audio/wav')\n",
-    "}\n",
-    "diariz_conf = {\n",
-    "\"punctuationConfig\": {\n",
-    "    \"enablePunctuation\": False, # Applies punctuation\n",
-    "    \"serviceName\": None # Force serviceName (See SubService resolution)\n",
-    "},\n",
-    "\"enablePunctuation\": False, # Applies punctuation (Do not use, kept for backward compatibility)\n",
-    "\"diarizationConfig\": {\n",
-    "    \"enableDiarization\": True, #Enables speaker diarization\n",
-    "}\n",
-    "}\n",
-    "data = {\n",
-    "    'transcriptionConfig': json.dumps(diariz_conf)\n",
-    "}\n",
-    "\n",
-    "headers = {\n",
-    "    'Accept': 'application/json'\n",
-    "}\n",
-    "endpoint = \"https://localhost/stt-french-whisper-v3\"\n",
-    "res_check = requests.get(f\"{endpoint}/healthcheck\", headers=headers, verify=False)\n",
-    "print(res_check.text)\n",
-    "print(res_check.status_code)\n",
-    "\n",
-    "response = requests.post(f\"{endpoint}/transcribe\", files=files, data=data, headers=headers, verify=False)\n",
-    "stop\n",
-    "if response.status_code == 201:\n",
-    "    result = response.json()\n",
-    "    print(\"Transcription lancée avec succès. Job ID:\", result['jobid'])\n",
-    "else:\n",
-    "    print(\"Erreur lors de la requête:\", response.status_code, response.text)\n",
-    "\n",
-    "job_id = result['jobid']\n",
-    "job_status_url = f\"{endpoint}/job/{job_id}\"\n",
-    "\n",
-    "job_response = requests.get(job_status_url,headers=headers, verify=False)\n",
-    "if job_response.status_code == 202 or job_response.status_code == 201:\n",
-    "    job_status = job_response.json()\n",
-    "    print(\"Statut du job:\", job_status)\n",
-    "    while (status := requests.get(job_status_url,headers=headers, verify=False)).json()['state'] != 'done' :\n",
-    "        time.sleep(30)\n",
-    "        print(status)\n",
-    "        if status == 'failed':\n",
-    "            break\n",
-    "    job_response = requests.get(job_status_url,headers=headers,verify=False)\n",
-    "    job_status = job_response.json()\n",
-    "    \n",
-    "else:\n",
-    "    print(\"Erreur lors de la vérification du statut:\", job_response.status_code, job_response.text)\n",
-    "result_id = job_status['result_id']\n",
-    "result_url = f\"{endpoint}/results/{result_id}\"\n",
-    "result_response = requests.get(result_url,headers=headers, verify=False)\n",
-    "print(result_response.text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Gateway Timeout'"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "status.text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Transcription lancée avec succès. Job ID: 52fee215-919b-4544-85a0-c1ffc7d03754\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'{\"state\": \"started\", \"steps\": {\"diarization\": {\"progress\": 0.0, \"required\": true, \"status\": \"started\"}, \"postprocessing\": {\"progress\": 0.0, \"required\": true, \"status\": \"pending\"}, \"preprocessing\": {\"progress\": 1.0, \"required\": true, \"status\": \"done\"}, \"punctuation\": {\"required\": false}, \"transcription\": {\"progress\": 1.0, \"required\": true, \"status\": \"done\"}}}'"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "if response.status_code == 201:\n",
-    "    result = response.json()\n",
-    "    print(\"Transcription lancée avec succès. Job ID:\", result['jobid'])\n",
-    "else:\n",
-    "    print(\"Erreur lors de la requête:\", response.status_code, response.text)\n",
-    "\n",
-    "job_id = result['jobid']\n",
-    "job_status_url = f\"{endpoint}/job/{job_id}\"\n",
-    "job_response = requests.get(job_status_url,headers=headers, verify=False)\n",
-    "job_response.text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "result_id = job_response.json()['result_id']\n",
-    "\n",
-    "result_url = f\"{endpoint}/results/{result_id}\"\n",
-    "result_response = requests.get(result_url,headers=headers, verify=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ubuntu/miniconda3/envs/llm-gateway/lib/python3.12/site-packages/urllib3/connectionpool.py:1099: InsecureRequestWarning: Unverified HTTPS request is being made to host 'localhost'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "job_status_url = f\"{endpoint}/job/{job_id}\"\n",
-    "\n",
-    "job_response = requests.get(job_status_url,headers=headers, verify=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "llm-gateway",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From 7b9c3649220d2184a3ea2738124758de71a6f833 Mon Sep 17 00:00:00 2001
From: htagourti <htagourti@linagora.com>
Date: Mon, 13 Jan 2025 09:14:01 +0000
Subject: [PATCH 18/18] updated default llm config

---
 conf-templates/llm/.hydra-conf/config.yaml      | 1 +
 conf-templates/llm/.hydra-conf/services/en.yaml | 6 ++++--
 conf-templates/llm/.hydra-conf/services/fr.yaml | 6 ++++--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/conf-templates/llm/.hydra-conf/config.yaml b/conf-templates/llm/.hydra-conf/config.yaml
index cd99493..83da579 100644
--- a/conf-templates/llm/.hydra-conf/config.yaml
+++ b/conf-templates/llm/.hydra-conf/config.yaml
@@ -18,6 +18,7 @@ backend_defaults :
   top_p: null
   reduceSummary: null
   consolidateSummary: null
+  reduce_prompt: null
   service_name: ${oc.env:SERVICE_NAME,LLM_Gateway}
 
 api_params:
diff --git a/conf-templates/llm/.hydra-conf/services/en.yaml b/conf-templates/llm/.hydra-conf/services/en.yaml
index ef211fb..2143b71 100644
--- a/conf-templates/llm/.hydra-conf/services/en.yaml
+++ b/conf-templates/llm/.hydra-conf/services/en.yaml
@@ -2,14 +2,14 @@ en:
   type: summary
   fields: 2
   name: summarize-en
-  route: summarize/en
+  route: summarize-en
   description:
     fr: English summary
   backend: vLLM
   flavor:
     - name: llama
       modelName: meta-llama-31-8b-it
-      totalContextLength: 128000
+      totalContextLength: 32000
       maxGenerationLength: 2048
       tokenizerClass: LlamaTokenizer
       createNewTurnAfter: 250
@@ -19,3 +19,5 @@ en:
       top_p: 0.7
       reduceSummary: false
       consolidateSummary: false
+      reduce_prompt: null
+      type: abstractive
diff --git a/conf-templates/llm/.hydra-conf/services/fr.yaml b/conf-templates/llm/.hydra-conf/services/fr.yaml
index 07c34e2..ca71662 100644
--- a/conf-templates/llm/.hydra-conf/services/fr.yaml
+++ b/conf-templates/llm/.hydra-conf/services/fr.yaml
@@ -2,14 +2,14 @@ fr:
   type: summary
   fields: 2
   name: summarize-fr
-  route: summarize/fr
+  route: summarize-fr
   description:
     fr: Résumé français
   backend: vLLM
   flavor:
     - name: llama
       modelName: meta-llama-31-8b-it
-      totalContextLength: 128000
+      totalContextLength: 32000
       maxGenerationLength: 2048
       tokenizerClass: LlamaTokenizer
       createNewTurnAfter: 250
@@ -19,3 +19,5 @@ fr:
       top_p: 0.7
       reduceSummary: false
       consolidateSummary: false
+      reduce_prompt: null
+      type: abstractive