diff --git a/.envdefault b/.envdefault index 6d2d104..a58e0d1 100644 --- a/.envdefault +++ b/.envdefault @@ -24,3 +24,19 @@ LINTO_FRONT_THEME=LinTO-green ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session SUPER_ADMIN_EMAIL=superadmin@mail.com SUPER_ADMIN_PWD=superadmin + +# OpenAI +OPENAI_API_TOKEN=sk*** +OPENAI_API_BASE=*** + +ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session +SUPER_ADMIN_EMAIL=superadmin@mail.com +SUPER_ADMIN_PWD=superadmin + +# OpenAI +OPENAI_API_TOKEN=sk*** +OPENAI_API_BASE=*** + +ORGANIZATION_DEFAULT_PERMISSIONS=upload,summary,session +SUPER_ADMIN_EMAIL=admin@mail.com +SUPER_ADMIN_PWD=superadminpassword \ No newline at end of file diff --git a/.gitignore b/.gitignore index 90dfe27..54e5554 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ **/running/*.yaml +websocket.pcap +.env \ No newline at end of file diff --git a/conf-templates/llm/.hydra-conf/config.yaml b/conf-templates/llm/.hydra-conf/config.yaml new file mode 100644 index 0000000..83da579 --- /dev/null +++ b/conf-templates/llm/.hydra-conf/config.yaml @@ -0,0 +1,46 @@ +defaults : + - _self_ + - services : + - en + - fr + +prompt_path: ./prompts/ +backend_defaults : + name: null + modelName: null + totalContextLength: null + maxGenerationLength: null + tokenizerClass: null + createNewTurnAfter: null + summaryTurns: null + maxNewTurns: null + temperature: null + top_p: null + reduceSummary: null + consolidateSummary: null + reduce_prompt: null + service_name: ${oc.env:SERVICE_NAME,LLM_Gateway} + +api_params: + api_base: ${oc.env:OPENAI_API_BASE,http://localhost:9000/v1} + api_key: ${oc.env:OPENAI_API_TOKEN,EMPTY} + max_retries: ${oc.decode:${oc.env:MAX_RETRIES,6}} + max_retry_delay: ${oc.decode:${oc.env:MAX_RETRY_DELAY,10}} + service_port: ${oc.decode:${oc.env:HTTP_PORT,8000}} + workers: ${oc.decode:${oc.env:CONCURRENCY,1}} + timeout: ${oc.decode:${oc.env:TIMEOUT,60}} + ws_polling_interval: ${oc.decode:${oc.env:WS_POLLING_INTERVAL,3}} + +semaphore: + max_concurrent_inferences: ${oc.decode:${oc.env:MAX_CONCURRENT_INFERENCES,3}} + +swagger: + url: ${oc.env:SWAGGER_URL,/docs} + title: ${oc.env:SWAGGER_TITLE,STT API Documentation} + description: ${oc.env:SWAGGER_DESCRIPTION,API to make summary of text using LLMs.} + +services_broker: + url: ${oc.env:SERVICES_BROKER,redis://localhost:6379} + password: ${oc.env:BROKER_PASS,EMPTY} + +debug: false \ No newline at end of file diff --git a/conf-templates/llm/.hydra-conf/services/en.yaml b/conf-templates/llm/.hydra-conf/services/en.yaml new file mode 100644 index 0000000..2143b71 --- /dev/null +++ b/conf-templates/llm/.hydra-conf/services/en.yaml @@ -0,0 +1,23 @@ +en: + type: summary + fields: 2 + name: summarize-en + route: summarize-en + description: + fr: English summary + backend: vLLM + flavor: + - name: llama + modelName: meta-llama-31-8b-it + totalContextLength: 32000 + maxGenerationLength: 2048 + tokenizerClass: LlamaTokenizer + createNewTurnAfter: 250 + summaryTurns: 3 + maxNewTurns: 9 + temperature: 0.2 + top_p: 0.7 + reduceSummary: false + consolidateSummary: false + reduce_prompt: null + type: abstractive diff --git a/conf-templates/llm/.hydra-conf/services/fr.yaml b/conf-templates/llm/.hydra-conf/services/fr.yaml new file mode 100644 index 0000000..ca71662 --- /dev/null +++ b/conf-templates/llm/.hydra-conf/services/fr.yaml @@ -0,0 +1,23 @@ +fr: + type: summary + fields: 2 + name: summarize-fr + route: summarize-fr + description: + fr: Résumé français + backend: vLLM + flavor: + - name: llama + modelName: meta-llama-31-8b-it + totalContextLength: 32000 + maxGenerationLength: 2048 + tokenizerClass: LlamaTokenizer + createNewTurnAfter: 250 + summaryTurns: 3 + maxNewTurns: 9 + temperature: 0.2 + top_p: 0.7 + reduceSummary: false + consolidateSummary: false + reduce_prompt: null + type: abstractive diff --git a/conf-templates/llm/prompts/summarize-en.txt b/conf-templates/llm/prompts/summarize-en.txt new file mode 100644 index 0000000..f4b24db --- /dev/null +++ b/conf-templates/llm/prompts/summarize-en.txt @@ -0,0 +1,16 @@ +You must summarize a transcript following these guidelines: +Always use standard spelling conventions. +Rely strictly on the text to be processed without including external information. +Remove the mention of the speaker followed by ":" in the summary. +Explain the content without using the first-person narrative. +Never write anything other than the summary of the processed speech turns, do not provide information about the reduction and processing carried out, never present the summarized text out of context (no "Here is the summary of the speech turns:"). +Never include in the summary any statements from the speech turns summarized so far. +The speech turns can be in any language and must be translated into English. + +### Speech turns summarized so far (do not repeat or summarize again) +{} + +### Speech turns to process +{} + +### Speech turns summarized (in English) \ No newline at end of file diff --git a/conf-templates/llm/summary.txt b/conf-templates/llm/prompts/summarize-fr.txt similarity index 98% rename from conf-templates/llm/summary.txt rename to conf-templates/llm/prompts/summarize-fr.txt index f98c37e..d706ac6 100644 --- a/conf-templates/llm/summary.txt +++ b/conf-templates/llm/prompts/summarize-fr.txt @@ -1,15 +1,15 @@ -Vous devez résumer une transcription en suivant les directives suivantes : -Toujours utiliser les conventions orthographiques standard du français. -S'appuyer strictement sur le texte à traiter sans inclure d'informations externes. -Enlever la mention du locuteur suivie de ":" dans le résumé. -Expliquer le propos sans reprendre le tour de parole à la première personne. -Ne jamais rien écrire d'autre que le résumé des tours de parole traités, ne pas donner d'informations sur la réduction et les traitements réalisés, ne jamais présenter le texte résumé en sortant du contexte (pas de "Voici le résumé des tours de parole : "). -Ne jamais inclure dans le résumé des propos issus des tours de paroles résumé jusque là. - -### Tours de parole résumés jusque là (ne surtout pas répéter ou résumer à nouveau) -{} - -### Tours de parole à traiter -{} - +Vous devez résumer une transcription en suivant les directives suivantes : +Toujours utiliser les conventions orthographiques standard du français. +S'appuyer strictement sur le texte à traiter sans inclure d'informations externes. +Enlever la mention du locuteur suivie de ":" dans le résumé. +Expliquer le propos sans reprendre le tour de parole à la première personne. +Ne jamais rien écrire d'autre que le résumé des tours de parole traités, ne pas donner d'informations sur la réduction et les traitements réalisés, ne jamais présenter le texte résumé en sortant du contexte (pas de "Voici le résumé des tours de parole : "). +Ne jamais inclure dans le résumé des propos issus des tours de paroles résumé jusque là. + +### Tours de parole résumés jusque là (ne surtout pas répéter ou résumer à nouveau) +{} + +### Tours de parole à traiter +{} + ### Tours de parole résumés (en français) \ No newline at end of file diff --git a/conf-templates/llm/summary.json b/conf-templates/llm/summary.json deleted file mode 100644 index 388d5f0..0000000 --- a/conf-templates/llm/summary.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "type": "summary", - "fields": 2, - "name": "summary", - "description": { - "fr": "Résumé des tours" - }, - "backend": "vLLM", - "flavor": [ - { - "name": "llama3", - "modelName": "casperhansen/llama-3-8b-instruct-awq", - "totalContextLength": 8192, - "maxGenerationLength": 2048, - "tokenizerClass": "LlamaTokenizer", - "createNewTurnAfter": 300, - "summaryTurns": 2, - "maxNewTurns": 10, - "temperature": 0.1, - "top_p": 0.8 - } - ] -} diff --git a/scripts/build-config.sh b/scripts/build-config.sh index 37b2b85..522889b 100755 --- a/scripts/build-config.sh +++ b/scripts/build-config.sh @@ -52,9 +52,9 @@ build_stt() { build_llm() { echo "Building LLM..." - mkdir -p "${LINTO_SHARED_MOUNT}/llm_services/" \ - ${LINTO_SHARED_MOUNT}/models/ - cp -r "${CONFIG_TEMPLATES}/llm/"* "${LINTO_SHARED_MOUNT}/llm_services/" + mkdir -p ${LINTO_SHARED_MOUNT}/models/ + + cp -r "${CONFIG_TEMPLATES}/llm" "${LINTO_SHARED_MOUNT}" create_networks "net_llm_services" } @@ -115,6 +115,41 @@ build_session() { mkdir -p ${LINTO_LOCAL_MOUNT}/database/postgres/db-session-database/ } +build_khaldi-french-streaming() { + echo "Building Live streaming..." + TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/AMs/french" + + if [ ! -d "$TARGET_FOLDER" ]; then + ZIP_URL="https://dl.linto.ai/downloads/model-distribution/acoustic-models/fr-FR/linSTT_AM_fr-FR_v2.2.0.zip" + ZIP_FILE="${TARGET_FOLDER}/linSTT_AM_fr-FR_v2.2.0.zip" + + echo "Creating target folder: $TARGET_FOLDER" + mkdir -p "$TARGET_FOLDER" + curl -L -o "$ZIP_FILE" "$ZIP_URL" + unzip -o "$ZIP_FILE" -d "$TARGET_FOLDER" + rm "$ZIP_FILE" + fi + + TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/LMs/french" + + if [ ! -d "$TARGET_FOLDER" ]; then + ZIP_URL="https://dl.linto.ai/downloads/model-distribution/decoding-graphs/LVCSR/fr-FR/decoding_graph_fr-FR_Big_v2.2.0.zip" + ZIP_FILE="${TARGET_FOLDER}/linSTT_AM_fr-FR_v2.2.0.zip" + echo "Creating target folder: $TARGET_FOLDER" + mkdir -p "$TARGET_FOLDER" + curl -L -o "$ZIP_FILE" "$ZIP_URL" + unzip -o "$ZIP_FILE" -d "$TARGET_FOLDER" + rm "$ZIP_FILE" + fi +} + +build_whisper-streaming() { + echo "Building whisper..." + + mkdir -p ${LINTO_SHARED_MOUNT}/audios/api_uploads \ + ${LINTO_SHARED_MOUNT}/models/ +} + build_kaldi-french-streaming() { echo "Building Live streaming..." TARGET_FOLDER="${LINTO_SHARED_MOUNT}/models/AMs/french" diff --git a/scripts/build-services.sh b/scripts/build-services.sh index 8a53100..0a3344d 100755 --- a/scripts/build-services.sh +++ b/scripts/build-services.sh @@ -56,6 +56,8 @@ generate_yaml_files() { -V DIARIZATION_DEFAULT=$diarization_service \ -V GPU_MODE=$gpu_mode \ -V ENABLE_SESSION_STUDIO=$enable_session_studio \ + -V OPENAI_API_BASE=$OPENAI_API_BASE \ + -V OPENAI_API_TOKEN=$OPENAI_API_TOKEN \ "${service_dir}/template.jsonnet" | yq eval -P - >"$RUNNING_DIR/$FILE_NAME.yaml" fi } @@ -69,7 +71,10 @@ build_main_service() { build_llm() { echo "Building LLM..." generate_yaml_files "services/llm/llm-gateway" $1 $2 - generate_yaml_files "services/llm/vllm" + generate_yaml_files "services/stt/task-broker-redis" + if [ "$3" = "true" ]; then + generate_yaml_files "services/llm/vllm" + fi } build_studio() { @@ -160,6 +165,7 @@ main() { gpu_enable="${6:-false}" diarization_enable="${7:-false}" speaker_identification="${8:-false}" + vllm_enable="${9:-false}" case "$1" in stt-fr) @@ -172,7 +178,7 @@ main() { build_diarization $gpu_enable $speaker_identification ;; llm) - build_llm $traefik_exposed $gateway_exposed + build_llm $traefik_exposed $gateway_exposed $vllm_enable ;; studio) # Special rule for studio on param 4 who containing the information about live-streaming diff --git a/scripts/dialog.sh b/scripts/dialog.sh index 7f44696..462a182 100755 --- a/scripts/dialog.sh +++ b/scripts/dialog.sh @@ -111,6 +111,33 @@ dialog_gpu_mode() { fi } +streaming_service() { + selected_streaming_services=$(dialog --title "Streaming Services" --checklist \ + "Streaming service selection?" "$DIALOG_HEIGHT" "$DIALOG_WIDTH" 2 \ + 1 "Linto french kaldi streaming service" off \ + 2 "Linto whisper streaming service" off \ + 3>&1 1>&2 2>&3) + + echo "$selected_streaming_services" +} +dialog_vllm() { + vllm=$(dialog --title "vLLM Backend deployment" --radiolist \ + "Do you want to deploy the vLLM service?" "$DIALOG_HEIGHT" "$DIALOG_WIDTH" 2 \ + 1 "Yes" off \ + 2 "No" off \ + 3>&1 1>&2 2>&3) + + case "$vllm" in + 1) + vllm_enable="true" + ;; + 2) + vllm_enable="false" + ;; + esac + echo "$vllm_enable" +} + streaming_service() { selected_streaming_services=$(dialog --title "Streaming Services" --checklist \ "Streaming service selection?" "$DIALOG_HEIGHT" "$DIALOG_WIDTH" 2 \ @@ -144,6 +171,12 @@ main() { streaming_service) streaming_service ;; + vllm) + dialog_vllm + ;; + streaming_service) + streaming_service + ;; *) echo "Usage: $0 {expose|transcription|deployment|gpu|domain|speaker_identification|streaming_service}" exit 1 diff --git a/scripts/setup-services.sh b/scripts/setup-services.sh index 564c7b5..4451394 100755 --- a/scripts/setup-services.sh +++ b/scripts/setup-services.sh @@ -82,18 +82,23 @@ trigger_build_service() { #TODO: we expose to the gateway when studio is selected gpu_enable=false + vllm_enable=false diarization_enable="" live_streaming_enable=false speaker_identification="false" if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) && "$services" =~ (^|[[:space:]])(1|2)($|[[:space:]]) ]]; then speaker_identification=$(./scripts/dialog.sh "speaker_identification") + if [[ "$speaker_identification" == "true" ]]; then diarization_enable="stt-diarization-pyannote-qdrant" else diarization_enable="stt-diarization-pyannote" fi fi + if [[ "$services" =~ (^|[[:space:]])3($|[[:space:]]) ]]; then + diarization_enable="stt-diarization-pyannote" + fi if [[ "$services" =~ (^|[[:space:]])6($|[[:space:]]) ]]; then echo "Studio is selected, forcing API Gateway" expose_api_gateway=true @@ -102,6 +107,9 @@ trigger_build_service() { echo "Studio is selected, forcing API Gateway" live_streaming_enable=true fi + if [[ "$services" =~ (^|[[:space:]])4($|[[:space:]]) ]]; then + vllm_enable=$(./scripts/dialog.sh "vllm") + fi ./scripts/build-services.sh "main" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE" @@ -136,7 +144,7 @@ trigger_build_service() { 4) ./scripts/build-config.sh "llm" - ./scripts/build-services.sh "llm" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE" "$expose_traefik" "$expose_api_gateway" + ./scripts/build-services.sh "llm" "$LINTO_DOMAIN" "$DEPLOYMENT_MODE" "$expose_traefik" "$expose_api_gateway" "" "" "" "$vllm_enable" ;; 5) diff --git a/services/live-session/stt-khaldi-french-streaming/config.jsonnet b/services/live-session/stt-khaldi-french-streaming/config.jsonnet new file mode 100644 index 0000000..26f7cc4 --- /dev/null +++ b/services/live-session/stt-khaldi-french-streaming/config.jsonnet @@ -0,0 +1,65 @@ +local tag = std.extVar('LINTO_IMAGE_TAG'); +local repo = std.extVar('DOCKER_REGISTRY'); +local domain = std.extVar('LINTO_DOMAIN'); + +local expose_with_traefik = std.extVar('EXPOSE_TRAEFIK') == "true"; +local expose_with_gateway = std.extVar('EXPOSE_GATEWAY') == "true"; + + +{ + //Generals + build_me: true, //Set to false to disable this build as a YAML file in ./running dir + service_name: 'stt-khaldi-french-streaming', + image: 'lintoai/linto-stt-kaldi:' + tag, + reserve_memory: '', //128M + reserve_cpu: '', //0.5 + limit_cpu: '', //1 + limit_memory: '', //512M + replicas: 1, + + //Main blocks + use_env_file: '', //Set to specified env file (.dockerenv) or leave blank + expose_with_traefik: expose_with_traefik, // TODO : set this to false after API GATEWAY tests + healthcheck: true, + expose_with_api_gateway: expose_with_gateway, + + //Traefik + traefik_endpoint: '/stt-khaldi-french-streaming', + traefik_strip_prefix: '/stt-khaldi-french-streaming', + traefik_server_port: 80, + traefik_domain: domain, + use_basic_auth: true, + + //Healthcheck + healthcheck_interval: '15s', + healthcheck_timeout: '10s', + healthcheck_retries: 4, + healthcheck_start_period: '10s', + restart_policy: false, + restart_condition: 'on-failure', + restart_delay: '5s', + restart_max_attempts: 3, + + //swarm node label constraints + swarm_node_label_constraints: [], //[['ip', 'ingress'], ['mongo', true]...] + + //swarm node role constraints + swarm_node_role_constraints: '', // worker, manager, or leave blank for none + + //API Gateway + gateway_server_port: 80, + gateway_server_desc:{ en: "Linto streaming service",fr:"Service de streaming Linto"}, + gateway_server_scope: 'llm', + + gateway_define_endpoints: [ + { + endpoint: 'stt-khaldi-french-streaming', + middlewares_order: 'logs', + middlewares: [ + { name: 'logs', params: { debug: '*' } } + ], + }, + ], + //Override command + command: [], +} \ No newline at end of file diff --git a/services/live-session/stt-khaldi-french-streaming/template.jsonnet b/services/live-session/stt-khaldi-french-streaming/template.jsonnet new file mode 100644 index 0000000..3970b2f --- /dev/null +++ b/services/live-session/stt-khaldi-french-streaming/template.jsonnet @@ -0,0 +1,37 @@ +local base = import '../../../jsonnet/base.libsonnet'; +local config = import 'config.jsonnet'; +local service = base.Service(config); +local shared_mount = std.extVar('LINTO_SHARED_MOUNT'); +local network = std.extVar('DOCKER_NETWORK'); + + +local patch = { + services: { + [config.service_name]: { + volumes: [ + shared_mount + '/audios/api_uploads/:/opt/audio', + shared_mount + '/models/AMs/french:/opt/AM', + shared_mount + '/models/LMs/french:/opt/LM', + ], + networks: [ + network, + 'session_network', + ], + environment: { + SERVICE_MODE: 'websocket', // task | http | websocket + MODEL_TYPE: 'lin', // lin | vosk + ENABLE_STREAMING: 'true', + STREAMING_PORT: '80', + CONCURRENCY: '1', + LANGUAGE: 'fr-FR', + }, + }, + }, + networks: { + session_network: { + external: true, + }, + }, +}; + +std.mergePatch(service, patch) diff --git a/services/llm/llm-gateway/template.jsonnet b/services/llm/llm-gateway/template.jsonnet index e7c5036..02c77f2 100644 --- a/services/llm/llm-gateway/template.jsonnet +++ b/services/llm/llm-gateway/template.jsonnet @@ -3,23 +3,28 @@ local config = import 'config.jsonnet'; local service = base.Service(config); local shared_mount = std.extVar('LINTO_SHARED_MOUNT'); local network = std.extVar('DOCKER_NETWORK'); +local redis_password = std.extVar('REDIS_PASSWORD'); +local openai_api_base = std.extVar('OPENAI_API_BASE'); +local open_api_token = std.extVar('OPENAI_API_TOKEN'); local patch = { services: { [config.service_name]: { volumes: [ shared_mount + '/models/:/root/.cache', - shared_mount + '/llm_services/:/usr/src/services/' + shared_mount + '/llm/.hydra-conf:/usr/src/.hydra-conf', + shared_mount + '/llm/prompts:/usr/src/prompts' ], networks: [ 'net_llm_services', + 'task_broker_services', network, ], environment: { PYTHONUNBUFFERED:1, SERVICE_NAME:'LLM_Gateway', - OPENAI_API_BASE:'http://vllm-service:8000/v1', - OPENAI_API_TOKEN:'EMPTY', + OPENAI_API_BASE: openai_api_base, + OPENAI_API_TOKEN: open_api_token, HTTP_PORT:80, CONCURRENCY:1, TIMEOUT:60, @@ -27,6 +32,8 @@ local patch = { SWAGGER_URL: '/llm-gateway', SWAGGER_PATH:'../document/swagger_llm_gateway.yml', RESULT_DB_PATH:'./results.sqlite', + SERVICES_BROKER: 'redis://task-broker-redis:6379', + BROKER_PASS: redis_password, }, }, }, @@ -34,6 +41,9 @@ local patch = { net_llm_services: { external: true, }, + task_broker_services: { + external: true, + }, }, }; diff --git a/services/studio/studio-api/template.jsonnet b/services/studio/studio-api/template.jsonnet index a5e4e09..6842efc 100644 --- a/services/studio/studio-api/template.jsonnet +++ b/services/studio/studio-api/template.jsonnet @@ -28,7 +28,7 @@ local patch = { DB_NAME: 'conversations', GATEWAY_SERVICES: 'http://api-gateway', - LLM_GATEWAY_SERVICES: 'http://llm-gateway/', + LLM_GATEWAY_SERVICES: 'http://llm-gateway', CORS_ENABLED:'true', CORS_API_WHITELIST: 'https://'+domain,