diff --git a/AudioQnA/kubernetes/manifests/README.md b/AudioQnA/kubernetes/manifests/README.md new file mode 100644 index 000000000..ab1ce2cbc --- /dev/null +++ b/AudioQnA/kubernetes/manifests/README.md @@ -0,0 +1,32 @@ +# Deploy VisualQnA in a Kubernetes Cluster + +> [NOTE] +> The following values must be set before you can deploy: +> HUGGINGFACEHUB_API_TOKEN +> You can also customize the "MODEL_ID" and "model-volume" + +## Deploy On Xeon +``` +cd GenAIExamples/AudioQnA/kubernetes/manifests/xeon +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml +kubectl apply -f audioqna.yaml +``` +## Deploy On Gaudi +``` +cd GenAIExamples/AudioQnA/kubernetes/manifests/gaudi +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml +kubectl apply -f audioqna.yaml +``` + + +## Verify Services + +Make sure all the pods are running, and restart the audioqna-xxxx pod if necessary. + +```bash +kubectl get pods + +curl http://${host_ip}:3008/v1/audioqna -X POST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json' +``` diff --git a/AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml b/AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml new file mode 100644 index 000000000..5a824ef3e --- /dev/null +++ b/AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml @@ -0,0 +1,439 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: audio-qna-config + namespace: default +data: + ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 + TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 + MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc + ASR_SERVICE_HOST_IP: asr-svc + ASR_SERVICE_PORT: "3001" + LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVICE_PORT: "3007" + TTS_SERVICE_HOST_IP: tts-svc + TTS_SERVICE_PORT: "3002" + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: asr-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: asr-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: asr-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: asr-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/asr:latest + imagePullPolicy: IfNotPresent + name: asr-deploy + args: null + ports: + - containerPort: 9099 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: asr-svc +spec: + type: ClusterIP + selector: + app: asr-deploy + ports: + - name: service + port: 3001 + targetPort: 9099 +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: whisper-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: whisper-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: whisper-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: whisper-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/whisper-gaudi:latest + imagePullPolicy: IfNotPresent + name: whisper-deploy + args: null + ports: + - containerPort: 7066 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: whisper-svc +spec: + type: ClusterIP + selector: + app: whisper-deploy + ports: + - name: service + port: 7066 + targetPort: 7066 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tts-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: tts-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: tts-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: tts-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/tts:latest + imagePullPolicy: IfNotPresent + name: tts-deploy + args: null + ports: + - containerPort: 9088 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: tts-svc +spec: + type: ClusterIP + selector: + app: tts-deploy + ports: + - name: service + port: 3002 + targetPort: 9088 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: speecht5-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: speecht5-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: speecht5-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: speecht5-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/speecht5-gaudi:latest + imagePullPolicy: IfNotPresent + name: speecht5-deploy + args: null + ports: + - containerPort: 7055 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: speecht5-svc +spec: + type: ClusterIP + selector: + app: speecht5-deploy + ports: + - name: service + port: 7055 + targetPort: 7055 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: PREFILL_BATCH_BUCKET_SIZE + value: "1" + - name: BATCH_BUCKET_SIZE + value: "8" + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /home/sdp/cesg + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 3006 + targetPort: 80 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 3007 + targetPort: 9000 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: audioqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: audioqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: audioqna-backend-server-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: audioqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/audioqna:latest + imagePullPolicy: IfNotPresent + name: audioqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: audioqna-backend-server-svc +spec: + type: NodePort + selector: + app: audioqna-backend-server-deploy + ports: + - name: service + port: 3008 + targetPort: 8888 + nodePort: 30666 diff --git a/AudioQnA/kubernetes/manifests/xeon/audioqna.yaml b/AudioQnA/kubernetes/manifests/xeon/audioqna.yaml new file mode 100644 index 000000000..89b3e6b1a --- /dev/null +++ b/AudioQnA/kubernetes/manifests/xeon/audioqna.yaml @@ -0,0 +1,395 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: audio-qna-config + namespace: default +data: + ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 + TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 + MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc + ASR_SERVICE_HOST_IP: asr-svc + ASR_SERVICE_PORT: "3001" + LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVICE_PORT: "3007" + TTS_SERVICE_HOST_IP: tts-svc + TTS_SERVICE_PORT: "3002" + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: asr-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: asr-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: asr-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: asr-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/asr:latest + imagePullPolicy: IfNotPresent + name: asr-deploy + args: null + ports: + - containerPort: 9099 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: asr-svc +spec: + type: ClusterIP + selector: + app: asr-deploy + ports: + - name: service + port: 3001 + targetPort: 9099 +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: whisper-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: whisper-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: whisper-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: whisper-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/whisper:latest + imagePullPolicy: IfNotPresent + name: whisper-deploy + args: null + ports: + - containerPort: 7066 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: whisper-svc +spec: + type: ClusterIP + selector: + app: whisper-deploy + ports: + - name: service + port: 7066 + targetPort: 7066 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tts-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: tts-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: tts-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: tts-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/tts:latest + imagePullPolicy: IfNotPresent + name: tts-deploy + args: null + ports: + - containerPort: 9088 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: tts-svc +spec: + type: ClusterIP + selector: + app: tts-deploy + ports: + - name: service + port: 3002 + targetPort: 9088 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: speecht5-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: speecht5-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: speecht5-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: speecht5-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/speecht5:latest + imagePullPolicy: IfNotPresent + name: speecht5-deploy + args: null + ports: + - containerPort: 7055 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: speecht5-svc +spec: + type: ClusterIP + selector: + app: speecht5-deploy + ports: + - name: service + port: 7055 + targetPort: 7055 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: ghcr.io/huggingface/text-generation-inference:2.2.0 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /home/sdp/cesg + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 3006 + targetPort: 80 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 3007 + targetPort: 9000 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: audioqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: audioqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: audioqna-backend-server-deploy + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: audioqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: audio-qna-config + image: opea/audioqna:latest + imagePullPolicy: IfNotPresent + name: audioqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: audioqna-backend-server-svc +spec: + type: NodePort + selector: + app: audioqna-backend-server-deploy + ports: + - name: service + port: 3008 + targetPort: 8888 + nodePort: 30666