diff --git a/VisualQnA/docker/gaudi/README.md b/VisualQnA/docker/gaudi/README.md index aa12c0820..72d5efb07 100644 --- a/VisualQnA/docker/gaudi/README.md +++ b/VisualQnA/docker/gaudi/README.md @@ -116,7 +116,7 @@ curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d { "type": "image_url", "image_url": { - "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" } } ] diff --git a/VisualQnA/docker/xeon/README.md b/VisualQnA/docker/xeon/README.md index 225bb9639..3d53661d0 100644 --- a/VisualQnA/docker/xeon/README.md +++ b/VisualQnA/docker/xeon/README.md @@ -68,15 +68,20 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt cd ../../../.. ``` -### 4. Pull TGI image +### 4. Build TGI Xeon Image + +Since TGI official image has not supported llava-next for CPU, we'll need to build it based on Dockerfile_intel. ```bash -docker pull ghcr.io/huggingface/text-generation-inference:2.2.0 +git clone https://github.com/huggingface/text-generation-inference +cd text-generation-inference/ +docker build -t opea/llava-tgi-xeon:latest --build-arg PLATFORM=cpu --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${https_proxy} . -f Dockerfile_intel +cd ../ ``` Then run the command `docker images`, you will have the following 4 Docker Images: -1. `ghcr.io/huggingface/text-generation-inference:2.2.0` +1. `opea/llava-tgi-xeon:latest` 2. `opea/lvm-tgi:latest` 3. `opea/visualqna:latest` 4. `opea/visualqna-ui:latest` @@ -152,7 +157,7 @@ curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d { "type": "image_url", "image_url": { - "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" } } ] diff --git a/VisualQnA/docker/xeon/compose.yaml b/VisualQnA/docker/xeon/compose.yaml index 9b9501242..b43e30f31 100644 --- a/VisualQnA/docker/xeon/compose.yaml +++ b/VisualQnA/docker/xeon/compose.yaml @@ -6,7 +6,7 @@ version: "3.8" services: llava-tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.2.0 + image: opea/llava-tgi-xeon:latest container_name: tgi-llava-xeon-server ports: - "9399:80" @@ -19,7 +19,7 @@ services: https_proxy: ${https_proxy} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 - command: --model-id ${LVM_MODEL_ID} + command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --cuda-graphs 0 lvm-tgi: image: opea/lvm-tgi:latest container_name: lvm-tgi-server diff --git a/VisualQnA/kubernetes/README.md b/VisualQnA/kubernetes/README.md new file mode 100644 index 000000000..808b0764b --- /dev/null +++ b/VisualQnA/kubernetes/README.md @@ -0,0 +1,57 @@ +# Deploy VisualQnA in a Kubernetes Cluster + +This document outlines the deployment process for a Visual Question Answering (VisualQnA) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines. + +Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install. + +If you have only Intel Xeon machines you could use the visualqna_xeon.yaml file or if you have a Gaudi cluster you could use visualqna_gaudi.yaml +In the below example we illustrate on Xeon. + +## Deploy the VisualQnA application + +1. Create the desired namespace if it does not already exist and deploy the application + ```bash + export APP_NAMESPACE=CT + kubectl create ns $APP_NAMESPACE + sed -i "s|namespace: visualqna|namespace: $APP_NAMESPACE|g" ./visualqna_xeon.yaml + kubectl apply -f ./visualqna_xeon.yaml + ``` + +2. Check if the application is up and ready + ```bash + kubectl get pods -n $APP_NAMESPACE + ``` + +3. Deploy a client pod for testing + ```bash + kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity + ``` + +4. Check that client pod is ready + ```bash + kubectl get pods -n $APP_NAMESPACE + ``` + +5. Send request to application + ```bash + export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) + export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='visualqna')].status.accessUrl}") + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What'\''s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" + } + } + ] + } + ], + "max_tokens": 128}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_visualqna.log + ``` diff --git a/VisualQnA/kubernetes/manifests/README.md b/VisualQnA/kubernetes/manifests/README.md new file mode 100644 index 000000000..9973b15e7 --- /dev/null +++ b/VisualQnA/kubernetes/manifests/README.md @@ -0,0 +1,51 @@ +# Deploy VisualQnA in Kubernetes Cluster + +> [NOTE] +> You can also customize the "LVM_MODEL_ID" if needed. + +> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the visualqna workload is running. Otherwise, you need to modify the `visualqna.yaml` file to change the `model-volume` to a directory that exists on the node. + +## Deploy On Xeon + +``` +cd GenAIExamples/visualqna/kubernetes/manifests/xeon +kubectl apply -f visualqna.yaml +``` + +## Deploy On Gaudi + +``` +cd GenAIExamples/visualqna/kubernetes/manifests/gaudi +kubectl apply -f visualqna.yaml +``` + +## Verify Services + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/visualqna 8888:8888` to expose the visualqna service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:8888/v1/visualqna \ + -H 'Content-Type: application/json' \ + -d '{"messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What'\''s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" + } + } + ] + } + ], + "max_tokens": 128}' +``` diff --git a/VisualQnA/kubernetes/manifests/gaudi/visualqna.yaml b/VisualQnA/kubernetes/manifests/gaudi/visualqna.yaml new file mode 100644 index 000000000..6f00bfb8c --- /dev/null +++ b/VisualQnA/kubernetes/manifests/gaudi/visualqna.yaml @@ -0,0 +1,298 @@ +--- +# Source: visualqna/charts/lvm-uservice/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: visualqna-lvm-uservice-config + labels: + helm.sh/chart: lvm-uservice-0.8.0 + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + LVM_ENDPOINT: "http://visualqna-tgi" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: "" + https_proxy: "" + no_proxy: "" +--- +# Source: visualqna/charts/tgi/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: visualqna-tgi-config + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf" + PORT: "8399" + MAX_INPUT_TOKENS: "4096" + MAX_TOTAL_TOKENS: "8192" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: visualqna/charts/lvm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: visualqna-lvm-uservice + labels: + helm.sh/chart: lvm-uservice-0.8.0 + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9399 + targetPort: 9399 + protocol: TCP + name: lvm-uservice + selector: + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna +--- +# Source: visualqna/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: visualqna-tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 8399 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna +--- +# Source: visualqna/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: visualqna + labels: + helm.sh/chart: visualqna-0.8.0 + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + name: visualqna + selector: + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna +--- +# Source: visualqna/charts/lvm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: visualqna-lvm-uservice + labels: + helm.sh/chart: lvm-uservice-0.8.0 + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + template: + metadata: + labels: + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + spec: + securityContext: + {} + containers: + - name: visualqna + envFrom: + - configMapRef: + name: visualqna-lvm-uservice-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/lvm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: lvm-uservice + containerPort: 9399 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: visualqna/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: visualqna-tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: visualqna-tgi-config + securityContext: + {} + image: "opea/llava-tgi:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 8399 + protocol: TCP + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: tmp + emptyDir: {} +--- +# Source: visualqna/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: visualqna + labels: + helm.sh/chart: visualqna-0.8.0 + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna + template: + metadata: + labels: + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna + spec: + securityContext: + null + containers: + - name: visualqna + env: + - name: LVM_SERVICE_HOST_IP + value: visualqna-lvm-uservice + #- name: MEGA_SERVICE_PORT + # value: 8888 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/visualqna:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: visualqna + containerPort: 8888 + protocol: TCP + resources: + null + volumes: + - name: tmp + emptyDir: {} diff --git a/VisualQnA/kubernetes/manifests/xeon/visualqna.yaml b/VisualQnA/kubernetes/manifests/xeon/visualqna.yaml new file mode 100644 index 000000000..744853d81 --- /dev/null +++ b/VisualQnA/kubernetes/manifests/xeon/visualqna.yaml @@ -0,0 +1,298 @@ +--- +# Source: visualqna/charts/lvm-uservice/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: visualqna-lvm-uservice-config + labels: + helm.sh/chart: lvm-uservice-0.8.0 + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + LVM_ENDPOINT: "http://visualqna-tgi" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: "" + https_proxy: "" + no_proxy: "" +--- +# Source: visualqna/charts/tgi/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: visualqna-tgi-config + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf" + PORT: "8399" + MAX_INPUT_TOKENS: "4096" + MAX_TOTAL_TOKENS: "8192" + CUDA_GRAPHS: "0" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: visualqna/charts/lvm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: visualqna-lvm-uservice + labels: + helm.sh/chart: lvm-uservice-0.8.0 + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9399 + targetPort: 9399 + protocol: TCP + name: lvm-uservice + selector: + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna +--- +# Source: visualqna/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: visualqna-tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 8399 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna +--- +# Source: visualqna/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: visualqna + labels: + helm.sh/chart: visualqna-0.8.0 + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + name: visualqna + selector: + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna +--- +# Source: visualqna/charts/lvm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: visualqna-lvm-uservice + labels: + helm.sh/chart: lvm-uservice-0.8.0 + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + template: + metadata: + labels: + app.kubernetes.io/name: lvm-uservice + app.kubernetes.io/instance: visualqna + spec: + securityContext: + {} + containers: + - name: visualqna + envFrom: + - configMapRef: + name: visualqna-lvm-uservice-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/lvm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: lvm-uservice + containerPort: 9399 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: visualqna/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: visualqna-tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: visualqna + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: visualqna-tgi-config + securityContext: + {} + image: "ghcr.io/huggingface/text-generation-inference:2.2.0" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 8399 + protocol: TCP + resources: + {} + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: tmp + emptyDir: {} +--- +# Source: visualqna/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: visualqna + labels: + helm.sh/chart: visualqna-0.8.0 + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna + template: + metadata: + labels: + app.kubernetes.io/name: visualqna + app.kubernetes.io/instance: visualqna + spec: + securityContext: + null + containers: + - name: visualqna + env: + - name: LVM_SERVICE_HOST_IP + value: visualqna-lvm-uservice + #- name: MEGA_SERVICE_PORT + # value: 8888 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/visualqna:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: visualqna + containerPort: 8888 + protocol: TCP + resources: + null + volumes: + - name: tmp + emptyDir: {} diff --git a/VisualQnA/kubernetes/visualqna_gaudi.yaml b/VisualQnA/kubernetes/visualqna_gaudi.yaml new file mode 100644 index 000000000..4bd824802 --- /dev/null +++ b/VisualQnA/kubernetes/visualqna_gaudi.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: gaudi + name: visualqna + namespace: visualqna +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Lvm + data: $response + internalService: + serviceName: visualqna-service + config: + endpoint: /v1/lvm + LVM_ENDPOINT: visualqna-tgi-svc + - name: TgiGaudi + internalService: + serviceName: visualqna-tgi-svc + config: + MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf + endpoint: /generate + isDownstreamService: true diff --git a/VisualQnA/kubernetes/visualqna_xeon.yaml b/VisualQnA/kubernetes/visualqna_xeon.yaml new file mode 100644 index 000000000..c789c09da --- /dev/null +++ b/VisualQnA/kubernetes/visualqna_xeon.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: xeon + name: visualqna + namespace: visualqna +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Lvm + data: $response + internalService: + serviceName: visualqna-service + config: + endpoint: /v1/lvm + LVM_ENDPOINT: visualqna-tgi-svc + - name: Tgi + internalService: + serviceName: visualqna-tgi-svc + config: + MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf + endpoint: /generate + isDownstreamService: true