diff --git a/CodeGen/README.md b/CodeGen/README.md index 3de96461a..1eef8bac8 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -29,3 +29,7 @@ Refer to the [Gaudi Guide](./docker-composer/gaudi/README.md) for instructions o ## Deploy CodeGen on Xeon Refer to the [Xeon Guide](./docker-composer/xeon/README.md) for instructions on deploying CodeGen on Xeon. + +## Deploy CodeGen into Kubernetes on Xeon & Gaudi + +Refer to the [Kubernetes Guide](./kubernetes/manifests/README.md) for instructions on deploying CodeGen into Kubernetes on Xeon & Gaudi. diff --git a/CodeGen/kubernetes/manifests/README.md b/CodeGen/kubernetes/manifests/README.md index e69de29bb..b7ca064e9 100644 --- a/CodeGen/kubernetes/manifests/README.md +++ b/CodeGen/kubernetes/manifests/README.md @@ -0,0 +1,35 @@ +

Deploy CodeGen in Kubernetes Cluster

+ +> [NOTE] +> The following values must be set before you can deploy: +> HUGGINGFACEHUB_API_TOKEN +> You can also customize the "MODEL_ID" and "model-volume" + +## Deploy On Xeon + +``` +cd GenAIExamples/CodeGen/kubernetes/manifests/xeon +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml +kubectl apply -f codegen.yaml +``` + +## Deploy On Gaudi + +``` +cd GenAIExamples/CodeGen/kubernetes/manifests/gaudi +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml +kubectl apply -f codegen.yaml +``` + +## Verify Services + +Make sure all the pods are running, and restart the codegen-xxxx pod if necessary. + +``` +kubectl get pods +curl http://codegen:6666/v1/codegen -H "Content-Type: application/json" -d '{ + "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception." + }' +``` diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml new file mode 100644 index 000000000..e92c64a74 --- /dev/null +++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml @@ -0,0 +1,207 @@ +--- +# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: codegen-tgi + labels: + helm.sh/chart: tgi-0.1.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.4" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 80 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen +--- +apiVersion: v1 +kind: Service +metadata: + name: codegen-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.1.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen +--- +apiVersion: v1 +kind: Service +metadata: + name: codegen + labels: + helm.sh/chart: codegen-0.1.0 + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6666 + targetPort: 6666 + protocol: TCP + name: codegen + selector: + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: codegen-tgi + labels: + helm.sh/chart: tgi-0.1.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.4" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen + spec: + securityContext: {} + containers: + - name: tgi + env: + - name: MODEL_ID + value: ise-uiuc/Magicoder-S-DS-6.7B + - name: PORT + value: "80" + securityContext: {} + image: "ghcr.io/huggingface/tgi-gaudi:1.2.1" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + hostPath: + path: /mnt + type: Directory +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: codegen-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.1.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen + spec: + securityContext: {} + containers: + - name: codegen + env: + - name: TGI_LLM_ENDPOINT + value: "http://codegen-tgi:80" + - name: HUGGINGFACEHUB_API_TOKEN + value: "insert-your-huggingface-token-here" + - name: http_proxy + value: + - name: https_proxy + value: + - name: no_proxy + value: + + securityContext: {} + image: "opea/llm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + startupProbe: + exec: + command: + - curl + - http://codegen-tgi:80 + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: {} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: codegen + labels: + helm.sh/chart: codegen-0.1.0 + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen + template: + metadata: + labels: + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen + spec: + securityContext: null + containers: + - name: codegen + env: + - name: LLM_SERVICE_HOST_IP + value: codegen-llm-uservice + securityContext: null + image: "opea/codegen:latest" + imagePullPolicy: IfNotPresent + ports: + - name: codegen + containerPort: 6666 + protocol: TCP + resources: null diff --git a/CodeGen/kubernetes/manifests/xeon/codegen.yaml b/CodeGen/kubernetes/manifests/xeon/codegen.yaml new file mode 100644 index 000000000..1887741cf --- /dev/null +++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml @@ -0,0 +1,205 @@ +--- +# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: codegen-tgi + labels: + helm.sh/chart: tgi-0.1.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.4" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 80 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen +--- +apiVersion: v1 +kind: Service +metadata: + name: codegen-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.1.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen +--- +apiVersion: v1 +kind: Service +metadata: + name: codegen + labels: + helm.sh/chart: codegen-0.1.0 + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6666 + targetPort: 6666 + protocol: TCP + name: codegen + selector: + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: codegen-tgi + labels: + helm.sh/chart: tgi-0.1.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.4" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: codegen + spec: + securityContext: {} + containers: + - name: tgi + env: + - name: MODEL_ID + value: ise-uiuc/Magicoder-S-DS-6.7B + - name: PORT + value: "80" + securityContext: {} + image: "ghcr.io/huggingface/text-generation-inference:1.4" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: {} + volumes: + - name: model-volume + hostPath: + path: /mnt + type: Directory +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: codegen-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.1.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: codegen + spec: + securityContext: {} + containers: + - name: codegen + env: + - name: TGI_LLM_ENDPOINT + value: "http://codegen-tgi:80" + - name: HUGGINGFACEHUB_API_TOKEN + value: "insert-your-huggingface-token-here" + - name: http_proxy + value: + - name: https_proxy + value: + - name: no_proxy + value: + + securityContext: {} + image: "opea/llm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + startupProbe: + exec: + command: + - curl + - http://codegen-tgi:80 + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: {} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: codegen + labels: + helm.sh/chart: codegen-0.1.0 + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen + template: + metadata: + labels: + app.kubernetes.io/name: codegen + app.kubernetes.io/instance: codegen + spec: + securityContext: null + containers: + - name: codegen + env: + - name: LLM_SERVICE_HOST_IP + value: codegen-llm-uservice + securityContext: null + image: "opea/codegen:latest" + imagePullPolicy: IfNotPresent + ports: + - name: codegen + containerPort: 6666 + protocol: TCP + resources: null