diff --git a/CodeGen/README.md b/CodeGen/README.md
index 3de96461a..1eef8bac8 100644
--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -29,3 +29,7 @@ Refer to the [Gaudi Guide](./docker-composer/gaudi/README.md) for instructions o
## Deploy CodeGen on Xeon
Refer to the [Xeon Guide](./docker-composer/xeon/README.md) for instructions on deploying CodeGen on Xeon.
+
+## Deploy CodeGen into Kubernetes on Xeon & Gaudi
+
+Refer to the [Kubernetes Guide](./kubernetes/manifests/README.md) for instructions on deploying CodeGen into Kubernetes on Xeon & Gaudi.
diff --git a/CodeGen/kubernetes/manifests/README.md b/CodeGen/kubernetes/manifests/README.md
index e69de29bb..b7ca064e9 100644
--- a/CodeGen/kubernetes/manifests/README.md
+++ b/CodeGen/kubernetes/manifests/README.md
@@ -0,0 +1,35 @@
+
Deploy CodeGen in Kubernetes Cluster
+
+> [NOTE]
+> The following values must be set before you can deploy:
+> HUGGINGFACEHUB_API_TOKEN
+> You can also customize the "MODEL_ID" and "model-volume"
+
+## Deploy On Xeon
+
+```
+cd GenAIExamples/CodeGen/kubernetes/manifests/xeon
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
+kubectl apply -f codegen.yaml
+```
+
+## Deploy On Gaudi
+
+```
+cd GenAIExamples/CodeGen/kubernetes/manifests/gaudi
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
+kubectl apply -f codegen.yaml
+```
+
+## Verify Services
+
+Make sure all the pods are running, and restart the codegen-xxxx pod if necessary.
+
+```
+kubectl get pods
+curl http://codegen:6666/v1/codegen -H "Content-Type: application/json" -d '{
+ "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."
+ }'
+```
diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
new file mode 100644
index 000000000..e92c64a74
--- /dev/null
+++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
@@ -0,0 +1,207 @@
+---
+# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: codegen-tgi
+ labels:
+ helm.sh/chart: tgi-0.1.0
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.4"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ ports:
+ - port: 80
+ targetPort: 80
+ protocol: TCP
+ name: tgi
+ selector:
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: codegen-llm-uservice
+ labels:
+ helm.sh/chart: llm-uservice-0.1.0
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.0.0"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ ports:
+ - port: 9000
+ targetPort: 9000
+ protocol: TCP
+ name: llm-uservice
+ selector:
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: codegen
+ labels:
+ helm.sh/chart: codegen-0.1.0
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.0.0"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ ports:
+ - port: 6666
+ targetPort: 6666
+ protocol: TCP
+ name: codegen
+ selector:
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: codegen-tgi
+ labels:
+ helm.sh/chart: tgi-0.1.0
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.4"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+ spec:
+ securityContext: {}
+ containers:
+ - name: tgi
+ env:
+ - name: MODEL_ID
+ value: ise-uiuc/Magicoder-S-DS-6.7B
+ - name: PORT
+ value: "80"
+ securityContext: {}
+ image: "ghcr.io/huggingface/tgi-gaudi:1.2.1"
+ imagePullPolicy: IfNotPresent
+ volumeMounts:
+ - mountPath: /data
+ name: model-volume
+ ports:
+ - name: http
+ containerPort: 80
+ protocol: TCP
+ resources:
+ limits:
+ habana.ai/gaudi: 1
+ volumes:
+ - name: model-volume
+ hostPath:
+ path: /mnt
+ type: Directory
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: codegen-llm-uservice
+ labels:
+ helm.sh/chart: llm-uservice-0.1.0
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.0.0"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+ spec:
+ securityContext: {}
+ containers:
+ - name: codegen
+ env:
+ - name: TGI_LLM_ENDPOINT
+ value: "http://codegen-tgi:80"
+ - name: HUGGINGFACEHUB_API_TOKEN
+ value: "insert-your-huggingface-token-here"
+ - name: http_proxy
+ value:
+ - name: https_proxy
+ value:
+ - name: no_proxy
+ value:
+
+ securityContext: {}
+ image: "opea/llm-tgi:latest"
+ imagePullPolicy: IfNotPresent
+ ports:
+ - name: llm-uservice
+ containerPort: 9000
+ protocol: TCP
+ startupProbe:
+ exec:
+ command:
+ - curl
+ - http://codegen-tgi:80
+ initialDelaySeconds: 5
+ periodSeconds: 5
+ failureThreshold: 120
+ resources: {}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: codegen
+ labels:
+ helm.sh/chart: codegen-0.1.0
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.0.0"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+ spec:
+ securityContext: null
+ containers:
+ - name: codegen
+ env:
+ - name: LLM_SERVICE_HOST_IP
+ value: codegen-llm-uservice
+ securityContext: null
+ image: "opea/codegen:latest"
+ imagePullPolicy: IfNotPresent
+ ports:
+ - name: codegen
+ containerPort: 6666
+ protocol: TCP
+ resources: null
diff --git a/CodeGen/kubernetes/manifests/xeon/codegen.yaml b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
new file mode 100644
index 000000000..1887741cf
--- /dev/null
+++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
@@ -0,0 +1,205 @@
+---
+# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: codegen-tgi
+ labels:
+ helm.sh/chart: tgi-0.1.0
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.4"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ ports:
+ - port: 80
+ targetPort: 80
+ protocol: TCP
+ name: tgi
+ selector:
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: codegen-llm-uservice
+ labels:
+ helm.sh/chart: llm-uservice-0.1.0
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.0.0"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ ports:
+ - port: 9000
+ targetPort: 9000
+ protocol: TCP
+ name: llm-uservice
+ selector:
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: codegen
+ labels:
+ helm.sh/chart: codegen-0.1.0
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.0.0"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ ports:
+ - port: 6666
+ targetPort: 6666
+ protocol: TCP
+ name: codegen
+ selector:
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: codegen-tgi
+ labels:
+ helm.sh/chart: tgi-0.1.0
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.4"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: tgi
+ app.kubernetes.io/instance: codegen
+ spec:
+ securityContext: {}
+ containers:
+ - name: tgi
+ env:
+ - name: MODEL_ID
+ value: ise-uiuc/Magicoder-S-DS-6.7B
+ - name: PORT
+ value: "80"
+ securityContext: {}
+ image: "ghcr.io/huggingface/text-generation-inference:1.4"
+ imagePullPolicy: IfNotPresent
+ volumeMounts:
+ - mountPath: /data
+ name: model-volume
+ ports:
+ - name: http
+ containerPort: 80
+ protocol: TCP
+ resources: {}
+ volumes:
+ - name: model-volume
+ hostPath:
+ path: /mnt
+ type: Directory
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: codegen-llm-uservice
+ labels:
+ helm.sh/chart: llm-uservice-0.1.0
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.0.0"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: llm-uservice
+ app.kubernetes.io/instance: codegen
+ spec:
+ securityContext: {}
+ containers:
+ - name: codegen
+ env:
+ - name: TGI_LLM_ENDPOINT
+ value: "http://codegen-tgi:80"
+ - name: HUGGINGFACEHUB_API_TOKEN
+ value: "insert-your-huggingface-token-here"
+ - name: http_proxy
+ value:
+ - name: https_proxy
+ value:
+ - name: no_proxy
+ value:
+
+ securityContext: {}
+ image: "opea/llm-tgi:latest"
+ imagePullPolicy: IfNotPresent
+ ports:
+ - name: llm-uservice
+ containerPort: 9000
+ protocol: TCP
+ startupProbe:
+ exec:
+ command:
+ - curl
+ - http://codegen-tgi:80
+ initialDelaySeconds: 5
+ periodSeconds: 5
+ failureThreshold: 120
+ resources: {}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: codegen
+ labels:
+ helm.sh/chart: codegen-0.1.0
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+ app.kubernetes.io/version: "1.0.0"
+ app.kubernetes.io/managed-by: Helm
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: codegen
+ app.kubernetes.io/instance: codegen
+ spec:
+ securityContext: null
+ containers:
+ - name: codegen
+ env:
+ - name: LLM_SERVICE_HOST_IP
+ value: codegen-llm-uservice
+ securityContext: null
+ image: "opea/codegen:latest"
+ imagePullPolicy: IfNotPresent
+ ports:
+ - name: codegen
+ containerPort: 6666
+ protocol: TCP
+ resources: null