diff --git a/comps/3rd_parties/tei/deployment/kubernetes/README.md b/comps/3rd_parties/tei/deployment/kubernetes/README.md new file mode 100644 index 0000000000..1650330214 --- /dev/null +++ b/comps/3rd_parties/tei/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy TEI on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install tei oci://ghcr.io/opea-project/charts/tei --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install tei oci://ghcr.io/opea-project/charts/tei --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/3rd_parties/tei/deployment/kubernetes/cpu-values.yaml b/comps/3rd_parties/tei/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..5eaa0d2744 --- /dev/null +++ b/comps/3rd_parties/tei/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: ghcr.io/huggingface/text-embeddings-inference diff --git a/comps/3rd_parties/tei/deployment/kubernetes/gaudi-values.yaml b/comps/3rd_parties/tei/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..aa8c36da48 --- /dev/null +++ b/comps/3rd_parties/tei/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +OMPI_MCA_btl_vader_single_copy_mechanism: "none" +MAX_WARMUP_SEQUENCE_LENGTH: "512" +image: + repository: ghcr.io/huggingface/tei-gaudi + tag: 1.5.0 + +securityContext: + readOnlyRootFilesystem: false + +resources: + limits: + habana.ai/gaudi: 1 + +livenessProbe: + timeoutSeconds: 1 +readinessProbe: + timeoutSeconds: 1 diff --git a/comps/3rd_parties/teirerank/deployment/kubernetes/README.md b/comps/3rd_parties/teirerank/deployment/kubernetes/README.md new file mode 100644 index 0000000000..b67de89cb0 --- /dev/null +++ b/comps/3rd_parties/teirerank/deployment/kubernetes/README.md @@ -0,0 +1,18 @@ +# Deploy TEIRERANK on kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install teirerank oci://ghcr.io/opea-project/charts/teirerank --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install teirerank oci://ghcr.io/opea-project/charts/teirerank --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/comps/3rd_parties/teirerank/deployment/kubernetes/cpu-values.yaml b/comps/3rd_parties/teirerank/deployment/kubernetes/cpu-values.yaml new file mode 100644 index 0000000000..5eaa0d2744 --- /dev/null +++ b/comps/3rd_parties/teirerank/deployment/kubernetes/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: ghcr.io/huggingface/text-embeddings-inference diff --git a/comps/3rd_parties/teirerank/deployment/kubernetes/gaudi-values.yaml b/comps/3rd_parties/teirerank/deployment/kubernetes/gaudi-values.yaml new file mode 100644 index 0000000000..aa8c36da48 --- /dev/null +++ b/comps/3rd_parties/teirerank/deployment/kubernetes/gaudi-values.yaml @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +OMPI_MCA_btl_vader_single_copy_mechanism: "none" +MAX_WARMUP_SEQUENCE_LENGTH: "512" +image: + repository: ghcr.io/huggingface/tei-gaudi + tag: 1.5.0 + +securityContext: + readOnlyRootFilesystem: false + +resources: + limits: + habana.ai/gaudi: 1 + +livenessProbe: + timeoutSeconds: 1 +readinessProbe: + timeoutSeconds: 1