From d9946180a2372652136bd46a21aab308cda31d7e Mon Sep 17 00:00:00 2001 From: Ruoyu Ying Date: Thu, 18 Jul 2024 16:21:28 +0800 Subject: [PATCH] doc: fix minor issue in GMC doc (#383) Signed-off-by: Ruoyu Ying --- ChatQnA/kubernetes/manifests/README.md | 6 +++--- DocSum/kubernetes/README.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ChatQnA/kubernetes/manifests/README.md b/ChatQnA/kubernetes/manifests/README.md index bb68a9fad..041b8afcd 100644 --- a/ChatQnA/kubernetes/manifests/README.md +++ b/ChatQnA/kubernetes/manifests/README.md @@ -64,7 +64,7 @@ kubectl create deployment client-test -n chatqa --image=python:3.8.13 -- sleep i 5. Access the application using the above URL from the client pod ```sh -export CLIENT_POD=$(kubectl get pod -l app=client-test -o jsonpath={.items..metadata.name}) +export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name}) export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}") kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' ``` @@ -79,7 +79,7 @@ For example, to use Llama-2-7b-chat-hf make the following edit: ```yaml - name: Tgi internalService: - serviceName: tgi-svc + serviceName: tgi-service-m config: LLM_MODEL_ID: Llama-2-7b-chat-hf ``` @@ -92,7 +92,7 @@ kubectl apply -f $(pwd)/chatQnA_xeon.yaml 8. Check that the tgi-svc-deployment has been changed to use the new LLM Model ```sh -kubectl get deployment tgi-svc-deployment -n chatqa -o jsonpath="{.spec.template.spec.containers[*].env[?(@.name=='LLM_MODEL_ID')].value}" +kubectl get deployment tgi-service-m-deployment -n chatqa -o jsonpath="{.spec.template.spec.containers[*].env[?(@.name=='LLM_MODEL_ID')].value}" ``` 9. Access the updated pipeline using the same URL from above using the client pod diff --git a/DocSum/kubernetes/README.md b/DocSum/kubernetes/README.md index 19f58319d..06e492f36 100644 --- a/DocSum/kubernetes/README.md +++ b/DocSum/kubernetes/README.md @@ -58,7 +58,7 @@ kubectl create deployment client-test -n ${ns} --image=python:3.8.13 -- sleep in 6. Access the pipeline using the above URL from the client pod and execute a request ```bash -export CLIENT_POD=$(kubectl get pod -l app=client-test -o jsonpath={.items..metadata.name}) +export CLIENT_POD=$(kubectl get pod -n ${ns} -l app=client-test -o jsonpath={.items..metadata.name}) export accessUrl=$(kubectl get gmc -n $ns -o jsonpath="{.items[?(@.metadata.name=='docsum')].status.accessUrl}") kubectl exec "$CLIENT_POD" -n $ns -- curl $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' -H 'Content-Type: application/json' ```