From 2f03a3a894b7ab8a6012edd030ecab01b55af253 Mon Sep 17 00:00:00 2001 From: XinyaoWa Date: Thu, 19 Sep 2024 14:15:25 +0800 Subject: [PATCH] Align parameters for "max_token, repetition_penalty,presence_penalty,frequency_penalty" (#726) Signed-off-by: Xinyao Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../docker_compose/intel/cpu/xeon/README.md | 2 +- .../docker_compose/intel/hpu/gaudi/README.md | 2 +- AudioQnA/tests/test_gmc_on_gaudi.sh | 2 +- AudioQnA/tests/test_gmc_on_xeon.sh | 2 +- ChatQnA/benchmark/benchmark.yaml | 2 +- ChatQnA/chatqna_no_wrapper.py | 6 ++- .../docker_compose/intel/cpu/aipc/README.md | 2 +- .../docker_compose/intel/cpu/xeon/README.md | 21 +++++++-- .../intel/cpu/xeon/README_qdrant.md | 2 +- .../docker_compose/intel/hpu/gaudi/README.md | 29 ++++++++++-- .../hpu/gaudi/how_to_validate_service.md | 2 +- ChatQnA/docker_compose/nvidia/gpu/README.md | 2 +- CodeGen/README.md | 2 +- .../docker_compose/intel/cpu/xeon/README.md | 2 +- .../docker_compose/intel/hpu/gaudi/README.md | 2 +- CodeGen/tests/test_gmc_on_gaudi.sh | 2 +- CodeGen/tests/test_gmc_on_xeon.sh | 2 +- CodeTrans/README.md | 2 +- DocSum/README.md | 2 +- .../docker_compose/intel/cpu/xeon/README.md | 2 +- .../docker_compose/intel/cpu/xeon/README.md | 2 +- .../docker_compose/intel/hpu/gaudi/README.md | 2 +- .../docker_compose/intel/cpu/xeon/README.md | 44 +++++++++---------- .../docker_compose/intel/hpu/gaudi/README.md | 44 +++++++++---------- 24 files changed, 110 insertions(+), 72 deletions(-) diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md index 338771dd0..d08061284 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md +++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md @@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \ # llm microservice curl http://${host_ip}:3007/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ -H 'Content-Type: application/json' # speecht5 service diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md index 28ec3f402..842227ee5 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md @@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \ # llm microservice curl http://${host_ip}:3007/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ -H 'Content-Type: application/json' # speecht5 service diff --git a/AudioQnA/tests/test_gmc_on_gaudi.sh b/AudioQnA/tests/test_gmc_on_gaudi.sh index 898a91524..d90bd3624 100755 --- a/AudioQnA/tests/test_gmc_on_gaudi.sh +++ b/AudioQnA/tests/test_gmc_on_gaudi.sh @@ -34,7 +34,7 @@ function validate_audioqa() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}") - byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) + byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) echo "$byte_str" > $LOG_PATH/curl_audioqa.log if [ -z "$byte_str" ]; then echo "audioqa failed, please check the logs in ${LOG_PATH}!" diff --git a/AudioQnA/tests/test_gmc_on_xeon.sh b/AudioQnA/tests/test_gmc_on_xeon.sh index ed6adddd2..15e04e62c 100755 --- a/AudioQnA/tests/test_gmc_on_xeon.sh +++ b/AudioQnA/tests/test_gmc_on_xeon.sh @@ -34,7 +34,7 @@ function validate_audioqa() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}") - byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) + byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) echo "$byte_str" > $LOG_PATH/curl_audioqa.log if [ -z "$byte_str" ]; then echo "audioqa failed, please check the logs in ${LOG_PATH}!" diff --git a/ChatQnA/benchmark/benchmark.yaml b/ChatQnA/benchmark/benchmark.yaml index f1eb86a37..851a3e11a 100644 --- a/ChatQnA/benchmark/benchmark.yaml +++ b/ChatQnA/benchmark/benchmark.yaml @@ -41,7 +41,7 @@ test_cases: run_test: false service_name: "llm-svc" # Replace with your service name parameters: - max_new_tokens: 128 + max_tokens: 128 temperature: 0.01 top_k: 10 top_p: 0.95 diff --git a/ChatQnA/chatqna_no_wrapper.py b/ChatQnA/chatqna_no_wrapper.py index 2780c7486..c08c6a2f3 100644 --- a/ChatQnA/chatqna_no_wrapper.py +++ b/ChatQnA/chatqna_no_wrapper.py @@ -69,10 +69,12 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k next_inputs = {} next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] - next_inputs["max_tokens"] = llm_parameters_dict["max_new_tokens"] + next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] next_inputs["stream"] = inputs["streaming"] - next_inputs["frequency_penalty"] = inputs["repetition_penalty"] + next_inputs["frequency_penalty"] = inputs["frequency_penalty"] + next_inputs["presence_penalty"] = inputs["presence_penalty"] + next_inputs["repetition_penalty"] = inputs["repetition_penalty"] next_inputs["temperature"] = inputs["temperature"] inputs = next_inputs diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/README.md b/ChatQnA/docker_compose/intel/cpu/aipc/README.md index 3c28d1c10..9b13d8185 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md +++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md @@ -229,7 +229,7 @@ OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL ```bash curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 7eb75431a..5eca0d284 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -438,18 +438,31 @@ docker compose -f compose_vllm.yaml up -d This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup. ```bash + # TGI service curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` + For parameters in TGI modes, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".) + + ```bash + # vLLM Service + curl http://${your_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \ + -H 'Content-Type: application/json' + ``` + + For parameters in vLLM modes, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html) + 8. MegaService ```bash - curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ - "messages": "What is the revenue of Nike in 2023?" - }' + curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' ``` 9. Dataprep Microservice(Optional) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md index 25ba15c3f..c11ab8e9f 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md @@ -304,7 +304,7 @@ docker compose -f compose_qdrant.yaml up -d ```bash curl http://${host_ip}:6047/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index bc41c782a..ec8e3ad09 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -442,18 +442,41 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid 7. LLM Microservice ```bash + # TGI service + curl http://${host_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' + ``` + + For parameters in TGI mode, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".) + + ```bash + # vLLM Service curl http://${host_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \ + -H 'Content-Type: application/json' + ``` + + For parameters in vLLM Mode, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html) + + ```bash + # vLLM-on-Ray Service + curl http://${your_ip}:9000/v1/chat/completions \ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"presence_penalty":1.03","streaming":false}' \ -H 'Content-Type: application/json' ``` + For parameters in vLLM-on-Ray mode, can refer to [LangChain ChatOpenAI API](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html) + 8. MegaService ```bash curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ - "messages": "What is the revenue of Nike in 2023?" - }' + "messages": "What is the revenue of Nike in 2023?" + }' ``` 9. Dataprep Microservice(Optional) diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md index 2e2d3d023..8ada1e525 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md @@ -278,7 +278,7 @@ and the log shows model warm up, please wait for a while and try it later. ``` curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md index cfdda158f..7e3966a7f 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/README.md +++ b/ChatQnA/docker_compose/nvidia/gpu/README.md @@ -280,7 +280,7 @@ docker compose up -d ```bash curl http://${host_ip}:9000/v1/chat/completions \ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/CodeGen/README.md b/CodeGen/README.md index bc93ff473..cbf94bbb8 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -132,7 +132,7 @@ Two ways of consuming CodeGen Service: http_proxy="" curl http://${host_ip}:8028/generate \ -X POST \ - -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' \ + -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index d7dc3376e..8bdde1f75 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -138,7 +138,7 @@ docker compose up -d ```bash curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_new_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 74afd54ae..2a5040ea0 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -119,7 +119,7 @@ docker compose up -d ```bash curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_new_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/CodeGen/tests/test_gmc_on_gaudi.sh b/CodeGen/tests/test_gmc_on_gaudi.sh index ad16e2108..805237208 100755 --- a/CodeGen/tests/test_gmc_on_gaudi.sh +++ b/CodeGen/tests/test_gmc_on_gaudi.sh @@ -34,7 +34,7 @@ function validate_codegen() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "chatqna failed, please check the logs in ${LOG_PATH}!" diff --git a/CodeGen/tests/test_gmc_on_xeon.sh b/CodeGen/tests/test_gmc_on_xeon.sh index 92f620365..5f3ff0eae 100755 --- a/CodeGen/tests/test_gmc_on_xeon.sh +++ b/CodeGen/tests/test_gmc_on_xeon.sh @@ -34,7 +34,7 @@ function validate_codegen() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "chatqna failed, please check the logs in ${LOG_PATH}!" diff --git a/CodeTrans/README.md b/CodeTrans/README.md index 0a00ca902..a1b95b154 100644 --- a/CodeTrans/README.md +++ b/CodeTrans/README.md @@ -127,7 +127,7 @@ By default, the UI runs on port 5173 internally. http_proxy="" curl http://${host_ip}:8008/generate \ -X POST \ - -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` diff --git a/DocSum/README.md b/DocSum/README.md index a6fb0ea9f..ca1ebfeba 100644 --- a/DocSum/README.md +++ b/DocSum/README.md @@ -149,7 +149,7 @@ Two ways of consuming Document Summarization Service: http_proxy="" curl http://${host_ip}:8008/generate \ -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md index dd1f59f27..312f191ff 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md @@ -271,7 +271,7 @@ Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail ```bash curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/README.md b/SearchQnA/docker_compose/intel/cpu/xeon/README.md index f31975ac6..5dbd77464 100644 --- a/SearchQnA/docker_compose/intel/cpu/xeon/README.md +++ b/SearchQnA/docker_compose/intel/cpu/xeon/README.md @@ -140,7 +140,7 @@ curl http://${host_ip}:3006/generate \ # llm microservice curl http://${host_ip}:3007/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md index b34398c35..6021c7938 100644 --- a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md @@ -150,7 +150,7 @@ curl http://${host_ip}:3006/generate \ # llm microservice curl http://${host_ip}:3007/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md index 3a6058e0c..8f0d5b6b3 100644 --- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md +++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md @@ -138,28 +138,28 @@ Follow the instructions to validate MicroServices. 2. MegaService - ```bash - curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - ``` +```bash +curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What'\''s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" + } + } + ] + } + ], + "max_tokens": 300 + }' +``` ## 🚀 Launch the UI diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md index 2a8f3a276..84783353a 100644 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md @@ -95,28 +95,28 @@ Follow the instructions to validate MicroServices. 2. MegaService - ```bash - curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - ``` +```bash +curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What'\''s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" + } + } + ] + } + ], + "max_tokens": 300 + }' +``` ## 🚀 Launch the UI