opea-project · lvliang-intel · Sep 19, 2024 · Sep 4, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/AudioQnA/docker/gaudi/README.md b/AudioQnA/docker/gaudi/README.md
@@ -130,6 +130,6 @@ curl http://${host_ip}:3002/v1/audio/speech \
 ```bash
 curl http://${host_ip}:3008/v1/audioqna \
   -X POST \
-  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
+  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_new_tokens":64}' \
   -H 'Content-Type: application/json'
 ```
diff --git a/AudioQnA/docker/ui/svelte/src/lib/modules/chat/network.ts b/AudioQnA/docker/ui/svelte/src/lib/modules/chat/network.ts
@@ -26,7 +26,7 @@ export async function fetchAudioText(file) {
 	const url = `${CHAT_URL}`;
 	const requestBody = {
 		audio: file,
-		max_tokens: 64,
+		max_new_tokens: 64,
 	};
 
 	const init: RequestInit = {

diff --git a/AudioQnA/docker/xeon/README.md b/AudioQnA/docker/xeon/README.md
@@ -130,6 +130,6 @@ curl http://${host_ip}:3002/v1/audio/speech \
 ```bash
 curl http://${host_ip}:3008/v1/audioqna \
   -X POST \
-  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
+  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_new_tokens":64}' \
   -H 'Content-Type: application/json'
 ```
diff --git a/AudioQnA/kubernetes/manifests/README.md b/AudioQnA/kubernetes/manifests/README.md
@@ -28,5 +28,5 @@ Make sure all the pods are running, and restart the audioqna-xxxx pod if necessa
 ```bash
 kubectl get pods
 
-curl http://${host_ip}:3008/v1/audioqna   -X POST   -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}'   -H 'Content-Type: application/json'
+curl http://${host_ip}:3008/v1/audioqna   -X POST   -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_new_tokens":64}'   -H 'Content-Type: application/json'
 ```
diff --git a/AudioQnA/tests/test_audioqna_on_gaudi.sh b/AudioQnA/tests/test_audioqna_on_gaudi.sh
@@ -72,7 +72,7 @@ function start_services() {
 
 
 function validate_megaservice() {
-    result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
+    result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_new_tokens":64}' -H 'Content-Type: application/json')
     echo "result is === $result"
     if [[ $result == *"AAA"* ]]; then
         echo "Result correct."

diff --git a/AudioQnA/tests/test_audioqna_on_xeon.sh b/AudioQnA/tests/test_audioqna_on_xeon.sh
@@ -61,7 +61,7 @@ function start_services() {
 
 
 function validate_megaservice() {
-    result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
+    result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_new_tokens":64}' -H 'Content-Type: application/json')
     echo $result
     if [[ $result == *"AAA"* ]]; then
         echo "Result correct."

diff --git a/ChatQnA/docker/gaudi/README.md b/ChatQnA/docker/gaudi/README.md
@@ -337,7 +337,7 @@ curl http://${host_ip}:8007/v1/completions \
   -d '{
   "model": "${LLM_MODEL_ID}",
   "prompt": "What is Deep Learning?",
-  "max_tokens": 32,
+  "max_new_tokens": 32,
   "temperature": 0
   }'
 ```

diff --git a/ChatQnA/docker/xeon/README.md b/ChatQnA/docker/xeon/README.md
@@ -331,7 +331,7 @@ curl http://${host_ip}:9009/generate \
 # vLLM Service
 curl http://${host_ip}:9009/v1/completions \
   -H "Content-Type: application/json" \
-  -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
+  -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_new_tokens": 32, "temperature": 0}'
 ```
 
 7. LLM Microservice

diff --git a/ChatQnA/tests/test_chatqna_vllm_on_gaudi.sh b/ChatQnA/tests/test_chatqna_vllm_on_gaudi.sh
@@ -143,7 +143,7 @@ function validate_microservices() {
         "text" \
         "vllm-llm" \
         "vllm-gaudi-server" \
-        '{"model": "Intel/neural-chat-7b-v3-3","prompt": "What is Deep Learning?","max_tokens": 32,"temperature": 0}'
+        '{"model": "Intel/neural-chat-7b-v3-3","prompt": "What is Deep Learning?","max_new_tokens": 32,"temperature": 0}'
 
     # llm microservice
     validate_services \

diff --git a/ChatQnA/tests/test_chatqna_vllm_on_xeon.sh b/ChatQnA/tests/test_chatqna_vllm_on_xeon.sh
@@ -145,7 +145,7 @@ function validate_microservices() {
         "text" \
         "vllm-llm" \
         "vllm-service" \
-        '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
+        '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_new_tokens": 32, "temperature": 0}'
 
     # llm microservice
     validate_services \

diff --git a/VisualQnA/docker/gaudi/README.md b/VisualQnA/docker/gaudi/README.md
@@ -121,7 +121,7 @@ curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d
         ]
       }
     ],
-    "max_tokens": 300
+    "max_new_tokens": 300
     }'
 ```
 

diff --git a/VisualQnA/docker/xeon/README.md b/VisualQnA/docker/xeon/README.md
@@ -158,7 +158,7 @@ curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d
         ]
       }
     ],
-    "max_tokens": 300
+    "max_new_tokens": 300
     }'
 ```
 

diff --git a/VisualQnA/kubernetes/README.md b/VisualQnA/kubernetes/README.md
@@ -52,5 +52,5 @@ In the below example we illustrate on Xeon.
         ]
       }
     ],
-    "max_tokens": 128}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_visualqna.log
+    "max_new_tokens": 128}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_visualqna.log
    ```
diff --git a/VisualQnA/kubernetes/manifests/README.md b/VisualQnA/kubernetes/manifests/README.md
@@ -47,5 +47,5 @@ curl http://localhost:8888/v1/visualqna \
         ]
       }
     ],
-    "max_tokens": 128}'
+    "max_new_tokens": 128}'
 ```
-Original file line number
+Diff line change
@@ Expand Up @@
             ]
           }
         ],
-        "max_tokens": 300
+        "max_new_tokens": 300
         }'
     ```
@@ Expand Down @@