diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml index 805707dc4..3262b5420 100644 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml +++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml @@ -76,8 +76,8 @@ data: no_proxy: "" LOGFLAG: "" vLLM_ENDPOINT: "insert-your-remote-vllm-inference-endpoint" - LLM_MODEL: "meta-llama/Meta-Llama-3.1-8B-Instruct" - MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct" + LLM_MODEL: "meta-llama/Meta-Llama-3.1-70B-Instruct" + MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct" CLIENTID: "" CLIENT_SECRET: "" TOKEN_URL: "" @@ -174,6 +174,10 @@ data: proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_buffering off; + proxy_cache off; + proxy_request_buffering off; + gzip off; } location /v1/dataprep { @@ -459,6 +463,9 @@ spec: {} containers: - name: chatqna-ui + env: + - name: MODEL_ID + value: "meta-llama/Meta-Llama-3.1-70B-Instruct" securityContext: {} image: "opea/chatqna-ui:latest" @@ -981,7 +988,7 @@ spec: - name: EMBEDDING_SERVICE_HOST_IP value: chatqna-embedding-usvc - name: MODEL_ID - value: "meta-llama/Meta-Llama-3.1-8B-Instruct" + value: "meta-llama/Meta-Llama-3.1-70B-Instruct" securityContext: allowPrivilegeEscalation: false capabilities: @@ -993,7 +1000,7 @@ spec: seccompProfile: type: RuntimeDefault image: "opea/chatqna-wrapper:latest" - imagePullPolicy: IfNotPresent + imagePullPolicy: Always volumeMounts: - mountPath: /tmp name: tmp