Skip to content

Commit

Permalink
ChatQnA: Update kubernetes xeon chatqna remote inference and svelte UI (
Browse files Browse the repository at this point in the history
#1215)

Signed-off-by: sgurunat <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
sgurunat and pre-commit-ci[bot] authored Dec 4, 2024
1 parent 3299e5c commit 031cf6e
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,8 @@ data:
no_proxy: ""
LOGFLAG: ""
vLLM_ENDPOINT: "insert-your-remote-inference-endpoint"
LLM_MODEL: "meta-llama/Meta-Llama-3.1-8B-Instruct"
LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct"
MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct"
LLM_MODEL: "meta-llama/Meta-Llama-3.1-70B-Instruct"
MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
CLIENTID: ""
CLIENT_SECRET: ""
TOKEN_URL: ""
Expand Down Expand Up @@ -216,6 +215,10 @@ data:
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_buffering off;
proxy_cache off;
proxy_request_buffering off;
gzip off;
}
location /v1/dataprep {
Expand Down Expand Up @@ -552,6 +555,9 @@ spec:
{}
containers:
- name: chatqna-ui
env:
- name: MODEL_ID
value: "meta-llama/Meta-Llama-3.1-70B-Instruct"
securityContext:
{}
image: "opea/chatqna-ui:latest"
Expand Down Expand Up @@ -691,7 +697,7 @@ spec:
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
imagePullPolicy: Always
ports:
- name: embedding-usvc
containerPort: 6000
Expand Down Expand Up @@ -769,7 +775,7 @@ spec:
seccompProfile:
type: RuntimeDefault
image: "opea/llm-vllm:latest"
imagePullPolicy: IfNotPresent
imagePullPolicy: Always
ports:
- name: llm-uservice
containerPort: 9000
Expand Down Expand Up @@ -919,7 +925,7 @@ spec:
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
imagePullPolicy: Always
ports:
- name: reranking-usvc
containerPort: 8000
Expand Down Expand Up @@ -1257,7 +1263,7 @@ spec:
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: MODEL_ID
value: "meta-llama/Meta-Llama-3.1-8B-Instruct"
value: "meta-llama/Meta-Llama-3.1-70B-Instruct"
securityContext:
allowPrivilegeEscalation: false
capabilities:
Expand All @@ -1269,7 +1275,7 @@ spec:
seccompProfile:
type: RuntimeDefault
image: "opea/chatqna-wrapper:latest"
imagePullPolicy: IfNotPresent
imagePullPolicy: Always
volumeMounts:
- mountPath: /tmp
name: tmp
Expand Down
8 changes: 7 additions & 1 deletion ChatQnA/ui/svelte/src/lib/network/chat/Network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,19 @@ import { env } from "$env/dynamic/public";
import { SSE } from "sse.js";

const CHAT_BASE_URL = env.CHAT_BASE_URL;
const MODEL_ID = env.MODEL_ID;

export async function fetchTextStream(query: string) {
let payload = {};
let url = "";
let modelId = "Intel/neural-chat-7b-v3-3";

if (MODEL_ID) {
modelId = MODEL_ID;
}

payload = {
model: "Intel/neural-chat-7b-v3-3",
model: `${modelId}`,
messages: query,
};
url = `${CHAT_BASE_URL}`;
Expand Down

0 comments on commit 031cf6e

Please sign in to comment.