Skip to content

Commit

Permalink
[ChatQnA] Remove enforce-eager to enable HPU graphs for better vLLM p…
Browse files Browse the repository at this point in the history
…erf (#1210)

Signed-off-by: Wang, Kai Lawrence <[email protected]>
  • Loading branch information
wangkl2 authored Dec 10, 2024
1 parent 6f9f6f0 commit 4c01e14
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 3 deletions.
2 changes: 1 addition & 1 deletion ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1286,7 +1286,6 @@ spec:
type: RuntimeDefault
image: "opea/vllm-gaudi:latest"
args:
- "--enforce-eager"
- "--model"
- "$(MODEL_ID)"
- "--tensor-parallel-size"
Expand Down
2 changes: 1 addition & 1 deletion ChatQnA/tests/test_compose_vllm_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ function start_services() {
# Start Docker Containers
docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
until [[ "$n" -ge 160 ]]; do
echo "n=$n"
docker logs vllm-gaudi-server > vllm_service_start.log
if grep -q "Warmup finished" vllm_service_start.log; then
Expand Down

0 comments on commit 4c01e14

Please sign in to comment.