Skip to content

Commit

Permalink
fix vllm docker command (#388)
Browse files Browse the repository at this point in the history
Signed-off-by: Xinyao Wang <[email protected]>
  • Loading branch information
XinyaoWa authored Aug 5, 2024
1 parent fb0ea3d commit c1a5883
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion comps/llms/text-generation/vllm/launch_vllm_service.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ volume=$PWD/data
if [ "$hw_mode" = "hpu" ]; then
docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} vllm:hpu /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture "
else
docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model $model_name --host 0.0.0.0 --port 80"
docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 vllm:cpu --model $model_name --host 0.0.0.0 --port 80
fi

0 comments on commit c1a5883

Please sign in to comment.