diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 0cfe6be9ac767..fc12b6e82c836 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -1284,6 +1284,10 @@ def _run_engine( if use_tqdm: pbar.close() + + # Make sure that all workers are finished. + self.llm_engine.stop_remote_worker_execution_loop() + # Sort the outputs by request ID. # This is necessary because some requests may be finished earlier than # its previous requests.