Skip to content

Commit

Permalink
[Bugfix] fix race condition that leads to wrong order of token return…
Browse files Browse the repository at this point in the history
…ed (vllm-project#10802)

Signed-off-by: Jannis Schönleber <[email protected]>
  • Loading branch information
joennlae authored and abmfy committed Jan 24, 2025
1 parent 81d75e1 commit b8b8ee1
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions vllm/engine/multiprocessing/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,14 @@ async def setup(self):
"""Setup the client before it starts sending server requests."""

# Start output_loop
self.output_loop = asyncio.create_task(self.run_output_handler_loop())
if self.output_loop is None:
# only generate once to avoid multiple concurrent output_loops
# this will lead to race conditions and wrong orders of tokens
# returned by the engine
# setup will be called multiple times during the startup of
# the engine
self.output_loop = asyncio.create_task(
self.run_output_handler_loop())

with self.get_data_socket() as socket:
# Wait until server is ready.
Expand All @@ -271,8 +278,9 @@ async def setup(self):
self.tracing_flag = response.tracing_enabled

# Start health_loop.
self.health_loop = asyncio.create_task(
self.run_heartbeat_loop(timeout=VLLM_RPC_TIMEOUT))
if self.health_loop is None:
self.health_loop = asyncio.create_task(
self.run_heartbeat_loop(timeout=VLLM_RPC_TIMEOUT))

def close(self):
"""Destroy the ZeroMQ Context."""
Expand Down

0 comments on commit b8b8ee1

Please sign in to comment.