From 411abd224fe460405f67a7686ea67e6f28fa8ef2 Mon Sep 17 00:00:00 2001 From: Rui Qiao <161574667+ruisearch42@users.noreply.github.com> Date: Tue, 14 Jan 2025 21:19:55 -0800 Subject: [PATCH] [core] Turn off GPU communication overlap for Ray executor (#12051) Signed-off-by: Rui Qiao --- vllm/envs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index c4a568c680db0..b7b597ea15af3 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -45,7 +45,7 @@ VLLM_USE_RAY_SPMD_WORKER: bool = False VLLM_USE_RAY_COMPILED_DAG: bool = False VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL: bool = True - VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = True + VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = False VLLM_WORKER_MULTIPROC_METHOD: str = "fork" VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets") VLLM_IMAGE_FETCH_TIMEOUT: int = 5 @@ -340,11 +340,11 @@ def get_default_config_root(): lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL", "1")) ), - # If the env var is set, it enables GPU communication overlap in - # Ray's compiled DAG. This flag is ignored if + # If the env var is set, it enables GPU communication overlap + # (experimental feature) in Ray's compiled DAG. This flag is ignored if # VLLM_USE_RAY_COMPILED_DAG is not set. "VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM": - lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM", "1")) + lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM", "0")) ), # Use dedicated multiprocess context for workers.