From b4f4f1495a05a33ab7b91dbea0e745bbfa1df7f6 Mon Sep 17 00:00:00 2001 From: Rui Qiao Date: Tue, 14 Jan 2025 18:49:38 +0000 Subject: [PATCH 1/2] [core] Turn off GPU communication overlap for Ray executor Signed-off-by: Rui Qiao --- vllm/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index c4a568c680db0..2a9a819258de8 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -45,7 +45,7 @@ VLLM_USE_RAY_SPMD_WORKER: bool = False VLLM_USE_RAY_COMPILED_DAG: bool = False VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL: bool = True - VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = True + VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = False VLLM_WORKER_MULTIPROC_METHOD: str = "fork" VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets") VLLM_IMAGE_FETCH_TIMEOUT: int = 5 @@ -344,7 +344,7 @@ def get_default_config_root(): # Ray's compiled DAG. This flag is ignored if # VLLM_USE_RAY_COMPILED_DAG is not set. "VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM": - lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM", "1")) + lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM", "0")) ), # Use dedicated multiprocess context for workers. From 535ea41b855ad75f1203c14060c94aecd7ec9195 Mon Sep 17 00:00:00 2001 From: Rui Qiao Date: Tue, 14 Jan 2025 18:52:39 +0000 Subject: [PATCH 2/2] up Signed-off-by: Rui Qiao --- vllm/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 2a9a819258de8..b7b597ea15af3 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -340,8 +340,8 @@ def get_default_config_root(): lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL", "1")) ), - # If the env var is set, it enables GPU communication overlap in - # Ray's compiled DAG. This flag is ignored if + # If the env var is set, it enables GPU communication overlap + # (experimental feature) in Ray's compiled DAG. This flag is ignored if # VLLM_USE_RAY_COMPILED_DAG is not set. "VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM": lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM", "0"))