diff --git a/examples/env_check.yaml b/examples/env_check.yaml index 30adaefb3b0..7d2ed57ab8f 100644 --- a/examples/env_check.yaml +++ b/examples/env_check.yaml @@ -8,6 +8,9 @@ workdir: . setup: | echo "here" echo "export TEST_VAR=test" >> ~/.bashrc + [[ -v CUDA_VISIBLE_DEVICES ]] && exit 1 || exit 0 + export CUDA_VISIBLE_DEVICES=1 + [[ -v CUDA_VISIBLE_DEVICES ]] && exit 0 || exit 1 run: | if [[ -z "${TEST_VAR}" ]]; then diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 8082dbd7cad..79907734596 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -208,6 +208,10 @@ def add_prologue(self, self._code += [ textwrap.dedent(f"""\ _SETUP_CPUS = 0.0001 + # The setup command will be run as a ray task with num_cpus=_SETUP_CPUS as the + # requirement; this means Ray will set CUDA_VISIBLE_DEVICES to an empty string. + # We unset it so that user setup command may properly use this env var. + setup_cmd = 'unset CUDA_VISIBLE_DEVICES; ' + setup_cmd job_lib.set_status({job_id!r}, job_lib.JobStatus.SETTING_UP) print({_CTRL_C_TIP_MESSAGE!r}, file=sys.stderr, flush=True) total_num_nodes = len(ray.nodes()) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 9f4a736bfb2..2c7ec4576f5 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -229,7 +229,7 @@ def test_env_check(): test = Test( 'env_check', [ - f'sky launch -y -c {name} examples/env_check.yaml', + f'sky launch -y -c {name} --detach-setup examples/env_check.yaml', f'sky logs {name} 1 --status', # Ensure the job succeeded. ], f'sky down -y {name}',