diff --git a/sky/provision/docker_utils.py b/sky/provision/docker_utils.py index ab6c92d558a..8de7beab2e7 100644 --- a/sky/provision/docker_utils.py +++ b/sky/provision/docker_utils.py @@ -166,8 +166,7 @@ def _run(self, rc, cmd, error_msg='Failed to run docker setup commands', - stderr=stdout + stderr, - stream_logs=False) + stderr=stdout + stderr) return stdout.strip() def initialize(self) -> str: @@ -231,6 +230,8 @@ def initialize(self) -> str: # issue with nvidia container toolkit: # https://github.com/NVIDIA/nvidia-container-toolkit/issues/48 self._run( + '[ -f /etc/docker/daemon.json ] || ' + 'echo "{}" | sudo tee /etc/docker/daemon.json;' 'sudo jq \'.["exec-opts"] = ["native.cgroupdriver=cgroupfs"]\' ' '/etc/docker/daemon.json > /tmp/daemon.json;' 'sudo mv /tmp/daemon.json /etc/docker/daemon.json;' diff --git a/sky/skylet/providers/command_runner.py b/sky/skylet/providers/command_runner.py index ae54abe4a6b..06c5d6d48af 100644 --- a/sky/skylet/providers/command_runner.py +++ b/sky/skylet/providers/command_runner.py @@ -234,6 +234,8 @@ def run_init(self, *, as_head: bool, file_mounts: Dict[str, str], # issue with nvidia container toolkit: # https://github.com/NVIDIA/nvidia-container-toolkit/issues/48 self.run( + '[ -f /etc/docker/daemon.json ] || ' + 'echo "{}" | sudo tee /etc/docker/daemon.json;' 'sudo jq \'.["exec-opts"] = ["native.cgroupdriver=cgroupfs"]\' ' '/etc/docker/daemon.json > /tmp/daemon.json;' 'sudo mv /tmp/daemon.json /etc/docker/daemon.json;' diff --git a/tests/test_smoke.py b/tests/test_smoke.py index bcfe96cdbac..af05d8c3b62 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1464,6 +1464,7 @@ def test_ibm_job_queue_multinode(): @pytest.mark.no_scp # Doesn't support SCP for now @pytest.mark.no_oci # Doesn't support OCI for now @pytest.mark.no_kubernetes # Doesn't support Kubernetes for now +# TODO(zhwu): we should fix this for kubernetes def test_docker_preinstalled_package(generic_cloud: str): name = _get_cluster_name() test = Test(