From 4c3780c48a87c0cc63cef94fdef4ed2b9721fcb3 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 3 Oct 2024 04:20:39 -0700 Subject: [PATCH 1/2] Update Docker files to CUDA 12.5 and UCX 1.17 --- docker/UCXPy-rdma-core.dockerfile | 10 ++++------ docker/{ucx-py-cuda11.5.yml => ucx-py-cuda12.5.yml} | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) rename docker/{ucx-py-cuda11.5.yml => ucx-py-cuda12.5.yml} (90%) diff --git a/docker/UCXPy-rdma-core.dockerfile b/docker/UCXPy-rdma-core.dockerfile index 46020e9b..66915e05 100644 --- a/docker/UCXPy-rdma-core.dockerfile +++ b/docker/UCXPy-rdma-core.dockerfile @@ -1,15 +1,15 @@ -ARG CUDA_VERSION=11.5.2 -ARG DISTRIBUTION_VERSION=ubuntu20.04 +ARG CUDA_VERSION=12.5.1 +ARG DISTRIBUTION_VERSION=ubuntu22.04 FROM nvidia/cuda:${CUDA_VERSION}-devel-${DISTRIBUTION_VERSION} # Tag to checkout from UCX repository -ARG UCX_VERSION_TAG=v1.13.0 +ARG UCX_VERSION_TAG=v1.17.0 # Where to install conda, and what to name the created environment ARG CONDA_HOME=/opt/conda ARG CONDA_ENV=ucx # Name of conda spec file in the current working directory that # will be used to build the conda environment. -ARG CONDA_ENV_SPEC=ucx-py-cuda11.5.yml +ARG CONDA_ENV_SPEC=ucx-py-cuda12.5.yml ENV CONDA_ENV="${CONDA_ENV}" ENV CONDA_HOME="${CONDA_HOME}" @@ -32,8 +32,6 @@ RUN apt-get update -y \ pkg-config \ udev \ curl \ - librdmacm-dev \ - rdma-core \ && apt-get autoremove -y \ && apt-get clean diff --git a/docker/ucx-py-cuda11.5.yml b/docker/ucx-py-cuda12.5.yml similarity index 90% rename from docker/ucx-py-cuda11.5.yml rename to docker/ucx-py-cuda12.5.yml index c69f8620..db614076 100644 --- a/docker/ucx-py-cuda11.5.yml +++ b/docker/ucx-py-cuda12.5.yml @@ -5,7 +5,7 @@ channels: dependencies: - python=3.10 - - cudatoolkit=11.5 + - cuda-version=12.5 - setuptools - cython>=3.0.0 - pytest From a40bb972a49429b68a80f3d9f783c1ff46ebd173 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 3 Oct 2024 05:09:30 -0700 Subject: [PATCH 2/2] Add `librdmacm-dev`/`rdma-core` dependencies back --- docker/UCXPy-rdma-core.dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/UCXPy-rdma-core.dockerfile b/docker/UCXPy-rdma-core.dockerfile index 66915e05..5895d04a 100644 --- a/docker/UCXPy-rdma-core.dockerfile +++ b/docker/UCXPy-rdma-core.dockerfile @@ -32,6 +32,8 @@ RUN apt-get update -y \ pkg-config \ udev \ curl \ + librdmacm-dev \ + rdma-core \ && apt-get autoremove -y \ && apt-get clean