diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5cb903d30..d24950e73 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,4 +15,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - run: docker build --build-arg=KUBERNETES_MINOR_VERSION=latest --file Dockerfile.kubetest2 . \ No newline at end of file + - run: docker build --build-arg=KUBERNETES_MINOR_VERSION=latest --file Dockerfile.kubetest2 . + build-nccl: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: docker build --file e2e2/test/images/Dockerfile.aws-efa-nccl-tests . \ No newline at end of file diff --git a/e2e2/test/images/Dockerfile.aws-efa-nccl-tests b/e2e2/test/images/Dockerfile.aws-efa-nccl-tests index 0b2b638f8..09d9fb15d 100644 --- a/e2e2/test/images/Dockerfile.aws-efa-nccl-tests +++ b/e2e2/test/images/Dockerfile.aws-efa-nccl-tests @@ -1,8 +1,9 @@ -FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 +# Start with the NVIDIA CUDA base image +FROM nvidia/cuda:12.5.0-devel-ubuntu22.04 ARG EFA_INSTALLER_VERSION=latest # 1.7.4+ is required, to enforce proper EFA function with OFI_NCCL_DISABLE_GDR_REQUIRED_CHECK=0 -ARG AWS_OFI_NCCL_VERSION=1.7.4 +ARG AWS_OFI_NCCL_VERSION=1.9.1 ARG NCCL_TESTS_VERSION=master # Install necessary dependencies