From 72f2fc7429f5bfcb5cc71b8ed93b06b0b0d8271d Mon Sep 17 00:00:00 2001 From: Benson Ma Date: Thu, 26 Jan 2023 13:41:32 -0800 Subject: [PATCH] [build] Fix the nightly builds - Replace installation of `pytorch-cuda` with `cuda`, as the former package has been recently slimmed down to remove `nvcc` and other CUDA dependencies required for FBGEMM build (see https://github.com/pytorch/pytorch/issues/91334) - Add `pytorch` package installation checks to make sure we are installing the GPU variant of the package - Add `cuda` package installation checks to make sure we can access `nvcc` and `cuda_runtime.h` - Remove Python 3.7 builds from the nightlies --- .github/scripts/build_wheel.bash | 2 +- .github/scripts/test_torchrec.bash | 2 +- .github/scripts/test_wheel.bash | 2 +- .github/workflows/fbgemm_nightly_build.yml | 28 ++++++++++++++++------ 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/.github/scripts/build_wheel.bash b/.github/scripts/build_wheel.bash index dbf473da05..e4c7022ca8 100644 --- a/.github/scripts/build_wheel.bash +++ b/.github/scripts/build_wheel.bash @@ -18,7 +18,7 @@ usage () { echo "-v : verbose" echo "-h : help" echo "PACKAGE_NAME : output package name (e.g., fbgemm_gpu_nightly)" - echo "PYTHON_VERSION : Python version (e.g., 3.7, 3.8, 3.10)" + echo "PYTHON_VERSION : Python version (e.g., 3.8, 3.9, 3.10)" echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))" echo "PYTORCH_CUDA_VERSION: PyTorch's CUDA version (e.g., 11.6, 11.7)" echo "MINICONDA_PREFIX : path to install Miniconda (default: \$HOME/miniconda)" diff --git a/.github/scripts/test_torchrec.bash b/.github/scripts/test_torchrec.bash index 1d96e85d67..d0b2f25fdd 100644 --- a/.github/scripts/test_torchrec.bash +++ b/.github/scripts/test_torchrec.bash @@ -20,7 +20,7 @@ usage () { echo "PACKAGE_NAME : output package name of TorchRec (e.g., torchrec_nightly)" echo " Note: TorchRec is sensitive to its package name" echo " e.g., torchrec needs fbgemm-gpu while torchrec_nightly needs fbgemm-gpu-nightly" - echo "PYTHON_VERSION : Python version (e.g., 3.7, 3.8, 3.10)" + echo "PYTHON_VERSION : Python version (e.g., 3.8, 3.9, 3.10)" echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))" echo "PYTORCH_CUDA_VERSION: PyTorch's CUDA version (e.g., 11.6, 11.7)" echo "FBGEMM_WHEEL_PATH : path to FBGEMM_GPU's wheel file" diff --git a/.github/scripts/test_wheel.bash b/.github/scripts/test_wheel.bash index 5168d5a0a7..ce78fd097c 100644 --- a/.github/scripts/test_wheel.bash +++ b/.github/scripts/test_wheel.bash @@ -17,7 +17,7 @@ usage () { echo "Usage: bash test_wheel.bash -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c PYTORCH_CUDA_VERSION -w FBGEMM_WHEEL_PATH [-m MINICONDA_PREFIX] [-v] [-h]" echo "-v : verbose" echo "-h : help" - echo "PYTHON_VERSION : Python version (e.g., 3.7, 3.8, 3.10)" + echo "PYTHON_VERSION : Python version (e.g., 3.8, 3.9, 3.10)" echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))" echo "PYTORCH_CUDA_VERSION: PyTorch's CUDA version (e.g., 11.6, 11.7)" echo "FBGEMM_WHEEL_PATH : path to FBGEMM_GPU's wheel file" diff --git a/.github/workflows/fbgemm_nightly_build.yml b/.github/workflows/fbgemm_nightly_build.yml index 58238e5f77..a96a574d8f 100644 --- a/.github/workflows/fbgemm_nightly_build.yml +++ b/.github/workflows/fbgemm_nightly_build.yml @@ -19,10 +19,6 @@ jobs: strategy: matrix: include: - - os: linux.2xlarge - python-version: "3.7" - python-tag: "py37" - cuda-tag: "cu11" - os: linux.2xlarge python-version: "3.8" python-tag: "py38" @@ -70,7 +66,16 @@ jobs: - name: Install PyTorch and CUDA shell: bash run: | - conda install -n build_binary -y pytorch pytorch-cuda=11.7 -c pytorch-nightly -c nvidia + conda install -n build_binary -y pytorch cuda -c pytorch-nightly -c "nvidia/label/cuda-11.7.1" + + # Ensure that CUDA is properly installed + conda run -n build_binary which nvcc + conda run -n build_binary test -n "$(find $HOME/miniconda -name cuda_runtime.h)" + echo "Successfully installed CUDA" + + # Ensure that the PyTorch nightly build is the GPU variant (i.e. contains cuDNN) + conda list -n build_binary pytorch | grep cudnn + echo "Successfully installed pytorch-nightly" - name: Install Other Dependencies shell: bash run: | @@ -130,7 +135,7 @@ jobs: strategy: matrix: os: [linux.g5.4xlarge.nvidia.gpu] - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10"] cuda-tag: ["cu11"] needs: build_on_cpu steps: @@ -217,7 +222,16 @@ jobs: - name: Install PyTorch using Conda shell: bash run: | - conda install -n build_binary -y pytorch pytorch-cuda=11.7 -c pytorch-nightly -c nvidia + conda install -n build_binary -y pytorch cuda -c pytorch-nightly -c "nvidia/label/cuda-11.7.1" + + # Ensure that CUDA is properly installed + conda run -n build_binary which nvcc + conda run -n build_binary test -n "$(find $HOME/miniconda -name cuda_runtime.h)" + echo "Successfully installed CUDA" + + # Ensure that the PyTorch nightly build is the GPU variant (i.e. contains cuDNN) + conda list -n build_binary pytorch | grep cudnn + echo "Successfully installed pytorch-nightly" # download wheel from GHA - name: Download wheel uses: actions/download-artifact@v2