From 1aff99e219a96e70e1e58cf7502649f8cd41b019 Mon Sep 17 00:00:00 2001 From: Benson Ma Date: Fri, 27 Jan 2023 21:32:27 -0800 Subject: [PATCH] Fix the nightly builds (#1562) Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/1562 Reviewed By: shintaro-iwasaki Differential Revision: D42824013 Pulled By: q10 fbshipit-source-id: eb330c4e31a4df5f21b1fc6a1f93a26d567dea7d --- .github/scripts/build_wheel.bash | 2 +- .github/scripts/test_torchrec.bash | 2 +- .github/scripts/test_wheel.bash | 2 +- .github/workflows/fbgemm_nightly_build.yml | 28 ++++++++++++++++------ 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/.github/scripts/build_wheel.bash b/.github/scripts/build_wheel.bash index dbf473da0..e4c7022ca 100644 --- a/.github/scripts/build_wheel.bash +++ b/.github/scripts/build_wheel.bash @@ -18,7 +18,7 @@ usage () { echo "-v : verbose" echo "-h : help" echo "PACKAGE_NAME : output package name (e.g., fbgemm_gpu_nightly)" - echo "PYTHON_VERSION : Python version (e.g., 3.7, 3.8, 3.10)" + echo "PYTHON_VERSION : Python version (e.g., 3.8, 3.9, 3.10)" echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))" echo "PYTORCH_CUDA_VERSION: PyTorch's CUDA version (e.g., 11.6, 11.7)" echo "MINICONDA_PREFIX : path to install Miniconda (default: \$HOME/miniconda)" diff --git a/.github/scripts/test_torchrec.bash b/.github/scripts/test_torchrec.bash index 1d96e85d6..d0b2f25fd 100644 --- a/.github/scripts/test_torchrec.bash +++ b/.github/scripts/test_torchrec.bash @@ -20,7 +20,7 @@ usage () { echo "PACKAGE_NAME : output package name of TorchRec (e.g., torchrec_nightly)" echo " Note: TorchRec is sensitive to its package name" echo " e.g., torchrec needs fbgemm-gpu while torchrec_nightly needs fbgemm-gpu-nightly" - echo "PYTHON_VERSION : Python version (e.g., 3.7, 3.8, 3.10)" + echo "PYTHON_VERSION : Python version (e.g., 3.8, 3.9, 3.10)" echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))" echo "PYTORCH_CUDA_VERSION: PyTorch's CUDA version (e.g., 11.6, 11.7)" echo "FBGEMM_WHEEL_PATH : path to FBGEMM_GPU's wheel file" diff --git a/.github/scripts/test_wheel.bash b/.github/scripts/test_wheel.bash index 5168d5a0a..ce78fd097 100644 --- a/.github/scripts/test_wheel.bash +++ b/.github/scripts/test_wheel.bash @@ -17,7 +17,7 @@ usage () { echo "Usage: bash test_wheel.bash -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c PYTORCH_CUDA_VERSION -w FBGEMM_WHEEL_PATH [-m MINICONDA_PREFIX] [-v] [-h]" echo "-v : verbose" echo "-h : help" - echo "PYTHON_VERSION : Python version (e.g., 3.7, 3.8, 3.10)" + echo "PYTHON_VERSION : Python version (e.g., 3.8, 3.9, 3.10)" echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))" echo "PYTORCH_CUDA_VERSION: PyTorch's CUDA version (e.g., 11.6, 11.7)" echo "FBGEMM_WHEEL_PATH : path to FBGEMM_GPU's wheel file" diff --git a/.github/workflows/fbgemm_nightly_build.yml b/.github/workflows/fbgemm_nightly_build.yml index 58238e5f7..a96a574d8 100644 --- a/.github/workflows/fbgemm_nightly_build.yml +++ b/.github/workflows/fbgemm_nightly_build.yml @@ -19,10 +19,6 @@ jobs: strategy: matrix: include: - - os: linux.2xlarge - python-version: "3.7" - python-tag: "py37" - cuda-tag: "cu11" - os: linux.2xlarge python-version: "3.8" python-tag: "py38" @@ -70,7 +66,16 @@ jobs: - name: Install PyTorch and CUDA shell: bash run: | - conda install -n build_binary -y pytorch pytorch-cuda=11.7 -c pytorch-nightly -c nvidia + conda install -n build_binary -y pytorch cuda -c pytorch-nightly -c "nvidia/label/cuda-11.7.1" + + # Ensure that CUDA is properly installed + conda run -n build_binary which nvcc + conda run -n build_binary test -n "$(find $HOME/miniconda -name cuda_runtime.h)" + echo "Successfully installed CUDA" + + # Ensure that the PyTorch nightly build is the GPU variant (i.e. contains cuDNN) + conda list -n build_binary pytorch | grep cudnn + echo "Successfully installed pytorch-nightly" - name: Install Other Dependencies shell: bash run: | @@ -130,7 +135,7 @@ jobs: strategy: matrix: os: [linux.g5.4xlarge.nvidia.gpu] - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10"] cuda-tag: ["cu11"] needs: build_on_cpu steps: @@ -217,7 +222,16 @@ jobs: - name: Install PyTorch using Conda shell: bash run: | - conda install -n build_binary -y pytorch pytorch-cuda=11.7 -c pytorch-nightly -c nvidia + conda install -n build_binary -y pytorch cuda -c pytorch-nightly -c "nvidia/label/cuda-11.7.1" + + # Ensure that CUDA is properly installed + conda run -n build_binary which nvcc + conda run -n build_binary test -n "$(find $HOME/miniconda -name cuda_runtime.h)" + echo "Successfully installed CUDA" + + # Ensure that the PyTorch nightly build is the GPU variant (i.e. contains cuDNN) + conda list -n build_binary pytorch | grep cudnn + echo "Successfully installed pytorch-nightly" # download wheel from GHA - name: Download wheel uses: actions/download-artifact@v2