Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance FBGEMM nightly CI #1407

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions .github/scripts/build_wheel.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/bin/bash

# Exit on failure
set -e

# shellcheck source=/dev/null
. "$(dirname "$(realpath -s "$0")")/setup_env.bash"

verbose=0
package_name=""
python_version=""
pytorch_channel_name=""
pytorch_cuda_version="x"
miniconda_prefix="${HOME}/miniconda"

usage () {
echo "Usage: bash build_wheel.bash -o PACKAGE_NAME -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c PYTORCH_CUDA_VERSION [-m MINICONDA_PREFIX] [-v] [-h]"
echo "-v : verbose"
echo "-h : help"
echo "PACKAGE_NAME : output package name (e.g., fbgemm_gpu_nightly)"
echo "PYTHON_VERSION : Python version (e.g., 3.7, 3.8, 3.10)"
echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))"
echo "PYTORCH_CUDA_VERSION: PyTorch's CUDA version (e.g., 11.6, 11.7)"
echo "MINICONDA_PREFIX : path to install Miniconda (default: \$HOME/miniconda)"
echo "Example 1: Python 3.10 + PyTorch nightly (CUDA 11.7), install miniconda at /home/user/tmp/miniconda"
echo " bash build_wheel.bash -v -P pytorch-nightly -p 3.10 -c 11.7 -m /home/user/tmp/miniconda"
echo "Example 2: Python 3.10 + PyTorch stable (CPU), install miniconda at \$HOME/miniconda"
echo " bash build_wheel.bash -v -P pytorch -p 3.10 -c \"\""
}

while getopts vfho:p:P:c:m: flag
do
case "$flag" in
v) verbose="1";;
o) package_name="${OPTARG}";;
p) python_version="${OPTARG}";;
P) pytorch_channel_name="${OPTARG}";;
c) pytorch_cuda_version="${OPTARG}";;
m) miniconda_prefix="${OPTARG}";;
h) usage
exit 0;;
*) usage
exit 1;;
esac
done

if [ "$python_version" == "" ] || [ "$pytorch_cuda_version" == "x" ] || [ "$miniconda_prefix" == "" ] || [ "$pytorch_channel_name" == "" ] || [ "$package_name" == "" ]; then
usage
exit 1
fi
python_tag="${python_version//\./}"

if [ "$verbose" == "1" ]; then
# Print each line verbosely
set -x -e
fi

################################################################################
echo "## 0. Minimal check"
################################################################################

if [ ! -d "fbgemm_gpu" ]; then
echo "Error: this script must be executed in FBGEMM/"
exit 1
elif [ "$(which gcc 2>/dev/null)" == "" ]; then
echo "Error: GCC is needed to compile FBGEMM"
exit 1
fi

################################################################################
echo "## 1. Set up Miniconda"
################################################################################

setup_miniconda "$miniconda_prefix"

################################################################################
echo "## 2. Create build_binary environment"
################################################################################

create_conda_environment build_binary "$python_version" "$pytorch_channel_name" "$pytorch_cuda_version"

cd fbgemm_gpu

# cuDNN is needed to "build" FBGEMM
install_cudnn "$miniconda_prefix/build_only/cudnn"
export CUDNN_INCLUDE_DIR="$miniconda_prefix/build_only/cudnn/include"
export CUDNN_LIBRARY="$miniconda_prefix/build_only/cudnn/lib"

conda run -n build_binary python -m pip install -r requirements.txt

# TODO: Do we need these checks?
ldd --version
conda info
conda run -n build_binary python --version
gcc --version
conda run -n build_binary python -c "import torch.distributed"
conda run -n build_binary python -c "import skbuild"
conda run -n build_binary python -c "import numpy"
cd ../

################################################################################
echo "## 3. Build FBGEMM_GPU"
################################################################################

cd fbgemm_gpu
rm -rf dist _skbuild
if [ "$pytorch_cuda_version" == "" ]; then
# CPU version
build_arg="--cpu_only"
package_name="${package_name}_cpu"
else
# GPU version
# We build only CUDA 7.0 and 8.0 (i.e., for v100 and a100) because of 100 MB binary size limit from PYPI website.
build_arg="-DTORCH_CUDA_ARCH_LIST=7.0;8.0"
fi

# manylinux1_x86_64 is specified for pypi upload: distribute python extensions as wheels on Linux
conda run -n build_binary python setup.py bdist_wheel --package_name="${package_name}" --python-tag="py${python_tag}" "${build_arg}" --plat-name=manylinux1_x86_64
cd ../

# Usage:
# pip install $(ls fbgemm_gpu/dist/${package_name}-*.whl)
# python -c "import fbgemm_gpu"

wheel_name="$(ls fbgemm_gpu/dist/"${package_name}"-*.whl)"
echo "Successfully built $wheel_name"
68 changes: 68 additions & 0 deletions .github/scripts/setup_env.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash

setup_miniconda () {
miniconda_prefix="$1"
if [ "$miniconda_prefix" == "" ]; then
echo "Usage: setup_miniconda MINICONDA_PREFIX_PATH"
echo "Example:"
echo " setup_miniconda /home/user/tmp/miniconda"
exit 1
fi
if [ ! -f "${miniconda_prefix}/bin/conda" ]; then
# Download miniconda if not exists
mkdir -p "$miniconda_prefix"
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
bash miniconda.sh -b -p "$miniconda_prefix" -u
fi
# These variables will be exported outside
export PATH="${miniconda_prefix}/bin:${PATH}"
export CONDA="${miniconda_prefix}"
}

create_conda_environment () {
env_name="$1"
python_version="$2"
pytorch_channel_name="$3"
pytorch_cuda_version="$4"
if [ "$python_version" == "" ]; then
echo "Usage: create_conda_environment ENV_NAME PYTHON_VERSION PYTORCH_CHANNEL_NAME PYTORCH_CUDA_VERSION"
echo "Example:"
echo " create_conda_environment build_binary 3.10 pytorch-nightly 11.7"
exit 1
fi
# -y removes existing environment
conda create -y --name "$env_name" python="$python_version"
if [ "$pytorch_cuda_version" == "" ]; then
# CPU version
conda install -n "$env_name" -y pytorch cpuonly -c "$pytorch_channel_name"
else
# GPU version
conda install -n "$env_name" -y pytorch pytorch-cuda="$pytorch_cuda_version" -c "$pytorch_channel_name" -c nvidia
fi
}

install_cudnn () {
install_path="$1"
if [ "$install_path" == "" ]; then
echo "Usage: install_cudnn INSTALL_PATH"
echo "Example:"
echo " install_cudnn \$(pwd)/cudnn_install"
exit 1
fi

rm -rf "$install_path"
mkdir -p "$install_path"

# Install cuDNN manually
# See https://github.com/pytorch/builder/blob/main/common/install_cuda.sh
mkdir -p tmp_cudnn
cd tmp_cudnn || exit
wget -q https://ossci-linux.s3.amazonaws.com/cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz
tar xf cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz
rm -rf "${install_path:?}/include"
rm -rf "${install_path:?}/lib"
mv cudnn-linux-x86_64-8.5.0.96_cuda11-archive/include "$install_path"
mv cudnn-linux-x86_64-8.5.0.96_cuda11-archive/lib "$install_path"
cd ../
rm -rf tmp_cudnn
}
102 changes: 102 additions & 0 deletions .github/scripts/test_torchrec.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/bin/bash

# Exit on failure
set -e

# shellcheck source=/dev/null
. "$(dirname "$(realpath -s "$0")")/setup_env.bash"

verbose=0
torchrec_package_name=""
python_version=""
pytorch_cuda_version="x"
fbgemm_wheel_path="x"
miniconda_prefix="${HOME}/miniconda"

usage () {
echo "Usage: bash test_torchrec.bash -o PACKAGE_NAME -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c PYTORCH_CUDA_VERSION -w FBGEMM_WHEEL_PATH [-m MINICONDA_PREFIX] [-v] [-h]"
echo "-v : verbose"
echo "-h : help"
echo "PACKAGE_NAME : output package name of TorchRec (e.g., torchrec_nightly)"
echo " Note: TorchRec is sensitive to its package name"
echo " e.g., torchrec needs fbgemm-gpu while torchrec_nightly needs fbgemm-gpu-nightly"
echo "PYTHON_VERSION : Python version (e.g., 3.7, 3.8, 3.10)"
echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))"
echo "PYTORCH_CUDA_VERSION: PyTorch's CUDA version (e.g., 11.6, 11.7)"
echo "FBGEMM_WHEEL_PATH : path to FBGEMM_GPU's wheel file"
echo "MINICONDA_PREFIX : path to install Miniconda (default: \$HOME/miniconda)"
echo "Example: Python 3.10 + PyTorch nightly (CUDA 11.7), install miniconda at \$HOME/miniconda, using dist/fbgemm_gpu_nightly.whl"
echo " bash test_torchrec.bash -v -o torchrec_nightly -p 3.10 -P pytorch-nightly -c 11.7 -w dist/fbgemm_gpu_nightly.whl"
}

while getopts vho:p:P:c:m:w: flag
do
case "$flag" in
v) verbose="1";;
o) torchrec_package_name="${OPTARG}";;
p) python_version="${OPTARG}";;
P) pytorch_channel_name="${OPTARG}";;
c) pytorch_cuda_version="${OPTARG}";;
m) miniconda_prefix="${OPTARG}";;
w) fbgemm_wheel_path="${OPTARG}";;
h) usage
exit 0;;
*) usage
exit 1;;
esac
done

if [ "$torchrec_package_name" == "" ] || [ "$python_version" == "" ] || [ "$pytorch_cuda_version" == "x" ] || [ "$miniconda_prefix" == "" ] || [ "$pytorch_channel_name" == "" ] || [ "$fbgemm_wheel_path" == "" ]; then
usage
exit 1
fi
python_tag="${python_version//\./}"

if [ "$verbose" == "1" ]; then
# Print each line verbosely
set -x -e
fi

################################################################################
echo "## 0. Minimal check"
################################################################################

if [ ! -d "torchrec" ]; then
echo "Error: this script must be executed in torchrec/"
exit 1
fi

################################################################################
echo "## 1. Set up Miniconda"
################################################################################

setup_miniconda "$miniconda_prefix"

################################################################################
echo "## 2. Create test_binary environment"
################################################################################

create_conda_environment test_binary "$python_version" "$pytorch_channel_name" "$pytorch_cuda_version"

# Comment out FBGEMM_GPU since we will install it from "$fbgemm_wheel_path"
sed -i 's/fbgemm-gpu/#fbgemm-gpu/g' requirements.txt
conda run -n test_binary python -m pip install -r requirements.txt
# Install FBGEMM_GPU from a local wheel file.
conda run -n test_binary python -m pip install "$fbgemm_wheel_path"
conda run -n test_binary python -c "import fbgemm_gpu"

################################################################################
echo "## 3. Build TorchRec"
################################################################################

rm -rf dist
conda run -n test_binary python setup.py bdist_wheel --package_name "${torchrec_package_name}" --python-tag="py${python_tag}"

################################################################################
echo "## 4. Import TorchRec"
################################################################################

conda run -n test_binary python -m pip install dist/"${torchrec_package_name}"*.whl
conda run -n test_binary python -c "import torchrec"

echo "Test succeeded"
Loading