Skip to content

Commit

Permalink
[ci] simplify CI configurations, parallelize compilation, test CUDA o…
Browse files Browse the repository at this point in the history
…n Ubuntu 22.04 (#6458)
  • Loading branch information
jameslamb authored May 23, 2024
1 parent dd9da91 commit 4b5d549
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ install:
- set PYTHON_VERSION=%CONFIGURATION%
- set CONDA_ENV="test-env"
- ps: |
$env:CMAKE_BUILD_PARALLEL_LEVEL = 4
$env:MINICONDA = "C:\Miniconda3-x64"
$env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH"
$env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER"
$env:LGB_VER = (Get-Content $env:APPVEYOR_BUILD_FOLDER\VERSION.txt).trim()
build: false

Expand Down
2 changes: 2 additions & 0 deletions .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ SANITIZERS=${SANITIZERS:-""}

ARCH=$(uname -m)

LGB_VER=$(head -n 1 ${BUILD_DIRECTORY}/VERSION.txt)

if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "gcc" ]]; then
export CXX=g++-11
export CC=gcc-11
Expand Down
2 changes: 2 additions & 0 deletions .ci/test_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ function Check-Output {
}
}

$env:LGB_VER = (Get-Content $env:BUILD_SOURCESDIRECTORY\VERSION.txt).trim()

# unify environment variable for Azure DevOps and AppVeyor
if (Test-Path env:APPVEYOR) {
$env:APPVEYOR = "true"
Expand Down
139 changes: 80 additions & 59 deletions .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,53 +8,41 @@ on:
branches:
- master
- release/*
# Run manually by clicking a button in the UI
workflow_dispatch:
inputs:
restart_docker:
description: 'Restart nvidia-docker on the runner before building?'
required: true
type: boolean
default: false

# automatically cancel in-progress builds if another commit is pushed
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

env:
github_actions: 'true'
os_name: linux
conda_env: test-env

jobs:
test:
name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
# Optionally reinstall + restart docker on the runner before building.
# This is safe as long as only 1 of these jobs runs at a time.
restart-docker:
name: set up docker
runs-on: [self-hosted, linux]
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- method: wheel
compiler: gcc
python_version: "3.11"
cuda_version: "11.8.0"
task: cuda
- method: source
compiler: gcc
python_version: "3.9"
cuda_version: "12.2.0"
task: cuda
- method: pip
compiler: clang
python_version: "3.10"
cuda_version: "11.8.0"
task: cuda
timeout-minutes: 30
steps:
- name: Setup or update software on host machine
if: ${{ inputs.restart_docker }}
run: |
# install core packages
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
apt-transport-https \
ca-certificates \
curl \
git \
gnupg-agent \
lsb-release \
software-properties-common
# set up nvidia-docker
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
Expand All @@ -67,43 +55,76 @@ jobs:
nvidia-docker2
sudo chmod a+rw /var/run/docker.sock
sudo systemctl restart docker
- name: Remove old folder with repository
run: sudo rm -rf $GITHUB_WORKSPACE
- name: mark job successful
run: |
exit 0
test:
name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
runs-on: [self-hosted, linux]
needs: [restart-docker]
container:
image: ${{ matrix.image }}
env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
COMPILER: ${{ matrix.compiler }}
CONDA: /tmp/miniforge
CONDA_ENV: test-env
DEBIAN_FRONTEND: noninteractive
METHOD: ${{ matrix.method }}
OS_NAME: linux
PYTHON_VERSION: ${{ matrix.python_version }}
TASK: ${{ matrix.task }}
options: --gpus all
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
include:
- method: wheel
compiler: gcc
python_version: "3.11"
cuda_version: "11.8.0"
image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
task: cuda
- method: source
compiler: gcc
python_version: "3.9"
cuda_version: "12.2.0"
image: nvcr.io/nvidia/cuda:12.2.0-devel-ubuntu22.04
task: cuda
- method: pip
compiler: clang
python_version: "3.10"
cuda_version: "11.8.0"
image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
task: cuda
steps:
- name: Install latest git
run: |
apt-get update
apt-get install --no-install-recommends -y \
ca-certificates \
software-properties-common
add-apt-repository ppa:git-core/ppa -y
apt-get update
apt-get install --no-install-recommends -y \
git
- name: Checkout repository
uses: actions/checkout@v1
uses: actions/checkout@v4
with:
fetch-depth: 5
submodules: true
- name: Setup and run tests
run: |
export ROOT_DOCKER_FOLDER=/LightGBM
cat > docker.env <<EOF
GITHUB_ACTIONS=${{ env.github_actions }}
OS_NAME=${{ env.os_name }}
COMPILER=${{ matrix.compiler }}
TASK=${{ matrix.task }}
METHOD=${{ matrix.method }}
CONDA_ENV=${{ env.conda_env }}
PYTHON_VERSION=${{ matrix.python_version }}
BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
LGB_VER=$(head -n 1 VERSION.txt)
EOF
cat > docker-script.sh <<EOF
export CONDA=\$HOME/miniforge
export PATH=\$CONDA/bin:\$PATH
nvidia-smi
$ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
$ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
EOF
cuda_version="${{ matrix.cuda_version }}"
cuda_major=${cuda_version%%.*}
docker_img="nvcr.io/nvidia/cuda:${cuda_version}-devel"
if [[ ${cuda_major} -eq 11 ]]; then
docker_img="${docker_img}-ubuntu18.04"
elif [[ ${cuda_major} -ge 12 ]]; then
docker_img="${docker_img}-ubuntu20.04"
fi
docker run --env-file docker.env -v "$GITHUB_WORKSPACE":"$ROOT_DOCKER_FOLDER" --rm --gpus all "$docker_img" /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
export PATH=$CONDA/bin:$PATH
# check GPU usage
nvidia-smi
# build and test
$GITHUB_WORKSPACE/.ci/setup.sh
$GITHUB_WORKSPACE/.ci/test.sh
all-cuda-jobs-successful:
if: always()
runs-on: ubuntu-latest
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/linkchecker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ on:

env:
CONDA_ENV: test-env
GITHUB_ACTIONS: 'true'
OS_NAME: 'linux'
PYTHON_VERSION: '3.11'
TASK: 'check-links'
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/python_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ concurrency:
cancel-in-progress: true

env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
CONDA_ENV: test-env
GITHUB_ACTIONS: 'true'

jobs:
test:
Expand Down Expand Up @@ -73,7 +73,6 @@ jobs:
export OS_NAME="linux"
fi
export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
export LGB_VER=$(head -n 1 VERSION.txt)
export CONDA=${HOME}/miniforge
export PATH=${CONDA}/bin:${PATH}
$GITHUB_WORKSPACE/.ci/setup.sh || exit 1
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/r_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ concurrency:
cancel-in-progress: true

env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
# hack to get around this:
# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
_R_CHECK_SYSTEM_CLOCK_: 0
Expand Down Expand Up @@ -189,7 +190,6 @@ jobs:
run: |
export TASK="${{ matrix.task }}"
export COMPILER="${{ matrix.compiler }}"
export GITHUB_ACTIONS="true"
if [[ "${{ matrix.os }}" == "macos-13" ]]; then
export OS_NAME="macos"
elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
Expand All @@ -216,7 +216,6 @@ jobs:
$env:R_VERSION = "${{ matrix.r_version }}"
$env:R_BUILD_TYPE = "${{ matrix.build_type }}"
$env:COMPILER = "${{ matrix.compiler }}"
$env:GITHUB_ACTIONS = "true"
$env:TASK = "${{ matrix.task }}"
& "$env:GITHUB_WORKSPACE/.ci/test_windows.ps1"
test-r-sanitizers:
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/static_analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ concurrency:
env:
COMPILER: 'gcc'
CONDA_ENV: test-env
GITHUB_ACTIONS: 'true'
OS_NAME: 'linux'
PYTHON_VERSION: '3.11'

Expand Down
5 changes: 1 addition & 4 deletions .vsts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pr:
variables:
AZURE: 'true'
PYTHON_VERSION: '3.11'
CMAKE_BUILD_PARALLEL_LEVEL: 4
CONDA_ENV: test-env
runCodesignValidationInjection: false
skipComponentGovernanceDetection: true
Expand Down Expand Up @@ -82,7 +83,6 @@ jobs:
steps:
- script: |
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
echo "##vso[task.prependpath]/usr/lib64/openmpi/bin"
echo "##vso[task.prependpath]$CONDA/bin"
displayName: 'Set variables'
Expand Down Expand Up @@ -159,7 +159,6 @@ jobs:
steps:
- script: |
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
CONDA=$HOME/miniforge
echo "##vso[task.setvariable variable=CONDA]$CONDA"
echo "##vso[task.prependpath]$CONDA/bin"
Expand Down Expand Up @@ -225,7 +224,6 @@ jobs:
CONDA_ENV=$CONDA_ENV
PYTHON_VERSION=$PYTHON_VERSION
BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
LGB_VER=$(head -n 1 VERSION.txt)
PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS
BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY
EOF
Expand Down Expand Up @@ -283,7 +281,6 @@ jobs:
steps:
- script: |
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
CONDA=$AGENT_HOMEDIRECTORY/miniforge
echo "##vso[task.setvariable variable=CONDA]$CONDA"
echo "##vso[task.prependpath]$CONDA/bin"
Expand Down

0 comments on commit 4b5d549

Please sign in to comment.