Skip to content

Commit

Permalink
Merge branch 'main' into cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
leofang authored Dec 8, 2024
2 parents 6c35033 + 0723d62 commit 7d117f2
Show file tree
Hide file tree
Showing 15 changed files with 257 additions and 408 deletions.
43 changes: 43 additions & 0 deletions .github/actions/setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,22 @@ runs:
run: |
env
- name: Set up CTK cache variable
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}" >> $GITHUB_ENV
echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}.tar.gz" >> $GITHUB_ENV
- name: Download CTK cache
id: ctk-get-cache
uses: actions/cache/restore@v4
continue-on-error: true
with:
key: ${{ env.CTK_CACHE_KEY }}
path: ./${{ env.CTK_CACHE_FILENAME }}

- name: Get CUDA components
if: ${{ steps.ctk-get-cache.outputs.cache-hit != 'true' }}
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
CUDA_PATH="./cuda_toolkit"
Expand Down Expand Up @@ -90,15 +105,43 @@ runs:
}
# Get headers and shared libraries in place
# Note: the existing artifact would need to be manually deleted (ex: through web UI)
# if this list is changed, as the artifact actions do not offer any option for us to
# invalidate the artifact.
populate_cuda_path cuda_nvcc
populate_cuda_path cuda_cudart
populate_cuda_path cuda_nvrtc
populate_cuda_path cuda_profiler_api
populate_cuda_path libnvjitlink
ls -l $CUDA_PATH
# Prepare the cache
# Note: try to escape | and > ...
tar -czvf ${CTK_CACHE_FILENAME} ${CUDA_PATH}
# Note: the headers will be copied into the cibuildwheel manylinux container,
# so setting the CUDA_PATH env var here is meaningless.
- name: Upload CTK cache
if: ${{ always() &&
steps.ctk-get-cache.outputs.cache-hit != 'true' }}
uses: actions/cache/save@v4
with:
key: ${{ env.CTK_CACHE_KEY }}
path: ./${{ env.CTK_CACHE_FILENAME }}

- name: Restore CTK cache
if: ${{ steps.ctk-get-cache.outputs.cache-hit == 'true' }}
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
ls -l
CUDA_PATH="./cuda_toolkit"
tar -xzvf $CTK_CACHE_FILENAME
ls -l $CUDA_PATH
if [ ! -d "$CUDA_PATH/include" ]; then
exit 1
fi
- name: Set environment variables
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
Expand Down
93 changes: 93 additions & 0 deletions .github/actions/test/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: test

description: Run tests in specified project

inputs:
test-options:
required: true
type: string

runs:
using: composite
steps:
- name: Run nvidia-smi to make sure GPU is working
shell: bash --noprofile --norc -xeuo pipefail {0}
run: nvidia-smi

- name: Download bindings build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}

- name: Display structure of downloaded bindings artifacts
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
pwd
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
- name: Download core build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}

- name: Display structure of downloaded core build artifacts
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
pwd
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up CTK cache variable
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
echo "CTK_CACHE_KEY=mini-ctk-${CTK_BUILD_VER}-${HOST_PLATFORM}" >> $GITHUB_ENV
echo "CTK_CACHE_FILENAME=mini-ctk-${CTK_BUILD_VER}-${HOST_PLATFORM}.tar.gz" >> $GITHUB_ENV
- name: Download CTK cache
id: ctk-get-cache
uses: actions/cache/restore@v4
continue-on-error: true
with:
key: ${{ env.CTK_CACHE_KEY }}
path: ./${{ env.CTK_CACHE_FILENAME }}
fail-on-cache-miss: true

- name: Restore CTK cache
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
ls -l
CUDA_PATH="$(pwd)/cuda_toolkit"
tar -xzvf $CTK_CACHE_FILENAME
ls -l $CUDA_PATH
if [ ! -d "$CUDA_PATH/include" ]; then
exit 1
fi
echo "CUDA_PATH=$CUDA_PATH" >> $GITHUB_ENV
echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_PATH/lib" >> $GITHUB_ENV
- name: Run test / analysis
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
REPO_DIR=$(pwd)
cd "${CUDA_BINDINGS_ARTIFACTS_DIR}"
pip install *.whl
cd "${CUDA_CORE_ARTIFACTS_DIR}"
pip install *.whl
cd "${REPO_DIR}/cuda_bindings"
pip install -r requirements.txt
pytest tests/
#pytest tests/cython
cd "${REPO_DIR}/cuda_core"
pytest -rxXs tests/
6 changes: 3 additions & 3 deletions .github/workflows/ci-gh.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Build and test
name: "CI"

concurrency:
group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }}
Expand All @@ -11,8 +11,7 @@ on:
- "main"

jobs:
build-and-test:
name: Build and test (${{ matrix.host-platform }}, ${{ matrix.target-device }}, ${{ matrix.build-mode }})
ci:
strategy:
fail-fast: false
matrix:
Expand All @@ -35,6 +34,7 @@ jobs:
# Note: this is for build-time only; the test-time matrix needs to be
# defined separately.
- "12.6.2"
name: "CI"
uses:
./.github/workflows/gh-build-and-test.yml
with:
Expand Down
113 changes: 96 additions & 17 deletions .github/workflows/gh-build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,100 @@ on:

jobs:
build:
name: Build (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}")
if: ${{ github.repository_owner == 'nvidia' }}
uses:
./.github/workflows/gh-build.yml
with:
client-repo: ${{ github.event.repository.name }}
target-device: ${{ inputs.target-device }}
runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu8') ||
(inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') ||
(inputs.host-platform == 'win-x64' && 'windows-2019') }}
# (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }}
build-type: ${{ inputs.build-type }}
host-platform: ${{ inputs.host-platform }}
build-mode: ${{ inputs.build-mode }}
upload-enabled: ${{ inputs.upload-enabled }}
python-version: ${{ inputs.python-version }}
cuda-version: ${{ inputs.cuda-version }}
dependencies-file: ""
secrets: inherit
permissions:
id-token: write # This is required for configure-aws-credentials
contents: read # This is required for actions/checkout
runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu8') ||
(inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') ||
(inputs.host-platform == 'win-x64' && 'windows-2019') }}
# (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }}
outputs:
CUDA_CORE_ARTIFACT_NAME: ${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACT_NAME }}
CUDA_CORE_ARTIFACTS_DIR: ${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACTS_DIR }}
CUDA_BINDINGS_ARTIFACT_NAME: ${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACT_NAME }}
CUDA_BINDINGS_ARTIFACTS_DIR: ${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }}
steps:
- name: Checkout ${{ github.event.repository.name }}
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up build environment
uses: ./.github/actions/setup
with:
client-repo: ${{ github.event.repository.name }}
build-type: ${{ inputs.build-type }}
target-device: "${{ inputs.target-device }}"
host-platform: ${{ inputs.host-platform }}
build-mode: ${{ inputs.build-mode }}
upload-enabled: ${{ inputs.upload-enabled }}
python-version: ${{ inputs.python-version }}
cuda-version: ${{ inputs.cuda-version }}

- name: Call build action
uses: ./.github/actions/build
with:
build-type: ${{ inputs.build-type }}
target-device: "${{ inputs.target-device }}"
host-platform: ${{ inputs.host-platform }}
upload-enabled: ${{ inputs.upload-enabled }}

- name: Pass environment variables
id: pass_env
run: |
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_NAME}" >> $GITHUB_OUTPUT
echo "CUDA_CORE_ARTIFACTS_DIR=${CUDA_CORE_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_NAME}" >> $GITHUB_OUTPUT
echo "CUDA_BINDINGS_ARTIFACTS_DIR=${CUDA_BINDINGS_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT
test:
# TODO: improve the name once a separate test matrix is defined
name: Test (CUDA ${{ inputs.cuda-version }})
# TODO: enable testing once linux-aarch64 & win-64 GPU runners are up
if: ${{ (github.repository_owner == 'nvidia') &&
startsWith(inputs.host-platform, 'linux-x64') }}
permissions:
id-token: write # This is required for configure-aws-credentials
contents: read # This is required for actions/checkout
runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }}
# TODO: use a different (nvidia?) container, or just run on bare image
container:
options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g
image: condaforge/miniforge3:latest
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
needs:
- build
steps:
- name: Checkout ${{ github.event.repository.name }}
uses: actions/checkout@v4
with:
fetch-depth: 0

# TODO: we probably don't need this?
# - name: Setup
# if: ${{ inputs.has-built != 'true' }}
# uses: ./.github/actions/setup
# with:
# client-repo: ${{ github.event.repository.name }}
# build-type: ${{ inputs.build-type }}
# target-device: "${{ inputs.target-device }}"
# host-platform: ${{ inputs.host-platform }}
# build-mode: ${{ inputs.build-mode }}
# upload-enabled: ${{ inputs.upload-enabled }}
# python-version: ${{ inputs.python-version }}

- name: Call test action
uses: ./.github/actions/test
with:
test-options: ${{ inputs.build-type }}
env:
CUDA_CORE_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_CORE_ARTIFACT_NAME }}
CUDA_CORE_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }}
CUDA_BINDINGS_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }}
CUDA_BINDINGS_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }}
PYTHON_VERSION: ${{ inputs.python-version }}
CTK_BUILD_VER: ${{ inputs.cuda-version }}
HOST_PLATFORM: ${{ inputs.host-platform }}
73 changes: 0 additions & 73 deletions .github/workflows/gh-build.yml

This file was deleted.

Loading

0 comments on commit 7d117f2

Please sign in to comment.