Skip to content

Commit

Permalink
Merge pull request #714 from rapidsai/branch-24.10
Browse files Browse the repository at this point in the history
  • Loading branch information
raydouglass committed Oct 10, 2024
2 parents faa595a + 75eae84 commit 4a97818
Show file tree
Hide file tree
Showing 15 changed files with 99 additions and 152 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/build-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ on:
RAFT_ANN_BENCH_CPU_TAG:
required: true
type: string
BUILD_RAFT_ANN_BENCH_CPU_IMAGE:
required: true
type: boolean

jobs:
build:
Expand Down Expand Up @@ -146,7 +149,7 @@ jobs:
RAPIDS_VER=${{ inputs.RAPIDS_VER }}
tags: ${{ inputs.RAFT_ANN_BENCH_DATASETS_TAG }}-${{ matrix.ARCH }}
- name: Build RAFT ANN Benchmarks CPU image
if: inputs.CUDA_VER == '12.5.1' # we don't need to build CPU packages for different CUDA versions.
if: inputs.BUILD_RAFT_ANN_BENCH_CPU_IMAGE
uses: docker/build-push-action@v6
with:
context: context
Expand Down
63 changes: 25 additions & 38 deletions .github/workflows/build-test-publish-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,25 @@ permissions:
statuses: none

jobs:
pr-builder:
if: ${{ !cancelled() && inputs.build_type == 'pull-request' }}
needs:
- checks
- compute-matrix
- build
- build-multiarch-manifest
- test
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
checks:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Run pre-commit
run: |
pip install pre-commit
pre-commit run --all-files
compute-matrix:
runs-on: ubuntu-latest
container:
Expand Down Expand Up @@ -119,7 +138,7 @@ jobs:
echo "TEST_MATRIX=$(yq -n -o json 'env(TEST_MATRIX)' | jq -c '{include: .}')" | tee --append "${GITHUB_OUTPUT}"
build:
needs: compute-matrix
needs: [checks, compute-matrix]
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
fail-fast: false
Expand All @@ -133,6 +152,7 @@ jobs:
LINUX_VER: ${{ matrix.LINUX_VER }}
PYTHON_VER: ${{ matrix.PYTHON_VER }}
RAPIDS_VER: ${{ needs.compute-matrix.outputs.RAPIDS_VER }}
BUILD_RAFT_ANN_BENCH_CPU_IMAGE: ${{ matrix.BUILD_RAFT_ANN_BENCH_CPU_IMAGE }}
BASE_TAG:
"rapidsai/${{ needs.compute-matrix.outputs.BASE_IMAGE_REPO }}:\
${{ needs.compute-matrix.outputs.BASE_TAG_PREFIX }}\
Expand Down Expand Up @@ -168,7 +188,6 @@ jobs:
${{ needs.compute-matrix.outputs.ALPHA_TAG }}-\
py${{ matrix.PYTHON_VER }}"
build-multiarch-manifest:
if: ${{ !cancelled() && inputs.build_type == 'branch' }}
needs: [build, compute-matrix]
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
Expand All @@ -187,6 +206,7 @@ jobs:
- name: Create multiarch manifest
shell: bash
env:
RAFT_ANN_BENCH_CPU_IMAGE_BUILT: ${{ matrix.BUILD_RAFT_ANN_BENCH_CPU_IMAGE }}
BASE_IMAGE_REPO: ${{ needs.compute-matrix.outputs.BASE_IMAGE_REPO }}
BASE_TAG_PREFIX: ${{ needs.compute-matrix.outputs.BASE_TAG_PREFIX }}
RAPIDS_VER: ${{ needs.compute-matrix.outputs.RAPIDS_VER }}
Expand All @@ -206,7 +226,7 @@ jobs:
ARCHES: ${{ toJSON(matrix.ARCHES) }}
run: ci/create-multiarch-manifest.sh
test:
needs: [compute-matrix, build, build-multiarch-manifest]
needs: [compute-matrix, build]
if: inputs.run_tests
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.TEST_MATRIX) }}
Expand All @@ -226,38 +246,5 @@ jobs:
${{ needs.compute-matrix.outputs.RAPIDS_VER }}\
${{ needs.compute-matrix.outputs.ALPHA_TAG }}-\
cuda${{ matrix.CUDA_VER }}-\
py${{ matrix.PYTHON_VER }}"
delete-temp-images:
if: ${{ !cancelled() && needs.test.result == 'success' }}
needs: [compute-matrix, build-multiarch-manifest, test]
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
fail-fast: false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Remove temporary images
shell: bash
env:
RAFT_ANN_BENCH_CPU_IMAGE_BUILT: ${{ matrix.CUDA_TAG == '12.5.1' }}
BASE_IMAGE_REPO: ${{ needs.compute-matrix.outputs.BASE_IMAGE_REPO }}
BASE_TAG_PREFIX: ${{ needs.compute-matrix.outputs.BASE_TAG_PREFIX }}
RAPIDS_VER: ${{ needs.compute-matrix.outputs.RAPIDS_VER }}
ALPHA_TAG: ${{ needs.compute-matrix.outputs.ALPHA_TAG }}
CUDA_TAG: ${{ matrix.CUDA_TAG }}
PYTHON_VER: ${{ matrix.PYTHON_VER }}
NOTEBOOKS_IMAGE_REPO: ${{ needs.compute-matrix.outputs.NOTEBOOKS_IMAGE_REPO }}
NOTEBOOKS_TAG_PREFIX: ${{ needs.compute-matrix.outputs.NOTEBOOKS_TAG_PREFIX }}
RAFT_ANN_BENCH_IMAGE_REPO: ${{ needs.compute-matrix.outputs.RAFT_ANN_BENCH_IMAGE_REPO }}
RAFT_ANN_BENCH_TAG_PREFIX: ${{ needs.compute-matrix.outputs.RAFT_ANN_BENCH_TAG_PREFIX }}
RAFT_ANN_BENCH_DATASETS_IMAGE_REPO: ${{ needs.compute-matrix.outputs.RAFT_ANN_BENCH_DATASETS_IMAGE_REPO }}
RAFT_ANN_BENCH_DATASETS_TAG_PREFIX: ${{ needs.compute-matrix.outputs.RAFT_ANN_BENCH_DATASETS_TAG_PREFIX }}
RAFT_ANN_BENCH_CPU_IMAGE_REPO: ${{ needs.compute-matrix.outputs.RAFT_ANN_BENCH_CPU_IMAGE_REPO }}
RAFT_ANN_BENCH_CPU_TAG_PREFIX: ${{ needs.compute-matrix.outputs.RAFT_ANN_BENCH_CPU_TAG_PREFIX }}
GPUCIBOT_DOCKERHUB_USER: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }}
GPUCIBOT_DOCKERHUB_TOKEN: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }}
ARCHES: ${{ toJSON(matrix.ARCHES) }}
run: ci/delete-temp-images.sh
py${{ matrix.PYTHON_VER }}-\
${{ matrix.ARCH }}"
19 changes: 0 additions & 19 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,7 @@ concurrency:
cancel-in-progress: true

jobs:
pr-builder:
needs:
- checks
- docker
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
checks:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Run pre-commit
run: |
pip install pre-commit
pre-commit run --all-files
docker:
needs: [checks]
uses: ./.github/workflows/build-test-publish-images.yml
with:
build_type: pull-request
Expand Down
11 changes: 11 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,14 @@ To build just the `base` image with default arguments: `docker buildx build --pu
- `CUDA_VER` - Version of CUDA to use. Should be `major.minor.patch`
- `PYTHON_VER` - Version of Python to use. Should be `major.minor`
- `RAPIDS_VER` - Version of RAPIDS to use. Should be `YY.MM`

## Cleaning Up

Every build first writes images to the https://hub.docker.com/r/rapidsai/staging repo on DockerHub,
then pushes them on to the individual repos like `rapidsai/base`, `rapidsai/notebooks`, etc.

A scheduled job regularly deletes old images from that `rapidsai/staging` repo.
See https://github.com/rapidsai/workflows/blob/main/.github/workflows/cleanup_staging.yaml for details.

If you come back to a pull requests here after more than a few days and find that jobs are failing with errors
that suggest that some necessary images don't exist, re-run all of CI on that pull request to produce new images.
10 changes: 8 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG LINUX_DISTRO=ubuntu
ARG LINUX_DISTRO_VER=22.04
ARG LINUX_VER=${LINUX_DISTRO}${LINUX_DISTRO_VER}

ARG RAPIDS_VER=24.08
ARG RAPIDS_VER=24.10

# Gather dependency information
FROM rapidsai/ci-conda:latest AS dependencies
Expand Down Expand Up @@ -51,6 +51,12 @@ WORKDIR /home/rapids
COPY condarc /opt/conda/.condarc

RUN <<EOF
# Include common diagnostic info
conda info
conda config --show-sources
conda list --show-channel-urls

# Install RAPIDS
mamba install -y -n base \
"rapids=${RAPIDS_VER}.*" \
"python=${PYTHON_VER}.*" \
Expand Down Expand Up @@ -122,7 +128,7 @@ LABEL com.nvidia.workbench.application.jupyterlab.webapp.url-cmd="jupyter lab li
LABEL com.nvidia.workbench.cuda-version="$CUDA_VER"
LABEL com.nvidia.workbench.description="RAPIDS with CUDA ${CUDA_VER}"
LABEL com.nvidia.workbench.entrypoint-script="/home/rapids/entrypoint.sh"
LABEL com.nvidia.workbench.image-version="24.08.01"
LABEL com.nvidia.workbench.image-version="24.10.00"
LABEL com.nvidia.workbench.labels="cuda${CUDA_VER}"
LABEL com.nvidia.workbench.name="RAPIDS with CUDA ${CUDA_VER}"
LABEL com.nvidia.workbench.os-distro-release="$LINUX_DISTRO_VER"
Expand Down
8 changes: 8 additions & 0 deletions ci/compute-matrix.jq
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ def compute_ubuntu_version($x):
def compute_cuda_tag($x):
$x + {CUDA_TAG: $x.CUDA_VER | split(".") | [.[0], .[1]] | join(".") };

def latest_cuda_version($cuda_versions):
$cuda_versions | max_by(. | split(".") | map(tonumber));

def compute_build_raft_ann_bench_cpu_image($x; $latest_cuda_version):
$x + {BUILD_RAFT_ANN_BENCH_CPU_IMAGE: ($x.CUDA_VER == $latest_cuda_version)}; # we don't need to build CPU packages for different CUDA versions

# Checks the current entry to see if it matches the given exclude
def matches($entry; $exclude):
all($exclude | to_entries | .[]; $entry[.key] == .value);
Expand All @@ -32,11 +38,13 @@ def compute_matrix($input):
keys_unsorted as $matrix_keys |
to_entries |
map(.value) |
latest_cuda_version($input.CUDA_VER) as $latest_cuda_version |
[
combinations |
lists2dict($matrix_keys; .) |
compute_ubuntu_version(.) |
compute_cuda_tag(.) |
compute_build_raft_ann_bench_cpu_image(.; $latest_cuda_version) |
filter_excludes(.; $excludes) |
compute_arch(.)
] |
Expand Down
12 changes: 8 additions & 4 deletions ci/create-multiarch-manifest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,10 @@ for arch in $(echo "${ARCHES}" | jq .[] -r); do
check_tag_exists "$RAFT_ANN_BENCH_DATASETS_IMAGE_REPO" "$full_raft_ann_bench_datasets_tag"
raft_ann_bench_datasets_source_tags+=("${org}/${RAFT_ANN_BENCH_DATASETS_IMAGE_REPO}:$full_raft_ann_bench_datasets_tag")

check_tag_exists "$RAFT_ANN_BENCH_CPU_IMAGE_REPO" "$full_raft_ann_bench_cpu_tag"
raft_ann_bench_cpu_source_tags+=("${org}/${RAFT_ANN_BENCH_CPU_IMAGE_REPO}:$full_raft_ann_bench_cpu_tag")
if [ "$RAFT_ANN_BENCH_CPU_IMAGE_BUILT" = "true" ]; then
check_tag_exists "$RAFT_ANN_BENCH_CPU_IMAGE_REPO" "$full_raft_ann_bench_cpu_tag"
raft_ann_bench_cpu_source_tags+=("${org}/${RAFT_ANN_BENCH_CPU_IMAGE_REPO}:$full_raft_ann_bench_cpu_tag")
fi
done

# Create and push Docker multi-arch manifests
Expand All @@ -78,5 +80,7 @@ docker manifest push "${org}/${RAFT_ANN_BENCH_IMAGE_REPO}:${raft_ann_bench_tag}"
docker manifest create "${org}/${RAFT_ANN_BENCH_DATASETS_IMAGE_REPO}:${raft_ann_bench_datasets_tag}" "${raft_ann_bench_datasets_source_tags[@]}"
docker manifest push "${org}/${RAFT_ANN_BENCH_DATASETS_IMAGE_REPO}:${raft_ann_bench_datasets_tag}"

docker manifest create "${org}/${RAFT_ANN_BENCH_CPU_IMAGE_REPO}:${raft_ann_bench_cpu_tag}" "${raft_ann_bench_cpu_source_tags[@]}"
docker manifest push "${org}/${RAFT_ANN_BENCH_CPU_IMAGE_REPO}:${raft_ann_bench_cpu_tag}"
if [ "$RAFT_ANN_BENCH_CPU_IMAGE_BUILT" = "true" ]; then
docker manifest create "${org}/${RAFT_ANN_BENCH_CPU_IMAGE_REPO}:${raft_ann_bench_cpu_tag}" "${raft_ann_bench_cpu_source_tags[@]}"
docker manifest push "${org}/${RAFT_ANN_BENCH_CPU_IMAGE_REPO}:${raft_ann_bench_cpu_tag}"
fi
50 changes: 0 additions & 50 deletions ci/delete-temp-images.sh

This file was deleted.

2 changes: 1 addition & 1 deletion ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}')
CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}')
CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR}

#Get <major>.<minor> for next version
# Get <major>.<minor> for next version
NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
Expand Down
10 changes: 5 additions & 5 deletions dockerhub-readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The RAPIDS suite of software libraries gives you the freedom to execute end-to-e
**NOTE:** Review our [system requirements](https://docs.rapids.ai/install#system-req) to ensure you have a compatible system!


### Current Version - RAPIDS v24.08
### Current Version - RAPIDS v24.10

RAPIDS Libraries included in the images:
- `cuDF`
Expand Down Expand Up @@ -39,7 +39,7 @@ There are two types:

The tag naming scheme for RAPIDS images incorporates key platform details into the tag as shown below:
```
24.08-cuda11.8-py3.11
24.10-cuda12.5-py3.12
^ ^ ^
| | Python version
| |
Expand All @@ -48,7 +48,7 @@ The tag naming scheme for RAPIDS images incorporates key platform details into t
RAPIDS version
```

**Note: Nightly builds of the images have the RAPIDS version appended with an `a` (ie `24.08a-cuda11.8-py3.11`)**
**Note: Nightly builds of the images have the RAPIDS version appended with an `a` (ie `24.10a-cuda12.5-py3.12`)**

## Usage

Expand Down Expand Up @@ -81,7 +81,7 @@ $ docker run \
-e EXTRA_CONDA_PACKAGES="jq" \
-e EXTRA_PIP_PACKAGES="beautifulsoup4" \
-p 8888:8888 \
rapidsai/notebooks:24.08-cuda11.8-py3.11
rapidsai/notebooks:24.10-cuda12.5-py3.12
```

### Bind Mounts
Expand All @@ -106,7 +106,7 @@ $ docker run \
--gpus all \
-shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-v $(pwd)/environment.yml:/home/rapids/environment.yml \
rapidsai/base:24.08-cuda11.8-py3.11
rapidsai/base:24.10-cuda12.5-py3.12
```

### Use JupyterLab to Explore the Notebooks
Expand Down
19 changes: 9 additions & 10 deletions matrix-test.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
# CUDA_VER is `<major>.<minor>` (e.g. `12.0`)

pull-request:
- { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.9', GPU: 'v100', DRIVER: 'earliest' }
- { CUDA_VER: '12.0', ARCH: 'amd64', PYTHON_VER: '3.10', GPU: 'v100', DRIVER: 'latest' }
- { CUDA_VER: '12.2', ARCH: 'arm64', PYTHON_VER: '3.11', GPU: 'a100', DRIVER: 'latest' }
- { CUDA_VER: '12.5', ARCH: 'amd64', PYTHON_VER: '3.11', GPU: 'v100', DRIVER: 'latest' }
- { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.10', GPU: 'v100', DRIVER: 'earliest' }
- { CUDA_VER: '12.0', ARCH: 'arm64', PYTHON_VER: '3.11', GPU: 'a100', DRIVER: 'latest' }
- { CUDA_VER: '12.5', ARCH: 'amd64', PYTHON_VER: '3.12', GPU: 'v100', DRIVER: 'latest' }
branch:
- { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.9', GPU: 'v100', DRIVER: 'earliest' }
- { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.9', GPU: 'v100', DRIVER: 'latest' }
- { CUDA_VER: '12.0', ARCH: 'amd64', PYTHON_VER: '3.10', GPU: 'v100', DRIVER: 'latest' }
- { CUDA_VER: '12.0', ARCH: 'arm64', PYTHON_VER: '3.10', GPU: 'a100', DRIVER: 'latest' }
- { CUDA_VER: '12.2', ARCH: 'amd64', PYTHON_VER: '3.11', GPU: 'v100', DRIVER: 'latest' }
- { CUDA_VER: '12.5', ARCH: 'arm64', PYTHON_VER: '3.11', GPU: 'a100', DRIVER: 'latest' }
- { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.10', GPU: 'v100', DRIVER: 'earliest' }
- { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.10', GPU: 'v100', DRIVER: 'latest' }
- { CUDA_VER: '12.0', ARCH: 'amd64', PYTHON_VER: '3.11', GPU: 'v100', DRIVER: 'latest' }
- { CUDA_VER: '12.0', ARCH: 'arm64', PYTHON_VER: '3.11', GPU: 'a100', DRIVER: 'latest' }
- { CUDA_VER: '12.5', ARCH: 'amd64', PYTHON_VER: '3.12', GPU: 'v100', DRIVER: 'latest' }
- { CUDA_VER: '12.5', ARCH: 'arm64', PYTHON_VER: '3.12', GPU: 'a100', DRIVER: 'latest' }
5 changes: 2 additions & 3 deletions matrix.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
CUDA_VER: # Should be `<major>.<minor>.<patch>` (e.g. `11.2.2`)
CUDA_VER: # Should be `<major>.<minor>.<patch>` (e.g. `12.5.1`)
- "11.8.0"
- "12.0.1"
- "12.2.2"
- "12.5.1"
PYTHON_VER:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
Loading

0 comments on commit 4a97818

Please sign in to comment.