From 7c371bf6c9adaff9b1e82f70fea86bb8bb3f20d1 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Tue, 19 Mar 2024 10:39:15 +0100 Subject: [PATCH 01/12] Use new runners for GPU jobs Signed-off-by: Jordan Jacobelli --- .github/workflows/build.yaml | 14 +++++++------- .github/workflows/pr.yaml | 22 +++++++++++----------- .github/workflows/test.yaml | 6 +++--- ci/build_cpp.sh | 3 +++ 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f22d60673..3dd85587b 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@use-new-runners with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@use-new-runners with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@use-new-runners with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -56,19 +56,19 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@use-new-runners with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} - node_type: "gpu-v100-latest-1" + node_type: "gpu-l4-latest-1-testing" arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" wheel-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@use-new-runners with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -78,7 +78,7 @@ jobs: wheel-publish: needs: wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@use-new-runners with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 2abbfd078..deb46ca97 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -22,63 +22,63 @@ jobs: - wheel-tests - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@use-new-runners checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@use-new-runners with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@use-new-runners with: build_type: pull-request conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@use-new-runners with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@use-new-runners with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@use-new-runners with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@use-new-runners with: build_type: pull-request - node_type: "gpu-v100-latest-1" + node_type: "gpu-l4-latest-1-testing" arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" wheel-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@use-new-runners with: build_type: pull-request script: ci/build_wheel.sh wheel-tests: needs: wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@use-new-runners with: build_type: pull-request script: ci/test_wheel.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@use-new-runners with: arch: '["amd64"]' cuda: '["12.2"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 747f9f544..71fbe1ec0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@use-new-runners with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@use-new-runners with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@use-new-runners with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 7329d4a34..2291cfde3 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -15,6 +15,9 @@ rapids-print-env version=$(rapids-generate-version) +PARALLEL_LEVEL="$(nproc)" +export PARALLEL_LEVEL + rapids-logger "Begin cpp build" # This calls mambabuild when boa is installed (as is the case in the CI images) From 86fae3e0e917900f0b227c160cb86462af681ddf Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Mon, 25 Mar 2024 14:57:21 +0100 Subject: [PATCH 02/12] Block job after cpp tests to debug Signed-off-by: Jordan Jacobelli --- ci/test_cpp.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index e3ff28d9f..f82f1aa0c 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -37,8 +37,13 @@ nvidia-smi # Run librmm gtests from librmm-tests package rapids-logger "Run gtests" +set +e + export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/ ./ci/run_ctests.sh -j20 && EXITCODE=$? || EXITCODE=$?; + +sleep 10h + rapids-logger "Test script exiting with value: $EXITCODE" exit ${EXITCODE} From 3eb9908a92242310a90d5bb761a019e4bbc1a518 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Mon, 25 Mar 2024 16:27:29 +0100 Subject: [PATCH 03/12] Remove debug Signed-off-by: Jordan Jacobelli --- ci/test_cpp.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index f82f1aa0c..e3ff28d9f 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -37,13 +37,8 @@ nvidia-smi # Run librmm gtests from librmm-tests package rapids-logger "Run gtests" -set +e - export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/ ./ci/run_ctests.sh -j20 && EXITCODE=$? || EXITCODE=$?; - -sleep 10h - rapids-logger "Test script exiting with value: $EXITCODE" exit ${EXITCODE} From 048f6918e15e4576534826fe1514e62eb45f07bd Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Mon, 8 Apr 2024 15:03:10 +0200 Subject: [PATCH 04/12] Get latest shared-workflows From 7991297eacac10e4710b216c9610aa6c1bdf5a26 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 9 Apr 2024 08:59:51 -0400 Subject: [PATCH 05/12] print network info --- ci/test_cpp.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index e3ff28d9f..0579f4183 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -8,6 +8,9 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +rapids-logger "Print network info" +ip a + rapids-logger "Generate C++ testing dependencies" rapids-dependency-file-generator \ --output conda \ From e759e67be3e8851513b3b85725255df2663ac2ca Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 9 Apr 2024 09:14:58 -0400 Subject: [PATCH 06/12] install `iproute2` tool --- ci/test_cpp.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 0579f4183..6d7983408 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -9,6 +9,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh rapids-logger "Print network info" +sudo apt install -y iproute2 ip a rapids-logger "Generate C++ testing dependencies" From f64df81fa7a91e27ea865648161782e7608d47f4 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 9 Apr 2024 09:26:30 -0400 Subject: [PATCH 07/12] rm `sudo` --- ci/test_cpp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 6d7983408..70da66ee4 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -9,7 +9,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh rapids-logger "Print network info" -sudo apt install -y iproute2 +apt install -y iproute2 ip a rapids-logger "Generate C++ testing dependencies" From 7a896cffd21dfe37a8313c97402116512aaa7101 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 9 Apr 2024 09:26:46 -0400 Subject: [PATCH 08/12] add `apt update` --- ci/test_cpp.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 70da66ee4..055fa9c2a 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -9,6 +9,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh rapids-logger "Print network info" +apt update apt install -y iproute2 ip a From c86b831beff11e0552653ac70b06ef8df8a392db Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 9 Apr 2024 10:03:39 -0400 Subject: [PATCH 09/12] revert `ip a` commits --- ci/test_cpp.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 055fa9c2a..e3ff28d9f 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -8,11 +8,6 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh -rapids-logger "Print network info" -apt update -apt install -y iproute2 -ip a - rapids-logger "Generate C++ testing dependencies" rapids-dependency-file-generator \ --output conda \ From 0acf8decf34bde58226e86732c47de4d75dee28a Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Tue, 16 Apr 2024 15:47:14 +0200 Subject: [PATCH 10/12] Get latest shared-workflows changes From 2671604bc620e53444b438dbbd740f322e4c07d4 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Wed, 17 Apr 2024 13:19:11 +0200 Subject: [PATCH 11/12] Remove usage of l4 nodes Signed-off-by: Jordan Jacobelli --- .github/workflows/build.yaml | 2 +- .github/workflows/pr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 3dd85587b..723b7ad94 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -62,7 +62,7 @@ jobs: branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} - node_type: "gpu-l4-latest-1-testing" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index deb46ca97..82d6de125 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -58,7 +58,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@use-new-runners with: build_type: pull-request - node_type: "gpu-l4-latest-1-testing" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" From e11625314b4aaa0aac4fc7566833f4b32c382b0e Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Wed, 17 Apr 2024 13:19:39 +0200 Subject: [PATCH 12/12] Remove changes on PARALLEL_LEVEL Signed-off-by: Jordan Jacobelli --- ci/build_cpp.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 2291cfde3..7329d4a34 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -15,9 +15,6 @@ rapids-print-env version=$(rapids-generate-version) -PARALLEL_LEVEL="$(nproc)" -export PARALLEL_LEVEL - rapids-logger "Begin cpp build" # This calls mambabuild when boa is installed (as is the case in the CI images)