From cf8bda423c0bf0d3a7826f76555cfb098594e0aa Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Mon, 9 Dec 2024 17:11:58 -0800 Subject: [PATCH] Replace stash-artifacts.{sh,py} -> manage-artifacts.py Also: * Remove publish-artifacts.sh * Upload artifacts to /{commit_id}/ prefix --- .github/workflows/jvm_tests.yml | 54 +++++--- .github/workflows/main.yml | 63 ++++----- .github/workflows/windows.yml | 16 ++- ops/pipeline/build-cpu-arm64.sh | 9 ++ ops/pipeline/build-cuda.sh | 8 +- ops/pipeline/build-gpu-rpkg-impl.sh | 2 +- ops/pipeline/build-gpu-rpkg.sh | 9 ++ ops/pipeline/build-jvm-manylinux2014.sh | 11 ++ ops/pipeline/build-manylinux2014.sh | 9 ++ ops/pipeline/build-win64-gpu.ps1 | 6 +- ops/pipeline/manage-artifacts.py | 163 ++++++++++++++++++++++++ ops/pipeline/publish-artifact.sh | 23 ---- ops/pipeline/stash-artifacts.ps1 | 49 ------- ops/pipeline/stash-artifacts.py | 144 --------------------- ops/pipeline/stash-artifacts.sh | 36 ------ ops/pipeline/test-python-wheel-impl.sh | 2 +- 16 files changed, 283 insertions(+), 321 deletions(-) create mode 100644 ops/pipeline/manage-artifacts.py delete mode 100755 ops/pipeline/publish-artifact.sh delete mode 100644 ops/pipeline/stash-artifacts.ps1 delete mode 100644 ops/pipeline/stash-artifacts.py delete mode 100755 ops/pipeline/stash-artifacts.sh diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index 965ea49ccad7..b059c530b01a 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -40,12 +40,6 @@ jobs: - name: Log into Docker registry (AWS ECR) run: bash ops/pipeline/login-docker-registry.sh - run: bash ops/pipeline/build-jvm-manylinux2014.sh ${{ matrix.arch }} - - name: Upload libxgboost4j.so - run: | - libname=lib/libxgboost4j_linux_${{ matrix.arch }}_${{ github.sha }}.so - mv -v lib/libxgboost4j.so ${libname} - bash ops/pipeline/publish-artifact.sh ${libname} \ - s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/libxgboost4j/ build-jvm-gpu: name: Build libxgboost4j.so with CUDA @@ -64,7 +58,10 @@ jobs: - run: bash ops/pipeline/build-jvm-gpu.sh - name: Stash files run: | - bash ops/pipeline/stash-artifacts.sh stash build-jvm-gpu lib/libxgboost4j.so + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-jvm-gpu \ + lib/libxgboost4j.so build-jvm-mac: name: "Build libxgboost4j.dylib for ${{ matrix.description }}" @@ -75,11 +72,11 @@ jobs: include: - description: "MacOS (Apple Silicon)" script: ops/pipeline/build-jvm-macos-apple-silicon.sh - libname: libxgboost4j_m1_${{ github.sha }}.dylib + libname: libxgboost4j_m1.dylib runner: macos-14 - description: "MacOS (Intel)" script: ops/pipeline/build-jvm-macos-intel.sh - libname: libxgboost4j_intel_${{ github.sha }}.dylib + libname: libxgboost4j_intel.dylib runner: macos-13 steps: - uses: actions/checkout@v4 @@ -89,8 +86,10 @@ jobs: - name: Upload libxgboost4j.dylib run: | mv -v lib/libxgboost4j.dylib ${{ matrix.libname }} - bash ops/pipeline/publish-artifact.sh ${{ matrix.libname }} \ - s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/libxgboost4j/ + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public \ + ${{ matrix.libname }} env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} @@ -112,13 +111,18 @@ jobs: run: bash ops/pipeline/login-docker-registry.sh - name: Unstash files run: | - bash ops/pipeline/stash-artifacts.sh unstash build-jvm-gpu lib/libxgboost4j.so + python3 ops/pipeline/manage-artifacts.py download \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-jvm-gpu \ + --dest-dir lib \ + libxgboost4j.so - run: bash ops/pipeline/build-jvm-doc.sh - name: Upload JVM doc run: | - bash ops/pipeline/publish-artifact.sh \ - jvm-packages/${{ env.BRANCH_NAME }}.tar.bz2 \ - s3://xgboost-docs/ + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-docs \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + jvm-packages/${{ env.BRANCH_NAME }}.tar.bz2 build-test-jvm-packages: name: Build and test JVM packages (Linux, Scala ${{ matrix.scala_version }}) @@ -144,8 +148,10 @@ jobs: SCALA_VERSION: ${{ matrix.scala_version }} - name: Stash files run: | - bash ops/pipeline/stash-artifacts.sh stash \ - build-test-jvm-packages lib/libxgboost4j.so + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-test-jvm-packages \ + lib/libxgboost4j.so if: matrix.scala_version == '2.13' build-test-jvm-packages-other-os: @@ -213,7 +219,11 @@ jobs: run: bash ops/pipeline/login-docker-registry.sh - name: Unstash files run: | - bash ops/pipeline/stash-artifacts.sh unstash build-jvm-gpu lib/libxgboost4j.so + python3 ops/pipeline/manage-artifacts.py download \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-jvm-gpu \ + --dest-dir lib \ + libxgboost4j.so - run: bash ops/pipeline/test-jvm-gpu.sh env: SCALA_VERSION: ${{ matrix.scala_version }} @@ -247,9 +257,11 @@ jobs: run: bash ops/pipeline/login-docker-registry.sh - name: Unstash files run: | - bash ops/pipeline/stash-artifacts.sh \ - unstash ${{ matrix.variant.artifact_from }} \ - lib/libxgboost4j.so + python3 ops/pipeline/manage-artifacts.py download \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/${{ matrix.variant.artifact_from }} \ + --dest-dir lib \ + libxgboost4j.so ls -lh lib/libxgboost4j.so - name: Deploy JVM packages to S3 run: | diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d259105ce877..fd1b94c7af4c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -30,7 +30,11 @@ jobs: run: bash ops/pipeline/login-docker-registry.sh - run: bash ops/pipeline/build-cpu.sh - name: Stash CLI executable - run: bash ops/pipeline/stash-artifacts.sh stash build-cpu ./xgboost + run: | + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-cpu \ + ./xgboost build-cpu-arm64: name: Build CPU ARM64 + manylinux_2_28_aarch64 wheel @@ -49,12 +53,10 @@ jobs: - run: bash ops/pipeline/build-cpu-arm64.sh - name: Stash files run: | - bash ops/pipeline/stash-artifacts.sh stash build-cpu-arm64 \ - ./xgboost python-package/dist/*.whl - - name: Upload Python wheel - run: | - bash ops/pipeline/publish-artifact.sh python-package/dist/*.whl \ - s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/ + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-cpu-arm64 \ + ./xgboost python-package/dist/*.whl build-cuda: name: Build CUDA + manylinux_2_28_x86_64 wheel @@ -74,15 +76,10 @@ jobs: bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 disable-rmm - name: Stash files run: | - bash ops/pipeline/stash-artifacts.sh stash build-cuda \ + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-cuda \ build/testxgboost ./xgboost python-package/dist/*.whl - - name: Upload Python wheel - run: | - for file in python-package/dist/*.whl python-package/dist/meta.json - do - bash ops/pipeline/publish-artifact.sh "${file}" \ - s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/ - done build-cuda-with-rmm: name: Build CUDA with RMM @@ -102,12 +99,10 @@ jobs: bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 enable-rmm - name: Stash files run: | - bash ops/pipeline/stash-artifacts.sh \ - stash build-cuda-with-rmm build/testxgboost - - name: Upload Python wheel - run: | - bash ops/pipeline/publish-artifact.sh python-package/dist/*.whl \ - s3://xgboost-nightly-builds/experimental_build_with_rmm/ + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-cuda-with-rmm \ + build/testxgboost build-cuda-with-rmm-dev: name: Build CUDA with RMM (dev) @@ -151,13 +146,6 @@ jobs: - name: Log into Docker registry (AWS ECR) run: bash ops/pipeline/login-docker-registry.sh - run: bash ops/pipeline/build-manylinux2014.sh ${{ matrix.arch }} - - name: Upload Python wheel - run: | - for wheel in python-package/dist/*.whl - do - bash ops/pipeline/publish-artifact.sh "${wheel}" \ - s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/ - done build-gpu-rpkg: name: Build GPU-enabled R package @@ -174,10 +162,6 @@ jobs: - name: Log into Docker registry (AWS ECR) run: bash ops/pipeline/login-docker-registry.sh - run: bash ops/pipeline/build-gpu-rpkg.sh - - name: Upload R tarball - run: | - bash ops/pipeline/publish-artifact.sh xgboost_r_gpu_linux_*.tar.gz \ - s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/ test-cpp-gpu: @@ -213,8 +197,11 @@ jobs: run: bash ops/pipeline/login-docker-registry.sh - name: Unstash gtest run: | - bash ops/pipeline/stash-artifacts.sh unstash ${{ matrix.artifact_from }} \ - build/testxgboost + python3 ops/pipeline/manage-artifacts.py download \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \ + --dest-dir build \ + testxgboost chmod +x build/testxgboost - run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.suite }} @@ -260,8 +247,12 @@ jobs: run: bash ops/pipeline/login-docker-registry.sh - name: Unstash Python wheel run: | - bash ops/pipeline/stash-artifacts.sh unstash ${{ matrix.artifact_from }} \ - python-package/dist/*.whl ./xgboost + python3 ops/pipeline/manage-artifacts.py download \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \ + --dest-dir wheelhouse \ + *.whl xgboost + mv -v wheelhouse/xgboost . chmod +x ./xgboost - name: Run Python tests, ${{ matrix.description }} run: bash ops/pipeline/test-python-wheel.sh ${{ matrix.suite }} ${{ matrix.container }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index f97daf761abf..41f3d5be53f7 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -30,9 +30,12 @@ jobs: submodules: "true" - run: powershell ops/pipeline/build-win64-gpu.ps1 - name: Stash files + shell: powershell run: | - powershell ops/pipeline/stash-artifacts.ps1 stash build-win64-gpu ` - build/testxgboost.exe xgboost.exe ` + conda activate + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-win64-gpu \ (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName) test-win64-gpu: @@ -47,7 +50,12 @@ jobs: with: submodules: "true" - name: Unstash files + shell: powershell run: | - powershell ops/pipeline/stash-artifacts.ps1 unstash build-win64-gpu ` - build/testxgboost.exe xgboost.exe python-package/dist/*.whl + conda activate + python3 ops/pipeline/manage-artifacts.py download \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-win64-gpu \ + --dest-dir python-package/dist \ + *.whl - run: powershell ops/pipeline/test-win64-gpu.ps1 diff --git a/ops/pipeline/build-cpu-arm64.sh b/ops/pipeline/build-cpu-arm64.sh index 2e0f0ea9ef4d..1c23d4dfe348 100755 --- a/ops/pipeline/build-cpu-arm64.sh +++ b/ops/pipeline/build-cpu-arm64.sh @@ -39,3 +39,12 @@ if ! unzip -l ./python-package/dist/*.whl | grep libgomp > /dev/null; then echo "error: libgomp.so was not vendored in the wheel" exit -1 fi + +echo "--- Upload Python wheel" +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + python-package/dist/*.whl +fi diff --git a/ops/pipeline/build-cuda.sh b/ops/pipeline/build-cuda.sh index 1965c50563ed..172fa9f85f16 100755 --- a/ops/pipeline/build-cuda.sh +++ b/ops/pipeline/build-cuda.sh @@ -93,9 +93,9 @@ then echo "--- Upload Python wheel" if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] then - aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \ - --acl public-read --no-progress - aws s3 cp python-package/dist/meta.json s3://xgboost-nightly-builds/${BRANCH_NAME}/ \ - --acl public-read --no-progress + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + python-package/dist/*.whl python-package/dist/meta.json fi fi diff --git a/ops/pipeline/build-gpu-rpkg-impl.sh b/ops/pipeline/build-gpu-rpkg-impl.sh index 2815b8f448f1..2b803b926271 100755 --- a/ops/pipeline/build-gpu-rpkg-impl.sh +++ b/ops/pipeline/build-gpu-rpkg-impl.sh @@ -33,4 +33,4 @@ cp -v lib/xgboost.so xgboost_rpack/src/ echo 'all:' > xgboost_rpack/src/Makefile echo 'all:' > xgboost_rpack/src/Makefile.win mv xgboost_rpack/ xgboost/ -tar cvzf xgboost_r_gpu_linux_${commit_hash}.tar.gz xgboost/ +tar cvzf xgboost_r_gpu_linux.tar.gz xgboost/ diff --git a/ops/pipeline/build-gpu-rpkg.sh b/ops/pipeline/build-gpu-rpkg.sh index a96a2a4a0247..07a08ff15385 100755 --- a/ops/pipeline/build-gpu-rpkg.sh +++ b/ops/pipeline/build-gpu-rpkg.sh @@ -8,6 +8,7 @@ then exit 1 fi +source ops/pipeline/classify-git-branch.sh source ops/pipeline/get-docker-registry-details.sh CONTAINER_TAG=${DOCKER_REGISTRY_URL}/xgb-ci.gpu_build_r_rockylinux8:main @@ -18,3 +19,11 @@ python3 ops/docker_run.py \ --container-tag ${CONTAINER_TAG} \ -- ops/pipeline/build-gpu-rpkg-impl.sh \ ${GITHUB_SHA} + +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + xgboost_r_gpu_linux.tar.gz +fi diff --git a/ops/pipeline/build-jvm-manylinux2014.sh b/ops/pipeline/build-jvm-manylinux2014.sh index 4eaae23bf7bc..068fb5fb0c44 100755 --- a/ops/pipeline/build-jvm-manylinux2014.sh +++ b/ops/pipeline/build-jvm-manylinux2014.sh @@ -12,6 +12,7 @@ fi arch=$1 container_id="xgb-ci.manylinux2014_${arch}" +source ops/pipeline/classify-git-branch.sh source ops/pipeline/get-docker-registry-details.sh CONTAINER_TAG="${DOCKER_REGISTRY_URL}/${container_id}:main" @@ -26,3 +27,13 @@ python3 ops/docker_run.py \ "cd build && cmake .. -DJVM_BINDINGS=ON -DUSE_OPENMP=ON && make -j$(nproc)" ldd lib/libxgboost4j.so objdump -T lib/libxgboost4j.so | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/\1/g' | sort -Vu + +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + libname=lib/libxgboost4j_linux_${arch}.so + mv -v lib/libxgboost4j.so ${libname} + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + ${libname} +fi diff --git a/ops/pipeline/build-manylinux2014.sh b/ops/pipeline/build-manylinux2014.sh index b572fed0186a..ae2b7598bf8b 100755 --- a/ops/pipeline/build-manylinux2014.sh +++ b/ops/pipeline/build-manylinux2014.sh @@ -16,6 +16,7 @@ fi arch="$1" +source ops/pipeline/classify-git-branch.sh source ops/pipeline/get-docker-registry-details.sh WHEEL_TAG="manylinux2014_${arch}" @@ -65,3 +66,11 @@ python3 ops/script/rename_whl.py \ --platform-tag ${WHEEL_TAG} rm -v python-package/dist/xgboost_cpu-*.whl mv -v wheelhouse/xgboost_cpu-*.whl python-package/dist/ + +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + python-package/dist/*.whl +fi diff --git a/ops/pipeline/build-win64-gpu.ps1 b/ops/pipeline/build-win64-gpu.ps1 index 76cc955059b8..26c9c0cfcbd1 100644 --- a/ops/pipeline/build-win64-gpu.ps1 +++ b/ops/pipeline/build-win64-gpu.ps1 @@ -40,7 +40,9 @@ if ($LASTEXITCODE -ne 0) { throw "Last command failed" } Write-Host "--- Upload Python wheel" cd .. if ( $is_release_branch -eq 1 ) { - aws s3 cp (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName) ` - s3://xgboost-nightly-builds/$Env:BRANCH_NAME/ --acl public-read --no-progress + python ops/pipeline/manage-artifacts.py upload ` + --s3-bucket 'xgboost-nightly-builds' ` + --prefix "$Env:BRANCH_NAME/$Env:GITHUB_SHA" --make-public ` + (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName) if ($LASTEXITCODE -ne 0) { throw "Last command failed" } } diff --git a/ops/pipeline/manage-artifacts.py b/ops/pipeline/manage-artifacts.py new file mode 100644 index 000000000000..e847fd8c8824 --- /dev/null +++ b/ops/pipeline/manage-artifacts.py @@ -0,0 +1,163 @@ +""" +Upload an artifact to an S3 bucket for later use +Note. This script takes in all inputs via environment variables + except the path to the artifact(s). +""" + +import argparse +import os +import subprocess +import sys +from pathlib import Path +from urllib.parse import SplitResult, urlsplit, urlunsplit + + +def resolve(x: Path) -> Path: + return x.expanduser().resolve() + + +def path_equals(a: Path, b: Path) -> bool: + return resolve(a) == resolve(b) + + +def compute_s3_url(*, s3_bucket: str, prefix: str, artifact: str) -> str: + if prefix == "": + return f"s3://{s3_bucket}/{artifact}" + return f"s3://{s3_bucket}/{prefix}/{artifact}" + + +def aws_s3_upload(*, src: Path, dest: str, make_public=bool) -> None: + cli_args = ["aws", "s3", "cp", "--no-progress", str(src), dest] + if make_public: + cli_args.extend(["--acl", "public-read"]) + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def aws_s3_download(*, src: str, dest_dir: Path) -> None: + cli_args = ["aws", "s3", "cp", "--no-progress", src, str(dest_dir)] + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def aws_s3_download_with_wildcard(*, src: str, dest_dir: Path) -> None: + parsed_src = urlsplit(src) + src_dir = urlunsplit( + SplitResult( + scheme="s3", + netloc=parsed_src.netloc, + path=os.path.dirname(parsed_src.path), + query="", + fragment="", + ) + ) + src_glob = os.path.basename(parsed_src.path) + cli_args = [ + "aws", + "s3", + "cp", + "--recursive", + "--no-progress", + "--exclude", + "'*'", + "--include", + src_glob, + src_dir, + str(dest_dir), + ] + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def upload(*, args: argparse.Namespace) -> None: + print(f"Uploading artifacts to prefix {args.prefix}...") + for artifact in args.artifacts: + artifact_path = Path(artifact) + s3_url = compute_s3_url( + s3_bucket=args.s3_bucket, prefix=args.prefix, artifact=artifact_path.name + ) + aws_s3_upload(src=artifact_path, dest=s3_url, make_public=args.make_public) + + +def download(*, args: argparse.Namespace) -> None: + print(f"Downloading artifacts from prefix {args.prefix}...") + dest_dir = Path(args.dest_dir) + print(f"mkdir -p {str(dest_dir)}") + dest_dir.mkdir(parents=True, exist_ok=True) + for artifact in args.artifacts: + s3_url = compute_s3_url( + s3_bucket=args.s3_bucket, prefix=args.prefix, artifact=artifact + ) + if "*" in artifact: + aws_s3_download_with_wildcard(src=s3_url, dest_dir=dest_dir) + else: + aws_s3_download(src=s3_url, dest_dir=dest_dir) + + +if __name__ == "__main__": + # Ensure that the current working directory is the project root + if not (Path.cwd() / "ops").is_dir() or not path_equals( + Path(__file__).parent.parent, Path.cwd() / "ops" + ): + x = Path(__file__).name + raise RuntimeError(f"Script {x} must be run at the project's root directory") + + root_parser = argparse.ArgumentParser() + subparser_factory = root_parser.add_subparsers(required=True, dest="command") + parsers = {} + for command in ["upload", "download"]: + parsers[command] = subparser_factory.add_parser(command) + parsers[command].add_argument( + "--s3-bucket", + type=str, + required=True, + help="Name of the S3 bucket to store the artifact", + ) + parsers[command].add_argument( + "--prefix", + type=str, + required=True, + help=( + "Where the artifact(s) would be stored. The artifact(s) will be stored at " + "s3://[s3-bucket]/[prefix]/[filename]." + ), + ) + parsers[command].add_argument( + "artifacts", + type=str, + nargs="+", + metavar="artifact", + help=f"Artifact(s) to {command}", + ) + + parsers["upload"].add_argument( + "--make-public", action="store_true", help="Make artifact publicly accessible" + ) + parsers["download"].add_argument( + "--dest-dir", type=str, required=True, help="Where to download artifact(s)" + ) + + if len(sys.argv) == 1: + print("1. Upload artifact(s)") + parsers["upload"].print_help() + print("\n2. Download artifact(s)") + parsers["download"].print_help() + sys.exit(1) + + parsed_args = root_parser.parse_args() + if parsed_args.command == "upload": + upload(args=parsed_args) + elif parsed_args.command == "download": + download(args=parsed_args) diff --git a/ops/pipeline/publish-artifact.sh b/ops/pipeline/publish-artifact.sh deleted file mode 100755 index adcb3c521d2a..000000000000 --- a/ops/pipeline/publish-artifact.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -## Publish artifacts in an S3 bucket -## Meant to be used inside GitHub Actions - -set -euo pipefail - -source ops/pipeline/enforce-ci.sh - -if [[ $# -ne 2 ]] -then - echo "Usage: $0 [artifact] [s3_url]" - exit 1 -fi - -artifact="$1" -s3_url="$2" - -if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] -then - echo "aws s3 cp ${artifact} ${s3_url} --acl public-read --no-progress" - aws s3 cp "${artifact}" "${s3_url}" --acl public-read --no-progress -fi diff --git a/ops/pipeline/stash-artifacts.ps1 b/ops/pipeline/stash-artifacts.ps1 deleted file mode 100644 index 9b9989bf376d..000000000000 --- a/ops/pipeline/stash-artifacts.ps1 +++ /dev/null @@ -1,49 +0,0 @@ -[CmdletBinding()] -Param( - [Parameter( - Mandatory=$true, - Position=0 - )][string]$command, - [Parameter( - Mandatory=$true, - Position=1 - )][string]$remote_prefix, - [Parameter( - Mandatory=$true, - Position=2, - ValueFromRemainingArguments=$true - )][string[]]$artifacts -) - -## Convenience wrapper for ops/pipeline/stash-artifacts.py -## Meant to be used inside GitHub Actions - -$ErrorActionPreference = "Stop" - -. ops/pipeline/enforce-ci.ps1 - -foreach ($env in "GITHUB_REPOSITORY", "GITHUB_RUN_ID", "RUNS_ON_S3_BUCKET_CACHE") { - $val = [Environment]::GetEnvironmentVariable($env) - if ($val -eq $null) { - Write-Host "Error: $env must be set." - exit 1 - } -} - -$artifact_stash_prefix = "cache/${Env:GITHUB_REPOSITORY}/stash/${Env:GITHUB_RUN_ID}" - -conda activate - -Write-Host @" -python ops/pipeline/stash-artifacts.py ` - --command "${command}" ` - --s3-bucket "${Env:RUNS_ON_S3_BUCKET_CACHE}" ` - --prefix "${artifact_stash_prefix}/${remote_prefix}" ` - -- $artifacts -"@ -python ops/pipeline/stash-artifacts.py ` - --command "${command}" ` - --s3-bucket "${Env:RUNS_ON_S3_BUCKET_CACHE}" ` - --prefix "${artifact_stash_prefix}/${remote_prefix}" ` - -- $artifacts -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } diff --git a/ops/pipeline/stash-artifacts.py b/ops/pipeline/stash-artifacts.py deleted file mode 100644 index 151e187513da..000000000000 --- a/ops/pipeline/stash-artifacts.py +++ /dev/null @@ -1,144 +0,0 @@ -""" -Stash an artifact in an S3 bucket for later use - -Note. This script takes in all inputs via environment variables - except the path to the artifact(s). -""" - -import argparse -import os -import subprocess -from pathlib import Path -from urllib.parse import SplitResult, urlsplit, urlunsplit - - -def resolve(x: Path) -> Path: - return x.expanduser().resolve() - - -def path_equals(a: Path, b: Path) -> bool: - return resolve(a) == resolve(b) - - -def compute_s3_url(s3_bucket: str, prefix: str, artifact: Path) -> str: - filename = artifact.name - relative_path = resolve(artifact).relative_to(Path.cwd()) - if resolve(artifact.parent) == resolve(Path.cwd()): - full_prefix = prefix - else: - full_prefix = f"{prefix}/{str(relative_path.parent)}" - return f"s3://{s3_bucket}/{full_prefix}/{filename}" - - -def aws_s3_upload(src: Path, dest: str) -> None: - cli_args = ["aws", "s3", "cp", "--no-progress", str(src), dest] - print(" ".join(cli_args)) - subprocess.run( - cli_args, - check=True, - encoding="utf-8", - ) - - -def aws_s3_download(src: str, dest: Path) -> None: - cli_args = ["aws", "s3", "cp", "--no-progress", src, str(dest)] - print(" ".join(cli_args)) - subprocess.run( - cli_args, - check=True, - encoding="utf-8", - ) - - -def aws_s3_download_with_wildcard(src: str, dest: Path) -> None: - parsed_src = urlsplit(src) - src_dir = urlunsplit( - SplitResult( - scheme="s3", - netloc=parsed_src.netloc, - path=os.path.dirname(parsed_src.path), - query="", - fragment="", - ) - ) - dest_dir = dest.parent - src_glob = os.path.basename(parsed_src.path) - cli_args = [ - "aws", - "s3", - "cp", - "--recursive", - "--no-progress", - "--exclude", - "'*'", - "--include", - src_glob, - src_dir, - str(dest_dir), - ] - print(" ".join(cli_args)) - subprocess.run( - cli_args, - check=True, - encoding="utf-8", - ) - - -def upload(args: argparse.Namespace) -> None: - print(f"Stashing artifacts to prefix {args.prefix}...") - for artifact in args.artifacts: - artifact_path = Path(artifact) - s3_url = compute_s3_url(args.s3_bucket, args.prefix, artifact_path) - aws_s3_upload(artifact_path, s3_url) - - -def download(args: argparse.Namespace) -> None: - print(f"Unstashing artifacts from prefix {args.prefix}...") - for artifact in args.artifacts: - artifact_path = Path(artifact) - print(f"mkdir -p {str(artifact_path.parent)}") - artifact_path.parent.mkdir(parents=True, exist_ok=True) - s3_url = compute_s3_url(args.s3_bucket, args.prefix, artifact_path) - if "*" in artifact: - aws_s3_download_with_wildcard(s3_url, artifact_path) - else: - aws_s3_download(s3_url, artifact_path) - - -if __name__ == "__main__": - # Ensure that the current working directory is the project root - if not (Path.cwd() / "ops").is_dir() or not path_equals( - Path(__file__).parent.parent, Path.cwd() / "ops" - ): - x = Path(__file__).name - raise RuntimeError(f"Script {x} must be run at the project's root directory") - - parser = argparse.ArgumentParser() - parser.add_argument( - "--command", - type=str, - choices=["stash", "unstash"], - required=True, - help="Whether to stash or unstash the artifact", - ) - parser.add_argument( - "--s3-bucket", - type=str, - required=True, - help="Name of the S3 bucket to store the artifact", - ) - parser.add_argument( - "--prefix", - type=str, - required=True, - help=( - "Where the artifact would be stored. The artifact will be stored in " - "s3://[s3-bucket]/[prefix]." - ), - ) - parser.add_argument("artifacts", type=str, nargs="+", metavar="artifact") - parsed_args = parser.parse_args() - if parsed_args.command == "stash": - upload(parsed_args) - elif parsed_args.command == "unstash": - download(parsed_args) diff --git a/ops/pipeline/stash-artifacts.sh b/ops/pipeline/stash-artifacts.sh deleted file mode 100755 index 98c9695c4227..000000000000 --- a/ops/pipeline/stash-artifacts.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -## Convenience wrapper for ops/pipeline/stash-artifacts.py -## Meant to be used inside GitHub Actions - -set -euo pipefail - -source ops/pipeline/enforce-ci.sh - -if [[ "$#" -lt 3 ]] -then - echo "Usage: $0 {stash,unstash} [remote_prefix] [artifact] [artifact ...]" - exit 1 -fi - -command="$1" -remote_prefix="$2" -shift 2 - -for arg in "GITHUB_REPOSITORY" "GITHUB_RUN_ID" "RUNS_ON_S3_BUCKET_CACHE" -do - if [[ -z "${!arg:-}" ]] - then - echo "Error: $arg must be set." - exit 2 - fi -done - -artifact_stash_prefix="cache/${GITHUB_REPOSITORY}/stash/${GITHUB_RUN_ID}" - -set -x -python3 ops/pipeline/stash-artifacts.py \ - --command "${command}" \ - --s3-bucket "${RUNS_ON_S3_BUCKET_CACHE}" \ - --prefix "${artifact_stash_prefix}/${remote_prefix}" \ - -- "$@" diff --git a/ops/pipeline/test-python-wheel-impl.sh b/ops/pipeline/test-python-wheel-impl.sh index 75bfa5fbaffb..837ff03b24d7 100755 --- a/ops/pipeline/test-python-wheel-impl.sh +++ b/ops/pipeline/test-python-wheel-impl.sh @@ -34,7 +34,7 @@ export PYSPARK_DRIVER_PYTHON=$(which python) export PYSPARK_PYTHON=$(which python) export SPARK_TESTING=1 -pip install -v ./python-package/dist/*.whl +pip install -v ./wheelhouse/*.whl case "$suite" in gpu)