From 8f5b261cadd70b40a28d01b61745fb64a3ba62c1 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Sun, 8 Dec 2024 01:32:24 -0800 Subject: [PATCH] Replace stash-artifacts.{sh,py} -> manage-artifacts.py --- .github/workflows/main.yml | 14 ++- ops/pipeline/manage-artifacts.py | 158 +++++++++++++++++++++++++++++++ ops/pipeline/stash-artifacts.py | 144 ---------------------------- ops/pipeline/stash-artifacts.sh | 36 ------- 4 files changed, 169 insertions(+), 183 deletions(-) create mode 100644 ops/pipeline/manage-artifacts.py delete mode 100644 ops/pipeline/stash-artifacts.py delete mode 100755 ops/pipeline/stash-artifacts.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 81602602b517..2c5238f6016d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -32,7 +32,9 @@ jobs: - run: bash ops/pipeline/build-cuda.sh - name: Stash files run: | - bash ops/pipeline/stash-artifacts.sh stash build-cuda \ + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-cuda \ build/testxgboost ./xgboost python-package/dist/*.whl test-cpp-gpu: @@ -65,7 +67,10 @@ jobs: run: bash ops/pipeline/login-docker-registry.sh - name: Unstash gtest run: | - bash ops/pipeline/stash-artifacts.sh unstash ${{ matrix.artifact_from }} \ + python3 ops/pipeline/manage-artifacts.py download \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \ + --dest-dir . \ build/testxgboost chmod +x build/testxgboost - run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.suite }} @@ -102,7 +107,10 @@ jobs: run: bash ops/pipeline/login-docker-registry.sh - name: Unstash Python wheel run: | - bash ops/pipeline/stash-artifacts.sh unstash ${{ matrix.artifact_from }} \ + python3 ops/pipeline/manage-artifacts.py download \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \ + --dest-dir . \ python-package/dist/*.whl ./xgboost chmod +x ./xgboost - name: Run Python tests, ${{ matrix.description }} diff --git a/ops/pipeline/manage-artifacts.py b/ops/pipeline/manage-artifacts.py new file mode 100644 index 000000000000..0b0d237cc7fb --- /dev/null +++ b/ops/pipeline/manage-artifacts.py @@ -0,0 +1,158 @@ +""" +Upload an artifact to an S3 bucket for later use + +Note. This script takes in all inputs via environment variables + except the path to the artifact(s). +""" + +import argparse +import os +import subprocess +import sys +from pathlib import Path +from urllib.parse import SplitResult, urlsplit, urlunsplit + + +def resolve(x: Path) -> Path: + return x.expanduser().resolve() + + +def path_equals(a: Path, b: Path) -> bool: + return resolve(a) == resolve(b) + + +def compute_s3_url(*, s3_bucket: str, prefix: str, artifact: str) -> str: + if prefix == "": + return f"s3://{s3_bucket}/{artifact}" + return f"s3://{s3_bucket}/{prefix}/{artifact}" + + +def aws_s3_upload(*, src: Path, dest: str) -> None: + cli_args = ["aws", "s3", "cp", "--no-progress", str(src), dest] + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def aws_s3_download(*, src: str, dest_dir: Path) -> None: + cli_args = ["aws", "s3", "cp", "--no-progress", src, str(dest_dir)] + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def aws_s3_download_with_wildcard(*, src: str, dest_dir: Path) -> None: + parsed_src = urlsplit(src) + src_dir = urlunsplit( + SplitResult( + scheme="s3", + netloc=parsed_src.netloc, + path=os.path.dirname(parsed_src.path), + query="", + fragment="", + ) + ) + src_glob = os.path.basename(parsed_src.path) + cli_args = [ + "aws", + "s3", + "cp", + "--recursive", + "--no-progress", + "--exclude", + "'*'", + "--include", + src_glob, + src_dir, + str(dest_dir), + ] + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def upload(*, args: argparse.Namespace) -> None: + print(f"Uploading artifacts to prefix {args.prefix}...") + for artifact in args.artifacts: + s3_url = compute_s3_url( + s3_bucket=args.s3_bucket, prefix=args.prefix, artifact=artifact + ) + aws_s3_upload(src=Path(artifact), dest=s3_url) + + +def download(*, args: argparse.Namespace) -> None: + print(f"Downloading artifacts from prefix {args.prefix}...") + dest_dir = Path(args.dest_dir) + print(f"mkdir -p {str(dest_dir)}") + dest_dir.mkdir(parents=True, exist_ok=True) + for artifact in args.artifacts: + s3_url = compute_s3_url( + s3_bucket=args.s3_bucket, prefix=args.prefix, artifact=artifact + ) + if "*" in artifact: + aws_s3_download_with_wildcard(src=s3_url, dest_dir=dest_dir) + else: + aws_s3_download(src=s3_url, dest_dir=dest_dir) + + +if __name__ == "__main__": + # Ensure that the current working directory is the project root + if not (Path.cwd() / "ops").is_dir() or not path_equals( + Path(__file__).parent.parent, Path.cwd() / "ops" + ): + x = Path(__file__).name + raise RuntimeError(f"Script {x} must be run at the project's root directory") + + root_parser = argparse.ArgumentParser() + subparser_factory = root_parser.add_subparsers(required=True, dest="command") + parsers = {} + for command in ["upload", "download"]: + parsers[command] = subparser_factory.add_parser(command) + parsers[command].add_argument( + "--s3-bucket", + type=str, + required=True, + help="Name of the S3 bucket to store the artifact", + ) + parsers[command].add_argument( + "--prefix", + type=str, + required=True, + help=( + "Where the artifact(s) would be stored. The artifact(s) will be stored at " + "s3://[s3-bucket]/[prefix]/[filename]." + ), + ) + parsers[command].add_argument( + "artifacts", + type=str, + nargs="+", + metavar="artifact", + help=f"Artifact(s) to {command}", + ) + + parsers["download"].add_argument( + "--dest-dir", type=str, required=True, help="Where to download artifact(s)" + ) + + if len(sys.argv) == 1: + print("1. Upload artifact(s)") + parsers["upload"].print_help() + print("\n2. Download artifact(s)") + parsers["download"].print_help() + sys.exit(1) + + parsed_args = root_parser.parse_args() + if parsed_args.command == "upload": + upload(args=parsed_args) + elif parsed_args.command == "download": + download(args=parsed_args) diff --git a/ops/pipeline/stash-artifacts.py b/ops/pipeline/stash-artifacts.py deleted file mode 100644 index 151e187513da..000000000000 --- a/ops/pipeline/stash-artifacts.py +++ /dev/null @@ -1,144 +0,0 @@ -""" -Stash an artifact in an S3 bucket for later use - -Note. This script takes in all inputs via environment variables - except the path to the artifact(s). -""" - -import argparse -import os -import subprocess -from pathlib import Path -from urllib.parse import SplitResult, urlsplit, urlunsplit - - -def resolve(x: Path) -> Path: - return x.expanduser().resolve() - - -def path_equals(a: Path, b: Path) -> bool: - return resolve(a) == resolve(b) - - -def compute_s3_url(s3_bucket: str, prefix: str, artifact: Path) -> str: - filename = artifact.name - relative_path = resolve(artifact).relative_to(Path.cwd()) - if resolve(artifact.parent) == resolve(Path.cwd()): - full_prefix = prefix - else: - full_prefix = f"{prefix}/{str(relative_path.parent)}" - return f"s3://{s3_bucket}/{full_prefix}/{filename}" - - -def aws_s3_upload(src: Path, dest: str) -> None: - cli_args = ["aws", "s3", "cp", "--no-progress", str(src), dest] - print(" ".join(cli_args)) - subprocess.run( - cli_args, - check=True, - encoding="utf-8", - ) - - -def aws_s3_download(src: str, dest: Path) -> None: - cli_args = ["aws", "s3", "cp", "--no-progress", src, str(dest)] - print(" ".join(cli_args)) - subprocess.run( - cli_args, - check=True, - encoding="utf-8", - ) - - -def aws_s3_download_with_wildcard(src: str, dest: Path) -> None: - parsed_src = urlsplit(src) - src_dir = urlunsplit( - SplitResult( - scheme="s3", - netloc=parsed_src.netloc, - path=os.path.dirname(parsed_src.path), - query="", - fragment="", - ) - ) - dest_dir = dest.parent - src_glob = os.path.basename(parsed_src.path) - cli_args = [ - "aws", - "s3", - "cp", - "--recursive", - "--no-progress", - "--exclude", - "'*'", - "--include", - src_glob, - src_dir, - str(dest_dir), - ] - print(" ".join(cli_args)) - subprocess.run( - cli_args, - check=True, - encoding="utf-8", - ) - - -def upload(args: argparse.Namespace) -> None: - print(f"Stashing artifacts to prefix {args.prefix}...") - for artifact in args.artifacts: - artifact_path = Path(artifact) - s3_url = compute_s3_url(args.s3_bucket, args.prefix, artifact_path) - aws_s3_upload(artifact_path, s3_url) - - -def download(args: argparse.Namespace) -> None: - print(f"Unstashing artifacts from prefix {args.prefix}...") - for artifact in args.artifacts: - artifact_path = Path(artifact) - print(f"mkdir -p {str(artifact_path.parent)}") - artifact_path.parent.mkdir(parents=True, exist_ok=True) - s3_url = compute_s3_url(args.s3_bucket, args.prefix, artifact_path) - if "*" in artifact: - aws_s3_download_with_wildcard(s3_url, artifact_path) - else: - aws_s3_download(s3_url, artifact_path) - - -if __name__ == "__main__": - # Ensure that the current working directory is the project root - if not (Path.cwd() / "ops").is_dir() or not path_equals( - Path(__file__).parent.parent, Path.cwd() / "ops" - ): - x = Path(__file__).name - raise RuntimeError(f"Script {x} must be run at the project's root directory") - - parser = argparse.ArgumentParser() - parser.add_argument( - "--command", - type=str, - choices=["stash", "unstash"], - required=True, - help="Whether to stash or unstash the artifact", - ) - parser.add_argument( - "--s3-bucket", - type=str, - required=True, - help="Name of the S3 bucket to store the artifact", - ) - parser.add_argument( - "--prefix", - type=str, - required=True, - help=( - "Where the artifact would be stored. The artifact will be stored in " - "s3://[s3-bucket]/[prefix]." - ), - ) - parser.add_argument("artifacts", type=str, nargs="+", metavar="artifact") - parsed_args = parser.parse_args() - if parsed_args.command == "stash": - upload(parsed_args) - elif parsed_args.command == "unstash": - download(parsed_args) diff --git a/ops/pipeline/stash-artifacts.sh b/ops/pipeline/stash-artifacts.sh deleted file mode 100755 index 98c9695c4227..000000000000 --- a/ops/pipeline/stash-artifacts.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -## Convenience wrapper for ops/pipeline/stash-artifacts.py -## Meant to be used inside GitHub Actions - -set -euo pipefail - -source ops/pipeline/enforce-ci.sh - -if [[ "$#" -lt 3 ]] -then - echo "Usage: $0 {stash,unstash} [remote_prefix] [artifact] [artifact ...]" - exit 1 -fi - -command="$1" -remote_prefix="$2" -shift 2 - -for arg in "GITHUB_REPOSITORY" "GITHUB_RUN_ID" "RUNS_ON_S3_BUCKET_CACHE" -do - if [[ -z "${!arg:-}" ]] - then - echo "Error: $arg must be set." - exit 2 - fi -done - -artifact_stash_prefix="cache/${GITHUB_REPOSITORY}/stash/${GITHUB_RUN_ID}" - -set -x -python3 ops/pipeline/stash-artifacts.py \ - --command "${command}" \ - --s3-bucket "${RUNS_ON_S3_BUCKET_CACHE}" \ - --prefix "${artifact_stash_prefix}/${remote_prefix}" \ - -- "$@"