Native JNI S3 PyTorch #1906
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Native JNI S3 PyTorch | |
on: | |
workflow_dispatch: | |
inputs: | |
pt_version: | |
description: 'pytorch version' | |
required: false | |
cuda: | |
description: 'CUDA version' | |
required: true | |
default: 'cu124' | |
schedule: | |
- cron: '0 5 * * *' | |
permissions: | |
id-token: write | |
contents: read | |
jobs: | |
build-pytorch-jni-linux: | |
if: github.repository == 'deepjavalibrary/djl' | |
runs-on: ubuntu-latest | |
container: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up JDK 17 | |
uses: actions/setup-java@v4 | |
with: | |
distribution: 'corretto' | |
java-version: 17 | |
- uses: actions/cache@v3 | |
with: | |
path: ~/.gradle/caches | |
key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Install Environment | |
run: | | |
apt-get update | |
DEBIAN_FRONTEND=noninteractive apt-get install -y locales curl unzip software-properties-common | |
apt-get install -y python3-pip python3-distutils | |
pip3 install awscli cmake | |
ln -s /usr/local/bin/cmake /usr/bin/cmake | |
- name: Release JNI prep | |
shell: bash | |
run: | | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
export PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
echo $PYTORCH_VERSION | |
./gradlew :engines:pytorch:pytorch-native:compileJNI -Ppt_version=$PYTORCH_VERSION | |
./gradlew -Pjni -Ppt_version=$PYTORCH_VERSION :integration:test "-Dai.djl.default_engine=PyTorch" | |
./gradlew :engines:pytorch:pytorch-native:cleanJNI | |
export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" | |
CUDA_VERSION=${{ github.event.inputs.cuda }} | |
export CUDA_VERSION=${CUDA_VERSION:-cu124} | |
./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcuda=$CUDA_VERSION -Ppt_version=$PYTORCH_VERSION | |
./gradlew :engines:pytorch:pytorch-native:cleanJNI | |
./gradlew :engines:pytorch:pytorch-native:compileJNI -Pprecxx11 -Pcuda=$CUDA_VERSION -Ppt_version=$PYTORCH_VERSION | |
./gradlew :engines:pytorch:pytorch-native:cleanJNI | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::425969335547:role/djl-ci-publish-role | |
aws-region: us-east-2 | |
- name: Copy files to S3 with the AWS CLI | |
run: | | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
aws s3 sync engines/pytorch/pytorch-native/jnilib s3://djl-ai/publish/pytorch/${PYTORCH_VERSION}/jnilib | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/pytorch/${PYTORCH_VERSION}/jnilib*" | |
build-pytorch-jni-precxx11: | |
if: github.repository == 'deepjavalibrary/djl' | |
runs-on: ubuntu-latest | |
container: | |
image: amazonlinux:2 | |
env: | |
JAVA_HOME: /usr/lib/jvm/java-17-amazon-corretto | |
timeout-minutes: 30 | |
needs: create-aarch64-runner | |
steps: | |
- name: Install compiler environment | |
run: | | |
yum -y update | |
yum -y install patch git cmake3 python3-devel java-17-amazon-corretto-devel jq | |
yum -y install gcc10 gcc10-c++ | |
ln -sf /usr/bin/gcc10-gcc /usr/bin/gcc | |
ln -sf /usr/bin/gcc10-cc /usr/bin/cc | |
ln -sf /usr/bin/gcc10-g++ /usr/bin/g++ | |
ln -sf /usr/bin/gcc10-c++ /usr/bin/c++ | |
ln -sf /usr/bin/gcc10-ar /usr/bin/ar | |
ln -sf /usr/bin/cmake3 /usr/bin/cmake | |
pip3 install awscli --upgrade | |
- uses: taiki-e/checkout-action@v1 | |
- name: Release JNI prep | |
run: | | |
export PATH=$PATH:$JAVA_HOME/bin | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
export PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
echo $PYTORCH_VERSION | |
./gradlew :engines:pytorch:pytorch-native:compileJNI -Pprecxx11 -Ppt_version=$PYTORCH_VERSION | |
export PYTORCH_PRECXX11=true | |
./gradlew -Pjni -Ppt_version=$PYTORCH_VERSION :integration:test "-Dai.djl.default_engine=PyTorch" | |
- name: Configure AWS Credentials | |
run: | | |
oidc_token=$(curl -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ | |
"$ACTIONS_ID_TOKEN_REQUEST_URL&audience=sts.amazonaws.com" | jq -r ".value") | |
echo "::add-mask::$oidc_token" | |
read -r AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN <<<"$(aws sts assume-role-with-web-identity \ | |
--region "us-east-2" \ | |
--role-arn "arn:aws:iam::425969335547:role/djl-ci-publish-role" \ | |
--role-session-name "native-jni-s3-pytorch-jni-precxx11" \ | |
--web-identity-token "$oidc_token" \ | |
--query "[Credentials.AccessKeyId, Credentials.SecretAccessKey, Credentials.SessionToken]" \ | |
--output text)" | |
echo "::add-mask::$AWS_ACCESS_KEY_ID" | |
echo "::add-mask::$AWS_SECRET_ACCESS_KEY" | |
echo "::add-mask::$AWS_SESSION_TOKEN" | |
echo "AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" >> "$GITHUB_ENV" | |
echo "AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" >> "$GITHUB_ENV" | |
echo "AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN" >> "$GITHUB_ENV" | |
- name: Copy files to S3 with the AWS CLI | |
run: | | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
aws s3 sync engines/pytorch/pytorch-native/jnilib s3://djl-ai/publish/pytorch/${PYTORCH_VERSION}/jnilib | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/pytorch/${PYTORCH_VERSION}/jnilib*" | |
build-pytorch-jni-windows: | |
if: github.repository == 'deepjavalibrary/djl' | |
runs-on: windows-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up JDK 17 | |
uses: actions/setup-java@v4 | |
with: | |
distribution: 'corretto' | |
java-version: 17 | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.gradle/caches | |
key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Release CPU JNI | |
shell: cmd | |
run: | | |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 | |
gradlew :engines:pytorch:pytorch-native:compileJNI -Ppt_version=${{ github.event.inputs.pt_version }} | |
- name: Install CUDA 11.7 | |
shell: cmd | |
run: | | |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 | |
curl.exe -L https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_516.94_windows.exe -o cuda.exe | |
curl.exe -L https://developer.download.nvidia.com/compute/redist/cudnn/v8.5.0/local_installers/11.7/cudnn-windows-x86_64-8.5.0.96_cuda11-archive.zip -o cudnn.zip | |
cuda.exe -s | |
unzip.exe cudnn.zip | |
cp.exe -a cudnn*/include cudnn*/lib cudnn*/bin "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7/" | |
rm.exe -Rf cuda.exe cuda.exe cudnn.zip cudnn* | |
- name: Release cuda11 JNI | |
shell: cmd | |
run: | | |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 | |
set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | |
set "PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%" | |
set CUDA_VERSION=${{ github.event.inputs.cuda }} | |
if "%CUDA_VERSION%" == "" set CUDA_VERSION=cu124 | |
gradlew :engines:pytorch:pytorch-native:cleanJNI :engines:pytorch:pytorch-native:compileJNI -Pcuda=%CUDA_VERSION% -Ppt_version=${{ github.event.inputs.pt_version }} | |
- name: Configure Deployment AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::425969335547:role/djl-ci-publish-role | |
aws-region: us-east-2 | |
- name: Copy files to S3 with the AWS CLI | |
shell: bash | |
run: | | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
aws s3 sync engines/pytorch/pytorch-native/jnilib s3://djl-ai/publish/pytorch/${PYTORCH_VERSION}/jnilib | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/pytorch/${PYTORCH_VERSION}/jnilib*" | |
build-pytorch-jni-arm64-macos: | |
if: github.repository == 'deepjavalibrary/djl' | |
runs-on: macos-latest-xlarge | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up JDK 17 | |
uses: actions/setup-java@v4 | |
with: | |
java-version: 17 | |
distribution: corretto | |
architecture: aarch64 | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.gradle/caches | |
key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Release JNI prep | |
shell: bash | |
run: | | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
export PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
echo $PYTORCH_VERSION | |
./gradlew :engines:pytorch:pytorch-native:compileJNI -Ppt_version=$PYTORCH_VERSION | |
./gradlew -Pjni -Ppt_version=$PYTORCH_VERSION :integration:test "-Dai.djl.default_engine=PyTorch" | |
- name: Configure Deployment AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::425969335547:role/djl-ci-publish-role | |
aws-region: us-east-2 | |
- name: Copy files to S3 with the AWS CLI | |
shell: bash | |
run: | | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
aws s3 sync engines/pytorch/pytorch-native/jnilib s3://djl-ai/publish/pytorch/${PYTORCH_VERSION}/jnilib | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/pytorch/${PYTORCH_VERSION}/jnilib*" | |
create-aarch64-runner: | |
if: github.repository == 'deepjavalibrary/djl' | |
runs-on: [ self-hosted, scheduler ] | |
steps: | |
- name: Create new Graviton instance | |
id: create_aarch64 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_graviton $token djl | |
outputs: | |
aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} | |
build-pytorch-jni-aarch64: | |
if: github.repository == 'deepjavalibrary/djl' | |
runs-on: [ self-hosted, aarch64 ] | |
container: | |
image: amazonlinux:2 | |
env: | |
JAVA_HOME: /usr/lib/jvm/java-17-amazon-corretto.aarch64 | |
timeout-minutes: 30 | |
needs: create-aarch64-runner | |
steps: | |
- name: Install compiler environment | |
run: | | |
yum -y update | |
yum -y install patch git cmake3 python3-devel java-17-amazon-corretto-devel jq | |
yum -y install gcc10 gcc10-c++ | |
ln -sf /usr/bin/gcc10-gcc /usr/bin/gcc | |
ln -sf /usr/bin/gcc10-cc /usr/bin/cc | |
ln -sf /usr/bin/gcc10-g++ /usr/bin/g++ | |
ln -sf /usr/bin/gcc10-c++ /usr/bin/c++ | |
ln -sf /usr/bin/gcc10-ar /usr/bin/ar | |
ln -sf /usr/bin/cmake3 /usr/bin/cmake | |
pip3 install awscli --upgrade | |
- uses: taiki-e/checkout-action@v1 | |
- name: Release JNI prep | |
run: | | |
export PATH=$PATH:$JAVA_HOME/bin | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
export PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
echo $PYTORCH_VERSION | |
./gradlew :engines:pytorch:pytorch-native:compileJNI -Pprecxx11 -Ppt_version=$PYTORCH_VERSION | |
./gradlew -Pjni -Ppt_version=$PYTORCH_VERSION :integration:test "-Dai.djl.default_engine=PyTorch" | |
- name: Configure AWS Credentials | |
run: | | |
oidc_token=$(curl -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ | |
"$ACTIONS_ID_TOKEN_REQUEST_URL&audience=sts.amazonaws.com" | jq -r ".value") | |
echo "::add-mask::$oidc_token" | |
read -r AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN <<<"$(aws sts assume-role-with-web-identity \ | |
--region "us-east-2" \ | |
--role-arn "arn:aws:iam::425969335547:role/djl-ci-publish-role" \ | |
--role-session-name "native-jni-s3-pytorch-jni-aarch64" \ | |
--web-identity-token "$oidc_token" \ | |
--query "[Credentials.AccessKeyId, Credentials.SecretAccessKey, Credentials.SessionToken]" \ | |
--output text)" | |
echo "::add-mask::$AWS_ACCESS_KEY_ID" | |
echo "::add-mask::$AWS_SECRET_ACCESS_KEY" | |
echo "::add-mask::$AWS_SESSION_TOKEN" | |
echo "AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" >> "$GITHUB_ENV" | |
echo "AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" >> "$GITHUB_ENV" | |
echo "AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN" >> "$GITHUB_ENV" | |
- name: Copy files to S3 with the AWS CLI | |
run: | | |
PYTORCH_VERSION=${{ github.event.inputs.pt_version }} | |
PYTORCH_VERSION=${PYTORCH_VERSION:-$(awk -F '=' '/pytorch/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)} | |
aws s3 sync engines/pytorch/pytorch-native/jnilib s3://djl-ai/publish/pytorch/${PYTORCH_VERSION}/jnilib | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/pytorch/${PYTORCH_VERSION}/jnilib*" | |
stop-runners: | |
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }} | |
runs-on: [ self-hosted, scheduler ] | |
needs: [ create-aarch64-runner, build-pytorch-jni-aarch64 ] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-aarch64-runner.outputs.aarch64_instance_id }} | |
./stop_instance.sh $instance_id |