Native S3 Huggingface tokenizers #101
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Native S3 Huggingface tokenizers | |
on: | |
workflow_dispatch: | |
jobs: | |
build-tokenizers-jni-osx: | |
runs-on: macos-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up JDK 17 | |
uses: actions/setup-java@v4 | |
with: | |
distribution: 'corretto' | |
java-version: 17 | |
- uses: actions-rs/toolchain@v1 | |
with: | |
toolchain: stable | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.gradle/caches | |
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Release JNI prep | |
run: | | |
./gradlew :extensions:tokenizers:compileJNI | |
./gradlew -Pjni :extensions:tokenizers:test | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-2 | |
- name: Copy files to S3 with the AWS CLI | |
run: | | |
TOKENIZERS_VERSION="$(cat gradle.properties | awk -F '=' '/tokenizers_version/ {print $2}')" | |
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/ | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*" | |
build-tokenizers-jni-linux: | |
runs-on: ubuntu-latest | |
container: centos:centos7 | |
steps: | |
- name: Install Environment | |
run: | | |
yum -y update | |
yum -y install centos-release-scl-rh epel-release perl-core | |
yum -y install devtoolset-7 git patch cmake3 libstdc++-static | |
ln -s /usr/bin/cmake3 /usr/bin/cmake | |
curl https://sh.rustup.rs -sSf | sh -s -- -y | |
pip3 install awscli --upgrade | |
- uses: actions-rs/toolchain@v1 | |
with: | |
toolchain: stable | |
- uses: actions/checkout@v3 | |
- name: Set up JDK 17 | |
uses: actions/setup-java@v3 | |
with: | |
distribution: 'corretto' | |
java-version: 17 | |
- uses: actions/cache@v3 | |
with: | |
path: ~/.gradle/caches | |
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Release JNI prep | |
run: | | |
source "$HOME/.cargo/env" | |
export PATH=$PATH:/opt/rh/devtoolset-7/root/usr/bin | |
./gradlew :extensions:tokenizers:compileJNI | |
PYTORCH_PRECXX11=true ./gradlew -Pjni :extensions:tokenizers:test | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-2 | |
- name: Copy files to S3 with the AWS CLI | |
run: | | |
TOKENIZERS_VERSION="$(cat gradle.properties | awk -F '=' '/tokenizers_version/ {print $2}')" | |
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/ | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*" | |
build-tokenizer-jni-windows: | |
runs-on: windows-latest | |
steps: | |
- name: Install Environment | |
run: | | |
choco install -y mingw | |
- uses: actions-rs/toolchain@v1 | |
with: | |
toolchain: stable | |
- uses: actions/checkout@v4 | |
- name: Set up JDK 17 | |
uses: actions/setup-java@v4 | |
with: | |
distribution: 'corretto' | |
java-version: 17 | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.gradle/caches | |
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Release CPU JNI | |
shell: cmd | |
run: | | |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 | |
gradlew :extensions:tokenizer:compileJNI | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-2 | |
- name: Copy files to S3 with the AWS CLI | |
shell: bash | |
run: | | |
TOKENIZERS_VERSION="$(cat gradle.properties | awk -F '=' '/tokenizers_version/ {print $2}')" | |
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/ | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*" | |
build-tokenizers-jni-arm64-osx: | |
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }} | |
runs-on: macos-latest-xlarge | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up JDK 17 | |
uses: actions/setup-java@v4 | |
with: | |
java-version: 17 | |
distribution: corretto | |
architecture: aarch64 | |
- uses: actions-rs/toolchain@v1 | |
with: | |
toolchain: stable | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.gradle/caches | |
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Release JNI prep | |
run: | | |
./gradlew :extensions:tokenizers:compileJNI | |
./gradlew -Pjni :extensions:tokenizers:test | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-2 | |
- name: Copy files to S3 with the AWS CLI | |
shell: bash | |
run: | | |
TOKENIZERS_VERSION="$(cat gradle.properties | awk -F '=' '/tokenizers_version/ {print $2}')" | |
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/ | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*" | |
create-aarch64-runner: | |
if: github.repository == 'deepjavalibrary/djl' | |
runs-on: [ self-hosted, scheduler ] | |
steps: | |
- name: Create new Graviton instance | |
id: create_aarch64 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_graviton $token djl | |
outputs: | |
aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} | |
build-tokenizer-jni-aarch64: | |
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }} | |
runs-on: [ self-hosted, aarch64 ] | |
timeout-minutes: 30 | |
needs: create-aarch64-runner | |
container: amazonlinux:2 | |
steps: | |
- name: Install Environment | |
run: | | |
yum -y update | |
yum -y groupinstall "Development Tools" | |
yum -y install patch perl-IPC-Cmd cmake3 | |
ln -s /usr/bin/cmake3 /usr/bin/cmake | |
pip3 install awscli --upgrade | |
- uses: actions-rs/toolchain@v1 | |
with: | |
toolchain: stable | |
- uses: actions/checkout@v3 | |
- name: Set up JDK 17 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: 17 | |
distribution: corretto | |
architecture: aarch64 | |
- uses: actions/cache@v3 | |
with: | |
path: ~/.gradle/caches | |
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Release JNI prep | |
run: | | |
./gradlew :extensions:tokenizers:compileJNI | |
PYTORCH_PRECXX11=true ./gradlew -Pjni :extensions:tokenizers:test | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-2 | |
- name: Copy files to S3 with the AWS CLI | |
run: | | |
TOKENIZERS_VERSION="$(cat gradle.properties | awk -F '=' '/tokenizers_version/ {print $2}')" | |
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/ | |
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*" | |
stop-runners: | |
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }} | |
runs-on: [ self-hosted, scheduler ] | |
needs: [ create-aarch64-runner, build-tokenizer-jni-aarch64 ] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-aarch64-runner.outputs.aarch64_instance_id }} | |
./stop_instance.sh $instance_id |