Skip to content

Native S3 Huggingface tokenizers #125

Native S3 Huggingface tokenizers

Native S3 Huggingface tokenizers #125

name: Native S3 Huggingface tokenizers
on:
workflow_dispatch:
jobs:
build-tokenizers-jni-osx:
runs-on: macos-13
steps:
- uses: actions/checkout@v4
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: 'corretto'
java-version: 17
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
- uses: actions/cache@v4
with:
path: ~/.gradle/caches
key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }}
restore-keys: |
${{ runner.os }}-gradle-
- name: Release JNI prep
run: |
./gradlew :extensions:tokenizers:compileJNI
./gradlew -Pjni :extensions:tokenizers:test
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Copy files to S3 with the AWS CLI
run: |
TOKENIZERS_VERSION="$(awk -F '=' '/tokenizers/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)"
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*"
build-tokenizers-jni-linux:
runs-on: ubuntu-latest
container: centos:centos7
steps:
- name: Install Environment
run: |
yum -y update
yum -y install centos-release-scl-rh epel-release perl-core
yum -y install devtoolset-7 git patch cmake3 libstdc++-static
ln -s /usr/bin/cmake3 /usr/bin/cmake
curl https://sh.rustup.rs -sSf | sh -s -- -y
pip3 install awscli wheel setuptools --upgrade
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
- uses: actions/checkout@v3
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
distribution: 'corretto'
java-version: 17
- uses: actions/cache@v3
with:
path: ~/.gradle/caches
key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }}
restore-keys: |
${{ runner.os }}-gradle-
- name: Release JNI prep
run: |
source "$HOME/.cargo/env"
export PATH=$PATH:/opt/rh/devtoolset-7/root/usr/bin
./gradlew :extensions:tokenizers:compileJNI
PYTORCH_PRECXX11=true ./gradlew -Pjni :extensions:tokenizers:test
- name: Build djl-converter wheel
working-directory: extensions/tokenizers/src/main/python/
run: ./setup.py bdist_wheel
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Copy files to S3 with the AWS CLI
run: |
TOKENIZERS_VERSION="$(awk -F '=' '/tokenizers/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)"
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*"
aws s3 cp extensions/tokenizers/src/main/python/dist/djl_converter-*.whl s3://djl-ai/publish/djl_converter/
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/djl_converter/*"
build-tokenizer-jni-windows:
runs-on: windows-latest
steps:
- name: Install Environment
run: |
choco install -y mingw
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
- uses: actions/checkout@v4
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: 'corretto'
java-version: 17
- uses: actions/cache@v4
with:
path: ~/.gradle/caches
key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }}
restore-keys: |
${{ runner.os }}-gradle-
- name: Release CPU JNI
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64
gradlew :extensions:tokenizer:compileJNI
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Copy files to S3 with the AWS CLI
shell: bash
run: |
TOKENIZERS_VERSION="$(awk -F '=' '/tokenizers/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)"
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*"
build-tokenizers-jni-arm64-osx:
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }}
runs-on: macos-latest-xlarge
steps:
- uses: actions/checkout@v4
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
java-version: 17
distribution: corretto
architecture: aarch64
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
- uses: actions/cache@v4
with:
path: ~/.gradle/caches
key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }}
restore-keys: |
${{ runner.os }}-gradle-
- name: Release JNI prep
run: |
./gradlew :extensions:tokenizers:compileJNI
./gradlew -Pjni :extensions:tokenizers:test
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Copy files to S3 with the AWS CLI
shell: bash
run: |
TOKENIZERS_VERSION="$(awk -F '=' '/tokenizers/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)"
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*"
create-aarch64-runner:
if: github.repository == 'deepjavalibrary/djl'
runs-on: [ self-hosted, scheduler ]
steps:
- name: Create new Graviton instance
id: create_aarch64
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_graviton $token djl
outputs:
aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }}
build-tokenizer-jni-aarch64:
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }}
runs-on: [ self-hosted, aarch64 ]
timeout-minutes: 30
needs: create-aarch64-runner
container: amazonlinux:2
steps:
- name: Install Environment
run: |
yum -y update
yum -y groupinstall "Development Tools"
yum -y install patch perl-IPC-Cmd cmake3
ln -s /usr/bin/cmake3 /usr/bin/cmake
pip3 install awscli --upgrade
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
- uses: actions/checkout@v3
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
java-version: 17
distribution: corretto
architecture: aarch64
- uses: actions/cache@v3
with:
path: ~/.gradle/caches
key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }}
restore-keys: |
${{ runner.os }}-gradle-
- name: Release JNI prep
run: |
./gradlew :extensions:tokenizers:compileJNI
PYTORCH_PRECXX11=true ./gradlew -Pjni :extensions:tokenizers:test
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Copy files to S3 with the AWS CLI
run: |
TOKENIZERS_VERSION="$(awk -F '=' '/tokenizers/ {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)"
aws s3 sync extensions/tokenizers/jnilib s3://djl-ai/publish/tokenizers/${TOKENIZERS_VERSION}/jnilib/
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/tokenizers/${TOKENIZERS_VERSION}/jnilib/*"
stop-runners:
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }}
runs-on: [ self-hosted, scheduler ]
needs: [ create-aarch64-runner, build-tokenizer-jni-aarch64 ]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-aarch64-runner.outputs.aarch64_instance_id }}
./stop_instance.sh $instance_id