Skip to content

feat: support nomic models #145

feat: support nomic models

feat: support nomic models #145

Workflow file for this run

name: Build and push Cuda A10 docker image to registry
on:
workflow_dispatch:
push:
branches:
- 'main'
tags:
- 'v*'
pull_request:
paths:
- ".github/workflows/build.yaml"
# - "integration-tests/**"
- "backends/**"
- "core/**"
- "router/**"
- "Cargo.lock"
- "rust-toolchain.toml"
- "Dockerfile"
branches:
- 'main'
jobs:
build-and-push-image:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-86-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
runs-on: [self-hosted, intel-cpu, 32-cpu, tgi-ci]
permissions:
contents: write
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Initialize Docker Buildx
uses: docker/[email protected]
with:
install: true
- name: Configure sccache
uses: actions/github-script@v6
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
- name: Inject slug/short variables
uses: rlespinasse/[email protected]
- name: Tailscale
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Login to GitHub Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to internal Container Registry
uses: docker/[email protected]
with:
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
registry: registry.internal.huggingface.tech
- name: Extract metadata (tags, labels) for Docker
id: meta-86
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
flavor: |
latest=false
tags: |
type=semver,pattern=86-{{version}}
type=semver,pattern=86-{{major}}.{{minor}}
type=raw,value=86-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=86-sha-${{ env.GITHUB_SHA_SHORT }}
- name: Build and push Docker image
id: build-and-push-86
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile-cuda
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
SCCACHE_GHA_ENABLED=on
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
CUDA_COMPUTE_CAP=86
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-86.outputs.tags }}
labels: ${{ steps.meta-86.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-86,mode=max
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-86,mode=max
- name: Extract metadata (tags, labels) for Docker
id: meta-86-grpc
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
flavor: |
latest=false
tags: |
type=semver,pattern=86-{{version}}-grpc
type=semver,pattern=86-{{major}}.{{minor}}-grpc
type=raw,value=86-latest-grpc
type=raw,value=86-sha-${{ env.GITHUB_SHA_SHORT }}-grpc
- name: Build and push Docker image
id: build-and-push-86-grpc
uses: docker/build-push-action@v4
with:
context: .
target: grpc
file: Dockerfile-cuda
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
SCCACHE_GHA_ENABLED=on
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
CUDA_COMPUTE_CAP=86
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-86-grpc.outputs.tags }}
labels: ${{ steps.meta-86-grpc.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-86,mode=max