forked from lm-sys/RouteLLM
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
210 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
docker/hf/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
name: Build and Publish Docker Images | ||
|
||
permissions: | ||
packages: write | ||
|
||
on: | ||
# Allows you to run this workflow manually from the Actions tab | ||
workflow_dispatch: | ||
push: | ||
tags: | ||
- v[0-9]+.* | ||
branches: | ||
- main | ||
release: | ||
types: | ||
- published | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
build-and-publish: | ||
runs-on: ubuntu-latest | ||
|
||
strategy: | ||
# Prevent a failure in one image from stopping the other builds | ||
fail-fast: false | ||
|
||
matrix: | ||
include: | ||
- image: routellm | ||
context: . | ||
file: Dockerfile | ||
platforms: linux/amd64,linux/arm64 | ||
device: cpu | ||
|
||
- image: routellm | ||
context: . | ||
file: Dockerfile | ||
platforms: linux/amd64,linux/arm64 | ||
device: cuda | ||
suffix: -cuda | ||
|
||
- image: routellm | ||
context: . | ||
file: Dockerfile | ||
platforms: linux/amd64,linux/arm64 | ||
device: rocm | ||
suffix: -rocm | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
|
||
- name: Docker meta | ||
id: meta | ||
uses: docker/metadata-action@v5 | ||
with: | ||
# list of Docker images to use as base name for tags | ||
images: | | ||
ghcr.io/${{ github.repository_owner }}/routellm | ||
# generate Docker tags based on the following events/attributes | ||
tags: | | ||
# Tag with branch name | ||
type=ref,event=branch,suffix=${{ matrix.suffix }} | ||
# Tag with pr-number | ||
type=ref,event=pr,suffix=${{ matrix.suffix }} | ||
# Tag with git tag on release | ||
type=ref,event=tag,suffix=${{ matrix.suffix }} | ||
type=raw,value=release,enable=${{ github.event_name == 'release' }},suffix=${{ matrix.suffix }} | ||
- name: Set up QEMU | ||
uses: docker/[email protected] | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/[email protected] | ||
|
||
- name: Login to GHCR | ||
if: github.event_name != 'pull_request' | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.repository_owner }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Build and push image | ||
uses: docker/[email protected] | ||
with: | ||
context: ${{ matrix.context }} | ||
file: ${{ matrix.file }} | ||
platforms: ${{ matrix.platforms }} | ||
build-args: | | ||
TARGET=${{ matrix.device }} | ||
push: ${{ github.event_name != 'pull_request' }} | ||
tags: ${{ steps.meta.outputs.tags }} | ||
labels: ${{ steps.meta.outputs.labels }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# syntax = docker/dockerfile:1.4 | ||
FROM python:3.10 | ||
|
||
# Install system dependencies | ||
RUN --mount=type=cache,target=/var/cache/apt \ | ||
apt-get update && \ | ||
apt-get install -y --no-install-recommends net-tools nano | ||
|
||
ENV PYTHONDONTWRITEBYTECODE=1 | ||
|
||
# Create and activate the routllm environment | ||
RUN python -m venv /opt/routllm && \ | ||
/opt/routllm/bin/pip install --upgrade pip | ||
|
||
# Set the PATH to prioritize the virtualenv | ||
ENV PATH="/opt/routllm/bin:$PATH" | ||
|
||
COPY docker/rootfs / | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
# Install additional requirements based on the target platform with apk package caching | ||
ARG TARGET=cuda | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
if [ "$TARGET" = "cuda" ]; then \ | ||
pip install torch --index-url https://download.pytorch.org/whl/cu121; \ | ||
elif [ "$TARGET" = "rocm" ]; then \ | ||
pip install torch --index-url https://download.pytorch.org/whl/rocm5.2; \ | ||
else \ | ||
pip install torch; \ | ||
fi; \ | ||
pip install -e .[serve,eval] | ||
|
||
EXPOSE 6060 | ||
VOLUME [ "/root/.cache/huggingface" ] | ||
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
services: | ||
routllm: | ||
restart: unless-stopped | ||
# edit below to run on a container runtime other than nvidia-container-runtime. | ||
# not yet tested with rocm/AMD GPUs | ||
# Comment out the "deploy" section to run on CPU only | ||
deploy: | ||
resources: | ||
reservations: | ||
devices: | ||
- driver: nvidia | ||
capabilities: [gpu, utility, compute] | ||
count: 1 # Currently routllm can only handle one gpu device!!! | ||
# Uncomment the image line to pull the image from the container registry. | ||
# Also, comment out the build section. | ||
#image: ghcr.io/lm-sys/routllm:latest | ||
build: | ||
context: .. | ||
dockerfile: docker/Dockerfile | ||
ports: | ||
- "6060:6060" | ||
volumes: | ||
- ./hf:/root/.cache/huggingface | ||
environment: | ||
#- OPENAI_API_KEY_FILE=/run/secrets/openai_api_key | ||
#- OPENAI_API_KEY=sk-... | ||
#- ANYSCALE_API_KEY_FILE=/run/secrets/anyscale_api_key | ||
#- ANYSCALE_API_KEY=esecret_XXXXXX | ||
#- ANTHROPIC_API_KEY_FILE=/run/secrets/anthropic_api_key | ||
#- ANTHROPIC_API_KEY=api-key | ||
#- GEMINI_API_KEY_FILE=/run/secrets/gemini_api_key | ||
#- GEMINI_API_KEY=api-key | ||
#- TOGETHERAI_API_KEY_FILE=/run/secrets/togetherai_api_key | ||
#- TOGETHERAI_API_KEY=api-key | ||
|
||
# bedrock (_FILE is supported) | ||
#- AWS_ACCESS_KEY_ID=... | ||
#- AWS_SECRET_ACCESS_KEY=... | ||
#- AWS_REGION_NAME=... | ||
- SERVER_ARGS=--routers mf --strong-model gpt-4o --weak-model anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1 | ||
|
||
# Only required if you are using a secret file | ||
#secrets: | ||
# - openai_api_key | ||
# - anyscale_api_key | ||
# - anthropic_api_key | ||
# - gemini_api_key | ||
# - togetherai_api_key | ||
# Only required if you are using a secret file | ||
#secrets: | ||
#openai_api_key: | ||
# file: <path to your openai api key file> | ||
#anyscale_api_key: | ||
# file: <path to your anyscale api key file> | ||
#anthropic_api_key: | ||
# file: <path to your anthropic api key file> | ||
#gemini_api_key: | ||
# file: <path to your gemini api key file> | ||
#togetherai_api_key: | ||
# file: <path to your togetherai api key file> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
# Support loading secrets from files using the _FILE suffix convention | ||
for _env_file in $(env | grep '_FILE=' | awk -F '=' '{print $1}'); do | ||
_env_var=$(echo "${_env_file}" | sed -r 's/(.*)_FILE/\1/') | ||
if [ -f "${!_env_file}" ]; then | ||
export "${_env_var}"="$(cat "${!_env_file}")" | ||
fi | ||
done | ||
|
||
python -m routellm.openai_server $SERVER_ARGS |