diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f8f07e5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +docker/hf/ diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..22134fb --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,96 @@ +name: Build and Publish Docker Images + +permissions: + packages: write + +on: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + push: + tags: + - v[0-9]+.* + branches: + - main + release: + types: + - published + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-and-publish: + runs-on: ubuntu-latest + + strategy: + # Prevent a failure in one image from stopping the other builds + fail-fast: false + + matrix: + include: + - image: routellm + context: . + file: Dockerfile + platforms: linux/amd64,linux/arm64 + device: cpu + + - image: routellm + context: . + file: Dockerfile + platforms: linux/amd64,linux/arm64 + device: cuda + suffix: -cuda + + - image: routellm + context: . + file: Dockerfile + platforms: linux/amd64,linux/arm64 + device: rocm + suffix: -rocm + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + # list of Docker images to use as base name for tags + images: | + ghcr.io/${{ github.repository_owner }}/routellm + # generate Docker tags based on the following events/attributes + tags: | + # Tag with branch name + type=ref,event=branch,suffix=${{ matrix.suffix }} + # Tag with pr-number + type=ref,event=pr,suffix=${{ matrix.suffix }} + # Tag with git tag on release + type=ref,event=tag,suffix=${{ matrix.suffix }} + type=raw,value=release,enable=${{ github.event_name == 'release' }},suffix=${{ matrix.suffix }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3.0.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3.3.0 + + - name: Login to GHCR + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push image + uses: docker/build-push-action@v5.3.0 + with: + context: ${{ matrix.context }} + file: ${{ matrix.file }} + platforms: ${{ matrix.platforms }} + build-args: | + TARGET=${{ matrix.device }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.gitignore b/.gitignore index a5053fe..24be079 100644 --- a/.gitignore +++ b/.gitignore @@ -162,4 +162,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file +#.idea/ + +# docker compose +docker/hf diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..f4332d0 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,38 @@ +# syntax = docker/dockerfile:1.4 +FROM python:3.10 + +# Install system dependencies +RUN --mount=type=cache,target=/var/cache/apt \ + apt-get update && \ + apt-get install -y --no-install-recommends net-tools nano + +ENV PYTHONDONTWRITEBYTECODE=1 + +# Create and activate the routllm environment +RUN python -m venv /opt/routllm && \ + /opt/routllm/bin/pip install --upgrade pip + +# Set the PATH to prioritize the virtualenv +ENV PATH="/opt/routllm/bin:$PATH" + +COPY docker/rootfs / + +WORKDIR /app + +COPY . . + +# Install additional requirements based on the target platform with apk package caching +ARG TARGET=cuda +RUN --mount=type=cache,target=/root/.cache/pip \ + if [ "$TARGET" = "cuda" ]; then \ + pip install torch --index-url https://download.pytorch.org/whl/cu121; \ + elif [ "$TARGET" = "rocm" ]; then \ + pip install torch --index-url https://download.pytorch.org/whl/rocm5.2; \ + else \ + pip install torch; \ + fi; \ + pip install -e .[serve,eval] + +EXPOSE 6060 +VOLUME [ "/root/.cache/huggingface" ] +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..7fc8c05 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,60 @@ +services: + routllm: + restart: unless-stopped + # edit below to run on a container runtime other than nvidia-container-runtime. + # not yet tested with rocm/AMD GPUs + # Comment out the "deploy" section to run on CPU only + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: [gpu, utility, compute] + count: 1 # Currently routllm can only handle one gpu device!!! + # Uncomment the image line to pull the image from the container registry. + # Also, comment out the build section. + #image: ghcr.io/lm-sys/routllm:latest + build: + context: .. + dockerfile: docker/Dockerfile + ports: + - "6060:6060" + volumes: + - ./hf:/root/.cache/huggingface + environment: + #- OPENAI_API_KEY_FILE=/run/secrets/openai_api_key + #- OPENAI_API_KEY=sk-... + #- ANYSCALE_API_KEY_FILE=/run/secrets/anyscale_api_key + #- ANYSCALE_API_KEY=esecret_XXXXXX + #- ANTHROPIC_API_KEY_FILE=/run/secrets/anthropic_api_key + #- ANTHROPIC_API_KEY=api-key + #- GEMINI_API_KEY_FILE=/run/secrets/gemini_api_key + #- GEMINI_API_KEY=api-key + #- TOGETHERAI_API_KEY_FILE=/run/secrets/togetherai_api_key + #- TOGETHERAI_API_KEY=api-key + + # bedrock (_FILE is supported) + #- AWS_ACCESS_KEY_ID=... + #- AWS_SECRET_ACCESS_KEY=... + #- AWS_REGION_NAME=... + - SERVER_ARGS=--routers mf --strong-model gpt-4o --weak-model anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1 + + # Only required if you are using a secret file + #secrets: + # - openai_api_key + # - anyscale_api_key + # - anthropic_api_key + # - gemini_api_key + # - togetherai_api_key +# Only required if you are using a secret file +#secrets: +#openai_api_key: +# file: +#anyscale_api_key: +# file: +#anthropic_api_key: +# file: +#gemini_api_key: +# file: +#togetherai_api_key: +# file: diff --git a/docker/rootfs/usr/local/bin/entrypoint.sh b/docker/rootfs/usr/local/bin/entrypoint.sh new file mode 100755 index 0000000..12a18ab --- /dev/null +++ b/docker/rootfs/usr/local/bin/entrypoint.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Support loading secrets from files using the _FILE suffix convention +for _env_file in $(env | grep '_FILE=' | awk -F '=' '{print $1}'); do + _env_var=$(echo "${_env_file}" | sed -r 's/(.*)_FILE/\1/') + if [ -f "${!_env_file}" ]; then + export "${_env_var}"="$(cat "${!_env_file}")" + fi +done + +python -m routellm.openai_server $SERVER_ARGS