diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..f8f07e5
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1 @@
+docker/hf/
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
new file mode 100644
index 0000000..22134fb
--- /dev/null
+++ b/.github/workflows/docker-publish.yml
@@ -0,0 +1,96 @@
+name: Build and Publish Docker Images
+
+permissions:
+  packages: write
+
+on:
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+  push:
+    tags:
+      - v[0-9]+.*
+    branches:
+      - main
+  release:
+    types:
+      - published
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+
+    strategy:
+      # Prevent a failure in one image from stopping the other builds
+      fail-fast: false
+
+      matrix:
+        include:
+          - image: routellm
+            context: .
+            file: Dockerfile
+            platforms: linux/amd64,linux/arm64
+            device: cpu
+
+          - image: routellm
+            context: .
+            file: Dockerfile
+            platforms: linux/amd64,linux/arm64
+            device: cuda
+            suffix: -cuda
+
+          - image: routellm
+            context: .
+            file: Dockerfile
+            platforms: linux/amd64,linux/arm64
+            device: rocm
+            suffix: -rocm
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          # list of Docker images to use as base name for tags
+          images: |
+            ghcr.io/${{ github.repository_owner }}/routellm
+          # generate Docker tags based on the following events/attributes
+          tags: |
+            # Tag with branch name
+            type=ref,event=branch,suffix=${{ matrix.suffix }}
+            # Tag with pr-number
+            type=ref,event=pr,suffix=${{ matrix.suffix }}
+            # Tag with git tag on release
+            type=ref,event=tag,suffix=${{ matrix.suffix }}
+            type=raw,value=release,enable=${{ github.event_name == 'release' }},suffix=${{ matrix.suffix }}
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3.0.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3.3.0
+
+      - name: Login to GHCR
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push image
+        uses: docker/build-push-action@v5.3.0
+        with:
+          context: ${{ matrix.context }}
+          file: ${{ matrix.file }}
+          platforms: ${{ matrix.platforms }}
+          build-args: |
+            TARGET=${{ matrix.device }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/.gitignore b/.gitignore
index a5053fe..24be079 100644
--- a/.gitignore
+++ b/.gitignore
@@ -162,4 +162,7 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
\ No newline at end of file
+#.idea/
+
+# docker compose
+docker/hf
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..f4332d0
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,38 @@
+# syntax = docker/dockerfile:1.4
+FROM python:3.10
+
+# Install system dependencies
+RUN --mount=type=cache,target=/var/cache/apt \
+  apt-get update && \
+  apt-get install -y --no-install-recommends net-tools nano
+
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Create and activate the routllm environment
+RUN python -m venv /opt/routllm && \
+  /opt/routllm/bin/pip install --upgrade pip
+
+# Set the PATH to prioritize the virtualenv
+ENV PATH="/opt/routllm/bin:$PATH"
+
+COPY docker/rootfs /
+
+WORKDIR /app
+
+COPY . .
+
+# Install additional requirements based on the target platform with apk package caching
+ARG TARGET=cuda
+RUN --mount=type=cache,target=/root/.cache/pip \
+  if [ "$TARGET" = "cuda" ]; then \
+  pip install torch --index-url https://download.pytorch.org/whl/cu121; \
+  elif [ "$TARGET" = "rocm" ]; then \
+  pip install torch --index-url https://download.pytorch.org/whl/rocm5.2; \
+  else \
+  pip install torch; \
+  fi; \
+  pip install -e .[serve,eval]
+
+EXPOSE 6060
+VOLUME [ "/root/.cache/huggingface" ]
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
new file mode 100644
index 0000000..7fc8c05
--- /dev/null
+++ b/docker/docker-compose.yml
@@ -0,0 +1,60 @@
+services:
+  routllm:
+    restart: unless-stopped
+    # edit below to run on a container runtime other than nvidia-container-runtime.
+    # not yet tested with rocm/AMD GPUs
+    # Comment out the "deploy" section to run on CPU only
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: [gpu, utility, compute]
+              count: 1 # Currently routllm can only handle one gpu device!!!
+    # Uncomment the image line to pull the image from the container registry.
+    # Also, comment out the build section.
+    #image: ghcr.io/lm-sys/routllm:latest
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile
+    ports:
+      - "6060:6060"
+    volumes:
+      - ./hf:/root/.cache/huggingface
+    environment:
+      #- OPENAI_API_KEY_FILE=/run/secrets/openai_api_key
+      #- OPENAI_API_KEY=sk-...
+      #- ANYSCALE_API_KEY_FILE=/run/secrets/anyscale_api_key
+      #- ANYSCALE_API_KEY=esecret_XXXXXX
+      #- ANTHROPIC_API_KEY_FILE=/run/secrets/anthropic_api_key
+      #- ANTHROPIC_API_KEY=api-key
+      #- GEMINI_API_KEY_FILE=/run/secrets/gemini_api_key
+      #- GEMINI_API_KEY=api-key
+      #- TOGETHERAI_API_KEY_FILE=/run/secrets/togetherai_api_key
+      #- TOGETHERAI_API_KEY=api-key
+
+      # bedrock (_FILE is supported)
+      #- AWS_ACCESS_KEY_ID=...
+      #- AWS_SECRET_ACCESS_KEY=...
+      #- AWS_REGION_NAME=...
+      - SERVER_ARGS=--routers mf --strong-model gpt-4o --weak-model anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1
+
+    # Only required if you are using a secret file
+    #secrets:
+    #  - openai_api_key
+    #  - anyscale_api_key
+    #  - anthropic_api_key
+    #  - gemini_api_key
+    #  - togetherai_api_key
+# Only required if you are using a secret file
+#secrets:
+#openai_api_key:
+#  file: <path to your openai api key file>
+#anyscale_api_key:
+#  file: <path to your anyscale api key file>
+#anthropic_api_key:
+#  file: <path to your anthropic api key file>
+#gemini_api_key:
+#  file: <path to your gemini api key file>
+#togetherai_api_key:
+#  file: <path to your togetherai api key file>
diff --git a/docker/rootfs/usr/local/bin/entrypoint.sh b/docker/rootfs/usr/local/bin/entrypoint.sh
new file mode 100755
index 0000000..12a18ab
--- /dev/null
+++ b/docker/rootfs/usr/local/bin/entrypoint.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Support loading secrets from files using the _FILE suffix convention
+for _env_file in $(env | grep '_FILE=' | awk -F '=' '{print $1}'); do
+  _env_var=$(echo "${_env_file}" | sed -r 's/(.*)_FILE/\1/')
+  if [ -f "${!_env_file}" ]; then
+    export "${_env_var}"="$(cat "${!_env_file}")"
+  fi
+done
+
+python -m routellm.openai_server $SERVER_ARGS