From 5b0816c71838e169cf0d487802beb00b6a20ea4d Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 15:23:56 +0100
Subject: [PATCH 01/40] Added rocm builds and documentation

---
 .dockerignore                                 |   6 +
 .../workflows/{docker.yml => docker-cuda.yml} |  17 ++-
 .github/workflows/docker-rocm.yml             | 119 ++++++++++++++++++
 .github/workflows/release.yml                 |   7 +-
 README.md                                     |  15 ++-
 Dockerfile => cuda.Dockerfile                 |   3 +-
 rocm.Dockerfile                               |  61 +++++++++
 website/docs/extensions/troubleshooting.md    |   6 +-
 website/docs/faq.mdx                          |  22 +++-
 website/docs/installation/apple.md            |   2 +-
 website/docs/installation/docker-compose.mdx  |  27 +++-
 website/docs/installation/docker.mdx          |  12 +-
 website/docs/installation/modal/index.md      |  10 +-
 13 files changed, 280 insertions(+), 27 deletions(-)
 rename .github/workflows/{docker.yml => docker-cuda.yml} (88%)
 create mode 100644 .github/workflows/docker-rocm.yml
 rename Dockerfile => cuda.Dockerfile (99%)
 create mode 100644 rocm.Dockerfile

diff --git a/.dockerignore b/.dockerignore
index de70e0d16772..bfbb41f4fe53 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,2 +1,8 @@
+.idea
+ci
+clients
+.github
+python
 **/target
 **/node_modules
+website
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker-cuda.yml
similarity index 88%
rename from .github/workflows/docker.yml
rename to .github/workflows/docker-cuda.yml
index 1e7482119d18..387a6bdcd9fd 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker-cuda.yml
@@ -1,4 +1,4 @@
-name: Create and publish docker image
+name: Create and publish CUDA docker image
 
 on:
   workflow_dispatch:
@@ -50,7 +50,10 @@ jobs:
 
       # Workaround: https://github.com/docker/build-push-action/issues/461
       - name: Setup Docker buildx
-        uses: docker/setup-buildx-action@v2.0.0
+        uses: docker/setup-buildx-action@v3.0.0
+        with:
+          # Needed to support OCI annotations
+          version: v0.12.0
 
       # Login against a Docker registry except on PR
       # https://github.com/docker/login-action
@@ -78,12 +81,14 @@ jobs:
 
       - name: Docker meta
         id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5.0.0
         with:
           # list of Docker images to use as base name for tags
           images: |
             ghcr.io/${{ env.IMAGE_NAME }}
+            ghcr.io/${{ env.IMAGE_NAME }}/cuda
             ${{ env.IMAGE_NAME }}
+            ${{ env.IMAGE_NAME }}-cuda
           # generate Docker tags based on the following events/attributes
           tags: |
             type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }}
@@ -95,13 +100,14 @@ jobs:
       # https://github.com/docker/build-push-action
       - name: Build and push Docker image
         id: build-and-push
-        uses: docker/build-push-action@v3.1.1
+        uses: docker/build-push-action@v5.1.0
         with:
-          file: Dockerfile
+          file: cuda.Dockerfile
           push: true
           context: .
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
+          annotations: ${{ steps.meta.outputs.labels }}
           cache-from: ${{ steps.cache.outputs.cache-from }}
           cache-to: ${{ steps.cache.outputs.cache-to }}
           build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
@@ -112,4 +118,3 @@ jobs:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
           repository: tabbyml/tabby
-
diff --git a/.github/workflows/docker-rocm.yml b/.github/workflows/docker-rocm.yml
new file mode 100644
index 000000000000..ca4963777f15
--- /dev/null
+++ b/.github/workflows/docker-rocm.yml
@@ -0,0 +1,119 @@
+name: Create and publish ROCm docker image
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 20 */1 * *'
+  push:
+    tags:
+      - 'v*'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} 
+  
+  # If this is enabled it will cancel current running and start latest
+  cancel-in-progress: true
+
+env:
+  RUST_TOOLCHAIN: 1.73.0
+
+jobs:
+  release-docker:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      # This is used to complete the identity challenge
+      # with sigstore/fulcio when running outside of PRs.
+      id-token: write
+
+    steps:
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false
+          swap-storage: true
+
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      # Workaround: https://github.com/docker/build-push-action/issues/461
+      - name: Setup Docker buildx
+        uses: docker/setup-buildx-action@v3.0.0
+        with:
+          # Needed to support OCI annotations
+          version: v0.12.0
+
+      # Login against a Docker registry except on PR
+      # https://github.com/docker/login-action
+      - name: Log into GitHub Container registry
+        uses: docker/login-action@v2.0.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Log into Docker Hub
+        uses: docker/login-action@v2.0.0
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Generate image name
+        run: |
+          echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV}
+
+      - uses: int128/docker-build-cache-config-action@v1
+        id: cache
+        with:
+          image: ghcr.io/${{ env.IMAGE_NAME }}/cache
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5.0.0
+        with:
+          # list of Docker images to use as base name for tags
+          images: |
+            ghcr.io/${{ env.IMAGE_NAME }}/rocm
+            ${{ env.IMAGE_NAME }}-rocm
+          # generate Docker tags based on the following events/attributes
+          variant: rocm
+          tags: |
+            type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }}
+            type=schedule,pattern=nightly
+            type=schedule,pattern={{date 'YYYYMMDD'}}
+            type=semver,pattern={{version}}
+
+      # Build and push Docker image with Buildx (don't push on PR)
+      # https://github.com/docker/build-push-action
+      - name: Build and push Docker image
+        id: build-and-push
+        uses: docker/build-push-action@v5.1.0
+        with:
+          file: rocm.Dockerfile
+          push: true
+          context: .
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          annotations: ${{ steps.meta.outputs.labels }}
+          cache-from: ${{ steps.cache.outputs.cache-from }}
+          cache-to: ${{ steps.cache.outputs.cache-to }}
+          build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
+
+      - name: Docker Hub Description
+        uses: peter-evans/dockerhub-description@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          repository: tabbyml/tabby
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b22676562baf..8f0ba9bc75a0 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -26,7 +26,7 @@ jobs:
     container: ${{ matrix.container }}
     strategy:
       matrix:
-        binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117]
+        binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, x86_64-manylinux2014-rocm5.7]
         include:
           - os: macos-latest
             target: aarch64-apple-darwin
@@ -40,6 +40,11 @@ jobs:
             binary: x86_64-manylinux2014-cuda117
             container: sameli/manylinux2014_x86_64_cuda_11.7
             build_args: --features cuda
+          - os: ubuntu-latest
+            target: x86_64-unknown-linux-gnu
+            binary: x86_64-manylinux2014-rocm5.7
+            container: rocm/dev-ubuntu-22.04:rocm5.7
+            build_args: --features rocm
 
     env:
       SCCACHE_GHA_ENABLED: true
diff --git a/README.md b/README.md
index c606301e2a09..892e3b50cd09 100644
--- a/README.md
+++ b/README.md
@@ -50,14 +50,25 @@ You can find our documentation [here](https://tabby.tabbyml.com/docs/getting-sta
 - ⚙️ [Configuration](https://tabby.tabbyml.com/docs/configuration)
 
 ### Run Tabby in 1 Minute
-The easiest way to start a Tabby server is by using the following Docker command:
+The easiest way to start a Tabby server is by using the following Docker command...
 
+...with cuda:
 ```bash
 docker run -it \
   --gpus all -p 8080:8080 -v $HOME/.tabby:/data \
-  tabbyml/tabby \
+  tabbyml/tabby-cuda \
   serve --model TabbyML/StarCoder-1B --device cuda
 ```
+
+...with ROCm (Linux only):
+```bash
+docker run -it \
+  --device /dev/dri --device /dev/kfd \
+  -p 8080:8080 -v $HOME/.tabby:/data \
+  tabbyml/tabby-rocm \
+  serve --model TabbyML/StarCoder-1B --device rocm
+```
+
 For additional options (e.g inference type, parallelism), please refer to the [documentation page](https://tabbyml.github.io/tabby).
 
 ## 🤝 Contributing
diff --git a/Dockerfile b/cuda.Dockerfile
similarity index 99%
rename from Dockerfile
rename to cuda.Dockerfile
index f939d143ec2f..947368d4a68d 100644
--- a/Dockerfile
+++ b/cuda.Dockerfile
@@ -29,12 +29,13 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain ${RUST_TOOLC
 ENV PATH="/root/.cargo/bin:${PATH}"
 
 WORKDIR /root/workspace
-COPY . .
 
 RUN mkdir -p /opt/tabby/bin
 RUN mkdir -p /opt/tabby/lib
 RUN mkdir -p target
 
+COPY . .
+
 RUN --mount=type=cache,target=/usr/local/cargo/registry \
     --mount=type=cache,target=/root/workspace/target \
     cargo build --features cuda --release --package tabby && \
diff --git a/rocm.Dockerfile b/rocm.Dockerfile
new file mode 100644
index 000000000000..87ccd51acf11
--- /dev/null
+++ b/rocm.Dockerfile
@@ -0,0 +1,61 @@
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG ROCM_VERSION=5.7
+# Target the CUDA build image
+ARG BASE_ROCM_DEV_CONTAINER="rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete"
+# Target the CUDA runtime image
+ARG BASE_ROCM_RUN_CONTAINER="rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete"
+
+FROM ${BASE_ROCM_DEV_CONTAINER} as build
+
+# Rust toolchain version
+ARG RUST_TOOLCHAIN=stable
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        curl \
+        pkg-config \
+        libssl-dev \
+        protobuf-compiler \
+        git \
+        cmake \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# setup rust.
+RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain ${RUST_TOOLCHAIN} -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+WORKDIR /root/workspace
+
+RUN mkdir -p /opt/tabby/bin
+RUN mkdir -p /opt/tabby/lib
+RUN mkdir -p target
+
+COPY . .
+
+RUN --mount=type=cache,target=/usr/local/cargo/registry \
+    --mount=type=cache,target=/root/workspace/target \
+    cargo build --features rocm --release --package tabby && \
+    cp target/release/tabby /opt/tabby/bin/
+
+FROM ${BASE_ROCM_RUN_CONTAINER} as runtime
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Disable safe directory in docker
+# Context: https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9
+RUN git config --system --add safe.directory "*"
+
+COPY --from=build /opt/tabby /opt/tabby
+
+ENV TABBY_ROOT=/data
+
+ENTRYPOINT ["/opt/tabby/bin/tabby"]
diff --git a/website/docs/extensions/troubleshooting.md b/website/docs/extensions/troubleshooting.md
index 94f42a667f5c..bf4ee1cc9be4 100644
--- a/website/docs/extensions/troubleshooting.md
+++ b/website/docs/extensions/troubleshooting.md
@@ -112,9 +112,9 @@ for the current code context.
 If your completion requests are timing out, Tabby may display a warning message. 
 This could be due to network issues or poor server performance, especially when 
 running a large model on a CPU. To improve performance, consider running the model 
-on a GPU with CUDA support or on Apple M1/M2 with Metal support. When running 
-the server, make sure to specify the device in the arguments using  `--device cuda` 
-or `--device metal`. You can also try using a smaller model from the available [models](https://tabby.tabbyml.com/docs/models/). 
+on a GPU with CUDA or ROCm support or on Apple M1/M2 with Metal support. When running 
+the server, make sure to specify the device in the arguments using  `--device cuda`, `--device rocm` or
+`--device metal`. You can also try using a smaller model from the available [models](https://tabby.tabbyml.com/docs/models/). 
 
 By default, the timeout for automatically triggered completion requests is set to 4 seconds. 
 You can adjust this timeout value in the `~/.tabby-client/agent/config.toml` configuration file.
diff --git a/website/docs/faq.mdx b/website/docs/faq.mdx
index 3dc6ecb632fc..f50093f9c309 100644
--- a/website/docs/faq.mdx
+++ b/website/docs/faq.mdx
@@ -1,10 +1,11 @@
-import CodeBlock from '@theme/CodeBlock';
-
 # ⁉️ Frequently Asked Questions
 
 <details>
   <summary>How much VRAM a LLM model consumes?</summary>
-  <div>By default, Tabby operates in int8 mode with CUDA, requiring approximately 8GB of VRAM for CodeLlama-7B.</div>
+    <div>
+        <p>By default, Tabby operates in int8 mode with CUDA, requiring approximately 8GB of VRAM for CodeLlama-7B.</p>
+        <p>For ROCm the actual limits are currently largely untested, but the same CodeLlama-7B seems to use 8GB of VRAM as well on a AMD Radeon™ RX 7900 XTX according to the ROCm monitoring tools.</p>
+    </div>
 </details>
 
 <details>
@@ -18,13 +19,26 @@ import CodeBlock from '@theme/CodeBlock';
     <p>
       To determine the mapping between the GPU card type and its compute capability, please visit <a href="https://developer.nvidia.com/cuda-gpus">this page</a>
     </p>
+    <p>
+      This also seems to be available on AMD Radeon™ GPUs, but it's unclear which cards besides RDNA3 support this.
+    </p>
   </div>
 </details>
 
 <details>
   <summary>How to utilize multiple NVIDIA GPUs?</summary>
   <div>
-    <p>Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES accordingly.</p>
+    <p>Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES or HIP_VISIBLE_DEVICES accordingly.</p>
+  </div>
+</details>
+
+<details>
+  <summary>My AMD ROCm device isn't supported by ROCm</summary>
+  <div>
+    <p>
+      You can use the HSA_OVERRIDE_GFX_VERSION variable if there is a similar GPU that is supported by ROCm you can set it to that.
+      For example for RDNA2 you can set it to 10.3.0 and to 11.0.0 for RDNA3.
+    </p>
   </div>
 </details>
 
diff --git a/website/docs/installation/apple.md b/website/docs/installation/apple.md
index 90bd2f6de1b5..8fed35ce4edb 100644
--- a/website/docs/installation/apple.md
+++ b/website/docs/installation/apple.md
@@ -14,4 +14,4 @@ brew install tabbyml/tabby/tabby
 tabby serve --device metal --model TabbyML/StarCoder-1B
 ```
 
-The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA. You can find more information about Docker [here](./docker).
+The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA or ROCm. You can find more information about Docker [here](./docker).
diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx
index 8dab5c47b985..54e9266bcc2b 100644
--- a/website/docs/installation/docker-compose.mdx
+++ b/website/docs/installation/docker-compose.mdx
@@ -5,6 +5,8 @@ sidebar_position: 1
 # Docker Compose
 This guide explains how to launch Tabby using docker-compose.
 
+
+
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
@@ -16,8 +18,8 @@ version: '3.5'
 
 services:
   tabby:
-    restart: always
-    image: tabbyml/tabby
+    restart: unless-stopped
+    image: tabbyml/tabby-cuda
     command: serve --model TabbyML/StarCoder-1B --device cuda
     volumes:
       - "$HOME/.tabby:/data"
@@ -33,6 +35,25 @@ services:
 ```
 
   </TabItem>
+  <TabItem value="rocm" label="ROCm">
+
+```yaml title="docker-compose.yml"
+version: '3.5'
+services:
+  tabby:
+    restart: unless-stopped
+    image: tabbyml/tabby-rocm
+    command: serve --model TabbyML/StarCoder-1B --device rocm
+    volumes:
+      - "$HOME/.tabby:/data"
+    ports:
+      - 8080:8080
+    devices:
+      - /dev/dri
+      - /dev/kfd
+```
+
+    </TabItem>
   <TabItem value="cpu" label="CPU">
 
 ```yaml title="docker-compose.yml"
@@ -40,7 +61,7 @@ version: '3.5'
 
 services:
   tabby:
-    restart: always
+    restart: unless-stopped
     image: tabbyml/tabby
     command: serve --model TabbyML/StarCoder-1B
     volumes:
diff --git a/website/docs/installation/docker.mdx b/website/docs/installation/docker.mdx
index 7f26f87c0c13..bd1c48d32632 100644
--- a/website/docs/installation/docker.mdx
+++ b/website/docs/installation/docker.mdx
@@ -6,6 +6,9 @@ sidebar_position: 0
 
 This guide explains how to launch Tabby using docker.
 
+
+
+
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
@@ -13,7 +16,14 @@ import TabItem from '@theme/TabItem';
   <TabItem value="cuda" label="CUDA (requires NVIDIA Container Toolkit)" default>
 
   ```bash title="run.sh"
-  docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby serve --model TabbyML/StarCoder-1B --device cuda
+  docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-cuda serve --model TabbyML/StarCoder-1B --device cuda
+  ```
+
+  </TabItem>
+  <TabItem value="rocm" label="ROCm" default>
+
+  ```bash title="run.sh"
+  docker run -it --device /dev/dri --device /dev/kfd -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-rocm serve --model TabbyML/StarCoder-1B --device rocm
   ```
 
   </TabItem>
diff --git a/website/docs/installation/modal/index.md b/website/docs/installation/modal/index.md
index 86638d7d31b4..10777833330d 100644
--- a/website/docs/installation/modal/index.md
+++ b/website/docs/installation/modal/index.md
@@ -20,11 +20,11 @@ GPU_CONFIG = gpu.T4()
 
 Currently supported GPUs in Modal:
 
-- `T4`: Low-cost GPU option, providing 16GiB of GPU memory.
-- `L4`: Mid-tier GPU option, providing 24GiB of GPU memory.
-- `A100`: The most powerful GPU available in the cloud. Available in 40GiB and 80GiB GPU memory configurations.
-- `A10G`: A10G GPUs deliver up to 3.3x better ML training performance, 3x better ML inference performance, and 3x better graphics performance, in comparison to NVIDIA T4 GPUs.
-- `Any`: Selects any one of the GPU classes available within Modal, according to availability.
+- `NVIDIA T4`: Low-cost GPU option, providing 16GiB of GPU memory.
+- `NVIDIA L4`: Mid-tier GPU option, providing 24GiB of GPU memory.
+- `NVIDIA A100`: The most powerful GPU available in the cloud. Available in 40GiB and 80GiB GPU memory configurations.
+- `NVIDIA A10G`: A10G GPUs deliver up to 3.3x better ML training performance, 3x better ML inference performance, and 3x better graphics performance, in comparison to NVIDIA T4 GPUs.
+- `NVIDIA Any`: Selects any one of the GPU classes available within Modal, according to availability.
 
 For detailed usage, please check official [Modal GPU reference](https://modal.com/docs/reference/modal.gpu).
 

From 1be9c1ef4ac79264158d7dddf8261ecf5b866044 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 15:27:23 +0100
Subject: [PATCH 02/40] Pulled build improvements from #902

---
 crates/llama-cpp-bindings/build.rs     | 10 ++++++----
 crates/llama-cpp-bindings/src/llama.rs |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
index b4e09f6c8506..48520af374e5 100644
--- a/crates/llama-cpp-bindings/build.rs
+++ b/crates/llama-cpp-bindings/build.rs
@@ -33,7 +33,7 @@ fn main() {
         println!("cargo:rustc-link-lib=cublasLt");
     }
     if cfg!(feature = "rocm") {
-        let amd_gpu_targets: Vec<&str> = vec![
+        let amd_gpu_default_targets: Vec<&str> = vec![
             "gfx803",
             "gfx900",
             "gfx906:xnack-",
@@ -51,6 +51,8 @@ fn main() {
             "gfx1102",
             "gfx1103",
         ];
+        let amd_gpu_targets =
+            env::var("AMDGPU_TARGETS").unwrap_or(amd_gpu_default_targets.join(";"));
 
         let rocm_root = env::var("ROCM_ROOT").unwrap_or("/opt/rocm".to_string());
         config.define("LLAMA_HIPBLAS", "ON");
@@ -59,7 +61,7 @@ fn main() {
             "CMAKE_CXX_COMPILER",
             format!("{}/llvm/bin/clang++", rocm_root),
         );
-        config.define("AMDGPU_TARGETS", amd_gpu_targets.join(";"));
+        config.define("AMDGPU_TARGETS", amd_gpu_targets);
         println!("cargo:rustc-link-arg=-Wl,--copy-dt-needed-entries");
         println!("cargo:rustc-link-search=native={}/hip/lib", rocm_root);
         println!("cargo:rustc-link-search=native={}/rocblas/lib", rocm_root);
@@ -74,8 +76,8 @@ fn main() {
 
     cxx_build::bridge("src/lib.rs")
         .file("src/engine.cc")
-        .flag_if_supported("-Iinclude")
-        .flag_if_supported("-Illama.cpp")
+        .include("include")
+        .include("llama.cpp")
         .flag_if_supported("-std=c++14")
         .compile("cxxbridge");
 }
diff --git a/crates/llama-cpp-bindings/src/llama.rs b/crates/llama-cpp-bindings/src/llama.rs
index 15db1358dc55..b288a3adc263 100644
--- a/crates/llama-cpp-bindings/src/llama.rs
+++ b/crates/llama-cpp-bindings/src/llama.rs
@@ -83,7 +83,7 @@ impl LlamaServiceImpl {
         };
 
         for ffi::StepOutput { request_id, text } in result {
-            let mut stopped = false;
+            let mut stopped;
             let LlamaRunningRequest { tx, stop_condition } =
                 self.requests.get_mut(&request_id).unwrap();
 

From 8a217609bd9c0357bc5f612562b31facee40293b Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 15:30:21 +0100
Subject: [PATCH 03/40] Fixed build container for rocm build

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 8f0ba9bc75a0..66f932635117 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -43,7 +43,7 @@ jobs:
           - os: ubuntu-latest
             target: x86_64-unknown-linux-gnu
             binary: x86_64-manylinux2014-rocm5.7
-            container: rocm/dev-ubuntu-22.04:rocm5.7
+            container: rocm/dev-ubuntu-22.04:5.7-complete
             build_args: --features rocm
 
     env:

From 1e3fe328bd5b1d60f7372e679b77b0a65f325b8c Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 15:38:56 +0100
Subject: [PATCH 04/40] Install git in rocm container

---
 .github/workflows/release.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 66f932635117..b1d4a5bdb348 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -44,6 +44,7 @@ jobs:
             target: x86_64-unknown-linux-gnu
             binary: x86_64-manylinux2014-rocm5.7
             container: rocm/dev-ubuntu-22.04:5.7-complete
+            install_git: true
             build_args: --features rocm
 
     env:
@@ -52,6 +53,14 @@ jobs:
       CARGO_INCREMENTAL: 0
 
     steps:
+      - name: Update Git
+        uses: actions/shell@v3
+        if: matrix.install_git == 'true'
+        with:
+          run: |
+            apt-get update
+            apt-get install -y git
+
       - name: Checkout
         uses: actions/checkout@v3
         with:

From 1a1d4da8dacfc3156db540d2a844fc8a8ae20da2 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 15:40:50 +0100
Subject: [PATCH 05/40] Fixed github step

---
 .github/workflows/release.yml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b1d4a5bdb348..be64260c4afb 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -54,12 +54,10 @@ jobs:
 
     steps:
       - name: Update Git
-        uses: actions/shell@v3
         if: matrix.install_git == 'true'
-        with:
-          run: |
-            apt-get update
-            apt-get install -y git
+        run: |
+          apt-get update
+          apt-get install -y git
 
       - name: Checkout
         uses: actions/checkout@v3

From b701a63bba894507e91157d438abdd12fb5565d6 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 15:45:31 +0100
Subject: [PATCH 06/40] Try to fix if statement

---
 .github/workflows/release.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index be64260c4afb..1919cc4512a4 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -53,8 +53,8 @@ jobs:
       CARGO_INCREMENTAL: 0
 
     steps:
-      - name: Update Git
-        if: matrix.install_git == 'true'
+      - name: Install Git
+        if: ${{ matrix.install_git }}
         run: |
           apt-get update
           apt-get install -y git

From 82bbf8d11b014d1248535114f4fbd52b30592adb Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 16:01:40 +0100
Subject: [PATCH 07/40] Added more generic dependency installation

---
 .github/workflows/release.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 1919cc4512a4..4bc6e360dcda 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -44,7 +44,7 @@ jobs:
             target: x86_64-unknown-linux-gnu
             binary: x86_64-manylinux2014-rocm5.7
             container: rocm/dev-ubuntu-22.04:5.7-complete
-            install_git: true
+            apt_dependencies: git pkg-config libssl-dev cmake protobuf-compiler
             build_args: --features rocm
 
     env:
@@ -53,11 +53,11 @@ jobs:
       CARGO_INCREMENTAL: 0
 
     steps:
-      - name: Install Git
-        if: ${{ matrix.install_git }}
+      - name: Install dependencies
+        if: ${{ matrix.apt_dependencies }}
         run: |
           apt-get update
-          apt-get install -y git
+          apt-get install -y ${{ matrix.apt_dependencies }}
 
       - name: Checkout
         uses: actions/checkout@v3

From 081a9f3cec6a504cdac72ac77e17634308e5bb9a Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 16:24:46 +0100
Subject: [PATCH 08/40] upgraded rustup action

---
 .github/workflows/release.yml | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4bc6e360dcda..2c336de64c7a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -18,7 +18,7 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  RUST_TOOLCHAIN: 1.73.0
+  RUST_TOOLCHAIN: 1.80.0
 
 jobs:
   release-binary:
@@ -44,7 +44,7 @@ jobs:
             target: x86_64-unknown-linux-gnu
             binary: x86_64-manylinux2014-rocm5.7
             container: rocm/dev-ubuntu-22.04:5.7-complete
-            apt_dependencies: git pkg-config libssl-dev cmake protobuf-compiler
+            apt_dependencies: git pkg-config libssl-dev cmake
             build_args: --features rocm
 
     env:
@@ -65,14 +65,11 @@ jobs:
           submodules: recursive
 
       - name: Install Rust
-        uses: actions-rs/toolchain@v1
+        uses: dtolnay/rust-toolchain@${{ env.RUST_TOOLCHAIN }}
         with:
-          toolchain: ${{ env.RUST_TOOLCHAIN }}
-          target: ${{ matrix.target }}
+          targets: ${{ matrix.target }}
           components: clippy
 
-      - run: rustup default ${{ env.RUST_TOOLCHAIN }}
-
       - name: Sccache cache
         uses: mozilla-actions/sccache-action@v0.0.3
         with:

From 5308d34d94ed7adc32f4cbbbf3f76beee8a3a591 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 16:30:34 +0100
Subject: [PATCH 09/40] Update sccache

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 2c336de64c7a..aff6a4b2f687 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -73,7 +73,7 @@ jobs:
       - name: Sccache cache
         uses: mozilla-actions/sccache-action@v0.0.3
         with:
-          version: "v0.4.0"
+          version: "v0.7.4"
 
       - name: Cargo registry cache
         uses: actions/cache@v3

From f50b5afa49a26c66fcfb27207ae90024a11c5367 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 16:44:18 +0100
Subject: [PATCH 10/40] Try pytorch manylinux image

---
 .github/workflows/release.yml | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index aff6a4b2f687..bb6005469874 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -43,8 +43,7 @@ jobs:
           - os: ubuntu-latest
             target: x86_64-unknown-linux-gnu
             binary: x86_64-manylinux2014-rocm5.7
-            container: rocm/dev-ubuntu-22.04:5.7-complete
-            apt_dependencies: git pkg-config libssl-dev cmake
+            container: pytorch/manylinux-rocm:5.7
             build_args: --features rocm
 
     env:
@@ -53,12 +52,6 @@ jobs:
       CARGO_INCREMENTAL: 0
 
     steps:
-      - name: Install dependencies
-        if: ${{ matrix.apt_dependencies }}
-        run: |
-          apt-get update
-          apt-get install -y ${{ matrix.apt_dependencies }}
-
       - name: Checkout
         uses: actions/checkout@v3
         with:

From 980f3ed8645c47937d30214718e34d777522995d Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 16:57:19 +0100
Subject: [PATCH 11/40] Switched location for toolchain parameter

---
 .github/workflows/release.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index bb6005469874..f55e725ac1db 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -18,7 +18,7 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  RUST_TOOLCHAIN: 1.80.0
+  RUST_TOOLCHAIN: 1.73.0
 
 jobs:
   release-binary:
@@ -58,8 +58,9 @@ jobs:
           submodules: recursive
 
       - name: Install Rust
-        uses: dtolnay/rust-toolchain@${{ env.RUST_TOOLCHAIN }}
+        uses: dtolnay/rust-toolchain@master
         with:
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
           targets: ${{ matrix.target }}
           components: clippy
 

From 42ad4794258f45f7e46d9b77ff32d84ce4a1d55f Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 16:59:32 +0100
Subject: [PATCH 12/40] Downgraded to deprecated action again

---
 .github/workflows/release.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index f55e725ac1db..8ffc1edca87a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -58,10 +58,10 @@ jobs:
           submodules: recursive
 
       - name: Install Rust
-        uses: dtolnay/rust-toolchain@master
+        uses: actions-rs/toolchain@v1
         with:
           toolchain: ${{ env.RUST_TOOLCHAIN }}
-          targets: ${{ matrix.target }}
+          target: ${{ matrix.target }}
           components: clippy
 
       - name: Sccache cache

From a5e69ca548383c0ff382e94924781fa5e1a936d8 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Sun, 10 Dec 2023 17:02:38 +0100
Subject: [PATCH 13/40] Readded set default step

---
 .github/workflows/release.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 8ffc1edca87a..94990b63dc6e 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -64,6 +64,9 @@ jobs:
           target: ${{ matrix.target }}
           components: clippy
 
+      - name: Set default rust version
+        run: rustup default ${{ env.RUST_TOOLCHAIN }}
+
       - name: Sccache cache
         uses: mozilla-actions/sccache-action@v0.0.3
         with:

From 5be320ab8e1dac18cd07c8c99aef3bbffd63c7f1 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Tue, 12 Dec 2023 22:07:28 +0100
Subject: [PATCH 14/40] Install minimal rocm on the fly

---
 .github/workflows/release.yml   |  9 ++++++---
 ci/prepare_build_environment.sh | 14 ++++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index deecd78810c8..9129b9048fdf 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -55,9 +55,10 @@ jobs:
             windows_cuda: '12.2.0'
           - os: ubuntu-latest
             target: x86_64-unknown-linux-gnu
-            binary: x86_64-manylinux2014-rocm5.7
-            container: pytorch/manylinux-rocm:5.7
+            binary: x86_64-manylinux2014-rocm57
+            container: quay.io/pypa/manylinux2014_x86_64
             build_args: --features rocm
+            linux_rocm: "5.7.2"
 
     env:
       SCCACHE_GHA_ENABLED: true
@@ -66,7 +67,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           submodules: recursive
 
@@ -97,6 +98,8 @@ jobs:
             ~/.cargo/git
 
       - name: Prepare build environment for macOS & Linux
+        env:
+          ROCM: ${{ matrix.linux_rocm }}
         run: bash ./ci/prepare_build_environment.sh
         if: runner.os != 'Windows'
 
diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh
index cdcf2f637eeb..5d820b7166ee 100755
--- a/ci/prepare_build_environment.sh
+++ b/ci/prepare_build_environment.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -e
+
 if [[ "$OSTYPE" == "darwin"* ]]; then
   brew install protobuf
 fi
@@ -11,6 +13,14 @@ install_protobuf_centos() {
   rm protoc-3.15.8-linux-x86_64.zip
 }
 
+install_hipblas_5_7_2_centos() {
+  wget -O /tmp/amdgpu-install.rpm https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm
+  yum install /tmp/amdgpu-install.rpm
+  rm /tmp/amdgpu-install.rpm
+
+  yum install hipblas-devel hipblaslt-devel
+}
+
 if [[ "$OSTYPE" == "linux"* ]]; then
   if command -v apt-get ; then
     sudo apt-get -y install protobuf-compiler libopenblas-dev
@@ -18,6 +28,10 @@ if [[ "$OSTYPE" == "linux"* ]]; then
     # Build from manylinux2014 container
     yum -y install openblas-devel perl-IPC-Cmd unzip curl openssl-devel
 
+    if [[ "$ROCM" == "5.7.2" ]]; then
+      install_hipblas_5_7_2_centos
+    fi
+
     # Disable safe directory in docker
     git config --system --add safe.directory "*"
 

From e89ca16e1844f985acccc4be3355b10ef0543a84 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Tue, 12 Dec 2023 22:11:33 +0100
Subject: [PATCH 15/40] fixed typo in binary name

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 9129b9048fdf..51912f83d32f 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -27,7 +27,7 @@ jobs:
     strategy:
       matrix:
         binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117,
-                 x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm5.7]
+                 x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57]
         include:
           - os: macos-latest
             target: aarch64-apple-darwin

From c96853ae28ec226806515a82a1338cc68a352d97 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Tue, 12 Dec 2023 22:13:02 +0100
Subject: [PATCH 16/40] Downgraded checkout action

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 51912f83d32f..514bf3be7526 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -67,7 +67,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
         with:
           submodules: recursive
 

From 97fef46610300aa886a041215d98c71ed47e85b2 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Tue, 12 Dec 2023 22:25:21 +0100
Subject: [PATCH 17/40] Use curl to download

---
 ci/prepare_build_environment.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh
index 5d820b7166ee..6222d66b8667 100755
--- a/ci/prepare_build_environment.sh
+++ b/ci/prepare_build_environment.sh
@@ -8,13 +8,13 @@ fi
 
 install_protobuf_centos() {
   PB_REL="https://github.com/protocolbuffers/protobuf/releases"
-  curl -LO $PB_REL/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
+  curl -SLO $PB_REL/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
   unzip protoc-3.15.8-linux-x86_64.zip -d /usr
   rm protoc-3.15.8-linux-x86_64.zip
 }
 
 install_hipblas_5_7_2_centos() {
-  wget -O /tmp/amdgpu-install.rpm https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm
+  curl -SL https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm --output /tmp/amdgpu-install.rpm
   yum install /tmp/amdgpu-install.rpm
   rm /tmp/amdgpu-install.rpm
 

From 022548c8b5a5236cd00baaed242035bc616e4ca6 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Tue, 12 Dec 2023 22:36:07 +0100
Subject: [PATCH 18/40] Add -y flag to yum

---
 ci/prepare_build_environment.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh
index 6222d66b8667..08051c52c198 100755
--- a/ci/prepare_build_environment.sh
+++ b/ci/prepare_build_environment.sh
@@ -15,10 +15,10 @@ install_protobuf_centos() {
 
 install_hipblas_5_7_2_centos() {
   curl -SL https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm --output /tmp/amdgpu-install.rpm
-  yum install /tmp/amdgpu-install.rpm
+  yum -y install /tmp/amdgpu-install.rpm
   rm /tmp/amdgpu-install.rpm
 
-  yum install hipblas-devel hipblaslt-devel
+  yum -y install hipblas-devel hipblaslt-devel
 }
 
 if [[ "$OSTYPE" == "linux"* ]]; then

From f4e99e758241ee6be0ec582f6a8331d391771ad1 Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Tue, 12 Dec 2023 22:47:08 +0100
Subject: [PATCH 19/40] Also install rocblas

---
 ci/prepare_build_environment.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh
index 08051c52c198..0309c100f1e9 100755
--- a/ci/prepare_build_environment.sh
+++ b/ci/prepare_build_environment.sh
@@ -17,8 +17,7 @@ install_hipblas_5_7_2_centos() {
   curl -SL https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm --output /tmp/amdgpu-install.rpm
   yum -y install /tmp/amdgpu-install.rpm
   rm /tmp/amdgpu-install.rpm
-
-  yum -y install hipblas-devel hipblaslt-devel
+  yum -y install hipblas-devel hipblaslt-devel rocblas-devel
 }
 
 if [[ "$OSTYPE" == "linux"* ]]; then

From 7ae9d1df715294b986cd730aa6ad0bb4614a3a79 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 14:56:14 +0800
Subject: [PATCH 20/40] Update release.yml

---
 .github/workflows/release.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 514bf3be7526..82a34be05f93 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -26,8 +26,7 @@ jobs:
     container: ${{ matrix.container }}
     strategy:
       matrix:
-        binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117,
-                 x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57]
+        binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57]
         include:
           - os: macos-latest
             target: aarch64-apple-darwin
@@ -56,9 +55,8 @@ jobs:
           - os: ubuntu-latest
             target: x86_64-unknown-linux-gnu
             binary: x86_64-manylinux2014-rocm57
-            container: quay.io/pypa/manylinux2014_x86_64
+            container: ghcr.io/cromefire/hipblas-manylinux/2014/5.7:latest
             build_args: --features rocm
-            linux_rocm: "5.7.2"
 
     env:
       SCCACHE_GHA_ENABLED: true
@@ -98,8 +96,6 @@ jobs:
             ~/.cargo/git
 
       - name: Prepare build environment for macOS & Linux
-        env:
-          ROCM: ${{ matrix.linux_rocm }}
         run: bash ./ci/prepare_build_environment.sh
         if: runner.os != 'Windows'
 

From 17cfd189dd00af9ba0fa29509c20626156f90d14 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 14:56:51 +0800
Subject: [PATCH 21/40] Update release.yml

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 82a34be05f93..14171731eb8a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -82,7 +82,7 @@ jobs:
       - name: Sccache cache
         uses: mozilla-actions/sccache-action@v0.0.3
         with:
-          version: "v0.7.4"
+          version: "v0.4.0"
 
       - name: Cargo registry cache
         uses: actions/cache@v3

From 895a2a2cba461dc79eca88660944fd0fd1fd18fe Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:04:39 +0800
Subject: [PATCH 22/40] Update prepare_build_environment.sh

---
 ci/prepare_build_environment.sh | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh
index 0309c100f1e9..44370733261a 100755
--- a/ci/prepare_build_environment.sh
+++ b/ci/prepare_build_environment.sh
@@ -8,18 +8,11 @@ fi
 
 install_protobuf_centos() {
   PB_REL="https://github.com/protocolbuffers/protobuf/releases"
-  curl -SLO $PB_REL/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
+  curl -LO $PB_REL/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
   unzip protoc-3.15.8-linux-x86_64.zip -d /usr
   rm protoc-3.15.8-linux-x86_64.zip
 }
 
-install_hipblas_5_7_2_centos() {
-  curl -SL https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm --output /tmp/amdgpu-install.rpm
-  yum -y install /tmp/amdgpu-install.rpm
-  rm /tmp/amdgpu-install.rpm
-  yum -y install hipblas-devel hipblaslt-devel rocblas-devel
-}
-
 if [[ "$OSTYPE" == "linux"* ]]; then
   if command -v apt-get ; then
     sudo apt-get -y install protobuf-compiler libopenblas-dev
@@ -27,10 +20,6 @@ if [[ "$OSTYPE" == "linux"* ]]; then
     # Build from manylinux2014 container
     yum -y install openblas-devel perl-IPC-Cmd unzip curl openssl-devel
 
-    if [[ "$ROCM" == "5.7.2" ]]; then
-      install_hipblas_5_7_2_centos
-    fi
-
     # Disable safe directory in docker
     git config --system --add safe.directory "*"
 

From 5c1ea2fb5e9827f15660ff6001714695dabfb3d4 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:05:41 +0800
Subject: [PATCH 23/40] Update prepare_build_environment.sh

---
 ci/prepare_build_environment.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh
index 44370733261a..cdcf2f637eeb 100755
--- a/ci/prepare_build_environment.sh
+++ b/ci/prepare_build_environment.sh
@@ -1,7 +1,5 @@
 #!/bin/bash
 
-set -e
-
 if [[ "$OSTYPE" == "darwin"* ]]; then
   brew install protobuf
 fi

From 18df1ba27e985bf511a2cf4c08f49edc9b86271c Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:07:51 +0800
Subject: [PATCH 24/40] Update build.rs

---
 crates/llama-cpp-bindings/build.rs | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
index b22253688215..0f4dff59b8d2 100644
--- a/crates/llama-cpp-bindings/build.rs
+++ b/crates/llama-cpp-bindings/build.rs
@@ -45,7 +45,7 @@ fn build_llama_cpp() {
         println!("cargo:rustc-link-lib=cublasLt");
     }
     if cfg!(feature = "rocm") {
-        let amd_gpu_default_targets: Vec<&str> = vec![
+        let amd_gpu_targets: Vec<&str> = vec![
             "gfx803",
             "gfx900",
             "gfx906:xnack-",
@@ -63,9 +63,7 @@ fn build_llama_cpp() {
             "gfx1102",
             "gfx1103",
         ];
-        let amd_gpu_targets =
-            env::var("AMDGPU_TARGETS").unwrap_or(amd_gpu_default_targets.join(";"));
-
+        
         let rocm_root = env::var("ROCM_ROOT").unwrap_or("/opt/rocm".to_string());
         config.define("LLAMA_HIPBLAS", "ON");
         config.define("CMAKE_C_COMPILER", format!("{}/llvm/bin/clang", rocm_root));
@@ -73,7 +71,7 @@ fn build_llama_cpp() {
             "CMAKE_CXX_COMPILER",
             format!("{}/llvm/bin/clang++", rocm_root),
         );
-        config.define("AMDGPU_TARGETS", amd_gpu_targets);
+        config.define("AMDGPU_TARGETS", amd_gpu_targets.join(";"));
         println!("cargo:rustc-link-arg=-Wl,--copy-dt-needed-entries");
         println!("cargo:rustc-link-search=native={}/hip/lib", rocm_root);
         println!("cargo:rustc-link-search=native={}/rocblas/lib", rocm_root);
@@ -105,8 +103,8 @@ fn build_llama_cpp() {
 fn build_cxx_binding() {
     cxx_build::bridge("src/lib.rs")
         .file("src/engine.cc")
-        .include("include")
-        .include("llama.cpp")
+        .flag_if_supported("-Iinclude")
+        .flag_if_supported("-Illama.cpp")
         .flag_if_supported("-std=c++14")
         .compile("cxxbridge");
 }

From d17bee08a3227f8908b256fb964fc4b77dbadaf6 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:08:12 +0800
Subject: [PATCH 25/40] Update build.rs

---
 crates/llama-cpp-bindings/build.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
index 0f4dff59b8d2..a768e361faf2 100644
--- a/crates/llama-cpp-bindings/build.rs
+++ b/crates/llama-cpp-bindings/build.rs
@@ -63,7 +63,7 @@ fn build_llama_cpp() {
             "gfx1102",
             "gfx1103",
         ];
-        
+
         let rocm_root = env::var("ROCM_ROOT").unwrap_or("/opt/rocm".to_string());
         config.define("LLAMA_HIPBLAS", "ON");
         config.define("CMAKE_C_COMPILER", format!("{}/llvm/bin/clang", rocm_root));

From 3113962b368152e04b00f61c6d261827c61b3dd6 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:09:23 +0800
Subject: [PATCH 26/40] Update README.md

---
 README.md | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 892e3b50cd09..c606301e2a09 100644
--- a/README.md
+++ b/README.md
@@ -50,25 +50,14 @@ You can find our documentation [here](https://tabby.tabbyml.com/docs/getting-sta
 - ⚙️ [Configuration](https://tabby.tabbyml.com/docs/configuration)
 
 ### Run Tabby in 1 Minute
-The easiest way to start a Tabby server is by using the following Docker command...
+The easiest way to start a Tabby server is by using the following Docker command:
 
-...with cuda:
 ```bash
 docker run -it \
   --gpus all -p 8080:8080 -v $HOME/.tabby:/data \
-  tabbyml/tabby-cuda \
+  tabbyml/tabby \
   serve --model TabbyML/StarCoder-1B --device cuda
 ```
-
-...with ROCm (Linux only):
-```bash
-docker run -it \
-  --device /dev/dri --device /dev/kfd \
-  -p 8080:8080 -v $HOME/.tabby:/data \
-  tabbyml/tabby-rocm \
-  serve --model TabbyML/StarCoder-1B --device rocm
-```
-
 For additional options (e.g inference type, parallelism), please refer to the [documentation page](https://tabbyml.github.io/tabby).
 
 ## 🤝 Contributing

From 81f138a33cee7143fccf75cfb478b424f047bc7a Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:13:34 +0800
Subject: [PATCH 27/40] Update website/docs/faq.mdx

---
 website/docs/faq.mdx | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/website/docs/faq.mdx b/website/docs/faq.mdx
index f50093f9c309..e17f5ec8704e 100644
--- a/website/docs/faq.mdx
+++ b/website/docs/faq.mdx
@@ -19,9 +19,6 @@
     <p>
       To determine the mapping between the GPU card type and its compute capability, please visit <a href="https://developer.nvidia.com/cuda-gpus">this page</a>
     </p>
-    <p>
-      This also seems to be available on AMD Radeon™ GPUs, but it's unclear which cards besides RDNA3 support this.
-    </p>
   </div>
 </details>
 

From ab80cda8bfefea7eb62870b1bfd6c2597f9bb1f5 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:14:56 +0800
Subject: [PATCH 28/40] Update index.md

---
 website/docs/installation/modal/index.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/website/docs/installation/modal/index.md b/website/docs/installation/modal/index.md
index 10777833330d..86638d7d31b4 100644
--- a/website/docs/installation/modal/index.md
+++ b/website/docs/installation/modal/index.md
@@ -20,11 +20,11 @@ GPU_CONFIG = gpu.T4()
 
 Currently supported GPUs in Modal:
 
-- `NVIDIA T4`: Low-cost GPU option, providing 16GiB of GPU memory.
-- `NVIDIA L4`: Mid-tier GPU option, providing 24GiB of GPU memory.
-- `NVIDIA A100`: The most powerful GPU available in the cloud. Available in 40GiB and 80GiB GPU memory configurations.
-- `NVIDIA A10G`: A10G GPUs deliver up to 3.3x better ML training performance, 3x better ML inference performance, and 3x better graphics performance, in comparison to NVIDIA T4 GPUs.
-- `NVIDIA Any`: Selects any one of the GPU classes available within Modal, according to availability.
+- `T4`: Low-cost GPU option, providing 16GiB of GPU memory.
+- `L4`: Mid-tier GPU option, providing 24GiB of GPU memory.
+- `A100`: The most powerful GPU available in the cloud. Available in 40GiB and 80GiB GPU memory configurations.
+- `A10G`: A10G GPUs deliver up to 3.3x better ML training performance, 3x better ML inference performance, and 3x better graphics performance, in comparison to NVIDIA T4 GPUs.
+- `Any`: Selects any one of the GPU classes available within Modal, according to availability.
 
 For detailed usage, please check official [Modal GPU reference](https://modal.com/docs/reference/modal.gpu).
 

From 23f20543b98ad9b74a6c82cad6e52ef32be81807 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:17:11 +0800
Subject: [PATCH 29/40] Update and rename docker-cuda.yml to docker.yml

---
 .github/workflows/{docker-cuda.yml => docker.yml} | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)
 rename .github/workflows/{docker-cuda.yml => docker.yml} (96%)

diff --git a/.github/workflows/docker-cuda.yml b/.github/workflows/docker.yml
similarity index 96%
rename from .github/workflows/docker-cuda.yml
rename to .github/workflows/docker.yml
index 387a6bdcd9fd..78b751781183 100644
--- a/.github/workflows/docker-cuda.yml
+++ b/.github/workflows/docker.yml
@@ -86,9 +86,7 @@ jobs:
           # list of Docker images to use as base name for tags
           images: |
             ghcr.io/${{ env.IMAGE_NAME }}
-            ghcr.io/${{ env.IMAGE_NAME }}/cuda
             ${{ env.IMAGE_NAME }}
-            ${{ env.IMAGE_NAME }}-cuda
           # generate Docker tags based on the following events/attributes
           tags: |
             type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }}
@@ -102,7 +100,7 @@ jobs:
         id: build-and-push
         uses: docker/build-push-action@v5.1.0
         with:
-          file: cuda.Dockerfile
+          file: Dockerfile
           push: true
           context: .
           tags: ${{ steps.meta.outputs.tags }}

From 5202dfe5362900cce1cbe0c4cc65586050bde574 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:17:39 +0800
Subject: [PATCH 30/40] Delete .github/workflows/docker-rocm.yml

---
 .github/workflows/docker-rocm.yml | 119 ------------------------------
 1 file changed, 119 deletions(-)
 delete mode 100644 .github/workflows/docker-rocm.yml

diff --git a/.github/workflows/docker-rocm.yml b/.github/workflows/docker-rocm.yml
deleted file mode 100644
index ca4963777f15..000000000000
--- a/.github/workflows/docker-rocm.yml
+++ /dev/null
@@ -1,119 +0,0 @@
-name: Create and publish ROCm docker image
-
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: '0 20 */1 * *'
-  push:
-    tags:
-      - 'v*'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} 
-  
-  # If this is enabled it will cancel current running and start latest
-  cancel-in-progress: true
-
-env:
-  RUST_TOOLCHAIN: 1.73.0
-
-jobs:
-  release-docker:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-      # This is used to complete the identity challenge
-      # with sigstore/fulcio when running outside of PRs.
-      id-token: write
-
-    steps:
-      - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: false
-          swap-storage: true
-
-      - name: Checkout repository
-        uses: actions/checkout@v3
-        with:
-          submodules: recursive
-
-      # Workaround: https://github.com/docker/build-push-action/issues/461
-      - name: Setup Docker buildx
-        uses: docker/setup-buildx-action@v3.0.0
-        with:
-          # Needed to support OCI annotations
-          version: v0.12.0
-
-      # Login against a Docker registry except on PR
-      # https://github.com/docker/login-action
-      - name: Log into GitHub Container registry
-        uses: docker/login-action@v2.0.0
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Log into Docker Hub
-        uses: docker/login-action@v2.0.0
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
-      - name: Generate image name
-        run: |
-          echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV}
-
-      - uses: int128/docker-build-cache-config-action@v1
-        id: cache
-        with:
-          image: ghcr.io/${{ env.IMAGE_NAME }}/cache
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5.0.0
-        with:
-          # list of Docker images to use as base name for tags
-          images: |
-            ghcr.io/${{ env.IMAGE_NAME }}/rocm
-            ${{ env.IMAGE_NAME }}-rocm
-          # generate Docker tags based on the following events/attributes
-          variant: rocm
-          tags: |
-            type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }}
-            type=schedule,pattern=nightly
-            type=schedule,pattern={{date 'YYYYMMDD'}}
-            type=semver,pattern={{version}}
-
-      # Build and push Docker image with Buildx (don't push on PR)
-      # https://github.com/docker/build-push-action
-      - name: Build and push Docker image
-        id: build-and-push
-        uses: docker/build-push-action@v5.1.0
-        with:
-          file: rocm.Dockerfile
-          push: true
-          context: .
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-          annotations: ${{ steps.meta.outputs.labels }}
-          cache-from: ${{ steps.cache.outputs.cache-from }}
-          cache-to: ${{ steps.cache.outputs.cache-to }}
-          build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
-
-      - name: Docker Hub Description
-        uses: peter-evans/dockerhub-description@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-          repository: tabbyml/tabby

From f3d793f9a131b1f388629a6fefe2c4dcc5d887b3 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:17:57 +0800
Subject: [PATCH 31/40] Delete rocm.Dockerfile

---
 rocm.Dockerfile | 61 -------------------------------------------------
 1 file changed, 61 deletions(-)
 delete mode 100644 rocm.Dockerfile

diff --git a/rocm.Dockerfile b/rocm.Dockerfile
deleted file mode 100644
index 87ccd51acf11..000000000000
--- a/rocm.Dockerfile
+++ /dev/null
@@ -1,61 +0,0 @@
-ARG UBUNTU_VERSION=22.04
-# This needs to generally match the container host's environment.
-ARG ROCM_VERSION=5.7
-# Target the CUDA build image
-ARG BASE_ROCM_DEV_CONTAINER="rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete"
-# Target the CUDA runtime image
-ARG BASE_ROCM_RUN_CONTAINER="rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete"
-
-FROM ${BASE_ROCM_DEV_CONTAINER} as build
-
-# Rust toolchain version
-ARG RUST_TOOLCHAIN=stable
-
-ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        curl \
-        pkg-config \
-        libssl-dev \
-        protobuf-compiler \
-        git \
-        cmake \
-        && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# setup rust.
-RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain ${RUST_TOOLCHAIN} -y
-ENV PATH="/root/.cargo/bin:${PATH}"
-
-WORKDIR /root/workspace
-
-RUN mkdir -p /opt/tabby/bin
-RUN mkdir -p /opt/tabby/lib
-RUN mkdir -p target
-
-COPY . .
-
-RUN --mount=type=cache,target=/usr/local/cargo/registry \
-    --mount=type=cache,target=/root/workspace/target \
-    cargo build --features rocm --release --package tabby && \
-    cp target/release/tabby /opt/tabby/bin/
-
-FROM ${BASE_ROCM_RUN_CONTAINER} as runtime
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        git \
-        && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# Disable safe directory in docker
-# Context: https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9
-RUN git config --system --add safe.directory "*"
-
-COPY --from=build /opt/tabby /opt/tabby
-
-ENV TABBY_ROOT=/data
-
-ENTRYPOINT ["/opt/tabby/bin/tabby"]

From 15767a83dbebbcc6dcbd3a05a97372a0becf69d1 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:18:13 +0800
Subject: [PATCH 32/40] Rename cuda.Dockerfile to Dockerfile

---
 cuda.Dockerfile => Dockerfile | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename cuda.Dockerfile => Dockerfile (100%)

diff --git a/cuda.Dockerfile b/Dockerfile
similarity index 100%
rename from cuda.Dockerfile
rename to Dockerfile

From 1ae3822dc2ddf14c74da1d119e1b52b312cbf20a Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:19:37 +0800
Subject: [PATCH 33/40] Update docker.yml

---
 .github/workflows/docker.yml | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 78b751781183..1e7482119d18 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -1,4 +1,4 @@
-name: Create and publish CUDA docker image
+name: Create and publish docker image
 
 on:
   workflow_dispatch:
@@ -50,10 +50,7 @@ jobs:
 
       # Workaround: https://github.com/docker/build-push-action/issues/461
       - name: Setup Docker buildx
-        uses: docker/setup-buildx-action@v3.0.0
-        with:
-          # Needed to support OCI annotations
-          version: v0.12.0
+        uses: docker/setup-buildx-action@v2.0.0
 
       # Login against a Docker registry except on PR
       # https://github.com/docker/login-action
@@ -81,7 +78,7 @@ jobs:
 
       - name: Docker meta
         id: meta
-        uses: docker/metadata-action@v5.0.0
+        uses: docker/metadata-action@v4
         with:
           # list of Docker images to use as base name for tags
           images: |
@@ -98,14 +95,13 @@ jobs:
       # https://github.com/docker/build-push-action
       - name: Build and push Docker image
         id: build-and-push
-        uses: docker/build-push-action@v5.1.0
+        uses: docker/build-push-action@v3.1.1
         with:
           file: Dockerfile
           push: true
           context: .
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-          annotations: ${{ steps.meta.outputs.labels }}
           cache-from: ${{ steps.cache.outputs.cache-from }}
           cache-to: ${{ steps.cache.outputs.cache-to }}
           build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
@@ -116,3 +112,4 @@ jobs:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
           repository: tabbyml/tabby
+

From e44f48d87b5ec2c4802796f7c27671699dac5d0c Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:20:59 +0800
Subject: [PATCH 34/40] Update website/docs/installation/docker.mdx

---
 website/docs/installation/docker.mdx | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/website/docs/installation/docker.mdx b/website/docs/installation/docker.mdx
index bd1c48d32632..8ecfc78617ec 100644
--- a/website/docs/installation/docker.mdx
+++ b/website/docs/installation/docker.mdx
@@ -6,9 +6,6 @@ sidebar_position: 0
 
 This guide explains how to launch Tabby using docker.
 
-
-
-
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 

From 6fc195e56c6e09b0f9274d3a07bb9b76238a0c46 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:21:51 +0800
Subject: [PATCH 35/40] Update website/docs/installation/docker-compose.mdx

---
 website/docs/installation/docker-compose.mdx | 2 --
 1 file changed, 2 deletions(-)

diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx
index 54e9266bcc2b..9b76850ed336 100644
--- a/website/docs/installation/docker-compose.mdx
+++ b/website/docs/installation/docker-compose.mdx
@@ -5,8 +5,6 @@ sidebar_position: 1
 # Docker Compose
 This guide explains how to launch Tabby using docker-compose.
 
-
-
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 

From 2c55ee4ff9bc914c1bd64fbdeede48e767aa67f1 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:22:55 +0800
Subject: [PATCH 36/40] Update docker-compose.mdx

---
 website/docs/installation/docker-compose.mdx | 23 ++------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx
index 9b76850ed336..d97764c9a804 100644
--- a/website/docs/installation/docker-compose.mdx
+++ b/website/docs/installation/docker-compose.mdx
@@ -16,8 +16,8 @@ version: '3.5'
 
 services:
   tabby:
-    restart: unless-stopped
-    image: tabbyml/tabby-cuda
+    restart: always
+    image: tabbyml/tabby
     command: serve --model TabbyML/StarCoder-1B --device cuda
     volumes:
       - "$HOME/.tabby:/data"
@@ -33,25 +33,6 @@ services:
 ```
 
   </TabItem>
-  <TabItem value="rocm" label="ROCm">
-
-```yaml title="docker-compose.yml"
-version: '3.5'
-services:
-  tabby:
-    restart: unless-stopped
-    image: tabbyml/tabby-rocm
-    command: serve --model TabbyML/StarCoder-1B --device rocm
-    volumes:
-      - "$HOME/.tabby:/data"
-    ports:
-      - 8080:8080
-    devices:
-      - /dev/dri
-      - /dev/kfd
-```
-
-    </TabItem>
   <TabItem value="cpu" label="CPU">
 
 ```yaml title="docker-compose.yml"

From a1f95893e34d8fa0a08854557829187ea5104b2b Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:23:44 +0800
Subject: [PATCH 37/40] Update docker-compose.mdx

---
 website/docs/installation/docker-compose.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx
index d97764c9a804..8dab5c47b985 100644
--- a/website/docs/installation/docker-compose.mdx
+++ b/website/docs/installation/docker-compose.mdx
@@ -40,7 +40,7 @@ version: '3.5'
 
 services:
   tabby:
-    restart: unless-stopped
+    restart: always
     image: tabbyml/tabby
     command: serve --model TabbyML/StarCoder-1B
     volumes:

From 1ed492bc8d7d321c8eda38810b04b6f94387a499 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:24:21 +0800
Subject: [PATCH 38/40] Update docker.mdx


From f7fe0021499be294c248e59c72655cd56b4c72ed Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:25:12 +0800
Subject: [PATCH 39/40] Update docker.mdx

---
 website/docs/installation/docker.mdx | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/website/docs/installation/docker.mdx b/website/docs/installation/docker.mdx
index 8ecfc78617ec..7f26f87c0c13 100644
--- a/website/docs/installation/docker.mdx
+++ b/website/docs/installation/docker.mdx
@@ -13,14 +13,7 @@ import TabItem from '@theme/TabItem';
   <TabItem value="cuda" label="CUDA (requires NVIDIA Container Toolkit)" default>
 
   ```bash title="run.sh"
-  docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-cuda serve --model TabbyML/StarCoder-1B --device cuda
-  ```
-
-  </TabItem>
-  <TabItem value="rocm" label="ROCm" default>
-
-  ```bash title="run.sh"
-  docker run -it --device /dev/dri --device /dev/kfd -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-rocm serve --model TabbyML/StarCoder-1B --device rocm
+  docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby serve --model TabbyML/StarCoder-1B --device cuda
   ```
 
   </TabItem>

From 39a640b7fbf578371f646418ccd36aa5e86dc356 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Wed, 13 Dec 2023 15:26:06 +0800
Subject: [PATCH 40/40] Update website/docs/faq.mdx

---
 website/docs/faq.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/faq.mdx b/website/docs/faq.mdx
index e17f5ec8704e..031f8555fe12 100644
--- a/website/docs/faq.mdx
+++ b/website/docs/faq.mdx
@@ -25,7 +25,7 @@
 <details>
   <summary>How to utilize multiple NVIDIA GPUs?</summary>
   <div>
-    <p>Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES or HIP_VISIBLE_DEVICES accordingly.</p>
+    <p>Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES (for cuda) or HIP_VISIBLE_DEVICES (for rocm) accordingly.</p>
   </div>
 </details>