From 5b0816c71838e169cf0d487802beb00b6a20ea4d Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 15:23:56 +0100 Subject: [PATCH 01/40] Added rocm builds and documentation --- .dockerignore | 6 + .../workflows/{docker.yml => docker-cuda.yml} | 17 ++- .github/workflows/docker-rocm.yml | 119 ++++++++++++++++++ .github/workflows/release.yml | 7 +- README.md | 15 ++- Dockerfile => cuda.Dockerfile | 3 +- rocm.Dockerfile | 61 +++++++++ website/docs/extensions/troubleshooting.md | 6 +- website/docs/faq.mdx | 22 +++- website/docs/installation/apple.md | 2 +- website/docs/installation/docker-compose.mdx | 27 +++- website/docs/installation/docker.mdx | 12 +- website/docs/installation/modal/index.md | 10 +- 13 files changed, 280 insertions(+), 27 deletions(-) rename .github/workflows/{docker.yml => docker-cuda.yml} (88%) create mode 100644 .github/workflows/docker-rocm.yml rename Dockerfile => cuda.Dockerfile (99%) create mode 100644 rocm.Dockerfile diff --git a/.dockerignore b/.dockerignore index de70e0d16772..bfbb41f4fe53 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,8 @@ +.idea +ci +clients +.github +python **/target **/node_modules +website diff --git a/.github/workflows/docker.yml b/.github/workflows/docker-cuda.yml similarity index 88% rename from .github/workflows/docker.yml rename to .github/workflows/docker-cuda.yml index 1e7482119d18..387a6bdcd9fd 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker-cuda.yml @@ -1,4 +1,4 @@ -name: Create and publish docker image +name: Create and publish CUDA docker image on: workflow_dispatch: @@ -50,7 +50,10 @@ jobs: # Workaround: https://github.com/docker/build-push-action/issues/461 - name: Setup Docker buildx - uses: docker/setup-buildx-action@v2.0.0 + uses: docker/setup-buildx-action@v3.0.0 + with: + # Needed to support OCI annotations + version: v0.12.0 # Login against a Docker registry except on PR # https://github.com/docker/login-action @@ -78,12 +81,14 @@ jobs: - name: Docker meta id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5.0.0 with: # list of Docker images to use as base name for tags images: | ghcr.io/${{ env.IMAGE_NAME }} + ghcr.io/${{ env.IMAGE_NAME }}/cuda ${{ env.IMAGE_NAME }} + ${{ env.IMAGE_NAME }}-cuda # generate Docker tags based on the following events/attributes tags: | type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }} @@ -95,13 +100,14 @@ jobs: # https://github.com/docker/build-push-action - name: Build and push Docker image id: build-and-push - uses: docker/build-push-action@v3.1.1 + uses: docker/build-push-action@v5.1.0 with: - file: Dockerfile + file: cuda.Dockerfile push: true context: . tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + annotations: ${{ steps.meta.outputs.labels }} cache-from: ${{ steps.cache.outputs.cache-from }} cache-to: ${{ steps.cache.outputs.cache-to }} build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }} @@ -112,4 +118,3 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} repository: tabbyml/tabby - diff --git a/.github/workflows/docker-rocm.yml b/.github/workflows/docker-rocm.yml new file mode 100644 index 000000000000..ca4963777f15 --- /dev/null +++ b/.github/workflows/docker-rocm.yml @@ -0,0 +1,119 @@ +name: Create and publish ROCm docker image + +on: + workflow_dispatch: + schedule: + - cron: '0 20 */1 * *' + push: + tags: + - 'v*' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} + + # If this is enabled it will cancel current running and start latest + cancel-in-progress: true + +env: + RUST_TOOLCHAIN: 1.73.0 + +jobs: + release-docker: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: true + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + + - name: Checkout repository + uses: actions/checkout@v3 + with: + submodules: recursive + + # Workaround: https://github.com/docker/build-push-action/issues/461 + - name: Setup Docker buildx + uses: docker/setup-buildx-action@v3.0.0 + with: + # Needed to support OCI annotations + version: v0.12.0 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into GitHub Container registry + uses: docker/login-action@v2.0.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Log into Docker Hub + uses: docker/login-action@v2.0.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Generate image name + run: | + echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV} + + - uses: int128/docker-build-cache-config-action@v1 + id: cache + with: + image: ghcr.io/${{ env.IMAGE_NAME }}/cache + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5.0.0 + with: + # list of Docker images to use as base name for tags + images: | + ghcr.io/${{ env.IMAGE_NAME }}/rocm + ${{ env.IMAGE_NAME }}-rocm + # generate Docker tags based on the following events/attributes + variant: rocm + tags: | + type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }} + type=schedule,pattern=nightly + type=schedule,pattern={{date 'YYYYMMDD'}} + type=semver,pattern={{version}} + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v5.1.0 + with: + file: rocm.Dockerfile + push: true + context: . + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + annotations: ${{ steps.meta.outputs.labels }} + cache-from: ${{ steps.cache.outputs.cache-from }} + cache-to: ${{ steps.cache.outputs.cache-to }} + build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }} + + - name: Docker Hub Description + uses: peter-evans/dockerhub-description@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + repository: tabbyml/tabby diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b22676562baf..8f0ba9bc75a0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -26,7 +26,7 @@ jobs: container: ${{ matrix.container }} strategy: matrix: - binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117] + binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, x86_64-manylinux2014-rocm5.7] include: - os: macos-latest target: aarch64-apple-darwin @@ -40,6 +40,11 @@ jobs: binary: x86_64-manylinux2014-cuda117 container: sameli/manylinux2014_x86_64_cuda_11.7 build_args: --features cuda + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + binary: x86_64-manylinux2014-rocm5.7 + container: rocm/dev-ubuntu-22.04:rocm5.7 + build_args: --features rocm env: SCCACHE_GHA_ENABLED: true diff --git a/README.md b/README.md index c606301e2a09..892e3b50cd09 100644 --- a/README.md +++ b/README.md @@ -50,14 +50,25 @@ You can find our documentation [here](https://tabby.tabbyml.com/docs/getting-sta - ⚙️ [Configuration](https://tabby.tabbyml.com/docs/configuration) ### Run Tabby in 1 Minute -The easiest way to start a Tabby server is by using the following Docker command: +The easiest way to start a Tabby server is by using the following Docker command... +...with cuda: ```bash docker run -it \ --gpus all -p 8080:8080 -v $HOME/.tabby:/data \ - tabbyml/tabby \ + tabbyml/tabby-cuda \ serve --model TabbyML/StarCoder-1B --device cuda ``` + +...with ROCm (Linux only): +```bash +docker run -it \ + --device /dev/dri --device /dev/kfd \ + -p 8080:8080 -v $HOME/.tabby:/data \ + tabbyml/tabby-rocm \ + serve --model TabbyML/StarCoder-1B --device rocm +``` + For additional options (e.g inference type, parallelism), please refer to the [documentation page](https://tabbyml.github.io/tabby). ## 🤝 Contributing diff --git a/Dockerfile b/cuda.Dockerfile similarity index 99% rename from Dockerfile rename to cuda.Dockerfile index f939d143ec2f..947368d4a68d 100644 --- a/Dockerfile +++ b/cuda.Dockerfile @@ -29,12 +29,13 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain ${RUST_TOOLC ENV PATH="/root/.cargo/bin:${PATH}" WORKDIR /root/workspace -COPY . . RUN mkdir -p /opt/tabby/bin RUN mkdir -p /opt/tabby/lib RUN mkdir -p target +COPY . . + RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/root/workspace/target \ cargo build --features cuda --release --package tabby && \ diff --git a/rocm.Dockerfile b/rocm.Dockerfile new file mode 100644 index 000000000000..87ccd51acf11 --- /dev/null +++ b/rocm.Dockerfile @@ -0,0 +1,61 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=5.7 +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER="rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete" +# Target the CUDA runtime image +ARG BASE_ROCM_RUN_CONTAINER="rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete" + +FROM ${BASE_ROCM_DEV_CONTAINER} as build + +# Rust toolchain version +ARG RUST_TOOLCHAIN=stable + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + pkg-config \ + libssl-dev \ + protobuf-compiler \ + git \ + cmake \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# setup rust. +RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain ${RUST_TOOLCHAIN} -y +ENV PATH="/root/.cargo/bin:${PATH}" + +WORKDIR /root/workspace + +RUN mkdir -p /opt/tabby/bin +RUN mkdir -p /opt/tabby/lib +RUN mkdir -p target + +COPY . . + +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/root/workspace/target \ + cargo build --features rocm --release --package tabby && \ + cp target/release/tabby /opt/tabby/bin/ + +FROM ${BASE_ROCM_RUN_CONTAINER} as runtime + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Disable safe directory in docker +# Context: https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9 +RUN git config --system --add safe.directory "*" + +COPY --from=build /opt/tabby /opt/tabby + +ENV TABBY_ROOT=/data + +ENTRYPOINT ["/opt/tabby/bin/tabby"] diff --git a/website/docs/extensions/troubleshooting.md b/website/docs/extensions/troubleshooting.md index 94f42a667f5c..bf4ee1cc9be4 100644 --- a/website/docs/extensions/troubleshooting.md +++ b/website/docs/extensions/troubleshooting.md @@ -112,9 +112,9 @@ for the current code context. If your completion requests are timing out, Tabby may display a warning message. This could be due to network issues or poor server performance, especially when running a large model on a CPU. To improve performance, consider running the model -on a GPU with CUDA support or on Apple M1/M2 with Metal support. When running -the server, make sure to specify the device in the arguments using `--device cuda` -or `--device metal`. You can also try using a smaller model from the available [models](https://tabby.tabbyml.com/docs/models/). +on a GPU with CUDA or ROCm support or on Apple M1/M2 with Metal support. When running +the server, make sure to specify the device in the arguments using `--device cuda`, `--device rocm` or +`--device metal`. You can also try using a smaller model from the available [models](https://tabby.tabbyml.com/docs/models/). By default, the timeout for automatically triggered completion requests is set to 4 seconds. You can adjust this timeout value in the `~/.tabby-client/agent/config.toml` configuration file. diff --git a/website/docs/faq.mdx b/website/docs/faq.mdx index 3dc6ecb632fc..f50093f9c309 100644 --- a/website/docs/faq.mdx +++ b/website/docs/faq.mdx @@ -1,10 +1,11 @@ -import CodeBlock from '@theme/CodeBlock'; - # ⁉️ Frequently Asked Questions
How much VRAM a LLM model consumes? -
By default, Tabby operates in int8 mode with CUDA, requiring approximately 8GB of VRAM for CodeLlama-7B.
+
+

By default, Tabby operates in int8 mode with CUDA, requiring approximately 8GB of VRAM for CodeLlama-7B.

+

For ROCm the actual limits are currently largely untested, but the same CodeLlama-7B seems to use 8GB of VRAM as well on a AMD Radeon™ RX 7900 XTX according to the ROCm monitoring tools.

+
@@ -18,13 +19,26 @@ import CodeBlock from '@theme/CodeBlock';

To determine the mapping between the GPU card type and its compute capability, please visit this page

+

+ This also seems to be available on AMD Radeon™ GPUs, but it's unclear which cards besides RDNA3 support this. +

How to utilize multiple NVIDIA GPUs?
-

Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES accordingly.

+

Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES or HIP_VISIBLE_DEVICES accordingly.

+
+
+ +
+ My AMD ROCm device isn't supported by ROCm +
+

+ You can use the HSA_OVERRIDE_GFX_VERSION variable if there is a similar GPU that is supported by ROCm you can set it to that. + For example for RDNA2 you can set it to 10.3.0 and to 11.0.0 for RDNA3. +

diff --git a/website/docs/installation/apple.md b/website/docs/installation/apple.md index 90bd2f6de1b5..8fed35ce4edb 100644 --- a/website/docs/installation/apple.md +++ b/website/docs/installation/apple.md @@ -14,4 +14,4 @@ brew install tabbyml/tabby/tabby tabby serve --device metal --model TabbyML/StarCoder-1B ``` -The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA. You can find more information about Docker [here](./docker). +The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA or ROCm. You can find more information about Docker [here](./docker). diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx index 8dab5c47b985..54e9266bcc2b 100644 --- a/website/docs/installation/docker-compose.mdx +++ b/website/docs/installation/docker-compose.mdx @@ -5,6 +5,8 @@ sidebar_position: 1 # Docker Compose This guide explains how to launch Tabby using docker-compose. + + import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; @@ -16,8 +18,8 @@ version: '3.5' services: tabby: - restart: always - image: tabbyml/tabby + restart: unless-stopped + image: tabbyml/tabby-cuda command: serve --model TabbyML/StarCoder-1B --device cuda volumes: - "$HOME/.tabby:/data" @@ -33,6 +35,25 @@ services: ``` + + +```yaml title="docker-compose.yml" +version: '3.5' +services: + tabby: + restart: unless-stopped + image: tabbyml/tabby-rocm + command: serve --model TabbyML/StarCoder-1B --device rocm + volumes: + - "$HOME/.tabby:/data" + ports: + - 8080:8080 + devices: + - /dev/dri + - /dev/kfd +``` + + ```yaml title="docker-compose.yml" @@ -40,7 +61,7 @@ version: '3.5' services: tabby: - restart: always + restart: unless-stopped image: tabbyml/tabby command: serve --model TabbyML/StarCoder-1B volumes: diff --git a/website/docs/installation/docker.mdx b/website/docs/installation/docker.mdx index 7f26f87c0c13..bd1c48d32632 100644 --- a/website/docs/installation/docker.mdx +++ b/website/docs/installation/docker.mdx @@ -6,6 +6,9 @@ sidebar_position: 0 This guide explains how to launch Tabby using docker. + + + import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; @@ -13,7 +16,14 @@ import TabItem from '@theme/TabItem'; ```bash title="run.sh" - docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby serve --model TabbyML/StarCoder-1B --device cuda + docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-cuda serve --model TabbyML/StarCoder-1B --device cuda + ``` + + + + + ```bash title="run.sh" + docker run -it --device /dev/dri --device /dev/kfd -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-rocm serve --model TabbyML/StarCoder-1B --device rocm ``` diff --git a/website/docs/installation/modal/index.md b/website/docs/installation/modal/index.md index 86638d7d31b4..10777833330d 100644 --- a/website/docs/installation/modal/index.md +++ b/website/docs/installation/modal/index.md @@ -20,11 +20,11 @@ GPU_CONFIG = gpu.T4() Currently supported GPUs in Modal: -- `T4`: Low-cost GPU option, providing 16GiB of GPU memory. -- `L4`: Mid-tier GPU option, providing 24GiB of GPU memory. -- `A100`: The most powerful GPU available in the cloud. Available in 40GiB and 80GiB GPU memory configurations. -- `A10G`: A10G GPUs deliver up to 3.3x better ML training performance, 3x better ML inference performance, and 3x better graphics performance, in comparison to NVIDIA T4 GPUs. -- `Any`: Selects any one of the GPU classes available within Modal, according to availability. +- `NVIDIA T4`: Low-cost GPU option, providing 16GiB of GPU memory. +- `NVIDIA L4`: Mid-tier GPU option, providing 24GiB of GPU memory. +- `NVIDIA A100`: The most powerful GPU available in the cloud. Available in 40GiB and 80GiB GPU memory configurations. +- `NVIDIA A10G`: A10G GPUs deliver up to 3.3x better ML training performance, 3x better ML inference performance, and 3x better graphics performance, in comparison to NVIDIA T4 GPUs. +- `NVIDIA Any`: Selects any one of the GPU classes available within Modal, according to availability. For detailed usage, please check official [Modal GPU reference](https://modal.com/docs/reference/modal.gpu). From 1be9c1ef4ac79264158d7dddf8261ecf5b866044 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 15:27:23 +0100 Subject: [PATCH 02/40] Pulled build improvements from #902 --- crates/llama-cpp-bindings/build.rs | 10 ++++++---- crates/llama-cpp-bindings/src/llama.rs | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs index b4e09f6c8506..48520af374e5 100644 --- a/crates/llama-cpp-bindings/build.rs +++ b/crates/llama-cpp-bindings/build.rs @@ -33,7 +33,7 @@ fn main() { println!("cargo:rustc-link-lib=cublasLt"); } if cfg!(feature = "rocm") { - let amd_gpu_targets: Vec<&str> = vec![ + let amd_gpu_default_targets: Vec<&str> = vec![ "gfx803", "gfx900", "gfx906:xnack-", @@ -51,6 +51,8 @@ fn main() { "gfx1102", "gfx1103", ]; + let amd_gpu_targets = + env::var("AMDGPU_TARGETS").unwrap_or(amd_gpu_default_targets.join(";")); let rocm_root = env::var("ROCM_ROOT").unwrap_or("/opt/rocm".to_string()); config.define("LLAMA_HIPBLAS", "ON"); @@ -59,7 +61,7 @@ fn main() { "CMAKE_CXX_COMPILER", format!("{}/llvm/bin/clang++", rocm_root), ); - config.define("AMDGPU_TARGETS", amd_gpu_targets.join(";")); + config.define("AMDGPU_TARGETS", amd_gpu_targets); println!("cargo:rustc-link-arg=-Wl,--copy-dt-needed-entries"); println!("cargo:rustc-link-search=native={}/hip/lib", rocm_root); println!("cargo:rustc-link-search=native={}/rocblas/lib", rocm_root); @@ -74,8 +76,8 @@ fn main() { cxx_build::bridge("src/lib.rs") .file("src/engine.cc") - .flag_if_supported("-Iinclude") - .flag_if_supported("-Illama.cpp") + .include("include") + .include("llama.cpp") .flag_if_supported("-std=c++14") .compile("cxxbridge"); } diff --git a/crates/llama-cpp-bindings/src/llama.rs b/crates/llama-cpp-bindings/src/llama.rs index 15db1358dc55..b288a3adc263 100644 --- a/crates/llama-cpp-bindings/src/llama.rs +++ b/crates/llama-cpp-bindings/src/llama.rs @@ -83,7 +83,7 @@ impl LlamaServiceImpl { }; for ffi::StepOutput { request_id, text } in result { - let mut stopped = false; + let mut stopped; let LlamaRunningRequest { tx, stop_condition } = self.requests.get_mut(&request_id).unwrap(); From 8a217609bd9c0357bc5f612562b31facee40293b Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 15:30:21 +0100 Subject: [PATCH 03/40] Fixed build container for rocm build --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8f0ba9bc75a0..66f932635117 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -43,7 +43,7 @@ jobs: - os: ubuntu-latest target: x86_64-unknown-linux-gnu binary: x86_64-manylinux2014-rocm5.7 - container: rocm/dev-ubuntu-22.04:rocm5.7 + container: rocm/dev-ubuntu-22.04:5.7-complete build_args: --features rocm env: From 1e3fe328bd5b1d60f7372e679b77b0a65f325b8c Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 15:38:56 +0100 Subject: [PATCH 04/40] Install git in rocm container --- .github/workflows/release.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 66f932635117..b1d4a5bdb348 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -44,6 +44,7 @@ jobs: target: x86_64-unknown-linux-gnu binary: x86_64-manylinux2014-rocm5.7 container: rocm/dev-ubuntu-22.04:5.7-complete + install_git: true build_args: --features rocm env: @@ -52,6 +53,14 @@ jobs: CARGO_INCREMENTAL: 0 steps: + - name: Update Git + uses: actions/shell@v3 + if: matrix.install_git == 'true' + with: + run: | + apt-get update + apt-get install -y git + - name: Checkout uses: actions/checkout@v3 with: From 1a1d4da8dacfc3156db540d2a844fc8a8ae20da2 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 15:40:50 +0100 Subject: [PATCH 05/40] Fixed github step --- .github/workflows/release.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b1d4a5bdb348..be64260c4afb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -54,12 +54,10 @@ jobs: steps: - name: Update Git - uses: actions/shell@v3 if: matrix.install_git == 'true' - with: - run: | - apt-get update - apt-get install -y git + run: | + apt-get update + apt-get install -y git - name: Checkout uses: actions/checkout@v3 From b701a63bba894507e91157d438abdd12fb5565d6 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 15:45:31 +0100 Subject: [PATCH 06/40] Try to fix if statement --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index be64260c4afb..1919cc4512a4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -53,8 +53,8 @@ jobs: CARGO_INCREMENTAL: 0 steps: - - name: Update Git - if: matrix.install_git == 'true' + - name: Install Git + if: ${{ matrix.install_git }} run: | apt-get update apt-get install -y git From 82bbf8d11b014d1248535114f4fbd52b30592adb Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 16:01:40 +0100 Subject: [PATCH 07/40] Added more generic dependency installation --- .github/workflows/release.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1919cc4512a4..4bc6e360dcda 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -44,7 +44,7 @@ jobs: target: x86_64-unknown-linux-gnu binary: x86_64-manylinux2014-rocm5.7 container: rocm/dev-ubuntu-22.04:5.7-complete - install_git: true + apt_dependencies: git pkg-config libssl-dev cmake protobuf-compiler build_args: --features rocm env: @@ -53,11 +53,11 @@ jobs: CARGO_INCREMENTAL: 0 steps: - - name: Install Git - if: ${{ matrix.install_git }} + - name: Install dependencies + if: ${{ matrix.apt_dependencies }} run: | apt-get update - apt-get install -y git + apt-get install -y ${{ matrix.apt_dependencies }} - name: Checkout uses: actions/checkout@v3 From 081a9f3cec6a504cdac72ac77e17634308e5bb9a Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 16:24:46 +0100 Subject: [PATCH 08/40] upgraded rustup action --- .github/workflows/release.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4bc6e360dcda..2c336de64c7a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,7 +18,7 @@ concurrency: cancel-in-progress: true env: - RUST_TOOLCHAIN: 1.73.0 + RUST_TOOLCHAIN: 1.80.0 jobs: release-binary: @@ -44,7 +44,7 @@ jobs: target: x86_64-unknown-linux-gnu binary: x86_64-manylinux2014-rocm5.7 container: rocm/dev-ubuntu-22.04:5.7-complete - apt_dependencies: git pkg-config libssl-dev cmake protobuf-compiler + apt_dependencies: git pkg-config libssl-dev cmake build_args: --features rocm env: @@ -65,14 +65,11 @@ jobs: submodules: recursive - name: Install Rust - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@${{ env.RUST_TOOLCHAIN }} with: - toolchain: ${{ env.RUST_TOOLCHAIN }} - target: ${{ matrix.target }} + targets: ${{ matrix.target }} components: clippy - - run: rustup default ${{ env.RUST_TOOLCHAIN }} - - name: Sccache cache uses: mozilla-actions/sccache-action@v0.0.3 with: From 5308d34d94ed7adc32f4cbbbf3f76beee8a3a591 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 16:30:34 +0100 Subject: [PATCH 09/40] Update sccache --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2c336de64c7a..aff6a4b2f687 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -73,7 +73,7 @@ jobs: - name: Sccache cache uses: mozilla-actions/sccache-action@v0.0.3 with: - version: "v0.4.0" + version: "v0.7.4" - name: Cargo registry cache uses: actions/cache@v3 From f50b5afa49a26c66fcfb27207ae90024a11c5367 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 16:44:18 +0100 Subject: [PATCH 10/40] Try pytorch manylinux image --- .github/workflows/release.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index aff6a4b2f687..bb6005469874 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -43,8 +43,7 @@ jobs: - os: ubuntu-latest target: x86_64-unknown-linux-gnu binary: x86_64-manylinux2014-rocm5.7 - container: rocm/dev-ubuntu-22.04:5.7-complete - apt_dependencies: git pkg-config libssl-dev cmake + container: pytorch/manylinux-rocm:5.7 build_args: --features rocm env: @@ -53,12 +52,6 @@ jobs: CARGO_INCREMENTAL: 0 steps: - - name: Install dependencies - if: ${{ matrix.apt_dependencies }} - run: | - apt-get update - apt-get install -y ${{ matrix.apt_dependencies }} - - name: Checkout uses: actions/checkout@v3 with: From 980f3ed8645c47937d30214718e34d777522995d Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 16:57:19 +0100 Subject: [PATCH 11/40] Switched location for toolchain parameter --- .github/workflows/release.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bb6005469874..f55e725ac1db 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,7 +18,7 @@ concurrency: cancel-in-progress: true env: - RUST_TOOLCHAIN: 1.80.0 + RUST_TOOLCHAIN: 1.73.0 jobs: release-binary: @@ -58,8 +58,9 @@ jobs: submodules: recursive - name: Install Rust - uses: dtolnay/rust-toolchain@${{ env.RUST_TOOLCHAIN }} + uses: dtolnay/rust-toolchain@master with: + toolchain: ${{ env.RUST_TOOLCHAIN }} targets: ${{ matrix.target }} components: clippy From 42ad4794258f45f7e46d9b77ff32d84ce4a1d55f Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 16:59:32 +0100 Subject: [PATCH 12/40] Downgraded to deprecated action again --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f55e725ac1db..8ffc1edca87a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -58,10 +58,10 @@ jobs: submodules: recursive - name: Install Rust - uses: dtolnay/rust-toolchain@master + uses: actions-rs/toolchain@v1 with: toolchain: ${{ env.RUST_TOOLCHAIN }} - targets: ${{ matrix.target }} + target: ${{ matrix.target }} components: clippy - name: Sccache cache From a5e69ca548383c0ff382e94924781fa5e1a936d8 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Sun, 10 Dec 2023 17:02:38 +0100 Subject: [PATCH 13/40] Readded set default step --- .github/workflows/release.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8ffc1edca87a..94990b63dc6e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -64,6 +64,9 @@ jobs: target: ${{ matrix.target }} components: clippy + - name: Set default rust version + run: rustup default ${{ env.RUST_TOOLCHAIN }} + - name: Sccache cache uses: mozilla-actions/sccache-action@v0.0.3 with: From 5be320ab8e1dac18cd07c8c99aef3bbffd63c7f1 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Tue, 12 Dec 2023 22:07:28 +0100 Subject: [PATCH 14/40] Install minimal rocm on the fly --- .github/workflows/release.yml | 9 ++++++--- ci/prepare_build_environment.sh | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index deecd78810c8..9129b9048fdf 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -55,9 +55,10 @@ jobs: windows_cuda: '12.2.0' - os: ubuntu-latest target: x86_64-unknown-linux-gnu - binary: x86_64-manylinux2014-rocm5.7 - container: pytorch/manylinux-rocm:5.7 + binary: x86_64-manylinux2014-rocm57 + container: quay.io/pypa/manylinux2014_x86_64 build_args: --features rocm + linux_rocm: "5.7.2" env: SCCACHE_GHA_ENABLED: true @@ -66,7 +67,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive @@ -97,6 +98,8 @@ jobs: ~/.cargo/git - name: Prepare build environment for macOS & Linux + env: + ROCM: ${{ matrix.linux_rocm }} run: bash ./ci/prepare_build_environment.sh if: runner.os != 'Windows' diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh index cdcf2f637eeb..5d820b7166ee 100755 --- a/ci/prepare_build_environment.sh +++ b/ci/prepare_build_environment.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + if [[ "$OSTYPE" == "darwin"* ]]; then brew install protobuf fi @@ -11,6 +13,14 @@ install_protobuf_centos() { rm protoc-3.15.8-linux-x86_64.zip } +install_hipblas_5_7_2_centos() { + wget -O /tmp/amdgpu-install.rpm https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm + yum install /tmp/amdgpu-install.rpm + rm /tmp/amdgpu-install.rpm + + yum install hipblas-devel hipblaslt-devel +} + if [[ "$OSTYPE" == "linux"* ]]; then if command -v apt-get ; then sudo apt-get -y install protobuf-compiler libopenblas-dev @@ -18,6 +28,10 @@ if [[ "$OSTYPE" == "linux"* ]]; then # Build from manylinux2014 container yum -y install openblas-devel perl-IPC-Cmd unzip curl openssl-devel + if [[ "$ROCM" == "5.7.2" ]]; then + install_hipblas_5_7_2_centos + fi + # Disable safe directory in docker git config --system --add safe.directory "*" From e89ca16e1844f985acccc4be3355b10ef0543a84 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Tue, 12 Dec 2023 22:11:33 +0100 Subject: [PATCH 15/40] fixed typo in binary name --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9129b9048fdf..51912f83d32f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -27,7 +27,7 @@ jobs: strategy: matrix: binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, - x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm5.7] + x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57] include: - os: macos-latest target: aarch64-apple-darwin From c96853ae28ec226806515a82a1338cc68a352d97 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Tue, 12 Dec 2023 22:13:02 +0100 Subject: [PATCH 16/40] Downgraded checkout action --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 51912f83d32f..514bf3be7526 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -67,7 +67,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v3 with: submodules: recursive From 97fef46610300aa886a041215d98c71ed47e85b2 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Tue, 12 Dec 2023 22:25:21 +0100 Subject: [PATCH 17/40] Use curl to download --- ci/prepare_build_environment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh index 5d820b7166ee..6222d66b8667 100755 --- a/ci/prepare_build_environment.sh +++ b/ci/prepare_build_environment.sh @@ -8,13 +8,13 @@ fi install_protobuf_centos() { PB_REL="https://github.com/protocolbuffers/protobuf/releases" - curl -LO $PB_REL/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip + curl -SLO $PB_REL/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip unzip protoc-3.15.8-linux-x86_64.zip -d /usr rm protoc-3.15.8-linux-x86_64.zip } install_hipblas_5_7_2_centos() { - wget -O /tmp/amdgpu-install.rpm https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm + curl -SL https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm --output /tmp/amdgpu-install.rpm yum install /tmp/amdgpu-install.rpm rm /tmp/amdgpu-install.rpm From 022548c8b5a5236cd00baaed242035bc616e4ca6 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Tue, 12 Dec 2023 22:36:07 +0100 Subject: [PATCH 18/40] Add -y flag to yum --- ci/prepare_build_environment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh index 6222d66b8667..08051c52c198 100755 --- a/ci/prepare_build_environment.sh +++ b/ci/prepare_build_environment.sh @@ -15,10 +15,10 @@ install_protobuf_centos() { install_hipblas_5_7_2_centos() { curl -SL https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm --output /tmp/amdgpu-install.rpm - yum install /tmp/amdgpu-install.rpm + yum -y install /tmp/amdgpu-install.rpm rm /tmp/amdgpu-install.rpm - yum install hipblas-devel hipblaslt-devel + yum -y install hipblas-devel hipblaslt-devel } if [[ "$OSTYPE" == "linux"* ]]; then From f4e99e758241ee6be0ec582f6a8331d391771ad1 Mon Sep 17 00:00:00 2001 From: Cromefire_ Date: Tue, 12 Dec 2023 22:47:08 +0100 Subject: [PATCH 19/40] Also install rocblas --- ci/prepare_build_environment.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh index 08051c52c198..0309c100f1e9 100755 --- a/ci/prepare_build_environment.sh +++ b/ci/prepare_build_environment.sh @@ -17,8 +17,7 @@ install_hipblas_5_7_2_centos() { curl -SL https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm --output /tmp/amdgpu-install.rpm yum -y install /tmp/amdgpu-install.rpm rm /tmp/amdgpu-install.rpm - - yum -y install hipblas-devel hipblaslt-devel + yum -y install hipblas-devel hipblaslt-devel rocblas-devel } if [[ "$OSTYPE" == "linux"* ]]; then From 7ae9d1df715294b986cd730aa6ad0bb4614a3a79 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 14:56:14 +0800 Subject: [PATCH 20/40] Update release.yml --- .github/workflows/release.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 514bf3be7526..82a34be05f93 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -26,8 +26,7 @@ jobs: container: ${{ matrix.container }} strategy: matrix: - binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, - x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57] + binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57] include: - os: macos-latest target: aarch64-apple-darwin @@ -56,9 +55,8 @@ jobs: - os: ubuntu-latest target: x86_64-unknown-linux-gnu binary: x86_64-manylinux2014-rocm57 - container: quay.io/pypa/manylinux2014_x86_64 + container: ghcr.io/cromefire/hipblas-manylinux/2014/5.7:latest build_args: --features rocm - linux_rocm: "5.7.2" env: SCCACHE_GHA_ENABLED: true @@ -98,8 +96,6 @@ jobs: ~/.cargo/git - name: Prepare build environment for macOS & Linux - env: - ROCM: ${{ matrix.linux_rocm }} run: bash ./ci/prepare_build_environment.sh if: runner.os != 'Windows' From 17cfd189dd00af9ba0fa29509c20626156f90d14 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 14:56:51 +0800 Subject: [PATCH 21/40] Update release.yml --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 82a34be05f93..14171731eb8a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -82,7 +82,7 @@ jobs: - name: Sccache cache uses: mozilla-actions/sccache-action@v0.0.3 with: - version: "v0.7.4" + version: "v0.4.0" - name: Cargo registry cache uses: actions/cache@v3 From 895a2a2cba461dc79eca88660944fd0fd1fd18fe Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:04:39 +0800 Subject: [PATCH 22/40] Update prepare_build_environment.sh --- ci/prepare_build_environment.sh | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh index 0309c100f1e9..44370733261a 100755 --- a/ci/prepare_build_environment.sh +++ b/ci/prepare_build_environment.sh @@ -8,18 +8,11 @@ fi install_protobuf_centos() { PB_REL="https://github.com/protocolbuffers/protobuf/releases" - curl -SLO $PB_REL/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip + curl -LO $PB_REL/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip unzip protoc-3.15.8-linux-x86_64.zip -d /usr rm protoc-3.15.8-linux-x86_64.zip } -install_hipblas_5_7_2_centos() { - curl -SL https://repo.radeon.com/amdgpu-install/5.7.2/rhel/7.9/amdgpu-install-5.7.50702-1.el7.noarch.rpm --output /tmp/amdgpu-install.rpm - yum -y install /tmp/amdgpu-install.rpm - rm /tmp/amdgpu-install.rpm - yum -y install hipblas-devel hipblaslt-devel rocblas-devel -} - if [[ "$OSTYPE" == "linux"* ]]; then if command -v apt-get ; then sudo apt-get -y install protobuf-compiler libopenblas-dev @@ -27,10 +20,6 @@ if [[ "$OSTYPE" == "linux"* ]]; then # Build from manylinux2014 container yum -y install openblas-devel perl-IPC-Cmd unzip curl openssl-devel - if [[ "$ROCM" == "5.7.2" ]]; then - install_hipblas_5_7_2_centos - fi - # Disable safe directory in docker git config --system --add safe.directory "*" From 5c1ea2fb5e9827f15660ff6001714695dabfb3d4 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:05:41 +0800 Subject: [PATCH 23/40] Update prepare_build_environment.sh --- ci/prepare_build_environment.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/prepare_build_environment.sh b/ci/prepare_build_environment.sh index 44370733261a..cdcf2f637eeb 100755 --- a/ci/prepare_build_environment.sh +++ b/ci/prepare_build_environment.sh @@ -1,7 +1,5 @@ #!/bin/bash -set -e - if [[ "$OSTYPE" == "darwin"* ]]; then brew install protobuf fi From 18df1ba27e985bf511a2cf4c08f49edc9b86271c Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:07:51 +0800 Subject: [PATCH 24/40] Update build.rs --- crates/llama-cpp-bindings/build.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs index b22253688215..0f4dff59b8d2 100644 --- a/crates/llama-cpp-bindings/build.rs +++ b/crates/llama-cpp-bindings/build.rs @@ -45,7 +45,7 @@ fn build_llama_cpp() { println!("cargo:rustc-link-lib=cublasLt"); } if cfg!(feature = "rocm") { - let amd_gpu_default_targets: Vec<&str> = vec![ + let amd_gpu_targets: Vec<&str> = vec![ "gfx803", "gfx900", "gfx906:xnack-", @@ -63,9 +63,7 @@ fn build_llama_cpp() { "gfx1102", "gfx1103", ]; - let amd_gpu_targets = - env::var("AMDGPU_TARGETS").unwrap_or(amd_gpu_default_targets.join(";")); - + let rocm_root = env::var("ROCM_ROOT").unwrap_or("/opt/rocm".to_string()); config.define("LLAMA_HIPBLAS", "ON"); config.define("CMAKE_C_COMPILER", format!("{}/llvm/bin/clang", rocm_root)); @@ -73,7 +71,7 @@ fn build_llama_cpp() { "CMAKE_CXX_COMPILER", format!("{}/llvm/bin/clang++", rocm_root), ); - config.define("AMDGPU_TARGETS", amd_gpu_targets); + config.define("AMDGPU_TARGETS", amd_gpu_targets.join(";")); println!("cargo:rustc-link-arg=-Wl,--copy-dt-needed-entries"); println!("cargo:rustc-link-search=native={}/hip/lib", rocm_root); println!("cargo:rustc-link-search=native={}/rocblas/lib", rocm_root); @@ -105,8 +103,8 @@ fn build_llama_cpp() { fn build_cxx_binding() { cxx_build::bridge("src/lib.rs") .file("src/engine.cc") - .include("include") - .include("llama.cpp") + .flag_if_supported("-Iinclude") + .flag_if_supported("-Illama.cpp") .flag_if_supported("-std=c++14") .compile("cxxbridge"); } From d17bee08a3227f8908b256fb964fc4b77dbadaf6 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:08:12 +0800 Subject: [PATCH 25/40] Update build.rs --- crates/llama-cpp-bindings/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs index 0f4dff59b8d2..a768e361faf2 100644 --- a/crates/llama-cpp-bindings/build.rs +++ b/crates/llama-cpp-bindings/build.rs @@ -63,7 +63,7 @@ fn build_llama_cpp() { "gfx1102", "gfx1103", ]; - + let rocm_root = env::var("ROCM_ROOT").unwrap_or("/opt/rocm".to_string()); config.define("LLAMA_HIPBLAS", "ON"); config.define("CMAKE_C_COMPILER", format!("{}/llvm/bin/clang", rocm_root)); From 3113962b368152e04b00f61c6d261827c61b3dd6 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:09:23 +0800 Subject: [PATCH 26/40] Update README.md --- README.md | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 892e3b50cd09..c606301e2a09 100644 --- a/README.md +++ b/README.md @@ -50,25 +50,14 @@ You can find our documentation [here](https://tabby.tabbyml.com/docs/getting-sta - ⚙️ [Configuration](https://tabby.tabbyml.com/docs/configuration) ### Run Tabby in 1 Minute -The easiest way to start a Tabby server is by using the following Docker command... +The easiest way to start a Tabby server is by using the following Docker command: -...with cuda: ```bash docker run -it \ --gpus all -p 8080:8080 -v $HOME/.tabby:/data \ - tabbyml/tabby-cuda \ + tabbyml/tabby \ serve --model TabbyML/StarCoder-1B --device cuda ``` - -...with ROCm (Linux only): -```bash -docker run -it \ - --device /dev/dri --device /dev/kfd \ - -p 8080:8080 -v $HOME/.tabby:/data \ - tabbyml/tabby-rocm \ - serve --model TabbyML/StarCoder-1B --device rocm -``` - For additional options (e.g inference type, parallelism), please refer to the [documentation page](https://tabbyml.github.io/tabby). ## 🤝 Contributing From 81f138a33cee7143fccf75cfb478b424f047bc7a Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:13:34 +0800 Subject: [PATCH 27/40] Update website/docs/faq.mdx --- website/docs/faq.mdx | 3 --- 1 file changed, 3 deletions(-) diff --git a/website/docs/faq.mdx b/website/docs/faq.mdx index f50093f9c309..e17f5ec8704e 100644 --- a/website/docs/faq.mdx +++ b/website/docs/faq.mdx @@ -19,9 +19,6 @@

To determine the mapping between the GPU card type and its compute capability, please visit this page

-

- This also seems to be available on AMD Radeon™ GPUs, but it's unclear which cards besides RDNA3 support this. -

From ab80cda8bfefea7eb62870b1bfd6c2597f9bb1f5 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:14:56 +0800 Subject: [PATCH 28/40] Update index.md --- website/docs/installation/modal/index.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/website/docs/installation/modal/index.md b/website/docs/installation/modal/index.md index 10777833330d..86638d7d31b4 100644 --- a/website/docs/installation/modal/index.md +++ b/website/docs/installation/modal/index.md @@ -20,11 +20,11 @@ GPU_CONFIG = gpu.T4() Currently supported GPUs in Modal: -- `NVIDIA T4`: Low-cost GPU option, providing 16GiB of GPU memory. -- `NVIDIA L4`: Mid-tier GPU option, providing 24GiB of GPU memory. -- `NVIDIA A100`: The most powerful GPU available in the cloud. Available in 40GiB and 80GiB GPU memory configurations. -- `NVIDIA A10G`: A10G GPUs deliver up to 3.3x better ML training performance, 3x better ML inference performance, and 3x better graphics performance, in comparison to NVIDIA T4 GPUs. -- `NVIDIA Any`: Selects any one of the GPU classes available within Modal, according to availability. +- `T4`: Low-cost GPU option, providing 16GiB of GPU memory. +- `L4`: Mid-tier GPU option, providing 24GiB of GPU memory. +- `A100`: The most powerful GPU available in the cloud. Available in 40GiB and 80GiB GPU memory configurations. +- `A10G`: A10G GPUs deliver up to 3.3x better ML training performance, 3x better ML inference performance, and 3x better graphics performance, in comparison to NVIDIA T4 GPUs. +- `Any`: Selects any one of the GPU classes available within Modal, according to availability. For detailed usage, please check official [Modal GPU reference](https://modal.com/docs/reference/modal.gpu). From 23f20543b98ad9b74a6c82cad6e52ef32be81807 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:17:11 +0800 Subject: [PATCH 29/40] Update and rename docker-cuda.yml to docker.yml --- .github/workflows/{docker-cuda.yml => docker.yml} | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) rename .github/workflows/{docker-cuda.yml => docker.yml} (96%) diff --git a/.github/workflows/docker-cuda.yml b/.github/workflows/docker.yml similarity index 96% rename from .github/workflows/docker-cuda.yml rename to .github/workflows/docker.yml index 387a6bdcd9fd..78b751781183 100644 --- a/.github/workflows/docker-cuda.yml +++ b/.github/workflows/docker.yml @@ -86,9 +86,7 @@ jobs: # list of Docker images to use as base name for tags images: | ghcr.io/${{ env.IMAGE_NAME }} - ghcr.io/${{ env.IMAGE_NAME }}/cuda ${{ env.IMAGE_NAME }} - ${{ env.IMAGE_NAME }}-cuda # generate Docker tags based on the following events/attributes tags: | type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }} @@ -102,7 +100,7 @@ jobs: id: build-and-push uses: docker/build-push-action@v5.1.0 with: - file: cuda.Dockerfile + file: Dockerfile push: true context: . tags: ${{ steps.meta.outputs.tags }} From 5202dfe5362900cce1cbe0c4cc65586050bde574 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:17:39 +0800 Subject: [PATCH 30/40] Delete .github/workflows/docker-rocm.yml --- .github/workflows/docker-rocm.yml | 119 ------------------------------ 1 file changed, 119 deletions(-) delete mode 100644 .github/workflows/docker-rocm.yml diff --git a/.github/workflows/docker-rocm.yml b/.github/workflows/docker-rocm.yml deleted file mode 100644 index ca4963777f15..000000000000 --- a/.github/workflows/docker-rocm.yml +++ /dev/null @@ -1,119 +0,0 @@ -name: Create and publish ROCm docker image - -on: - workflow_dispatch: - schedule: - - cron: '0 20 */1 * *' - push: - tags: - - 'v*' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} - - # If this is enabled it will cancel current running and start latest - cancel-in-progress: true - -env: - RUST_TOOLCHAIN: 1.73.0 - -jobs: - release-docker: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - - steps: - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: true - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: false - swap-storage: true - - - name: Checkout repository - uses: actions/checkout@v3 - with: - submodules: recursive - - # Workaround: https://github.com/docker/build-push-action/issues/461 - - name: Setup Docker buildx - uses: docker/setup-buildx-action@v3.0.0 - with: - # Needed to support OCI annotations - version: v0.12.0 - - # Login against a Docker registry except on PR - # https://github.com/docker/login-action - - name: Log into GitHub Container registry - uses: docker/login-action@v2.0.0 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Log into Docker Hub - uses: docker/login-action@v2.0.0 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Generate image name - run: | - echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV} - - - uses: int128/docker-build-cache-config-action@v1 - id: cache - with: - image: ghcr.io/${{ env.IMAGE_NAME }}/cache - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5.0.0 - with: - # list of Docker images to use as base name for tags - images: | - ghcr.io/${{ env.IMAGE_NAME }}/rocm - ${{ env.IMAGE_NAME }}-rocm - # generate Docker tags based on the following events/attributes - variant: rocm - tags: | - type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }} - type=schedule,pattern=nightly - type=schedule,pattern={{date 'YYYYMMDD'}} - type=semver,pattern={{version}} - - # Build and push Docker image with Buildx (don't push on PR) - # https://github.com/docker/build-push-action - - name: Build and push Docker image - id: build-and-push - uses: docker/build-push-action@v5.1.0 - with: - file: rocm.Dockerfile - push: true - context: . - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - annotations: ${{ steps.meta.outputs.labels }} - cache-from: ${{ steps.cache.outputs.cache-from }} - cache-to: ${{ steps.cache.outputs.cache-to }} - build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }} - - - name: Docker Hub Description - uses: peter-evans/dockerhub-description@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - repository: tabbyml/tabby From f3d793f9a131b1f388629a6fefe2c4dcc5d887b3 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:17:57 +0800 Subject: [PATCH 31/40] Delete rocm.Dockerfile --- rocm.Dockerfile | 61 ------------------------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 rocm.Dockerfile diff --git a/rocm.Dockerfile b/rocm.Dockerfile deleted file mode 100644 index 87ccd51acf11..000000000000 --- a/rocm.Dockerfile +++ /dev/null @@ -1,61 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG ROCM_VERSION=5.7 -# Target the CUDA build image -ARG BASE_ROCM_DEV_CONTAINER="rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete" -# Target the CUDA runtime image -ARG BASE_ROCM_RUN_CONTAINER="rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete" - -FROM ${BASE_ROCM_DEV_CONTAINER} as build - -# Rust toolchain version -ARG RUST_TOOLCHAIN=stable - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - curl \ - pkg-config \ - libssl-dev \ - protobuf-compiler \ - git \ - cmake \ - && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# setup rust. -RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain ${RUST_TOOLCHAIN} -y -ENV PATH="/root/.cargo/bin:${PATH}" - -WORKDIR /root/workspace - -RUN mkdir -p /opt/tabby/bin -RUN mkdir -p /opt/tabby/lib -RUN mkdir -p target - -COPY . . - -RUN --mount=type=cache,target=/usr/local/cargo/registry \ - --mount=type=cache,target=/root/workspace/target \ - cargo build --features rocm --release --package tabby && \ - cp target/release/tabby /opt/tabby/bin/ - -FROM ${BASE_ROCM_RUN_CONTAINER} as runtime - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - git \ - && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Disable safe directory in docker -# Context: https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9 -RUN git config --system --add safe.directory "*" - -COPY --from=build /opt/tabby /opt/tabby - -ENV TABBY_ROOT=/data - -ENTRYPOINT ["/opt/tabby/bin/tabby"] From 15767a83dbebbcc6dcbd3a05a97372a0becf69d1 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:18:13 +0800 Subject: [PATCH 32/40] Rename cuda.Dockerfile to Dockerfile --- cuda.Dockerfile => Dockerfile | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cuda.Dockerfile => Dockerfile (100%) diff --git a/cuda.Dockerfile b/Dockerfile similarity index 100% rename from cuda.Dockerfile rename to Dockerfile From 1ae3822dc2ddf14c74da1d119e1b52b312cbf20a Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:19:37 +0800 Subject: [PATCH 33/40] Update docker.yml --- .github/workflows/docker.yml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 78b751781183..1e7482119d18 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,4 +1,4 @@ -name: Create and publish CUDA docker image +name: Create and publish docker image on: workflow_dispatch: @@ -50,10 +50,7 @@ jobs: # Workaround: https://github.com/docker/build-push-action/issues/461 - name: Setup Docker buildx - uses: docker/setup-buildx-action@v3.0.0 - with: - # Needed to support OCI annotations - version: v0.12.0 + uses: docker/setup-buildx-action@v2.0.0 # Login against a Docker registry except on PR # https://github.com/docker/login-action @@ -81,7 +78,7 @@ jobs: - name: Docker meta id: meta - uses: docker/metadata-action@v5.0.0 + uses: docker/metadata-action@v4 with: # list of Docker images to use as base name for tags images: | @@ -98,14 +95,13 @@ jobs: # https://github.com/docker/build-push-action - name: Build and push Docker image id: build-and-push - uses: docker/build-push-action@v5.1.0 + uses: docker/build-push-action@v3.1.1 with: file: Dockerfile push: true context: . tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - annotations: ${{ steps.meta.outputs.labels }} cache-from: ${{ steps.cache.outputs.cache-from }} cache-to: ${{ steps.cache.outputs.cache-to }} build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }} @@ -116,3 +112,4 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} repository: tabbyml/tabby + From e44f48d87b5ec2c4802796f7c27671699dac5d0c Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:20:59 +0800 Subject: [PATCH 34/40] Update website/docs/installation/docker.mdx --- website/docs/installation/docker.mdx | 3 --- 1 file changed, 3 deletions(-) diff --git a/website/docs/installation/docker.mdx b/website/docs/installation/docker.mdx index bd1c48d32632..8ecfc78617ec 100644 --- a/website/docs/installation/docker.mdx +++ b/website/docs/installation/docker.mdx @@ -6,9 +6,6 @@ sidebar_position: 0 This guide explains how to launch Tabby using docker. - - - import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; From 6fc195e56c6e09b0f9274d3a07bb9b76238a0c46 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:21:51 +0800 Subject: [PATCH 35/40] Update website/docs/installation/docker-compose.mdx --- website/docs/installation/docker-compose.mdx | 2 -- 1 file changed, 2 deletions(-) diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx index 54e9266bcc2b..9b76850ed336 100644 --- a/website/docs/installation/docker-compose.mdx +++ b/website/docs/installation/docker-compose.mdx @@ -5,8 +5,6 @@ sidebar_position: 1 # Docker Compose This guide explains how to launch Tabby using docker-compose. - - import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; From 2c55ee4ff9bc914c1bd64fbdeede48e767aa67f1 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:22:55 +0800 Subject: [PATCH 36/40] Update docker-compose.mdx --- website/docs/installation/docker-compose.mdx | 23 ++------------------ 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx index 9b76850ed336..d97764c9a804 100644 --- a/website/docs/installation/docker-compose.mdx +++ b/website/docs/installation/docker-compose.mdx @@ -16,8 +16,8 @@ version: '3.5' services: tabby: - restart: unless-stopped - image: tabbyml/tabby-cuda + restart: always + image: tabbyml/tabby command: serve --model TabbyML/StarCoder-1B --device cuda volumes: - "$HOME/.tabby:/data" @@ -33,25 +33,6 @@ services: ```
- - -```yaml title="docker-compose.yml" -version: '3.5' -services: - tabby: - restart: unless-stopped - image: tabbyml/tabby-rocm - command: serve --model TabbyML/StarCoder-1B --device rocm - volumes: - - "$HOME/.tabby:/data" - ports: - - 8080:8080 - devices: - - /dev/dri - - /dev/kfd -``` - - ```yaml title="docker-compose.yml" From a1f95893e34d8fa0a08854557829187ea5104b2b Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:23:44 +0800 Subject: [PATCH 37/40] Update docker-compose.mdx --- website/docs/installation/docker-compose.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx index d97764c9a804..8dab5c47b985 100644 --- a/website/docs/installation/docker-compose.mdx +++ b/website/docs/installation/docker-compose.mdx @@ -40,7 +40,7 @@ version: '3.5' services: tabby: - restart: unless-stopped + restart: always image: tabbyml/tabby command: serve --model TabbyML/StarCoder-1B volumes: From 1ed492bc8d7d321c8eda38810b04b6f94387a499 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:24:21 +0800 Subject: [PATCH 38/40] Update docker.mdx From f7fe0021499be294c248e59c72655cd56b4c72ed Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:25:12 +0800 Subject: [PATCH 39/40] Update docker.mdx --- website/docs/installation/docker.mdx | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/website/docs/installation/docker.mdx b/website/docs/installation/docker.mdx index 8ecfc78617ec..7f26f87c0c13 100644 --- a/website/docs/installation/docker.mdx +++ b/website/docs/installation/docker.mdx @@ -13,14 +13,7 @@ import TabItem from '@theme/TabItem'; ```bash title="run.sh" - docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-cuda serve --model TabbyML/StarCoder-1B --device cuda - ``` - - - - - ```bash title="run.sh" - docker run -it --device /dev/dri --device /dev/kfd -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-rocm serve --model TabbyML/StarCoder-1B --device rocm + docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby serve --model TabbyML/StarCoder-1B --device cuda ``` From 39a640b7fbf578371f646418ccd36aa5e86dc356 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 13 Dec 2023 15:26:06 +0800 Subject: [PATCH 40/40] Update website/docs/faq.mdx --- website/docs/faq.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/faq.mdx b/website/docs/faq.mdx index e17f5ec8704e..031f8555fe12 100644 --- a/website/docs/faq.mdx +++ b/website/docs/faq.mdx @@ -25,7 +25,7 @@
How to utilize multiple NVIDIA GPUs?
-

Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES or HIP_VISIBLE_DEVICES accordingly.

+

Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES (for cuda) or HIP_VISIBLE_DEVICES (for rocm) accordingly.