From 3ee3d1ac606c95f6ba118777dede747d386db7eb Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Thu, 15 Feb 2024 18:18:45 +0100
Subject: [PATCH 01/23] [CI] Quantization workflow

---
 .github/workflows/self-scheduled.yml          | 51 +++++++++++++++++
 docker/transformers-all-latest-gpu/Dockerfile | 14 +----
 .../transformers-quantization-gpu/Dockerfile  | 56 +++++++++++++++++++
 3 files changed, 108 insertions(+), 13 deletions(-)
 create mode 100644 docker/transformers-quantization-gpu/Dockerfile

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index d44e9a29ecf0da..199451da6a3725 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -297,6 +297,57 @@ jobs:
           name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
           path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
 
+  run_tests_quantization_torch_gpu:
+    name: Quantization tests
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+    container:
+      image: huggingface/transformers-quantization-latest-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run quantization tests on GPU
+        working-directory: /transformers
+        run: |
+          pip install -r examples/pytorch/_tests_requirements.txt
+          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
+
   run_extract_warnings:
     name: Extract warnings in CI artifacts
     runs-on: ubuntu-22.04
diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
index e96eb9539c8bd2..7d46c225704116 100644
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@@ -46,22 +46,10 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/acc
 
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/peft@main#egg=peft
 
-# Add bitsandbytes for mixed int8 testing
-RUN python3 -m pip install --no-cache-dir bitsandbytes
-
-# Add auto-gptq for gtpq quantization testing
-RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
-
 # Add einops for additional model testing
 RUN python3 -m pip install --no-cache-dir einops
 
-# Add aqlm for quantization testing
-RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.1
-
-# Add autoawq for quantization testing
-RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp38-cp38-linux_x86_64.whl
-
-# For bettertransformer + gptq
+# For bettertransformer
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
 
 # For video model testing
diff --git a/docker/transformers-quantization-gpu/Dockerfile b/docker/transformers-quantization-gpu/Dockerfile
new file mode 100644
index 00000000000000..5ab1ee4ba6c429
--- /dev/null
+++ b/docker/transformers-quantization-gpu/Dockerfile
@@ -0,0 +1,56 @@
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+LABEL maintainer="Hugging Face"
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands)
+SHELL ["sh", "-lc"]
+
+# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
+# to be used as arguments for docker build (so far).
+
+ARG PYTORCH='2.1.1'
+# (not always a valid torch version)
+ARG INTEL_TORCH_EXT='2.1.100'
+# Example: `cu102`, `cu113`, etc.
+ARG CUDA='cu118'
+
+RUN apt update
+RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
+RUN python3 -m pip install --no-cache-dir --upgrade pip
+
+ARG REF=main
+RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
+
+# TODO: Handle these in a python utility script
+RUN [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile
+RUN echo torch=$VERSION
+# `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build.
+# Currently, let's just use their latest releases (when `torch` is installed with a release version)
+# TODO: We might need to specify proper versions that work with a specific torch version (especially for past CI).
+RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
+
+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev]
+
+RUN python3 -m pip uninstall -y flax jax
+
+RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
+
+# Add bitsandbytes for mixed int8 testing
+RUN python3 -m pip install --no-cache-dir bitsandbytes
+
+# Add auto-gptq for gtpq quantization testing
+RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
+
+# Add aqlm for quantization testing
+RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.1
+
+# Add autoawq for quantization testing
+RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp38-cp38-linux_x86_64.whl
+
+# For bettertransformer + gptq
+RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
+
+# When installing in editable mode, `transformers` is not recognized as a package.
+# this line must be added in order for python to be aware of transformers.
+RUN cd transformers && python3 setup.py develop
\ No newline at end of file

From 3df06c1d1312a39d8a502fdbf018360c69c0ba5d Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Thu, 15 Feb 2024 19:26:39 +0100
Subject: [PATCH 02/23] build dockerfile

---
 .github/workflows/build-docker-images.yml     | 38 +++++++++++++++++++
 .../Dockerfile                                |  0
 docs/source/en/hf_quantizer.md                |  2 +-
 3 files changed, 39 insertions(+), 1 deletion(-)
 rename docker/{transformers-quantization-gpu => transformers-quantization-latest-gpu}/Dockerfile (100%)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index be070a95d3a94f..a22dd10bea474d 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -307,3 +307,41 @@ jobs:
   #           REF=main
   #         push: true
   #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
+
+  latest-quantization-torch-docker:
+    name: "Latest Pytorch + Quantization [dev]"
+     # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-quantization-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }}
\ No newline at end of file
diff --git a/docker/transformers-quantization-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
similarity index 100%
rename from docker/transformers-quantization-gpu/Dockerfile
rename to docker/transformers-quantization-latest-gpu/Dockerfile
diff --git a/docs/source/en/hf_quantizer.md b/docs/source/en/hf_quantizer.md
index 154cfb54b9ebc8..8261a6bc4585e1 100644
--- a/docs/source/en/hf_quantizer.md
+++ b/docs/source/en/hf_quantizer.md
@@ -66,4 +66,4 @@ For some quantization methods, they may require "pre-quantizing" the models thro
 
 7. Document everything! Make sure your quantization method is documented in the [`docs/source/en/quantization.md`](https://github.com/huggingface/transformers/blob/abbffc4525566a48a9733639797c812301218b83/docs/source/en/quantization.md) file.
 
-8. Add tests! You should add tests by first adding the package in our nightly Dockerfile inside `docker/transformers-all-latest-gpu` and then adding a new test file in `tests/quantization/xxx`. Feel free to check out how it is implemented for other quantization methods.
+8. Add tests! You should add tests by first adding the package in our nightly Dockerfile inside `docker/transformers-quantization-latest-gpu` and then adding a new test file in `tests/quantization/xxx`. Feel free to check out how it is implemented for other quantization methods.

From 69a3ac57e71f7fc3f5cc911a26a7cc6ed9c4fc07 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Thu, 15 Feb 2024 19:35:37 +0100
Subject: [PATCH 03/23] fix dockerfile

---
 .../Dockerfile                                | 20 ++++++++-----------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
index 5ab1ee4ba6c429..7af84440cc6902 100644
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@@ -10,8 +10,8 @@ SHELL ["sh", "-lc"]
 # to be used as arguments for docker build (so far).
 
 ARG PYTORCH='2.1.1'
-# (not always a valid torch version)
-ARG INTEL_TORCH_EXT='2.1.100'
+ARG TORCH_VISION=''
+ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.
 ARG CUDA='cu118'
 
@@ -22,13 +22,9 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
 ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
 
-# TODO: Handle these in a python utility script
-RUN [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile
-RUN echo torch=$VERSION
-# `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build.
-# Currently, let's just use their latest releases (when `torch` is installed with a release version)
-# TODO: We might need to specify proper versions that work with a specific torch version (especially for past CI).
-RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
+RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
+RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' ||  VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
+RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' ||  VERSION='torchaudio'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
 
 RUN python3 -m pip install --no-cache-dir -e ./transformers[dev]
 
@@ -42,15 +38,15 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes
 # Add auto-gptq for gtpq quantization testing
 RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
 
+# Add optimum for gptq quantization testing
+RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
+
 # Add aqlm for quantization testing
 RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.1
 
 # Add autoawq for quantization testing
 RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp38-cp38-linux_x86_64.whl
 
-# For bettertransformer + gptq
-RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
-
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
\ No newline at end of file

From f36265f1e125c2c05c85e5d95e8fd6fe9af63176 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Thu, 15 Feb 2024 19:46:17 +0100
Subject: [PATCH 04/23] update self-cheduled.yml

---
 .github/workflows/self-scheduled.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 199451da6a3725..3b63b7a688b001 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -358,7 +358,8 @@ jobs:
       run_examples_gpu,
       run_pipelines_tf_gpu,
       run_pipelines_torch_gpu,
-      run_all_tests_torch_cuda_extensions_gpu
+      run_all_tests_torch_cuda_extensions_gpu,
+      run_tests_quantization_torch_gpu,
     ]
     steps:
       - name: Checkout transformers
@@ -406,6 +407,7 @@ jobs:
       run_pipelines_tf_gpu,
       run_pipelines_torch_gpu,
       run_all_tests_torch_cuda_extensions_gpu,
+      run_tests_quantization_torch_gpu,
       run_extract_warnings
     ]
     steps:

From 745435530222156291259862f365530a62012510 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Fri, 16 Feb 2024 22:16:18 +0100
Subject: [PATCH 05/23] test build dockerfile on push

---
 .github/workflows/build-docker-images.yml | 427 +++++++++++-----------
 1 file changed, 214 insertions(+), 213 deletions(-)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index a22dd10bea474d..c52e1f49c1da86 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -3,7 +3,8 @@ name: Build docker images (scheduled)
 on:
   push:
     branches:
-      - build_ci_docker_image*
+      # - build_ci_docker_image*
+      - add-quantization-workflow
   repository_dispatch:
   workflow_call:
     inputs:
@@ -18,195 +19,195 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
-  latest-docker:
-    name: "Latest PyTorch + TensorFlow [dev]"
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-all-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-all-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-all-latest-gpu-push-ci
+  # latest-docker:
+  #   name: "Latest PyTorch + TensorFlow [dev]"
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     - name: Cleanup disk
+  #       run: |
+  #         sudo ls -l /usr/local/lib/
+  #         sudo ls -l /usr/share/
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #         sudo rm -rf /usr/local/lib/android
+  #         sudo rm -rf /usr/share/dotnet
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-all-latest-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
+  #     # Push CI images still need to be re-built daily
+  #     -
+  #       name: Build and push (for Push CI) in a daily basis
+  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+  #       if: inputs.image_postfix != '-push-ci'
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-all-latest-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-all-latest-gpu-push-ci
 
-  latest-torch-deepspeed-docker:
-    name: "Latest PyTorch + DeepSpeed"
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
+  # latest-torch-deepspeed-docker:
+  #   name: "Latest PyTorch + DeepSpeed"
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     - name: Cleanup disk
+  #       run: |
+  #         sudo ls -l /usr/local/lib/
+  #         sudo ls -l /usr/share/
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #         sudo rm -rf /usr/local/lib/android
+  #         sudo rm -rf /usr/share/dotnet
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
 
-  # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
-  latest-torch-deepspeed-docker-for-push-ci-daily-build:
-    name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
+  # # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
+  # latest-torch-deepspeed-docker-for-push-ci-daily-build:
+  #   name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     - name: Cleanup disk
+  #       run: |
+  #         sudo ls -l /usr/local/lib/
+  #         sudo ls -l /usr/share/
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #         sudo rm -rf /usr/local/lib/android
+  #         sudo rm -rf /usr/share/dotnet
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     # Push CI images still need to be re-built daily
+  #     -
+  #       name: Build and push (for Push CI) in a daily basis
+  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+  #       if: inputs.image_postfix != '-push-ci'
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
 
-  doc-builder:
-    name: "Doc builder"
-    # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on: ubuntu-22.04
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-doc-builder
-          push: true
-          tags: huggingface/transformers-doc-builder
+  # doc-builder:
+  #   name: "Doc builder"
+  #   # Push CI doesn't need this image
+  #   if: inputs.image_postfix != '-push-ci'
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-doc-builder
+  #         push: true
+  #         tags: huggingface/transformers-doc-builder
 
-  latest-pytorch:
-    name: "Latest PyTorch [dev]"
-    # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-gpu
+  # latest-pytorch:
+  #   name: "Latest PyTorch [dev]"
+  #   # Push CI doesn't need this image
+  #   if: inputs.image_postfix != '-push-ci'
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     - name: Cleanup disk
+  #       run: |
+  #         sudo ls -l /usr/local/lib/
+  #         sudo ls -l /usr/share/
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #         sudo rm -rf /usr/local/lib/android
+  #         sudo rm -rf /usr/share/dotnet
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-gpu
 
 # Need to be fixed with the help from Guillaume.
 #  latest-pytorch-amd:
@@ -244,33 +245,33 @@ jobs:
 #          push: true
 #          tags: huggingface/transformers-pytorch-amd-gpu-push-ci
 
-  latest-tensorflow:
-    name: "Latest TensorFlow [dev]"
-    # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on: ubuntu-22.04
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-tensorflow-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-tensorflow-gpu
+  # latest-tensorflow:
+  #   name: "Latest TensorFlow [dev]"
+  #   # Push CI doesn't need this image
+  #   if: inputs.image_postfix != '-push-ci'
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-tensorflow-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-tensorflow-gpu
 
   # latest-pytorch-deepspeed-amd:
   #   name: "PyTorch + DeepSpeed (AMD) [dev]"

From c745704dd2b0e1872fdc2d7bc05cd05f5497286d Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Fri, 16 Feb 2024 22:45:52 +0100
Subject: [PATCH 06/23] fix torch install

---
 docker/transformers-quantization-latest-gpu/Dockerfile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
index 7af84440cc6902..c8c3c3405a836f 100644
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@@ -10,8 +10,6 @@ SHELL ["sh", "-lc"]
 # to be used as arguments for docker build (so far).
 
 ARG PYTORCH='2.1.1'
-ARG TORCH_VISION=''
-ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.
 ARG CUDA='cu118'
 
@@ -22,9 +20,11 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
 ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
 
-RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
-RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' ||  VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
-RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' ||  VERSION='torchaudio'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
+RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile
+RUN echo torch=$VERSION
+# `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build.
+# Currently, let's just use their latest releases (when `torch` is installed with a release version)
+RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
 
 RUN python3 -m pip install --no-cache-dir -e ./transformers[dev]
 

From 8c34b969a88460b4e665b78883dc8560f802e1cb Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Fri, 16 Feb 2024 23:58:40 +0100
Subject: [PATCH 07/23] udapte to python 3.10

---
 docker/transformers-quantization-latest-gpu/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
index c8c3c3405a836f..21de9a1cce0588 100644
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@@ -14,7 +14,7 @@ ARG PYTORCH='2.1.1'
 ARG CUDA='cu118'
 
 RUN apt update
-RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
+RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3.10 python3-pip ffmpeg
 RUN python3 -m pip install --no-cache-dir --upgrade pip
 
 ARG REF=main

From 7fc1a730d797c859bc1bdb3bc453fdacbc0960df Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Tue, 20 Feb 2024 20:50:00 +0100
Subject: [PATCH 08/23] update aqlm version

---
 docker/transformers-quantization-latest-gpu/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
index 21de9a1cce0588..e7df09bf415b48 100644
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@@ -14,7 +14,7 @@ ARG PYTORCH='2.1.1'
 ARG CUDA='cu118'
 
 RUN apt update
-RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3.10 python3-pip ffmpeg
+RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python python3-pip ffmpeg
 RUN python3 -m pip install --no-cache-dir --upgrade pip
 
 ARG REF=main
@@ -42,7 +42,7 @@ RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://hu
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
 
 # Add aqlm for quantization testing
-RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.1
+RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
 
 # Add autoawq for quantization testing
 RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp38-cp38-linux_x86_64.whl

From 471cb7b85382f72e6dbd7bf03670dbfec3b6a8fb Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 21 Feb 2024 15:43:44 +0100
Subject: [PATCH 09/23] uncomment build dockerfile

---
 .github/workflows/build-docker-images.yml | 495 +++++++++++-----------
 1 file changed, 247 insertions(+), 248 deletions(-)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index c52e1f49c1da86..079b6e10f1f973 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -3,8 +3,7 @@ name: Build docker images (scheduled)
 on:
   push:
     branches:
-      # - build_ci_docker_image*
-      - add-quantization-workflow
+      - build_ci_docker_image*
   repository_dispatch:
   workflow_call:
     inputs:
@@ -19,195 +18,195 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
-  # latest-docker:
-  #   name: "Latest PyTorch + TensorFlow [dev]"
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     - name: Cleanup disk
-  #       run: |
-  #         sudo ls -l /usr/local/lib/
-  #         sudo ls -l /usr/share/
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-all-latest-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
-  #     # Push CI images still need to be re-built daily
-  #     -
-  #       name: Build and push (for Push CI) in a daily basis
-  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-  #       if: inputs.image_postfix != '-push-ci'
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-all-latest-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-all-latest-gpu-push-ci
+  latest-docker:
+    name: "Latest PyTorch + TensorFlow [dev]"
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-all-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-all-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-all-latest-gpu-push-ci
 
-  # latest-torch-deepspeed-docker:
-  #   name: "Latest PyTorch + DeepSpeed"
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     - name: Cleanup disk
-  #       run: |
-  #         sudo ls -l /usr/local/lib/
-  #         sudo ls -l /usr/share/
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
+  latest-torch-deepspeed-docker:
+    name: "Latest PyTorch + DeepSpeed"
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
 
-  # # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
-  # latest-torch-deepspeed-docker-for-push-ci-daily-build:
-  #   name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     - name: Cleanup disk
-  #       run: |
-  #         sudo ls -l /usr/local/lib/
-  #         sudo ls -l /usr/share/
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     # Push CI images still need to be re-built daily
-  #     -
-  #       name: Build and push (for Push CI) in a daily basis
-  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-  #       if: inputs.image_postfix != '-push-ci'
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
+  # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
+  latest-torch-deepspeed-docker-for-push-ci-daily-build:
+    name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
 
-  # doc-builder:
-  #   name: "Doc builder"
-  #   # Push CI doesn't need this image
-  #   if: inputs.image_postfix != '-push-ci'
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-doc-builder
-  #         push: true
-  #         tags: huggingface/transformers-doc-builder
+  doc-builder:
+    name: "Doc builder"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on: ubuntu-22.04
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-doc-builder
+          push: true
+          tags: huggingface/transformers-doc-builder
 
-  # latest-pytorch:
-  #   name: "Latest PyTorch [dev]"
-  #   # Push CI doesn't need this image
-  #   if: inputs.image_postfix != '-push-ci'
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     - name: Cleanup disk
-  #       run: |
-  #         sudo ls -l /usr/local/lib/
-  #         sudo ls -l /usr/share/
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-gpu
+  latest-pytorch:
+    name: "Latest PyTorch [dev]"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-gpu
 
 # Need to be fixed with the help from Guillaume.
 #  latest-pytorch-amd:
@@ -245,69 +244,69 @@ jobs:
 #          push: true
 #          tags: huggingface/transformers-pytorch-amd-gpu-push-ci
 
-  # latest-tensorflow:
-  #   name: "Latest TensorFlow [dev]"
-  #   # Push CI doesn't need this image
-  #   if: inputs.image_postfix != '-push-ci'
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-tensorflow-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-tensorflow-gpu
+  latest-tensorflow:
+    name: "Latest TensorFlow [dev]"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on: ubuntu-22.04
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-tensorflow-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-tensorflow-gpu
 
-  # latest-pytorch-deepspeed-amd:
-  #   name: "PyTorch + DeepSpeed (AMD) [dev]"
+  latest-pytorch-deepspeed-amd:
+    name: "PyTorch + DeepSpeed (AMD) [dev]"
 
-  #   runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
-  #   steps:
-  #     - name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     - name: Check out code
-  #       uses: actions/checkout@v3
-  #     - name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     - name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
-  #     # Push CI images still need to be re-built daily
-  #     -
-  #       name: Build and push (for Push CI) in a daily basis
-  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-  #       if: inputs.image_postfix != '-push-ci'
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
+    runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Check out code
+        uses: actions/checkout@v3
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
 
   latest-quantization-torch-docker:
     name: "Latest Pytorch + Quantization [dev]"

From 2f45fdace277a152738d7afdc4a59908f2329ea2 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 21 Feb 2024 15:46:27 +0100
Subject: [PATCH 10/23] tests if the scheduler works

---
 .github/workflows/self-scheduled.yml | 475 ++++++++++++++-------------
 1 file changed, 238 insertions(+), 237 deletions(-)

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 3b63b7a688b001..626ca8f0698987 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -12,7 +12,8 @@ on:
     - cron: "17 2 * * *"
   push:
     branches:
-      - run_scheduled_ci*
+      # - run_scheduled_ci*
+      - add-quantization-workflow
 
 env:
   HF_HOME: /mnt/cache
@@ -70,232 +71,232 @@ jobs:
         run: |
           nvidia-smi
 
-  run_tests_gpu:
-    name: " "
-    needs: setup
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
-    uses: ./.github/workflows/model_jobs.yml
-    with:
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      machine_type: ${{ matrix.machine_type }}
-      slice_id: ${{ matrix.slice_id }}
-    secrets: inherit
-
-  run_examples_gpu:
-    name: Examples directory
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    needs: setup
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run examples tests on GPU
-        working-directory: /transformers
-        run: |
-          pip install -r examples/pytorch/_tests_requirements.txt
-          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ matrix.machine_type }}_run_examples_gpu
-          path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
-
-  run_pipelines_torch_gpu:
-    name: PyTorch pipelines
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-    container:
-      image: huggingface/transformers-pytorch-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    needs: setup
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all pipeline tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
-          path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
-
-  run_pipelines_tf_gpu:
-    name: TensorFlow pipelines
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-    container:
-      image: huggingface/transformers-tensorflow-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    needs: setup
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: |
-          git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all pipeline tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
-
-      - name: Failure short reports
-        if: ${{ always() }}
-        run: |
-          cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
-          path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
-
-  run_all_tests_torch_cuda_extensions_gpu:
-    name: Torch CUDA extension tests
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-    needs: setup
-    container:
-      image: huggingface/transformers-pytorch-deepspeed-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: /workspace/transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /workspace/transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Remove cached torch extensions
-        run: rm -rf /github/home/.cache/torch_extensions/
-
-      # To avoid unknown test failures
-      - name: Pre build DeepSpeed *again*
-        working-directory: /workspace
-        run: |
-          python3 -m pip uninstall -y deepspeed
-          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Environment
-        working-directory: /workspace/transformers
-        run: |
-          python utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /workspace/transformers
-        run: pip freeze
-
-      - name: Run all tests on GPU
-        working-directory: /workspace/transformers
-        run: |
-          python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
-          path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
+#   run_tests_gpu:
+#     name: " "
+#     needs: setup
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         machine_type: [single-gpu, multi-gpu]
+#         slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
+#     uses: ./.github/workflows/model_jobs.yml
+#     with:
+#       folder_slices: ${{ needs.setup.outputs.folder_slices }}
+#       machine_type: ${{ matrix.machine_type }}
+#       slice_id: ${{ matrix.slice_id }}
+#     secrets: inherit
+
+#   run_examples_gpu:
+#     name: Examples directory
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         machine_type: [single-gpu]
+#     runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+#     container:
+#       image: huggingface/transformers-all-latest-gpu
+#       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+#     needs: setup
+#     steps:
+#       - name: Update clone
+#         working-directory: /transformers
+#         run: git fetch && git checkout ${{ github.sha }}
+
+#       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+#         working-directory: /transformers
+#         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+#       - name: NVIDIA-SMI
+#         run: |
+#           nvidia-smi
+
+#       - name: Environment
+#         working-directory: /transformers
+#         run: |
+#           python3 utils/print_env.py
+
+#       - name: Show installed libraries and their versions
+#         working-directory: /transformers
+#         run: pip freeze
+
+#       - name: Run examples tests on GPU
+#         working-directory: /transformers
+#         run: |
+#           pip install -r examples/pytorch/_tests_requirements.txt
+#           python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
+
+#       - name: Failure short reports
+#         if: ${{ failure() }}
+#         continue-on-error: true
+#         run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
+
+#       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
+#         if: ${{ always() }}
+#         uses: actions/upload-artifact@v3
+#         with:
+#           name: ${{ matrix.machine_type }}_run_examples_gpu
+#           path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
+
+#   run_pipelines_torch_gpu:
+#     name: PyTorch pipelines
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         machine_type: [single-gpu, multi-gpu]
+#     runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+#     container:
+#       image: huggingface/transformers-pytorch-gpu
+#       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+#     needs: setup
+#     steps:
+#       - name: Update clone
+#         working-directory: /transformers
+#         run: git fetch && git checkout ${{ github.sha }}
+
+#       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+#         working-directory: /transformers
+#         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+#       - name: NVIDIA-SMI
+#         run: |
+#           nvidia-smi
+
+#       - name: Environment
+#         working-directory: /transformers
+#         run: |
+#           python3 utils/print_env.py
+
+#       - name: Show installed libraries and their versions
+#         working-directory: /transformers
+#         run: pip freeze
+
+#       - name: Run all pipeline tests on GPU
+#         working-directory: /transformers
+#         run: |
+#           python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
+
+#       - name: Failure short reports
+#         if: ${{ failure() }}
+#         continue-on-error: true
+#         run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
+
+#       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
+#         if: ${{ always() }}
+#         uses: actions/upload-artifact@v3
+#         with:
+#           name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
+#           path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
+
+#   run_pipelines_tf_gpu:
+#     name: TensorFlow pipelines
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         machine_type: [single-gpu, multi-gpu]
+#     runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+#     container:
+#       image: huggingface/transformers-tensorflow-gpu
+#       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+#     needs: setup
+#     steps:
+#       - name: Update clone
+#         working-directory: /transformers
+#         run: |
+#           git fetch && git checkout ${{ github.sha }}
+
+#       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+#         working-directory: /transformers
+#         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+#       - name: NVIDIA-SMI
+#         run: |
+#           nvidia-smi
+
+#       - name: Environment
+#         working-directory: /transformers
+#         run: |
+#           python3 utils/print_env.py
+
+#       - name: Show installed libraries and their versions
+#         working-directory: /transformers
+#         run: pip freeze
+
+#       - name: Run all pipeline tests on GPU
+#         working-directory: /transformers
+#         run: |
+#           python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
+
+#       - name: Failure short reports
+#         if: ${{ always() }}
+#         run: |
+#           cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
+
+#       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
+#         if: ${{ always() }}
+#         uses: actions/upload-artifact@v3
+#         with:
+#           name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
+#           path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
+
+#   run_all_tests_torch_cuda_extensions_gpu:
+#     name: Torch CUDA extension tests
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         machine_type: [single-gpu, multi-gpu]
+#     runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+#     needs: setup
+#     container:
+#       image: huggingface/transformers-pytorch-deepspeed-latest-gpu
+#       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+#     steps:
+#       - name: Update clone
+#         working-directory: /workspace/transformers
+#         run: git fetch && git checkout ${{ github.sha }}
+
+#       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+#         working-directory: /workspace/transformers
+#         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+#       - name: Remove cached torch extensions
+#         run: rm -rf /github/home/.cache/torch_extensions/
+
+#       # To avoid unknown test failures
+#       - name: Pre build DeepSpeed *again*
+#         working-directory: /workspace
+#         run: |
+#           python3 -m pip uninstall -y deepspeed
+#           DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
+
+#       - name: NVIDIA-SMI
+#         run: |
+#           nvidia-smi
+
+#       - name: Environment
+#         working-directory: /workspace/transformers
+#         run: |
+#           python utils/print_env.py
+
+#       - name: Show installed libraries and their versions
+#         working-directory: /workspace/transformers
+#         run: pip freeze
+
+#       - name: Run all tests on GPU
+#         working-directory: /workspace/transformers
+#         run: |
+#           python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
+
+#       - name: Failure short reports
+#         if: ${{ failure() }}
+#         continue-on-error: true
+#         run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
+
+#       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
+#         if: ${{ always() }}
+#         uses: actions/upload-artifact@v3
+#         with:
+#           name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
+#           path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
 
   run_tests_quantization_torch_gpu:
     name: Quantization tests
@@ -354,11 +355,11 @@ jobs:
     if: always()
     needs: [
       setup,
-      run_tests_gpu,
-      run_examples_gpu,
-      run_pipelines_tf_gpu,
-      run_pipelines_torch_gpu,
-      run_all_tests_torch_cuda_extensions_gpu,
+      # run_tests_gpu,
+      # run_examples_gpu,
+      # run_pipelines_tf_gpu,
+      # run_pipelines_torch_gpu,
+      # run_all_tests_torch_cuda_extensions_gpu,
       run_tests_quantization_torch_gpu,
     ]
     steps:
@@ -402,11 +403,11 @@ jobs:
     if: always()
     needs: [
       setup,
-      run_tests_gpu,
-      run_examples_gpu,
-      run_pipelines_tf_gpu,
-      run_pipelines_torch_gpu,
-      run_all_tests_torch_cuda_extensions_gpu,
+      # run_tests_gpu,
+      # run_examples_gpu,
+      # run_pipelines_tf_gpu,
+      # run_pipelines_torch_gpu,
+      # run_all_tests_torch_cuda_extensions_gpu,
       run_tests_quantization_torch_gpu,
       run_extract_warnings
     ]

From 67cd706c474408ecd031772e20512bc4525e7e60 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 21 Feb 2024 15:48:51 +0100
Subject: [PATCH 11/23] fix docker

---
 .github/workflows/build-docker-images.yml | 68 +++++++++++------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index 079b6e10f1f973..a22dd10bea474d 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -272,41 +272,41 @@ jobs:
           push: true
           tags: huggingface/transformers-tensorflow-gpu
 
-  latest-pytorch-deepspeed-amd:
-    name: "PyTorch + DeepSpeed (AMD) [dev]"
+  # latest-pytorch-deepspeed-amd:
+  #   name: "PyTorch + DeepSpeed (AMD) [dev]"
 
-    runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
-    steps:
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Check out code
-        uses: actions/checkout@v3
-      - name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      - name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
+  #   runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
+  #   steps:
+  #     - name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     - name: Check out code
+  #       uses: actions/checkout@v3
+  #     - name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     - name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
+  #     # Push CI images still need to be re-built daily
+  #     -
+  #       name: Build and push (for Push CI) in a daily basis
+  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+  #       if: inputs.image_postfix != '-push-ci'
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
 
   latest-quantization-torch-docker:
     name: "Latest Pytorch + Quantization [dev]"

From 99d0456dca02738967987423e2758709aeb08ac2 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 21 Feb 2024 15:49:16 +0100
Subject: [PATCH 12/23] do not trigger on psuh again

---
 .github/workflows/self-scheduled.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 626ca8f0698987..d9fc7a8617a456 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -12,8 +12,8 @@ on:
     - cron: "17 2 * * *"
   push:
     branches:
-      # - run_scheduled_ci*
-      - add-quantization-workflow
+      - run_scheduled_ci*
+      # - add-quantization-workflow
 
 env:
   HF_HOME: /mnt/cache

From aca17cf3cfe1225ad4c9f385e619fcaf9a273400 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 21 Feb 2024 17:03:39 +0100
Subject: [PATCH 13/23] add additional runs

---
 utils/notification_service.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/notification_service.py b/utils/notification_service.py
index 39a0fb840cf5ad..ac97f988c97298 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -1043,6 +1043,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
         "PyTorch pipelines": "run_tests_torch_pipeline_gpu",
         "TensorFlow pipelines": "run_tests_tf_pipeline_gpu",
         "Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
+        "Quantization tests": "run_tests_quantization_torch_gpu"
     }
 
     if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"):

From a796a5eea8cd7a979821f5b8989fecfaa3a20653 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 21 Feb 2024 17:04:04 +0100
Subject: [PATCH 14/23] test again

---
 .github/workflows/self-scheduled.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index d9fc7a8617a456..626ca8f0698987 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -12,8 +12,8 @@ on:
     - cron: "17 2 * * *"
   push:
     branches:
-      - run_scheduled_ci*
-      # - add-quantization-workflow
+      # - run_scheduled_ci*
+      - add-quantization-workflow
 
 env:
   HF_HOME: /mnt/cache

From e60712d09f5a8a6feb6e22f168f5371fe0b5f24b Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 21 Feb 2024 17:36:03 +0100
Subject: [PATCH 15/23] all good

---
 .github/workflows/self-scheduled.yml | 475 +++++++++++++--------------
 1 file changed, 237 insertions(+), 238 deletions(-)

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 626ca8f0698987..3b63b7a688b001 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -12,8 +12,7 @@ on:
     - cron: "17 2 * * *"
   push:
     branches:
-      # - run_scheduled_ci*
-      - add-quantization-workflow
+      - run_scheduled_ci*
 
 env:
   HF_HOME: /mnt/cache
@@ -71,232 +70,232 @@ jobs:
         run: |
           nvidia-smi
 
-#   run_tests_gpu:
-#     name: " "
-#     needs: setup
-#     strategy:
-#       fail-fast: false
-#       matrix:
-#         machine_type: [single-gpu, multi-gpu]
-#         slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
-#     uses: ./.github/workflows/model_jobs.yml
-#     with:
-#       folder_slices: ${{ needs.setup.outputs.folder_slices }}
-#       machine_type: ${{ matrix.machine_type }}
-#       slice_id: ${{ matrix.slice_id }}
-#     secrets: inherit
-
-#   run_examples_gpu:
-#     name: Examples directory
-#     strategy:
-#       fail-fast: false
-#       matrix:
-#         machine_type: [single-gpu]
-#     runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-#     container:
-#       image: huggingface/transformers-all-latest-gpu
-#       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-#     needs: setup
-#     steps:
-#       - name: Update clone
-#         working-directory: /transformers
-#         run: git fetch && git checkout ${{ github.sha }}
-
-#       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-#         working-directory: /transformers
-#         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-#       - name: NVIDIA-SMI
-#         run: |
-#           nvidia-smi
-
-#       - name: Environment
-#         working-directory: /transformers
-#         run: |
-#           python3 utils/print_env.py
-
-#       - name: Show installed libraries and their versions
-#         working-directory: /transformers
-#         run: pip freeze
-
-#       - name: Run examples tests on GPU
-#         working-directory: /transformers
-#         run: |
-#           pip install -r examples/pytorch/_tests_requirements.txt
-#           python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
-
-#       - name: Failure short reports
-#         if: ${{ failure() }}
-#         continue-on-error: true
-#         run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
-
-#       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
-#         if: ${{ always() }}
-#         uses: actions/upload-artifact@v3
-#         with:
-#           name: ${{ matrix.machine_type }}_run_examples_gpu
-#           path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
-
-#   run_pipelines_torch_gpu:
-#     name: PyTorch pipelines
-#     strategy:
-#       fail-fast: false
-#       matrix:
-#         machine_type: [single-gpu, multi-gpu]
-#     runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-#     container:
-#       image: huggingface/transformers-pytorch-gpu
-#       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-#     needs: setup
-#     steps:
-#       - name: Update clone
-#         working-directory: /transformers
-#         run: git fetch && git checkout ${{ github.sha }}
-
-#       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-#         working-directory: /transformers
-#         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-#       - name: NVIDIA-SMI
-#         run: |
-#           nvidia-smi
-
-#       - name: Environment
-#         working-directory: /transformers
-#         run: |
-#           python3 utils/print_env.py
-
-#       - name: Show installed libraries and their versions
-#         working-directory: /transformers
-#         run: pip freeze
-
-#       - name: Run all pipeline tests on GPU
-#         working-directory: /transformers
-#         run: |
-#           python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
-
-#       - name: Failure short reports
-#         if: ${{ failure() }}
-#         continue-on-error: true
-#         run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
-
-#       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
-#         if: ${{ always() }}
-#         uses: actions/upload-artifact@v3
-#         with:
-#           name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
-#           path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
-
-#   run_pipelines_tf_gpu:
-#     name: TensorFlow pipelines
-#     strategy:
-#       fail-fast: false
-#       matrix:
-#         machine_type: [single-gpu, multi-gpu]
-#     runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-#     container:
-#       image: huggingface/transformers-tensorflow-gpu
-#       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-#     needs: setup
-#     steps:
-#       - name: Update clone
-#         working-directory: /transformers
-#         run: |
-#           git fetch && git checkout ${{ github.sha }}
-
-#       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-#         working-directory: /transformers
-#         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-#       - name: NVIDIA-SMI
-#         run: |
-#           nvidia-smi
-
-#       - name: Environment
-#         working-directory: /transformers
-#         run: |
-#           python3 utils/print_env.py
-
-#       - name: Show installed libraries and their versions
-#         working-directory: /transformers
-#         run: pip freeze
-
-#       - name: Run all pipeline tests on GPU
-#         working-directory: /transformers
-#         run: |
-#           python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
-
-#       - name: Failure short reports
-#         if: ${{ always() }}
-#         run: |
-#           cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
-
-#       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
-#         if: ${{ always() }}
-#         uses: actions/upload-artifact@v3
-#         with:
-#           name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
-#           path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
-
-#   run_all_tests_torch_cuda_extensions_gpu:
-#     name: Torch CUDA extension tests
-#     strategy:
-#       fail-fast: false
-#       matrix:
-#         machine_type: [single-gpu, multi-gpu]
-#     runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
-#     needs: setup
-#     container:
-#       image: huggingface/transformers-pytorch-deepspeed-latest-gpu
-#       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-#     steps:
-#       - name: Update clone
-#         working-directory: /workspace/transformers
-#         run: git fetch && git checkout ${{ github.sha }}
-
-#       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-#         working-directory: /workspace/transformers
-#         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-#       - name: Remove cached torch extensions
-#         run: rm -rf /github/home/.cache/torch_extensions/
-
-#       # To avoid unknown test failures
-#       - name: Pre build DeepSpeed *again*
-#         working-directory: /workspace
-#         run: |
-#           python3 -m pip uninstall -y deepspeed
-#           DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
-
-#       - name: NVIDIA-SMI
-#         run: |
-#           nvidia-smi
-
-#       - name: Environment
-#         working-directory: /workspace/transformers
-#         run: |
-#           python utils/print_env.py
-
-#       - name: Show installed libraries and their versions
-#         working-directory: /workspace/transformers
-#         run: pip freeze
-
-#       - name: Run all tests on GPU
-#         working-directory: /workspace/transformers
-#         run: |
-#           python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
-
-#       - name: Failure short reports
-#         if: ${{ failure() }}
-#         continue-on-error: true
-#         run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
-
-#       - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
-#         if: ${{ always() }}
-#         uses: actions/upload-artifact@v3
-#         with:
-#           name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
-#           path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
+  run_tests_gpu:
+    name: " "
+    needs: setup
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
+    uses: ./.github/workflows/model_jobs.yml
+    with:
+      folder_slices: ${{ needs.setup.outputs.folder_slices }}
+      machine_type: ${{ matrix.machine_type }}
+      slice_id: ${{ matrix.slice_id }}
+    secrets: inherit
+
+  run_examples_gpu:
+    name: Examples directory
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+    container:
+      image: huggingface/transformers-all-latest-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run examples tests on GPU
+        working-directory: /transformers
+        run: |
+          pip install -r examples/pytorch/_tests_requirements.txt
+          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.machine_type }}_run_examples_gpu
+          path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
+
+  run_pipelines_torch_gpu:
+    name: PyTorch pipelines
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+    container:
+      image: huggingface/transformers-pytorch-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all pipeline tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
+
+  run_pipelines_tf_gpu:
+    name: TensorFlow pipelines
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+    container:
+      image: huggingface/transformers-tensorflow-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: |
+          git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all pipeline tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: |
+          cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
+
+  run_all_tests_torch_cuda_extensions_gpu:
+    name: Torch CUDA extension tests
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
+    needs: setup
+    container:
+      image: huggingface/transformers-pytorch-deepspeed-latest-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Update clone
+        working-directory: /workspace/transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /workspace/transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: Remove cached torch extensions
+        run: rm -rf /github/home/.cache/torch_extensions/
+
+      # To avoid unknown test failures
+      - name: Pre build DeepSpeed *again*
+        working-directory: /workspace
+        run: |
+          python3 -m pip uninstall -y deepspeed
+          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /workspace/transformers
+        run: |
+          python utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /workspace/transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /workspace/transformers
+        run: |
+          python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
+          path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
 
   run_tests_quantization_torch_gpu:
     name: Quantization tests
@@ -355,11 +354,11 @@ jobs:
     if: always()
     needs: [
       setup,
-      # run_tests_gpu,
-      # run_examples_gpu,
-      # run_pipelines_tf_gpu,
-      # run_pipelines_torch_gpu,
-      # run_all_tests_torch_cuda_extensions_gpu,
+      run_tests_gpu,
+      run_examples_gpu,
+      run_pipelines_tf_gpu,
+      run_pipelines_torch_gpu,
+      run_all_tests_torch_cuda_extensions_gpu,
       run_tests_quantization_torch_gpu,
     ]
     steps:
@@ -403,11 +402,11 @@ jobs:
     if: always()
     needs: [
       setup,
-      # run_tests_gpu,
-      # run_examples_gpu,
-      # run_pipelines_tf_gpu,
-      # run_pipelines_torch_gpu,
-      # run_all_tests_torch_cuda_extensions_gpu,
+      run_tests_gpu,
+      run_examples_gpu,
+      run_pipelines_tf_gpu,
+      run_pipelines_torch_gpu,
+      run_all_tests_torch_cuda_extensions_gpu,
       run_tests_quantization_torch_gpu,
       run_extract_warnings
     ]

From 3e82d7ba5cbb5b0ad027e7181e37b75cb810aa4c Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 21 Feb 2024 17:41:10 +0100
Subject: [PATCH 16/23] style

---
 utils/notification_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/notification_service.py b/utils/notification_service.py
index ac97f988c97298..d29e6994a232b2 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -1043,7 +1043,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
         "PyTorch pipelines": "run_tests_torch_pipeline_gpu",
         "TensorFlow pipelines": "run_tests_tf_pipeline_gpu",
         "Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
-        "Quantization tests": "run_tests_quantization_torch_gpu"
+        "Quantization tests": "run_tests_quantization_torch_gpu",
     }
 
     if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"):

From 34e6048d4eb0f27422a781f90191f6153661834c Mon Sep 17 00:00:00 2001
From: Marc Sun <57196510+SunMarc@users.noreply.github.com>
Date: Thu, 22 Feb 2024 10:14:55 -0500
Subject: [PATCH 17/23] Update .github/workflows/self-scheduled.yml

Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
---
 .github/workflows/self-scheduled.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 3b63b7a688b001..b0e1717993a37f 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -333,7 +333,6 @@ jobs:
       - name: Run quantization tests on GPU
         working-directory: /transformers
         run: |
-          pip install -r examples/pytorch/_tests_requirements.txt
           python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
 
       - name: Failure short reports

From c5c567089f41bf928211bdd5f2e119331671f5b8 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Fri, 23 Feb 2024 17:17:28 +0100
Subject: [PATCH 18/23] test build dockerfile with torch 2.2.0

---
 .github/workflows/build-docker-images.yml     | 419 +++++++++---------
 .../Dockerfile                                |   6 +-
 2 files changed, 207 insertions(+), 218 deletions(-)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index 8cae54cffa5d87..00781635ca6c3b 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -3,7 +3,8 @@ name: Build docker images (scheduled)
 on:
   push:
     branches:
-      - build_ci_docker_image*
+      # - build_ci_docker_image*
+      - add-quantization-workflow
   repository_dispatch:
   workflow_call:
     inputs:
@@ -18,185 +19,185 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
-  latest-docker:
-    name: "Latest PyTorch + TensorFlow [dev]"
-    runs-on: [intel-cpu, 8-cpu, ci]
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-all-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-all-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-all-latest-gpu-push-ci
+  # latest-docker:
+  #   name: "Latest PyTorch + TensorFlow [dev]"
+  #   runs-on: [intel-cpu, 8-cpu, ci]
+  #   steps:
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-all-latest-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
+  #     # Push CI images still need to be re-built daily
+  #     -
+  #       name: Build and push (for Push CI) in a daily basis
+  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+  #       if: inputs.image_postfix != '-push-ci'
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-all-latest-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-all-latest-gpu-push-ci
 
-  latest-torch-deepspeed-docker:
-    name: "Latest PyTorch + DeepSpeed"
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
+  # latest-torch-deepspeed-docker:
+  #   name: "Latest PyTorch + DeepSpeed"
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     - name: Cleanup disk
+  #       run: |
+  #         sudo ls -l /usr/local/lib/
+  #         sudo ls -l /usr/share/
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #         sudo rm -rf /usr/local/lib/android
+  #         sudo rm -rf /usr/share/dotnet
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
 
-  # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
-  latest-torch-deepspeed-docker-for-push-ci-daily-build:
-    name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
+  # # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
+  # latest-torch-deepspeed-docker-for-push-ci-daily-build:
+  #   name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     - name: Cleanup disk
+  #       run: |
+  #         sudo ls -l /usr/local/lib/
+  #         sudo ls -l /usr/share/
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #         sudo rm -rf /usr/local/lib/android
+  #         sudo rm -rf /usr/share/dotnet
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     # Push CI images still need to be re-built daily
+  #     -
+  #       name: Build and push (for Push CI) in a daily basis
+  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+  #       if: inputs.image_postfix != '-push-ci'
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
 
-  doc-builder:
-    name: "Doc builder"
-    # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on: ubuntu-22.04
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-doc-builder
-          push: true
-          tags: huggingface/transformers-doc-builder
+  # doc-builder:
+  #   name: "Doc builder"
+  #   # Push CI doesn't need this image
+  #   if: inputs.image_postfix != '-push-ci'
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-doc-builder
+  #         push: true
+  #         tags: huggingface/transformers-doc-builder
 
-  latest-pytorch:
-    name: "Latest PyTorch [dev]"
-    # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-gpu
+  # latest-pytorch:
+  #   name: "Latest PyTorch [dev]"
+  #   # Push CI doesn't need this image
+  #   if: inputs.image_postfix != '-push-ci'
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     - name: Cleanup disk
+  #       run: |
+  #         sudo ls -l /usr/local/lib/
+  #         sudo ls -l /usr/share/
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #         sudo rm -rf /usr/local/lib/android
+  #         sudo rm -rf /usr/share/dotnet
+  #         sudo du -sh /usr/local/lib/
+  #         sudo du -sh /usr/share/
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-gpu
 
 # Need to be fixed with the help from Guillaume.
 #  latest-pytorch-amd:
@@ -234,33 +235,33 @@ jobs:
 #          push: true
 #          tags: huggingface/transformers-pytorch-amd-gpu-push-ci
 
-  latest-tensorflow:
-    name: "Latest TensorFlow [dev]"
-    # Push CI doesn't need this image
-    if: inputs.image_postfix != '-push-ci'
-    runs-on: ubuntu-22.04
-    steps:
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v3
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-tensorflow-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-tensorflow-gpu
+  # latest-tensorflow:
+  #   name: "Latest TensorFlow [dev]"
+  #   # Push CI doesn't need this image
+  #   if: inputs.image_postfix != '-push-ci'
+  #   runs-on: ubuntu-22.04
+  #   steps:
+  #     -
+  #       name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     -
+  #       name: Check out code
+  #       uses: actions/checkout@v3
+  #     -
+  #       name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     -
+  #       name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-tensorflow-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-tensorflow-gpu
 
   # latest-pytorch-deepspeed-amd:
   #   name: "PyTorch + DeepSpeed (AMD) [dev]"
@@ -302,18 +303,8 @@ jobs:
     name: "Latest Pytorch + Quantization [dev]"
      # Push CI doesn't need this image
     if: inputs.image_postfix != '-push-ci'
-    runs-on: ubuntu-22.04
+    runs-on: [intel-cpu, 8-cpu, ci]
     steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
       -
         name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
index e7df09bf415b48..12c97f5a46152c 100644
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).
 
-ARG PYTORCH='2.1.1'
+ARG PYTORCH='2.2.0'
 # Example: `cu102`, `cu113`, etc.
 ARG CUDA='cu118'
 
@@ -26,9 +26,7 @@ RUN echo torch=$VERSION
 # Currently, let's just use their latest releases (when `torch` is installed with a release version)
 RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
 
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev]
-
-RUN python3 -m pip uninstall -y flax jax
+RUN python3 -m pip install --no-cache-dir -e ./transformers[torch-dev]
 
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
 

From 4c757b88f9617fa3b5a9fa437fdacc0b1dfcbea2 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Mon, 26 Feb 2024 15:59:22 +0100
Subject: [PATCH 19/23] fix extra

---
 docker/transformers-quantization-latest-gpu/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
index 12c97f5a46152c..66bdcc42bae9fd 100644
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@@ -26,7 +26,7 @@ RUN echo torch=$VERSION
 # Currently, let's just use their latest releases (when `torch` is installed with a release version)
 RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
 
-RUN python3 -m pip install --no-cache-dir -e ./transformers[torch-dev]
+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]
 
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
 

From ce94146f5437ffc0c64833d6e14a932fc49f9869 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Mon, 26 Feb 2024 16:48:04 +0100
Subject: [PATCH 20/23] clean

---
 .github/workflows/build-docker-images.yml | 475 +++++++++++-----------
 1 file changed, 237 insertions(+), 238 deletions(-)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index 00781635ca6c3b..d9796490e5e75b 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -3,8 +3,7 @@ name: Build docker images (scheduled)
 on:
   push:
     branches:
-      # - build_ci_docker_image*
-      - add-quantization-workflow
+      - build_ci_docker_image*
   repository_dispatch:
   workflow_call:
     inputs:
@@ -19,185 +18,185 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
-  # latest-docker:
-  #   name: "Latest PyTorch + TensorFlow [dev]"
-  #   runs-on: [intel-cpu, 8-cpu, ci]
-  #   steps:
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-all-latest-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
-  #     # Push CI images still need to be re-built daily
-  #     -
-  #       name: Build and push (for Push CI) in a daily basis
-  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-  #       if: inputs.image_postfix != '-push-ci'
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-all-latest-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-all-latest-gpu-push-ci
+  latest-docker:
+    name: "Latest PyTorch + TensorFlow [dev]"
+    runs-on: [intel-cpu, 8-cpu, ci]
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-all-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-all-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-all-latest-gpu-push-ci
 
-  # latest-torch-deepspeed-docker:
-  #   name: "Latest PyTorch + DeepSpeed"
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     - name: Cleanup disk
-  #       run: |
-  #         sudo ls -l /usr/local/lib/
-  #         sudo ls -l /usr/share/
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
+  latest-torch-deepspeed-docker:
+    name: "Latest PyTorch + DeepSpeed"
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
 
-  # # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
-  # latest-torch-deepspeed-docker-for-push-ci-daily-build:
-  #   name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     - name: Cleanup disk
-  #       run: |
-  #         sudo ls -l /usr/local/lib/
-  #         sudo ls -l /usr/share/
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     # Push CI images still need to be re-built daily
-  #     -
-  #       name: Build and push (for Push CI) in a daily basis
-  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-  #       if: inputs.image_postfix != '-push-ci'
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-deepspeed-latest-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
+  # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
+  latest-torch-deepspeed-docker-for-push-ci-daily-build:
+    name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
 
-  # doc-builder:
-  #   name: "Doc builder"
-  #   # Push CI doesn't need this image
-  #   if: inputs.image_postfix != '-push-ci'
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-doc-builder
-  #         push: true
-  #         tags: huggingface/transformers-doc-builder
+  doc-builder:
+    name: "Doc builder"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on: ubuntu-22.04
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-doc-builder
+          push: true
+          tags: huggingface/transformers-doc-builder
 
-  # latest-pytorch:
-  #   name: "Latest PyTorch [dev]"
-  #   # Push CI doesn't need this image
-  #   if: inputs.image_postfix != '-push-ci'
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     - name: Cleanup disk
-  #       run: |
-  #         sudo ls -l /usr/local/lib/
-  #         sudo ls -l /usr/share/
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo du -sh /usr/local/lib/
-  #         sudo du -sh /usr/share/
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-gpu
+  latest-pytorch:
+    name: "Latest PyTorch [dev]"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-gpu
 
 # Need to be fixed with the help from Guillaume.
 #  latest-pytorch-amd:
@@ -235,69 +234,69 @@ jobs:
 #          push: true
 #          tags: huggingface/transformers-pytorch-amd-gpu-push-ci
 
-  # latest-tensorflow:
-  #   name: "Latest TensorFlow [dev]"
-  #   # Push CI doesn't need this image
-  #   if: inputs.image_postfix != '-push-ci'
-  #   runs-on: ubuntu-22.04
-  #   steps:
-  #     -
-  #       name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     -
-  #       name: Check out code
-  #       uses: actions/checkout@v3
-  #     -
-  #       name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     -
-  #       name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-tensorflow-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-tensorflow-gpu
+  latest-tensorflow:
+    name: "Latest TensorFlow [dev]"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on: ubuntu-22.04
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v3
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-tensorflow-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-tensorflow-gpu
 
-  # latest-pytorch-deepspeed-amd:
-  #   name: "PyTorch + DeepSpeed (AMD) [dev]"
+  latest-pytorch-deepspeed-amd:
+    name: "PyTorch + DeepSpeed (AMD) [dev]"
 
-  #   runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
-  #   steps:
-  #     - name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #     - name: Check out code
-  #       uses: actions/checkout@v3
-  #     - name: Login to DockerHub
-  #       uses: docker/login-action@v3
-  #       with:
-  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     - name: Build and push
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
-  #     # Push CI images still need to be re-built daily
-  #     -
-  #       name: Build and push (for Push CI) in a daily basis
-  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-  #       if: inputs.image_postfix != '-push-ci'
-  #       uses: docker/build-push-action@v5
-  #       with:
-  #         context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-  #         build-args: |
-  #           REF=main
-  #         push: true
-  #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
+    runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Check out code
+        uses: actions/checkout@v3
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
 
   latest-quantization-torch-docker:
     name: "Latest Pytorch + Quantization [dev]"

From 4cb52b8822da9d1786a821a33e867e4fcc00d8fd Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 28 Feb 2024 15:26:37 +0100
Subject: [PATCH 21/23] revert changes

---
 src/transformers/modeling_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index b3102a37d37f31..4871110f5b6ffb 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -3552,6 +3552,7 @@ def from_pretrained(
                 "device_map": device_map,
                 "offload_dir": offload_folder,
                 "offload_index": offload_index,
+                "force_hooks": True
             }
             if "skip_keys" in inspect.signature(dispatch_model).parameters:
                 device_map_kwargs["skip_keys"] = model._skip_keys_device_placement

From 750693276b98d42f3db7ee358286841e50ac9134 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 28 Feb 2024 15:42:45 +0100
Subject: [PATCH 22/23] Revert "revert changes"

This reverts commit 4cb52b8822da9d1786a821a33e867e4fcc00d8fd.
---
 src/transformers/modeling_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index 4871110f5b6ffb..b3102a37d37f31 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -3552,7 +3552,6 @@ def from_pretrained(
                 "device_map": device_map,
                 "offload_dir": offload_folder,
                 "offload_index": offload_index,
-                "force_hooks": True
             }
             if "skip_keys" in inspect.signature(dispatch_model).parameters:
                 device_map_kwargs["skip_keys"] = model._skip_keys_device_placement

From 9209b46138335e1834a558223bb4d8de3dc1f337 Mon Sep 17 00:00:00 2001
From: Marc Sun <marc@huggingface.co>
Date: Wed, 28 Feb 2024 15:43:08 +0100
Subject: [PATCH 23/23] revert correct change

---
 .github/workflows/build-docker-images.yml | 68 +++++++++++------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index d9796490e5e75b..6144f8036f96c9 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -262,41 +262,41 @@ jobs:
           push: true
           tags: huggingface/transformers-tensorflow-gpu
 
-  latest-pytorch-deepspeed-amd:
-    name: "PyTorch + DeepSpeed (AMD) [dev]"
+  # latest-pytorch-deepspeed-amd:
+  #   name: "PyTorch + DeepSpeed (AMD) [dev]"
 
-    runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
-    steps:
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Check out code
-        uses: actions/checkout@v3
-      - name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      - name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
-      # Push CI images still need to be re-built daily
-      -
-        name: Build and push (for Push CI) in a daily basis
-        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-        if: inputs.image_postfix != '-push-ci'
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
-          build-args: |
-            REF=main
-          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
+  #   runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
+  #   steps:
+  #     - name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
+  #     - name: Check out code
+  #       uses: actions/checkout@v3
+  #     - name: Login to DockerHub
+  #       uses: docker/login-action@v3
+  #       with:
+  #         username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #         password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     - name: Build and push
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
+  #     # Push CI images still need to be re-built daily
+  #     -
+  #       name: Build and push (for Push CI) in a daily basis
+  #       # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+  #       # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+  #       if: inputs.image_postfix != '-push-ci'
+  #       uses: docker/build-push-action@v5
+  #       with:
+  #         context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+  #         build-args: |
+  #           REF=main
+  #         push: true
+  #         tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
 
   latest-quantization-torch-docker:
     name: "Latest Pytorch + Quantization [dev]"