diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 2b198bd4af56c5..6144f8036f96c9 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -297,3 +297,31 @@ jobs: # REF=main # push: true # tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci + + latest-quantization-torch-docker: + name: "Latest Pytorch + Quantization [dev]" + # Push CI doesn't need this image + if: inputs.image_postfix != '-push-ci' + runs-on: [intel-cpu, 8-cpu, ci] + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./docker/transformers-quantization-latest-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }} \ No newline at end of file diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index d44e9a29ecf0da..b0e1717993a37f 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -297,6 +297,56 @@ jobs: name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + run_tests_quantization_torch_gpu: + name: Quantization tests + strategy: + fail-fast: false + matrix: + machine_type: [single-gpu, multi-gpu] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + container: + image: huggingface/transformers-quantization-latest-gpu + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + needs: setup + steps: + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run quantization tests on GPU + working-directory: /transformers + run: | + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt + + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu + path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu + run_extract_warnings: name: Extract warnings in CI artifacts runs-on: ubuntu-22.04 @@ -307,7 +357,8 @@ jobs: run_examples_gpu, run_pipelines_tf_gpu, run_pipelines_torch_gpu, - run_all_tests_torch_cuda_extensions_gpu + run_all_tests_torch_cuda_extensions_gpu, + run_tests_quantization_torch_gpu, ] steps: - name: Checkout transformers @@ -355,6 +406,7 @@ jobs: run_pipelines_tf_gpu, run_pipelines_torch_gpu, run_all_tests_torch_cuda_extensions_gpu, + run_tests_quantization_torch_gpu, run_extract_warnings ] steps: diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile new file mode 100644 index 00000000000000..66bdcc42bae9fd --- /dev/null +++ b/docker/transformers-quantization-latest-gpu/Dockerfile @@ -0,0 +1,50 @@ +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 +LABEL maintainer="Hugging Face" + +ARG DEBIAN_FRONTEND=noninteractive + +# Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands) +SHELL ["sh", "-lc"] + +# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant +# to be used as arguments for docker build (so far). + +ARG PYTORCH='2.2.0' +# Example: `cu102`, `cu113`, etc. +ARG CUDA='cu118' + +RUN apt update +RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python python3-pip ffmpeg +RUN python3 -m pip install --no-cache-dir --upgrade pip + +ARG REF=main +RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF + +RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile +RUN echo torch=$VERSION +# `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build. +# Currently, let's just use their latest releases (when `torch` is installed with a release version) +RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA + +RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch] + +RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate + +# Add bitsandbytes for mixed int8 testing +RUN python3 -m pip install --no-cache-dir bitsandbytes + +# Add auto-gptq for gtpq quantization testing +RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ + +# Add optimum for gptq quantization testing +RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum + +# Add aqlm for quantization testing +RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2 + +# Add autoawq for quantization testing +RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp38-cp38-linux_x86_64.whl + +# When installing in editable mode, `transformers` is not recognized as a package. +# this line must be added in order for python to be aware of transformers. +RUN cd transformers && python3 setup.py develop \ No newline at end of file diff --git a/docs/source/en/hf_quantizer.md b/docs/source/en/hf_quantizer.md index 154cfb54b9ebc8..8261a6bc4585e1 100644 --- a/docs/source/en/hf_quantizer.md +++ b/docs/source/en/hf_quantizer.md @@ -66,4 +66,4 @@ For some quantization methods, they may require "pre-quantizing" the models thro 7. Document everything! Make sure your quantization method is documented in the [`docs/source/en/quantization.md`](https://github.com/huggingface/transformers/blob/abbffc4525566a48a9733639797c812301218b83/docs/source/en/quantization.md) file. -8. Add tests! You should add tests by first adding the package in our nightly Dockerfile inside `docker/transformers-all-latest-gpu` and then adding a new test file in `tests/quantization/xxx`. Feel free to check out how it is implemented for other quantization methods. +8. Add tests! You should add tests by first adding the package in our nightly Dockerfile inside `docker/transformers-quantization-latest-gpu` and then adding a new test file in `tests/quantization/xxx`. Feel free to check out how it is implemented for other quantization methods. diff --git a/utils/notification_service.py b/utils/notification_service.py index 39a0fb840cf5ad..d29e6994a232b2 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -1043,6 +1043,7 @@ def prepare_reports(title, header, reports, to_truncate=True): "PyTorch pipelines": "run_tests_torch_pipeline_gpu", "TensorFlow pipelines": "run_tests_tf_pipeline_gpu", "Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports", + "Quantization tests": "run_tests_quantization_torch_gpu", } if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"):