Skip to content

Commit

Permalink
Multi Pin Bumps across PT/AO/tune/ET: pt dev20241213 (#1367)
Browse files Browse the repository at this point in the history
* Bump PyTorch pin to 20241111

* bump to 1112

* Update install_requirements.sh

* Update install_requirements.sh

* Update checkpoint.py typo

* Update install_requirements.sh

* Update install_requirements.sh

* Update install_requirements.sh

* Bump pins, waiting for nvjit fix

* Update install_requirements.sh

* bump tune

* fix tune major version

* Bump AO pin to pick up import fix

* misc

* Update linux_job CI to v2

* Update install_requirements.sh PT pin to 1202

* Vision nightly is delayed

* Bump Cuda version; drop PT version to one with vision nightly

* Bump to 1205 vision nightly

* Vision nightly 1205 needs 1204 torch(?)

* Drop PT version to 1126 (friendly vision version), update devtoolset to 11 for almalinux

* Test download toolchain instead of binutils

* Test removing devtoolset

* Remove dep on devtoolset 11 that doesnt' exist on the new machine

* Bump ET pin

* Test nightly with updated vision

* Attempt to account for int4wo packing pt#139611

* Naive gguf int4wo attempt

* Update install_requirements.sh to 1210

* Update install_requirements.sh to 20241213

Should fix the MacOS wheel regression

* Update torchvision minor version to 22
  • Loading branch information
Jack-Khuu authored Dec 14, 2024
1 parent 570aebc commit bb72b09
Show file tree
Hide file tree
Showing 11 changed files with 74 additions and 166 deletions.
10 changes: 2 additions & 8 deletions .github/workflows/more-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,17 @@ on:

jobs:
test-cuda:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Download checkpoints"
# Install requirements
./install/install_requirements.sh cuda
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ jobs:
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "periodic" --backend "gpu"
test-gpu:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
secrets: inherit
Expand All @@ -119,7 +119,7 @@ jobs:
secrets-env: "HF_TOKEN_PERIODIC"
runner: ${{ matrix.runner }}
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
script: |
echo "::group::Print machine info"
nvidia-smi
Expand Down
42 changes: 11 additions & 31 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ jobs:
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "gpu"
test-gpu-compile:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
Expand All @@ -224,7 +224,7 @@ jobs:
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
script: |
echo "::group::Print machine info"
nvidia-smi
Expand All @@ -250,7 +250,7 @@ jobs:
echo "::endgroup::"
test-gpu-aoti-bfloat16:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
Expand All @@ -259,18 +259,13 @@ jobs:
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
timeout: 60
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip3 list
Expand All @@ -291,7 +286,7 @@ jobs:
echo "::endgroup::"
test-gpu-aoti-float32:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
Expand All @@ -300,17 +295,12 @@ jobs:
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip list
Expand All @@ -337,7 +327,7 @@ jobs:
echo "::endgroup::"
test-gpu-aoti-float16:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
Expand All @@ -346,17 +336,12 @@ jobs:
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip list
Expand Down Expand Up @@ -384,7 +369,7 @@ jobs:
echo "::endgroup::"
test-gpu-eval-sanity-check:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
Expand All @@ -393,17 +378,12 @@ jobs:
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip3 list
Expand Down Expand Up @@ -1031,7 +1011,7 @@ jobs:
echo "Tests complete."
test-build-runner-et-android:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.4xlarge
script: |
Expand Down
27 changes: 6 additions & 21 deletions .github/workflows/run-readme-periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,19 @@ on:

jobs:
test-readme:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
secrets: inherit
with:
runner: linux.g5.4xlarge.nvidia.gpu
secrets-env: "HF_TOKEN_PERIODIC"
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Create script to run README"
python3 torchchat/utils/scripts/updown.py --create-sections --file README.md > ./run-readme.sh
# for good measure, if something happened to updown processor,
Expand All @@ -44,23 +39,18 @@ jobs:
test-quantization-any:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
secrets: inherit
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Create script to run quantization"
python3 torchchat/utils/scripts/updown.py --create-sections --file docs/quantization.md > ./run-quantization.sh
# for good measure, if something happened to updown processor,
Expand All @@ -76,24 +66,19 @@ jobs:
echo "::endgroup::"
test-gguf-any:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
secrets: inherit
with:
runner: linux.g5.4xlarge.nvidia.gpu
secrets-env: "HF_TOKEN_PERIODIC"
gpu-arch-type: cuda
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Create script to run gguf"
python3 torchchat/utils/scripts/updown.py --file docs/GGUF.md > ./run-gguf.sh
# for good measure, if something happened to updown processor,
Expand Down
Loading

0 comments on commit bb72b09

Please sign in to comment.