Add in-tree special_models test suite using reworked iree-tooling. (#…

…17883) With this, we move away from using all the specialized json config files and complex workflows. Instead, we use python scripts which allow us to use custom flags, tolerances, and configurations based on the backend/model. Related PR in TestSuite: nod-ai/SHARK-TestSuite#271 This PR also removes all dependencies on SHARK-TestSuite tooling. Reworked the tools here so that downloading, caching, testing, and benchmarking occurs as intended with tools solely from this repo for iree_special_models. Whenever we are adding test files here, the goal is for an IREE user to be able to clone the repo and run the run tests knowing nothing about the SHARK-TestSuite . Also didn't realize, but ireers here already has a process of stamping here to check if a file is already produced. I think we have to remove this because it will skip even if there is a newer version of the file available and there's really no point when downloading to a cache because once it's there, it is never removed so not a valuable signal. (Third times the charm. Had to close the last two versions of this PR because couldn't get passed a pre-commit check that led me to rebase and add a bunch of commits that weren't mine 🤦 ) ci-exactly: build_all, test_amd_mi300, build_packages, regression_test --------- Signed-off-by: saienduri <[email protected]>
iree-org · Jul 12, 2024 · 44808e1 · 44808e1
1 parent 4035603
commit 44808e1
Show file tree

Hide file tree

Showing 29 changed files with 1,824 additions and 552 deletions.
diff --git a/.github/workflows/pkgci_regression_test.yml b/.github/workflows/pkgci_regression_test.yml
@@ -90,14 +90,15 @@ jobs:
         uses: actions/[email protected]
         with:
           repository: nod-ai/SHARK-TestSuite
-          ref: 3603a453b3777fac9af4506a3dc0b3d87587fd47
+          ref: a06e730ce325c12db40bb89b43e8e6e897052e96
           path: SHARK-TestSuite
           submodules: false
           lfs: false
       - name: Install external TestSuite Python requirements
         run: |
           source ${VENV_DIR}/bin/activate
           python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt
+          pip install --no-compile --pre --upgrade -e SHARK-TestSuite/common_tools
 
       - name: Run external tests - ONNX test suite
         run: |
@@ -138,25 +139,14 @@ jobs:
           # CPU
           - name: cpu_llvm_task
             models-config-file: models_cpu_llvm_task.json
-            sdxl-unet-config-file: sdxl_scheduled_unet_cpu_llvm_task.json
-            sdxl-vae-config-file: sdxl_vae_decode_cpu_llvm_task.json
-            sdxl-clip-config-file: sdxl_prompt_encoder_cpu_llvm_task.json
             runs-on: nodai-amdgpu-w7900-x86-64
 
           # AMD GPU
           - name: amdgpu_rocm_mi250_gfx90a
             models-config-file: models_gpu_rocm_gfx90a.json
-            models-extra-flags-config-file: models_gpu_rocm_gfx90a_additional_flags.json
-            sdxl-unet-config-file: sdxl_scheduled_unet_gpu_rocm_gfx90a.json
-            sdxl-vae-config-file: sdxl_vae_decode_gpu_rocm_gfx90a.json
-            sdxl-clip-config-file: sdxl_prompt_encoder_gpu_rocm_gfx90a.json
             runs-on: nodai-amdgpu-mi250-x86-64
           - name: amdgpu_rocm_mi300_gfx942
             models-config-file: models_gpu_rocm_gfx942.json
-            models-extra-flags-config-file: models_gpu_rocm_gfx942_additional_flags.json
-            sdxl-unet-config-file: sdxl_scheduled_unet_gpu_rocm_gfx942.json
-            sdxl-vae-config-file: sdxl_vae_decode_gpu_rocm_gfx942.json
-            sdxl-clip-config-file: sdxl_prompt_encoder_gpu_rocm_gfx942.json
             runs-on: nodai-amdgpu-mi300-x86-64
           - name: amdgpu_vulkan
             models-config-file: models_gpu_vulkan.json
@@ -176,10 +166,6 @@ jobs:
       IREE_TEST_FILES: ~/iree_tests_cache
       IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
       MODELS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-config-file }}
-      MODELS_EXTRA_FLAGS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-extra-flags-config-file }}
-      SDXL_UNET_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-unet-config-file }}
-      SDXL_CLIP_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-clip-config-file }}
-      SDXL_VAE_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-vae-config-file }}
       VENV_DIR: ${{ github.workspace }}/venv
       LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9
     steps:
@@ -207,33 +193,25 @@ jobs:
             --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
             --fetch-gh-workflow=${{ inputs.artifact_run_id }}
 
-      # TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm
-      # # In-tree tests
-      # - name: Run experimental/regression_suite tests
-      #   run: |
-      #     source ${VENV_DIR}/bin/activate
-      #     pytest \
-      #       -rA -s -m "plat_host_cpu and presubmit" \
-      #       experimental/regression_suite
-
       # Out of tree tests
       - name: Check out external TestSuite repository
         uses: actions/[email protected]
         with:
           repository: nod-ai/SHARK-TestSuite
-          ref: 3603a453b3777fac9af4506a3dc0b3d87587fd47
+          ref: a06e730ce325c12db40bb89b43e8e6e897052e96
           path: SHARK-TestSuite
           submodules: false
           lfs: true
       - name: Install external TestSuite Python requirements
         run: |
           source ${VENV_DIR}/bin/activate
-          python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt
+          python3 -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt
+          pip install --no-compile --pre --upgrade -e SHARK-TestSuite/common_tools
       - name: Download remote files for real weight model tests
         run: |
           source ${VENV_DIR}/bin/activate
-          python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir pytorch/models
-          python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir sharktank
+          python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/pytorch/models
+          python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/sharktank
 
       - name: Run external tests - models with real weights
         if: "matrix.models-config-file != '' && !cancelled()"
@@ -251,61 +229,99 @@ jobs:
             --durations=0 \
             --config-files=${MODELS_CONFIG_FILE_PATH}
 
-      - name: Run external tests - models with real weights and additional flags
-        if: "matrix.models-extra-flags-config-file != '' && !cancelled()"
-        run: |
-          source ${VENV_DIR}/bin/activate
-          pytest SHARK-TestSuite/iree_tests/pytorch/models \
-            -rpfE \
-            -k real_weights \
-            --no-skip-tests-missing-files \
-            --capture=no \
-            --log-cli-level=info \
-            --timeout=1200 \
-            --durations=0 \
-            --config-files=${MODELS_EXTRA_FLAGS_CONFIG_FILE_PATH}
+  test_regression_suite:
+    name: "test_regression_suite :: ${{ matrix.name }}"
+    runs-on: ${{ matrix.runs-on }}
+    strategy:
+      fail-fast: false
 
-      - name: "Run external tests - SDXL scheduled unet"
-        if: "matrix.sdxl-unet-config-file != '' && !cancelled()"
+      # Note: these jobs should use persistent runners with local caches.
+      # Downloading test files (50GB+) without a cache can take 20+ minutes.
+      matrix:
+        include:
+          # CPU
+          - name: cpu_llvm_task
+            models-config-file: models_cpu_llvm_task.json
+            backend: cpu
+            runs-on: nodai-amdgpu-w7900-x86-64
+
+          # AMD GPU
+          - name: amdgpu_rocm_mi250_gfx90a
+            rocm-chip: gfx90a
+            backend: rocm
+            runs-on: nodai-amdgpu-mi250-x86-64
+          - name: amdgpu_rocm_mi300_gfx942
+            rocm-chip: gfx942
+            backend: rocm
+            runs-on: nodai-amdgpu-mi300-x86-64
+    env:
+      PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
+      IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts
+      IREE_TEST_FILES: ~/iree_tests_cache
+      IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
+      VENV_DIR: ${{ github.workspace }}/venv
+      LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9
+    steps:
+      # TODO(saienduri): Find alternative to this temporary step that manipulates permission of github actions
+      # directory to be able to clean after every PR
+      - name: Pre Checkout MI300 Step
+        if: contains(matrix.name, 'gfx942')
         run: |
-          source ${VENV_DIR}/bin/activate
-          pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-scheduled-unet-3-tank \
-            -rpfE \
-            -k real_weights \
-            --no-skip-tests-missing-files \
-            --capture=no \
-            --log-cli-level=info \
-            --timeout=1200 \
-            --durations=0 \
-            --config-files=${SDXL_UNET_CONFIG_FILE_PATH}
+          sudo chmod -R 777 ~/actions-runner/_work
+      - name: Checking out IREE repository
+        uses: actions/[email protected]
+        with:
+          submodules: false
+      - uses: actions/[email protected]
+        with:
+          # Must match the subset of versions built in pkgci_build_packages.
+          python-version: "3.11"
+      - uses: actions/[email protected]
+        with:
+          name: linux_x86_64_release_packages
+          path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
+      - name: Setup venv
+        run: |
+          ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
+            --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
+            --fetch-gh-workflow=${{ inputs.artifact_run_id }}
+
+      # TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm
+      # # In-tree tests
+      # - name: Run experimental/regression_suite tests
+      #   run: |
+      #     source ${VENV_DIR}/bin/activate
+      #     pytest \
+      #       -rA -s -m "plat_host_cpu and presubmit" \
+      #       experimental/regression_suite
 
-      - name: "Run external tests - SDXL prompt encoder"
-        if: "matrix.sdxl-clip-config-file != '' && !cancelled()"
+      - name: "Running SDXL special model tests"
+        if: "!cancelled()"
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-prompt-encoder-tank \
+          pytest ./experimental/regression_suite/shark-test-suite-models/sdxl \
+            -k ${{ matrix.backend }} \
             -rpfE \
-            -k real_weights \
-            --no-skip-tests-missing-files \
             --capture=no \
             --log-cli-level=info \
             --timeout=1200 \
-            --durations=0 \
-            --config-files=${SDXL_CLIP_CONFIG_FILE_PATH}
+            --durations=0
+        env:
+          ROCM_CHIP: ${{ matrix.rocm-chip }}
 
-      - name: "Run external tests - SDXL vae decode"
-        if: "matrix.sdxl-vae-config-file != '' && !cancelled()"
+      - name: "Running SD3 special model tests"
+        if: "!cancelled()"
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-vae-decode-tank \
+          pytest ./experimental/regression_suite/shark-test-suite-models/sd3 \
+            -k ${{ matrix.backend }} \
             -rpfE \
-            -k real_weights \
-            --no-skip-tests-missing-files \
             --capture=no \
             --log-cli-level=info \
             --timeout=1200 \
-            --durations=0 \
-            --config-files=${SDXL_VAE_CONFIG_FILE_PATH}
+            --durations=0
+        env:
+          ROCM_CHIP: ${{ matrix.rocm-chip }}
 
       # Note: mi250 benchmark times are more lenient than mi300 (allowing about
       # 10% deviation from observed averages), since the mi250 runners we use
@@ -314,7 +330,7 @@ jobs:
         if: contains(matrix.name, 'rocm_mi250_gfx90a')
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest SHARK-TestSuite/iree_tests/benchmarks/sdxl/benchmark_sdxl_rocm.py \
+          pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
             --goldentime-rocm-e2e-ms 1450.0 \
             --goldentime-rocm-unet-ms 370.0 \
             --goldentime-rocm-clip-ms 18.5 \
@@ -336,7 +352,7 @@ jobs:
         if: contains(matrix.name, 'rocm_mi300_gfx942')
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest SHARK-TestSuite/iree_tests/benchmarks/sdxl/benchmark_sdxl_rocm.py \
+          pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
             --goldentime-rocm-e2e-ms 325.0 \
             --goldentime-rocm-unet-ms 77.0 \
             --goldentime-rocm-clip-ms 15.5 \

diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a_additional_flags.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a_additional_flags.json
diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942_additional_flags.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942_additional_flags.json
diff --git a/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_cpu_llvm_task.json b/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_cpu_llvm_task.json
diff --git a/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_gpu_rocm_gfx90a.json b/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_gpu_rocm_gfx90a.json
diff --git a/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_gpu_rocm_gfx942.json b/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_gpu_rocm_gfx942.json