tests: split DDP & common + running xdist (Lightning-AI#2127)

matsumotosan · Jan 11, 2024 · fd2e332 · fd2e332
1 parent 41e942c
commit fd2e332
Show file tree

Hide file tree

Showing 115 changed files with 250 additions and 254 deletions.
diff --git a/.azure/gpu-unittests.yml b/.azure/gpu-unittests.yml
@@ -109,7 +109,10 @@ jobs:
           ls -lh $(TRANSFORMERS_CACHE)
         displayName: "Show caches"
 
-      - bash: python -m pytest torchmetrics --timeout=240 --durations=50
+      - bash: |
+          python -m pytest torchmetrics --cov=torchmetrics \
+            --timeout=240 --durations=50 \
+            --numprocesses=5 --dist=loadfile
         env:
           DOCTEST_DOWNLOAD_TIMEOUT: "240"
           SKIP_SLOW_DOCTEST: "1"
@@ -123,11 +126,24 @@ jobs:
         workingDirectory: tests
         displayName: "Pull testing data from S3"
 
-      - bash: python -m pytest unittests -v --cov=torchmetrics --timeout=240 --durations=500
+      - bash: |
+          python -m pytest unittests -v \
+            -m "not DDP" --numprocesses=5 --dist=loadfile \
+            --cov=torchmetrics --timeout=240 --durations=500
+        env:
+          CUDA_LAUNCH_BLOCKING: "1"
+        workingDirectory: tests
+        displayName: "UnitTesting common"
+
+      - bash: |
+          python -m pytest unittests -v \
+            -m "DDP" \
+            --cov=torchmetrics --timeout=240 --durations=500
         env:
+          USE_PYTEST_POOL: "1"
           CUDA_LAUNCH_BLOCKING: "1"
         workingDirectory: tests
-        displayName: "UnitTesting"
+        displayName: "UnitTesting DDP"
 
       - bash: |
           python -m coverage report

diff --git a/.github/actions/unittesting/action.yml b/.github/actions/unittesting/action.yml
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -7,6 +7,7 @@ on: # Trigger the workflow on push or pull request, but only for the master bran
   pull_request:
     branches: [master, "release/*"]
     types: [opened, reopened, ready_for_review, synchronize]
+  workflow_dispatch: {}
   schedule:
     # At the end of every day
     - cron: "0 0 * * *"
@@ -52,6 +53,7 @@ jobs:
       FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
       PYPI_CACHE: "_ci-cache_PyPI"
       TOKENIZERS_PARALLELISM: false
+      TEST_DIRS: ${{ needs.check-diff.outputs.test-dirs }}
 
     # Timeout: https://stackoverflow.com/a/59076067/4521646
     # seems that MacOS jobs take much more than orger OS
@@ -68,12 +70,12 @@ jobs:
       - name: Setup macOS
         if: ${{ runner.os == 'macOS' }}
         run: |
-          echo 'UNITTEST_TIMEOUT=--timeout=120' >> $GITHUB_ENV
+          echo 'UNITTEST_TIMEOUT=--timeout=75' >> $GITHUB_ENV
           brew install gcc libomp ffmpeg # https://github.com/pytorch/pytorch/issues/20030
       - name: Setup Linux
         if: ${{ runner.os == 'Linux' }}
         run: |
-          echo 'UNITTEST_TIMEOUT=--timeout=120' >> $GITHUB_ENV
+          echo 'UNITTEST_TIMEOUT=--timeout=75' >> $GITHUB_ENV
           sudo apt update --fix-missing
           sudo apt install -y ffmpeg dvipng texlive-latex-extra texlive-fonts-recommended cm-super
       - name: Setup Windows
@@ -120,17 +122,64 @@ jobs:
           python -c "from torch import __version__ as ver; ver = ver.split('+')[0] ; assert ver == '${{ matrix.pytorch-version }}', ver"
           python -c 'import torch ; print("TORCH=" + str(torch.__version__))' >> $GITHUB_OUTPUT
 
-      - name: Unittests
-        uses: ./.github/actions/unittesting
-        if: ${{ needs.check-diff.outputs.test-dirs != '' }}
-        timeout-minutes: 90
+      - name: Pull testing data from S3
+        working-directory: ./tests
+        env:
+          S3_DATA: "https://pl-public-data.s3.amazonaws.com/metrics/data.zip"
+        run: |
+          pip install -q "urllib3>1.0"
+          # wget is simpler but does not work on Windows
+          python -c "from urllib.request import urlretrieve ; urlretrieve('$S3_DATA', 'data.zip')"
+          unzip -o data.zip
+          ls -l _data/*
+
+      - name: Export README tests
+        run: python -m phmdoctest README.md --outfile tests/unittests/test_readme.py
+
+      - name: Unittests common
+        working-directory: ./tests
+        run: |
+          python -m pytest -v \
+            $TEST_DIRS \
+            --cov=torchmetrics \
+            --durations=50 \
+            --reruns 3 \
+            --reruns-delay 1 \
+            -m "not DDP" \
+            -n auto \
+            --dist=loadfile \
+            ${{ env.UNITTEST_TIMEOUT }}
+
+      - name: Unittests DDP
+        working-directory: ./tests
+        env:
+          USE_PYTEST_POOL: "1"
+        run: |
+          python -m pytest -v \
+            $TEST_DIRS \
+            --cov=torchmetrics \
+            --durations=50 \
+            -m DDP \
+            --reruns 3 \
+            --reruns-delay 1 \
+            ${{ env.UNITTEST_TIMEOUT }}
+
+      - name: Statistics
+        if: success()
+        working-directory: ./tests
+        run: |
+          coverage xml
+          coverage report
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
         with:
-          requires: ${{ matrix.requires }}
-          codecov-token: ${{ secrets.CODECOV_TOKEN }}
-          python-version: ${{ matrix.python-version }}
-          pytorch-version: ${{ steps.info.outputs.TORCH }}
-          dirs: ${{ needs.check-diff.outputs.test-dirs }}
-          test-timeout: ${{ env.UNITTEST_TIMEOUT }}
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: tests/coverage.xml
+          flags: cpu,${{ runner.os }},python${{ matrix.python-version }},torch${{ steps.info.outputs.TORCH }}
+          env_vars: OS,PYTHON
+          name: codecov-umbrella
+          fail_ci_if_error: false
 
       - name: update cashing
         if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

diff --git a/.github/workflows/focus-diff.yml b/.github/workflows/focus-diff.yml
@@ -29,6 +29,6 @@ jobs:
           echo $PR_NUMBER
           pip install fire requests
           # python .github/assistant.py changed-domains $PR_NUMBER
-          echo "::set-output name=focus::$(python .github/assistant.py changed-domains $PR_NUMBER 2>&1)"
+          echo "focus=$(python .github/assistant.py changed-domains $PR_NUMBER)" >> $GITHUB_OUTPUT
 
       - run: echo "${{ steps.diff-domains.outputs.focus }}"
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,9 @@ addopts = [
     "--color=yes",
     "--disable-pytest-warnings",
 ]
+markers = [
+    "DDP: mark a test as Distributed Data Parallel",
+]
 #filterwarnings = ["error::FutureWarning"] # ToDo
 xfail_strict = true
 junit_duration_report = "call"

diff --git a/requirements/_tests.txt b/requirements/_tests.txt
@@ -7,6 +7,7 @@ pytest-cov ==4.1.0
 pytest-doctestplus ==1.1.0
 pytest-rerunfailures ==13.0
 pytest-timeout ==2.2.0
+pytest-xdist ==3.3.1
 phmdoctest ==1.4.0
 
 psutil <5.10.0

diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py
@@ -1,4 +1,4 @@
-"""Root package info."""
+r"""Root package info."""
 import logging as __logging
 import os
 

diff --git a/tests/unittests/audio/test_pesq.py b/tests/unittests/audio/test_pesq.py
@@ -79,7 +79,7 @@ class TestPESQ(MetricTester):
     atol = 1e-2
 
     @pytest.mark.parametrize("num_processes", [1, 2])
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_pesq(self, preds, target, ref_metric, fs, mode, num_processes, ddp):
         """Test class implementation of metric."""
         if num_processes != 1 and ddp:

diff --git a/tests/unittests/audio/test_pit.py b/tests/unittests/audio/test_pit.py
@@ -155,7 +155,7 @@ class TestPIT(MetricTester):
 
     atol = 1e-2
 
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_pit(self, preds, target, ref_metric, metric_func, mode, eval_func, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(

diff --git a/tests/unittests/audio/test_sa_sdr.py b/tests/unittests/audio/test_sa_sdr.py
@@ -75,7 +75,7 @@ class TestSASDR(MetricTester):
 
     atol = 1e-2
 
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_si_sdr(self, preds, target, scale_invariant, zero_mean, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(

diff --git a/tests/unittests/audio/test_sdr.py b/tests/unittests/audio/test_sdr.py
@@ -79,7 +79,7 @@ class TestSDR(MetricTester):
 
     atol = 1e-2
 
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_sdr(self, preds, target, ref_metric, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(

diff --git a/tests/unittests/audio/test_si_sdr.py b/tests/unittests/audio/test_si_sdr.py
@@ -77,7 +77,7 @@ class TestSISDR(MetricTester):
 
     atol = 1e-2
 
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_si_sdr(self, preds, target, ref_metric, zero_mean, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(

diff --git a/tests/unittests/audio/test_si_snr.py b/tests/unittests/audio/test_si_snr.py
@@ -72,7 +72,7 @@ class TestSISNR(MetricTester):
 
     atol = 1e-2
 
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_si_snr(self, preds, target, ref_metric, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(

diff --git a/tests/unittests/audio/test_snr.py b/tests/unittests/audio/test_snr.py
@@ -74,7 +74,7 @@ class TestSNR(MetricTester):
 
     atol = 1e-2
 
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_snr(self, preds, target, ref_metric, zero_mean, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(

diff --git a/tests/unittests/audio/test_srmr.py b/tests/unittests/audio/test_srmr.py
@@ -81,7 +81,7 @@ class TestSRMR(MetricTester):
 
     atol = 5e-2
 
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_srmr(self, preds, fs, fast, norm, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(

diff --git a/tests/unittests/audio/test_stoi.py b/tests/unittests/audio/test_stoi.py
@@ -77,7 +77,7 @@ class TestSTOI(MetricTester):
 
     atol = 1e-2
 
-    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_stoi(self, preds, target, ref_metric, fs, extended, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(

diff --git a/tests/unittests/bases/test_aggregation.py b/tests/unittests/bases/test_aggregation.py
@@ -82,7 +82,7 @@ def update(self, values, weights):
 class TestAggregation(MetricTester):
     """Test aggregation metrics."""
 
-    @pytest.mark.parametrize("ddp", [False, True])
+    @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
     def test_aggreagation(self, ddp, metric_class, compare_fn, values, weights):
         """Test modular implementation."""
         self.run_class_metric_test(

diff --git a/tests/unittests/bases/test_ddp.py b/tests/unittests/bases/test_ddp.py
@@ -85,6 +85,7 @@ def _test_ddp_compositional_tensor(rank: int, worldsize: int = NUM_PROCESSES) ->
     assert val == 2 * worldsize
 
 
+@pytest.mark.DDP()
 @pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
 @pytest.mark.parametrize(
     "process",
@@ -121,9 +122,10 @@ def compute(self):
     metric.update(torch.randn(10, 5)[:, 0])
 
 
+@pytest.mark.DDP()
 @pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
 def test_non_contiguous_tensors():
-    """Test that gather_all operation works for non contiguous tensors."""
+    """Test that gather_all operation works for non-contiguous tensors."""
     pytest.pool.map(_test_non_contiguous_tensors, range(NUM_PROCESSES))
 
 
@@ -227,6 +229,7 @@ def reload_state_dict(state_dict, expected_x, expected_c):
         torch.save(metric.state_dict(), filepath)
 
 
+@pytest.mark.DDP()
 @pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
 def test_state_dict_is_synced(tmpdir):
     """Tests that metrics are synced while creating the state dict but restored after to continue accumulation."""
@@ -254,6 +257,7 @@ def _test_sync_on_compute_list_state(rank, sync_on_compute):
         assert val == [tensor(rank + 1)]
 
 
+@pytest.mark.DDP()
 @pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
 @pytest.mark.parametrize("sync_on_compute", [True, False])
 @pytest.mark.parametrize("test_func", [_test_sync_on_compute_list_state, _test_sync_on_compute_tensor_state])
@@ -268,6 +272,7 @@ def _test_sync_with_empty_lists(rank):
     assert val == []
 
 
+@pytest.mark.DDP()
 @pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
 def test_sync_with_empty_lists():
     """Test that synchronization of states can be enabled and disabled for compute."""