Merge branch 'master' into feature/comet-logger-update

# Conflicts: # src/lightning/pytorch/loggers/comet.py
comet-ml · Nov 25, 2024 · 7358aef · 7358aef
2 parents f11232b + 75d7357
commit 7358aef
Show file tree

Hide file tree

Showing 246 changed files with 1,589 additions and 1,461 deletions.
diff --git a/.actions/assistant.py b/.actions/assistant.py
@@ -18,10 +18,11 @@
 import shutil
 import tempfile
 import urllib.request
+from collections.abc import Iterable, Iterator, Sequence
 from itertools import chain
 from os.path import dirname, isfile
 from pathlib import Path
-from typing import Any, Dict, Iterable, Iterator, List, Optional, Sequence, Tuple
+from typing import Any, Optional
 
 from packaging.requirements import Requirement
 from packaging.version import Version
@@ -127,7 +128,7 @@ def _parse_requirements(lines: Iterable[str]) -> Iterator[_RequirementWithCommen
         pip_argument = None
 
 
-def load_requirements(path_dir: str, file_name: str = "base.txt", unfreeze: str = "all") -> List[str]:
+def load_requirements(path_dir: str, file_name: str = "base.txt", unfreeze: str = "all") -> list[str]:
     """Loading requirements from a file.
 
     >>> path_req = os.path.join(_PROJECT_ROOT, "requirements")
@@ -222,7 +223,7 @@ def _load_aggregate_requirements(req_dir: str = "requirements", freeze_requireme
         fp.writelines([ln + os.linesep for ln in requires] + [os.linesep])
 
 
-def _retrieve_files(directory: str, *ext: str) -> List[str]:
+def _retrieve_files(directory: str, *ext: str) -> list[str]:
     all_files = []
     for root, _, files in os.walk(directory):
         for fname in files:
@@ -232,7 +233,7 @@ def _retrieve_files(directory: str, *ext: str) -> List[str]:
     return all_files
 
 
-def _replace_imports(lines: List[str], mapping: List[Tuple[str, str]], lightning_by: str = "") -> List[str]:
+def _replace_imports(lines: list[str], mapping: list[tuple[str, str]], lightning_by: str = "") -> list[str]:
     """Replace imports of standalone package to lightning.
 
     >>> lns = [
@@ -320,7 +321,7 @@ def copy_replace_imports(
             fo.writelines(lines)
 
 
-def create_mirror_package(source_dir: str, package_mapping: Dict[str, str]) -> None:
+def create_mirror_package(source_dir: str, package_mapping: dict[str, str]) -> None:
     """Create a mirror package with adjusted imports."""
     # replace imports and copy the code
     mapping = package_mapping.copy()

diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml
@@ -134,13 +134,13 @@ jobs:
         condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'fabric'))
         displayName: "Adjust tests & examples"
 
-      - bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest . -v --durations=50
-        workingDirectory: tests/tests_fabric/
+      - bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest tests_fabric/ -v --durations=50
+        workingDirectory: tests/
         displayName: "Testing: fabric standard"
         timeoutInMinutes: "10"
 
-      - bash: bash ../run_standalone_tests.sh "."
-        workingDirectory: tests/tests_fabric/
+      - bash: bash ./run_standalone_tests.sh "tests_fabric"
+        workingDirectory: tests/
         env:
           PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
         displayName: "Testing: fabric standalone"
@@ -157,7 +157,7 @@ jobs:
           ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
             --flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
           ls -l
-        workingDirectory: tests/tests_fabric/
+        workingDirectory: tests/
         displayName: "Statistics"
 
       - script: |

diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml
@@ -155,13 +155,13 @@ jobs:
           ls -l checkpoints/
         displayName: "Get legacy checkpoints"
 
-      - bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest -v --durations=50
-        workingDirectory: tests/tests_pytorch
+      - bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest tests_pytorch/ -v --durations=50
+        workingDirectory: tests/
         displayName: "Testing: PyTorch standard"
         timeoutInMinutes: "35"
 
-      - bash: bash ../run_standalone_tests.sh "."
-        workingDirectory: tests/tests_pytorch
+      - bash: bash ./run_standalone_tests.sh "tests_pytorch"
+        workingDirectory: tests/
         env:
           PL_USE_MOCKED_MNIST: "1"
           PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -5,39 +5,16 @@
 # the repo. Unless a later match takes precedence,
 # @global-owner1 and @global-owner2 will be requested for
 # review when someone opens a pull request.
-* @lantiga @borda @tchaton @awaelchli @justusschock
-
-# CI/CD and configs
-/.actions/                  @borda @ethanwharris @justusschock
-/.github/                   @borda @ethanwharris @justusschock
-/.azure/                    @borda @ethanwharris @justusschock
-/dockers/                   @borda @ethanwharris @justusschock
-*.yml                       @borda @ethanwharris @justusschock
+* @lantiga @borda @tchaton @justusschock @ethanwharris
 
 # Docs
-/docs/                                      @lantiga @borda @awaelchli
-/docs/*/conf.py                             @borda @awaelchli
 /.github/*.md                               @williamfalcon @lantiga @borda
-/.github/ISSUE_TEMPLATE/                    @borda @tchaton @awaelchli
-/docs/source-fabric/conf.py                 @borda @awaelchli
-/docs/source-fabric/index.rst               @awaelchli @lantiga
-/docs/source-pytorch/conf.py                @borda @awaelchli
+/docs/source-fabric/index.rst               @williamfalcon @lantiga
 /docs/source-pytorch/index.rst              @williamfalcon @lantiga
 /docs/source-pytorch/levels                 @williamfalcon @lantiga
 
-# PyTorch Lightning
-/src/lightning/pytorch                      @lantiga @borda @tchaton @awaelchli @justusschock
-
-# Lightning Data
-/src/lightning/data/                        @tchaton @lantiga
-
-# Lightning Fabric
-/src/lightning/fabric                       @lantiga @borda @tchaton @awaelchli @justusschock
-
 /.github/CODEOWNERS                  @williamfalcon
 /SECURITY.md                         @williamfalcon @lantiga
 /README.md                           @williamfalcon @lantiga
-/setup.py                            @williamfalcon @borda
-/src/pytorch_lightning/__about__.py  @williamfalcon @borda
-/src/lightning_fabric/__about__.py   @williamfalcon @borda @awaelchli
-/src/*/__setup__.py                  @borda @justusschock
+/src/pytorch_lightning/__about__.py  @williamfalcon @lantiga @borda
+/src/lightning_fabric/__about__.py   @williamfalcon @lantiga @borda
diff --git a/.github/workflows/_legacy-checkpoints.yml b/.github/workflows/_legacy-checkpoints.yml
@@ -60,7 +60,7 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           # Python version here needs to be supported by all PL versions listed in back-compatible-versions.txt.
-          python-version: 3.8
+          python-version: "3.9"
 
       - name: Install PL from source
         env:

diff --git a/.github/workflows/call-clear-cache.yml b/.github/workflows/call-clear-cache.yml
@@ -23,7 +23,7 @@ on:
 jobs:
   cron-clear:
     if: github.event_name == 'schedule' || github.event_name == 'pull_request'
-    uses: Lightning-AI/utilities/.github/workflows/[email protected].8
+    uses: Lightning-AI/utilities/.github/workflows/[email protected].9
     with:
       scripts-ref: v0.11.8
       dry-run: ${{ github.event_name == 'pull_request' }}
@@ -32,7 +32,7 @@ jobs:
 
   direct-clear:
     if: github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
-    uses: Lightning-AI/utilities/.github/workflows/[email protected].8
+    uses: Lightning-AI/utilities/.github/workflows/[email protected].9
     with:
       scripts-ref: v0.11.8
       dry-run: ${{ github.event_name == 'pull_request' }}

diff --git a/.github/workflows/ci-check-md-links.yml b/.github/workflows/ci-check-md-links.yml
@@ -14,7 +14,7 @@ on:
 
 jobs:
   check-md-links:
-    uses: Lightning-AI/utilities/.github/workflows/[email protected].8
+    uses: Lightning-AI/utilities/.github/workflows/[email protected].9
     with:
       config-file: ".github/markdown-links-config.json"
       base-branch: "master"
diff --git a/.github/workflows/ci-schema.yml b/.github/workflows/ci-schema.yml
@@ -8,7 +8,7 @@ on:
 
 jobs:
   check:
-    uses: Lightning-AI/utilities/.github/workflows/[email protected].8
+    uses: Lightning-AI/utilities/.github/workflows/[email protected].9
     with:
       # skip azure due to the wrong schema file by MSFT
       # https://github.com/Lightning-AI/lightning-flash/pull/1455#issuecomment-1244793607

diff --git a/.github/workflows/ci-tests-fabric.yml b/.github/workflows/ci-tests-fabric.yml
@@ -177,7 +177,7 @@ jobs:
           coverage xml
 
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         # see: https://github.com/actions/toolkit/issues/399
         continue-on-error: true
         with:

diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml
@@ -214,7 +214,7 @@ jobs:
           coverage xml
 
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         # see: https://github.com/actions/toolkit/issues/399
         continue-on-error: true
         with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -74,7 +74,7 @@ repos:
     hooks:
       # try to fix what is possible
       - id: ruff
-        args: ["--fix"]
+        args: ["--fix", "--unsafe-fixes"]
       # perform formatting updates
       - id: ruff-format
       # validate if all is fine with preview mode

diff --git a/README.md b/README.md
@@ -585,7 +585,6 @@ Lightning is rigorously tested across multiple CPUs, GPUs and TPUs and against m
 |       System / PyTorch ver.        | 1.13                                                                                                                                                                                                                            | 2.0                                                                                                                                                                                                                             |                                                                                                               2.1                                                                                                               |
 | :--------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
 |        Linux py3.9 \[GPUs\]        |  |  | [![Build Status](https://dev.azure.com/Lightning-AI/lightning/_apis/build/status%2Fpytorch-lightning%20%28GPUs%29?branchName=master)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) |
-|        Linux py3.9 \[TPUs\]        |                                                                                                                                                                                                                                 |  [![Test PyTorch - TPU](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml)     |      |
 |  Linux (multiple Python versions)  | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                                 | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                                 |                 [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                 |
 |   OSX (multiple Python versions)   | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                                 | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                                 |                 [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                 |
 | Windows (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                                 | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                                 |                 [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml)                 |

diff --git a/docs/source-pytorch/accelerators/gpu_intermediate.rst b/docs/source-pytorch/accelerators/gpu_intermediate.rst
@@ -26,7 +26,7 @@ Lightning supports multiple ways of doing distributed training.
     If you request multiple GPUs or nodes without setting a strategy, DDP will be automatically used.
 
 For a deeper understanding of what Lightning is doing, feel free to read this
-`guide <https://medium.com/@_willfalcon/9-tips-for-training-lightning-fast-neural-networks-in-pytorch-8e63a502f565>`_.
+`guide <https://towardsdatascience.com/9-tips-for-training-lightning-fast-neural-networks-in-pytorch-8e63a502f565>`_.
 
 
 ----

diff --git a/docs/source-pytorch/accelerators/tpu_faq.rst b/docs/source-pytorch/accelerators/tpu_faq.rst
@@ -40,9 +40,9 @@ Unsupported datatype transfer to TPUs?
 
 .. code-block::
 
-    File "/usr/local/lib/python3.8/dist-packages/torch_xla/utils/utils.py", line 205, in _for_each_instance_rewrite
+    File "/usr/local/lib/python3.9/dist-packages/torch_xla/utils/utils.py", line 205, in _for_each_instance_rewrite
         v = _for_each_instance_rewrite(result.__dict__[k], select_fn, fn, rwmap)
-    File "/usr/local/lib/python3.8/dist-packages/torch_xla/utils/utils.py", line 206, in _for_each_instance_rewrite
+    File "/usr/local/lib/python3.9/dist-packages/torch_xla/utils/utils.py", line 206, in _for_each_instance_rewrite
         result.__dict__[k] = v
     TypeError: 'mappingproxy' object does not support item assignment
 

diff --git a/docs/source-pytorch/advanced/post_training_quantization.rst b/docs/source-pytorch/advanced/post_training_quantization.rst
@@ -33,7 +33,7 @@ Installation
 Prerequisites
 =============
 
-Python version: 3.8, 3.9, 3.10
+Python version: 3.9, 3.10
 
 Install Intel® Neural Compressor
 ================================

diff --git a/examples/fabric/build_your_own_trainer/trainer.py b/examples/fabric/build_your_own_trainer/trainer.py
@@ -1,7 +1,7 @@
 import os
-from collections.abc import Mapping
+from collections.abc import Iterable, Mapping
 from functools import partial
-from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, cast
+from typing import Any, Literal, Optional, Union, cast
 
 import lightning as L
 import torch
@@ -19,11 +19,11 @@ def __init__(
         self,
         accelerator: Union[str, Accelerator] = "auto",
         strategy: Union[str, Strategy] = "auto",
-        devices: Union[List[int], str, int] = "auto",
+        devices: Union[list[int], str, int] = "auto",
         precision: Union[str, int] = "32-true",
         plugins: Optional[Union[str, Any]] = None,
-        callbacks: Optional[Union[List[Any], Any]] = None,
-        loggers: Optional[Union[Logger, List[Logger]]] = None,
+        callbacks: Optional[Union[list[Any], Any]] = None,
+        loggers: Optional[Union[Logger, list[Logger]]] = None,
         max_epochs: Optional[int] = 1000,
         max_steps: Optional[int] = None,
         grad_accum_steps: int = 1,
@@ -465,7 +465,7 @@ def get_latest_checkpoint(checkpoint_dir: str) -> Optional[str]:
 
     def _parse_optimizers_schedulers(
         self, configure_optim_output
-    ) -> Tuple[
+    ) -> tuple[
         Optional[L.fabric.utilities.types.Optimizable],
         Optional[Mapping[str, Union[L.fabric.utilities.types.LRScheduler, bool, str, int]]],
     ]:

diff --git a/examples/fabric/reinforcement_learning/rl/agent.py b/examples/fabric/reinforcement_learning/rl/agent.py
@@ -1,5 +1,4 @@
 import math
-from typing import Dict, Tuple
 
 import gymnasium as gym
 import torch
@@ -43,7 +42,7 @@ def __init__(self, envs: gym.vector.SyncVectorEnv, act_fun: str = "relu", ortho_
             layer_init(torch.nn.Linear(64, envs.single_action_space.n), std=0.01, ortho_init=ortho_init),
         )
 
-    def get_action(self, x: Tensor, action: Tensor = None) -> Tuple[Tensor, Tensor, Tensor]:
+    def get_action(self, x: Tensor, action: Tensor = None) -> tuple[Tensor, Tensor, Tensor]:
         logits = self.actor(x)
         distribution = Categorical(logits=logits)
         if action is None:
@@ -58,12 +57,12 @@ def get_greedy_action(self, x: Tensor) -> Tensor:
     def get_value(self, x: Tensor) -> Tensor:
         return self.critic(x)
 
-    def get_action_and_value(self, x: Tensor, action: Tensor = None) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    def get_action_and_value(self, x: Tensor, action: Tensor = None) -> tuple[Tensor, Tensor, Tensor, Tensor]:
         action, log_prob, entropy = self.get_action(x, action)
         value = self.get_value(x)
         return action, log_prob, entropy, value
 
-    def forward(self, x: Tensor, action: Tensor = None) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    def forward(self, x: Tensor, action: Tensor = None) -> tuple[Tensor, Tensor, Tensor, Tensor]:
         return self.get_action_and_value(x, action)
 
     @torch.no_grad()
@@ -77,7 +76,7 @@ def estimate_returns_and_advantages(
         num_steps: int,
         gamma: float,
         gae_lambda: float,
-    ) -> Tuple[Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor]:
         next_value = self.get_value(next_obs).reshape(1, -1)
         advantages = torch.zeros_like(rewards)
         lastgaelam = 0
@@ -143,7 +142,7 @@ def __init__(
         self.avg_value_loss = MeanMetric(**torchmetrics_kwargs)
         self.avg_ent_loss = MeanMetric(**torchmetrics_kwargs)
 
-    def get_action(self, x: Tensor, action: Tensor = None) -> Tuple[Tensor, Tensor, Tensor]:
+    def get_action(self, x: Tensor, action: Tensor = None) -> tuple[Tensor, Tensor, Tensor]:
         logits = self.actor(x)
         distribution = Categorical(logits=logits)
         if action is None:
@@ -158,12 +157,12 @@ def get_greedy_action(self, x: Tensor) -> Tensor:
     def get_value(self, x: Tensor) -> Tensor:
         return self.critic(x)
 
-    def get_action_and_value(self, x: Tensor, action: Tensor = None) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    def get_action_and_value(self, x: Tensor, action: Tensor = None) -> tuple[Tensor, Tensor, Tensor, Tensor]:
         action, log_prob, entropy = self.get_action(x, action)
         value = self.get_value(x)
         return action, log_prob, entropy, value
 
-    def forward(self, x: Tensor, action: Tensor = None) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    def forward(self, x: Tensor, action: Tensor = None) -> tuple[Tensor, Tensor, Tensor, Tensor]:
         return self.get_action_and_value(x, action)
 
     @torch.no_grad()
@@ -177,7 +176,7 @@ def estimate_returns_and_advantages(
         num_steps: int,
         gamma: float,
         gae_lambda: float,
-    ) -> Tuple[Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor]:
         next_value = self.get_value(next_obs).reshape(1, -1)
         advantages = torch.zeros_like(rewards)
         lastgaelam = 0
@@ -193,7 +192,7 @@ def estimate_returns_and_advantages(
         returns = advantages + values
         return returns, advantages
 
-    def training_step(self, batch: Dict[str, Tensor]):
+    def training_step(self, batch: dict[str, Tensor]):
         # Get actions and values given the current observations
         _, newlogprob, entropy, newvalue = self(batch["obs"], batch["actions"].long())
         logratio = newlogprob - batch["logprobs"]

diff --git a/examples/fabric/reinforcement_learning/train_fabric.py b/examples/fabric/reinforcement_learning/train_fabric.py
@@ -21,7 +21,6 @@
 import os
 import time
 from datetime import datetime
-from typing import Dict
 
 import gymnasium as gym
 import torch
@@ -38,7 +37,7 @@ def train(
     fabric: Fabric,
     agent: PPOLightningAgent,
     optimizer: torch.optim.Optimizer,
-    data: Dict[str, Tensor],
+    data: dict[str, Tensor],
     global_step: int,
     args: argparse.Namespace,
 ):