diff --git a/CHANGELOG.md b/CHANGELOG.md index 84db9b385009e..c04dd2b673481 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,11 +10,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- Added argument `trainer.predict(ckpt_path)` ([#7430](https://github.com/PyTorchLightning/pytorch-lightning/pull/7430)) + + - Added `clip_grad_by_value` support for TPUs ([#7025](https://github.com/PyTorchLightning/pytorch-lightning/pull/7025)) ### Changed + - Log epoch metrics before the `on_evaluation_end` hook ([#7272](https://github.com/PyTorchLightning/pytorch-lightning/pull/7272)) @@ -25,6 +29,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Refactored Loops * Moved attributes `global_step`, `current_epoch`, `max/min_steps`, `max/min_epochs`, `batch_idx`, and `total_batch_idx` to TrainLoop ([#7437](https://github.com/PyTorchLightning/pytorch-lightning/pull/7025)) + * Refactored result handling in training loop ([#7506](https://github.com/PyTorchLightning/pytorch-lightning/pull/7506)) - `DataModule`s now avoid duplicate `{setup,teardown,prepare_data}` calls for the same stage ([#7238](https://github.com/PyTorchLightning/pytorch-lightning/pull/7238)) @@ -35,10 +40,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed the behaviour when logging evaluation step metrics to no longer append `/epoch_*` to the metric name ([#7351](https://github.com/PyTorchLightning/pytorch-lightning/pull/7351)) -- Changed `resolve_training_type_plugins` to allow setting `num_nodes` and `sync_batchnorm` from `Trainer` setting ([7026](https://github.com/PyTorchLightning/pytorch-lightning/pull/7026)) +- Changed `resolve_training_type_plugins` to allow setting `num_nodes` and `sync_batchnorm` from `Trainer` setting ([#7026](https://github.com/PyTorchLightning/pytorch-lightning/pull/7026)) + + +- Default `seed_everything(workers=True)` in the `LightningCLI` ([#7504](https://github.com/PyTorchLightning/pytorch-lightning/pull/7504)) -- Changed `model.state_dict()` in `CheckpointConnector` to allow `training_type_plugin` to customize the model's `state_dict()` ([7474](https://github.com/PyTorchLightning/pytorch-lightning/pull/7474)) +- Changed `model.state_dict()` in `CheckpointConnector` to allow `training_type_plugin` to customize the model's `state_dict()` ([#7474](https://github.com/PyTorchLightning/pytorch-lightning/pull/7474)) + + +- MLflowLogger now uses the env variable `MLFLOW_TRACKING_URI` as default tracking uri ([#7457](https://github.com/PyTorchLightning/pytorch-lightning/pull/7457)) ### Deprecated @@ -52,14 +63,31 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed +- Prune deprecated classif. metrics from `pytorch_lightning.metrics.functional.classification` ([7499](https://github.com/PyTorchLightning/pytorch-lightning/pull/7499)) + + +- Removed deprecated data parallel classes `LightningDataParallel` and `LightningDistributedDataParallel` from `pytorch_lightning.overrides.data_parallel` ([7510](https://github.com/PyTorchLightning/pytorch-lightning/pull/7510)) + + +- Removed deprecated trainer attributes - `get_model` and `accelerator_backend` ([7502](https://github.com/PyTorchLightning/pytorch-lightning/pull/7502)) + + +- Removed deprecated utils modules `model_utils`, `warning_utils`, `xla_device_utils` and partially `argparse_utils` ([7503](https://github.com/PyTorchLightning/pytorch-lightning/pull/7503)) + + +- Removed deprecated trainer attributes - `on_cpu`, `on_tpu`, `use_tpu`, `on_gpu`, `use_dp`, `use_ddp`, `use_ddp2`, `use_horovod`, `use_single_gpu` ([#7501](https://github.com/PyTorchLightning/pytorch-lightning/pull/7501)) + ### Fixed + - Fixed parsing of multiple training dataloaders ([#7433](https://github.com/PyTorchLightning/pytorch-lightning/pull/7433)) + - Fixed recursive passing of `wrong_type` keyword argument in `pytorch_lightning.utilities.apply_to_collection` ([#7433](https://github.com/PyTorchLightning/pytorch-lightning/pull/7433)) + ## [1.3.1] - 2021-05-11 ### Fixed diff --git a/README.md b/README.md index f14205fb7e382..8da7836fb689e 100644 --- a/README.md +++ b/README.md @@ -118,22 +118,22 @@ pip install pytorch-lightning conda install pytorch-lightning -c conda-forge ``` - #### Install stable 1.2.x + #### Install stable 1.3.x - the actual status of 1.2 [stable] is following: + the actual status of 1.3 [stable] is following: - ![CI base testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20base%20testing/badge.svg?branch=release%2F1.2.x&event=push) - ![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=release%2F1.2.x&event=push) - ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=release%2F1.2.x&event=push) - ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=release%2F1.2.x&event=push) - ![Docs check](https://github.com/PyTorchLightning/pytorch-lightning/workflows/Docs%20check/badge.svg?branch=release%2F1.2.x&event=push) + ![CI base testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20base%20testing/badge.svg?branch=release%2F1.3.x&event=push) + ![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=release%2F1.3.x&event=push) + ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=release%2F1.3.x&event=push) + ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=release%2F1.3.x&event=push) + ![Docs check](https://github.com/PyTorchLightning/pytorch-lightning/workflows/Docs%20check/badge.svg?branch=release%2F1.3.x&event=push) Install future release from the source ```bash - pip install git+https://github.com/PytorchLightning/pytorch-lightning.git@release/1.2.x --upgrade + pip install git+https://github.com/PytorchLightning/pytorch-lightning.git@release/1.3.x --upgrade ``` - #### Install bleeding-edge - future 1.3 + #### Install bleeding-edge - future 1.4 Install nightly from the source (no guarantees) ```bash diff --git a/dockers/README.md b/dockers/README.md index 549006ec62c02..581c03c530d26 100644 --- a/dockers/README.md +++ b/dockers/README.md @@ -71,14 +71,14 @@ Inspiration comes from https://u.group/thinking/how-to-put-jupyter-notebooks-in- 1. Build the docker image: ```bash docker image build \ - -t pytorch-lightning:v1.2.9 \ + -t pytorch-lightning:v1.3.1 \ -f dockers/nvidia/Dockerfile \ - --build-arg LIGHTNING_VERSION=1.2.9 \ + --build-arg LIGHTNING_VERSION=1.3.1 \ . ``` 2. start the server and map ports: ```bash - docker run --rm -it --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -p 8888:8888 pytorch-lightning:v1.2.9 + docker run --rm -it --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -p 8888:8888 pytorch-lightning:v1.3.1 ``` 3. Connect in local browser: - copy the generated path e.g. `http://hostname:8888/?token=0719fa7e1729778b0cec363541a608d5003e26d4910983c6` diff --git a/dockers/tpu-tests/tpu_test_cases.jsonnet b/dockers/tpu-tests/tpu_test_cases.jsonnet index 8c3f3693fda50..13f70deed43ca 100644 --- a/dockers/tpu-tests/tpu_test_cases.jsonnet +++ b/dockers/tpu-tests/tpu_test_cases.jsonnet @@ -22,7 +22,7 @@ local tputests = base.BaseTest { ||| cd pytorch-lightning coverage run --source=pytorch_lightning -m pytest -v --capture=no \ - pytorch_lightning/utilities/xla_device_utils.py \ + pytorch_lightning/utilities/xla_device.py \ tests/accelerators/test_tpu_backend.py \ tests/models/test_tpu.py test_exit_code=$? diff --git a/docs/source/common/lightning_cli.rst b/docs/source/common/lightning_cli.rst index b11d505c502ad..1df80e1ccf830 100644 --- a/docs/source/common/lightning_cli.rst +++ b/docs/source/common/lightning_cli.rst @@ -91,8 +91,8 @@ practice to create a configuration file and provide this to the tool. A way to d The instantiation of the :class:`~pytorch_lightning.utilities.cli.LightningCLI` class takes care of parsing command line and config file options, instantiating the classes, setting up a callback to save the config in the log directory and -finally running :func:`trainer.fit`. The resulting object :code:`cli` can be used for instance to get the result of fit, -i.e., :code:`cli.fit_result`. +finally running the trainer. The resulting object :code:`cli` can be used for example to get the instance of the +model, (:code:`cli.model`). After multiple trainings with different configurations, each run will have in its respective log directory a :code:`config.yaml` file. This file can be used for reference to know in detail all the settings that were used for each diff --git a/pl_examples/basic_examples/autoencoder.py b/pl_examples/basic_examples/autoencoder.py index a574adb40d6e0..8ea03dabc9bdb 100644 --- a/pl_examples/basic_examples/autoencoder.py +++ b/pl_examples/basic_examples/autoencoder.py @@ -116,8 +116,7 @@ def test_dataloader(self): def cli_main(): cli = LightningCLI(LitAutoEncoder, MyDataModule, seed_everything_default=1234) - result = cli.trainer.test(cli.model, datamodule=cli.datamodule) - print(result) + cli.trainer.test(cli.model, datamodule=cli.datamodule) if __name__ == '__main__': diff --git a/pl_examples/basic_examples/backbone_image_classifier.py b/pl_examples/basic_examples/backbone_image_classifier.py index 53a24dfdb221f..57cf97be00023 100644 --- a/pl_examples/basic_examples/backbone_image_classifier.py +++ b/pl_examples/basic_examples/backbone_image_classifier.py @@ -129,8 +129,7 @@ def test_dataloader(self): def cli_main(): cli = LightningCLI(LitClassifier, MyDataModule, seed_everything_default=1234) - result = cli.trainer.test(cli.model, datamodule=cli.datamodule) - print(result) + cli.trainer.test(cli.model, datamodule=cli.datamodule) if __name__ == '__main__': diff --git a/pl_examples/basic_examples/dali_image_classifier.py b/pl_examples/basic_examples/dali_image_classifier.py index 1a9dc46c81137..eca5c21b3242c 100644 --- a/pl_examples/basic_examples/dali_image_classifier.py +++ b/pl_examples/basic_examples/dali_image_classifier.py @@ -222,8 +222,7 @@ def cli_main(): return cli = LightningCLI(LitClassifier, MyDataModule, seed_everything_default=1234) - result = cli.trainer.test(cli.model, datamodule=cli.datamodule) - print(result) + cli.trainer.test(cli.model, datamodule=cli.datamodule) if __name__ == "__main__": diff --git a/pl_examples/basic_examples/simple_image_classifier.py b/pl_examples/basic_examples/simple_image_classifier.py index d401e884a2f18..ffb6434352b2e 100644 --- a/pl_examples/basic_examples/simple_image_classifier.py +++ b/pl_examples/basic_examples/simple_image_classifier.py @@ -77,8 +77,7 @@ def configure_optimizers(self): def cli_main(): cli = LightningCLI(LitClassifier, MNISTDataModule, seed_everything_default=1234) - result = cli.trainer.test(cli.model, datamodule=cli.datamodule) - print(result) + cli.trainer.test(cli.model, datamodule=cli.datamodule) if __name__ == '__main__': diff --git a/pytorch_lightning/core/datamodule.py b/pytorch_lightning/core/datamodule.py index 23626ed9cbeae..84210e9d7b667 100644 --- a/pytorch_lightning/core/datamodule.py +++ b/pytorch_lightning/core/datamodule.py @@ -37,7 +37,7 @@ def __init__(self): def prepare_data(self): # download, split, etc... # only called on 1 GPU/TPU in distributed - def setup(self): + def setup(self, stage): # make assignments here (val/train/test split) # called on every process in DDP def train_dataloader(self): diff --git a/pytorch_lightning/loggers/mlflow.py b/pytorch_lightning/loggers/mlflow.py index 516ed55de9fcf..fbcd4bbcc5183 100644 --- a/pytorch_lightning/loggers/mlflow.py +++ b/pytorch_lightning/loggers/mlflow.py @@ -16,6 +16,7 @@ ------------- """ import logging +import os import re from argparse import Namespace from time import time @@ -85,7 +86,8 @@ def any_lightning_module_function_or_hook(self): Args: experiment_name: The name of the experiment tracking_uri: Address of local or remote tracking server. - If not provided, defaults to `file:`. + If not provided, defaults to `MLFLOW_TRACKING_URI` environment variable if set, otherwise it falls + back to `file:`. tags: A dictionary tags for the experiment. save_dir: A path to a local directory where the MLflow runs get saved. Defaults to `./mlflow` if `tracking_uri` is not provided. @@ -104,7 +106,7 @@ def any_lightning_module_function_or_hook(self): def __init__( self, experiment_name: str = 'default', - tracking_uri: Optional[str] = None, + tracking_uri: Optional[str] = os.getenv('MLFLOW_TRACKING_URI'), tags: Optional[Dict[str, Any]] = None, save_dir: Optional[str] = './mlruns', prefix: str = '', diff --git a/pytorch_lightning/metrics/functional/__init__.py b/pytorch_lightning/metrics/functional/__init__.py index 3b31dad5d3411..2bd5ca9b2e579 100644 --- a/pytorch_lightning/metrics/functional/__init__.py +++ b/pytorch_lightning/metrics/functional/__init__.py @@ -15,13 +15,6 @@ from pytorch_lightning.metrics.functional.auc import auc # noqa: F401 from pytorch_lightning.metrics.functional.auroc import auroc # noqa: F401 from pytorch_lightning.metrics.functional.average_precision import average_precision # noqa: F401 -from pytorch_lightning.metrics.functional.classification import ( # noqa: F401 - dice_score, - get_num_classes, - multiclass_auroc, - stat_scores_multiple_classes, - to_categorical, -) from pytorch_lightning.metrics.functional.confusion_matrix import confusion_matrix # noqa: F401 from pytorch_lightning.metrics.functional.explained_variance import explained_variance # noqa: F401 from pytorch_lightning.metrics.functional.f_beta import f1, fbeta # noqa: F401 diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py deleted file mode 100644 index de2f21e26438a..0000000000000 --- a/pytorch_lightning/metrics/functional/classification.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from functools import wraps -from typing import Callable, Optional, Sequence, Tuple - -import torch -from torchmetrics.utilities import class_reduce, reduce -from torchmetrics.utilities.data import get_num_classes, to_categorical - -from pytorch_lightning.metrics.functional.auc import auc as __auc -from pytorch_lightning.metrics.functional.auroc import auroc as __auroc -from pytorch_lightning.metrics.functional.iou import iou as __iou -from pytorch_lightning.utilities import rank_zero_deprecation, rank_zero_warn - - -def stat_scores( - pred: torch.Tensor, - target: torch.Tensor, - class_index: int, - argmax_dim: int = 1, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.stat_scores`. Will be removed in v1.4.0. - """ - if pred.ndim == target.ndim + 1: - pred = to_categorical(pred, argmax_dim=argmax_dim) - - tp = ((pred == class_index) * (target == class_index)).to(torch.long).sum() - fp = ((pred == class_index) * (target != class_index)).to(torch.long).sum() - tn = ((pred != class_index) * (target != class_index)).to(torch.long).sum() - fn = ((pred != class_index) * (target == class_index)).to(torch.long).sum() - sup = (target == class_index).to(torch.long).sum() - - return tp, fp, tn, fn, sup - - -# todo: remove in 1.4 -def stat_scores_multiple_classes( - pred: torch.Tensor, - target: torch.Tensor, - num_classes: Optional[int] = None, - argmax_dim: int = 1, - reduction: str = 'none', -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.stat_scores`. Will be removed in v1.4.0. - """ - rank_zero_deprecation( - "This `stat_scores_multiple_classes` was deprecated in v1.2.0 in favor of" - " `from pytorch_lightning.metrics.functional import stat_scores`." - " It will be removed in v1.4.0" - ) - if pred.ndim == target.ndim + 1: - pred = to_categorical(pred, argmax_dim=argmax_dim) - - num_classes = get_num_classes(pred, target, num_classes=num_classes) - - if pred.dtype != torch.bool: - pred = pred.clamp_max(max=num_classes) - if target.dtype != torch.bool: - target = target.clamp_max(max=num_classes) - - possible_reductions = ('none', 'sum', 'elementwise_mean') - if reduction not in possible_reductions: - raise ValueError("reduction type %s not supported" % reduction) - - if reduction == 'none': - pred = pred.view((-1, )).long() - target = target.view((-1, )).long() - - tps = torch.zeros((num_classes + 1, ), device=pred.device) - fps = torch.zeros((num_classes + 1, ), device=pred.device) - fns = torch.zeros((num_classes + 1, ), device=pred.device) - sups = torch.zeros((num_classes + 1, ), device=pred.device) - - match_true = (pred == target).float() - match_false = 1 - match_true - - tps.scatter_add_(0, pred, match_true) - fps.scatter_add_(0, pred, match_false) - fns.scatter_add_(0, target, match_false) - tns = pred.size(0) - (tps + fps + fns) - sups.scatter_add_(0, target, torch.ones_like(match_true)) - - tps = tps[:num_classes] - fps = fps[:num_classes] - tns = tns[:num_classes] - fns = fns[:num_classes] - sups = sups[:num_classes] - - elif reduction == 'sum' or reduction == 'elementwise_mean': - count_match_true = (pred == target).sum().float() - oob_tp, oob_fp, oob_tn, oob_fn, oob_sup = stat_scores(pred, target, num_classes, argmax_dim) - - tps = count_match_true - oob_tp - fps = pred.nelement() - count_match_true - oob_fp - fns = pred.nelement() - count_match_true - oob_fn - tns = pred.nelement() * (num_classes + 1) - (tps + fps + fns + oob_tn) - sups = pred.nelement() - oob_sup.float() - - if reduction == 'elementwise_mean': - tps /= num_classes - fps /= num_classes - fns /= num_classes - tns /= num_classes - sups /= num_classes - - return tps.float(), fps.float(), tns.float(), fns.float(), sups.float() - - -def _confmat_normalize(cm): - """ Normalization function for confusion matrix """ - cm = cm / cm.sum(-1, keepdim=True) - nan_elements = cm[torch.isnan(cm)].nelement() - if nan_elements != 0: - cm[torch.isnan(cm)] = 0 - rank_zero_warn(f'{nan_elements} nan values found in confusion matrix have been replaced with zeros.') - return cm - - -# todo: remove in 1.4 -def precision_recall( - pred: torch.Tensor, - target: torch.Tensor, - num_classes: Optional[int] = None, - class_reduction: str = 'micro', - return_support: bool = False, - return_state: bool = False -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.precision_recall`. Will be removed in v1.4.0. - """ - rank_zero_deprecation( - "This `precision_recall` was deprecated in v1.2.0 in favor of" - " `from pytorch_lightning.metrcs.functional import precision_recall`." - " It will be removed in v1.4.0" - ) - - tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred=pred, target=target, num_classes=num_classes) - - precision = class_reduce(tps, tps + fps, sups, class_reduction=class_reduction) - recall = class_reduce(tps, tps + fns, sups, class_reduction=class_reduction) - if return_state: - return {'tps': tps, 'fps': fps, 'fns': fns, 'sups': sups} - if return_support: - return precision, recall, sups - return precision, recall - - -# todo: remove in 1.4 -def precision( - pred: torch.Tensor, - target: torch.Tensor, - num_classes: Optional[int] = None, - class_reduction: str = 'micro', -) -> torch.Tensor: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.precision`. Will be removed in v1.4.0. - """ - rank_zero_deprecation( - "This `precision` was deprecated in v1.2.0 in favor of" - " `from pytorch_lightning.metrics.functional import precision`." - " It will be removed in v1.4.0" - ) - - return precision_recall(pred=pred, target=target, num_classes=num_classes, class_reduction=class_reduction)[0] - - -# todo: remove in 1.4 -def recall( - pred: torch.Tensor, - target: torch.Tensor, - num_classes: Optional[int] = None, - class_reduction: str = 'micro', -) -> torch.Tensor: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.recall`. Will be removed in v1.4.0. - """ - rank_zero_deprecation( - "This `recall` was deprecated in v1.2.0 in favor of" - " `from pytorch_lightning.metrics.functional import recall`." - " It will be removed in v1.4.0" - ) - - return precision_recall(pred=pred, target=target, num_classes=num_classes, class_reduction=class_reduction)[1] - - -# todo: remove in 1.4 -def auc( - x: torch.Tensor, - y: torch.Tensor, -) -> torch.Tensor: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.auc`. Will be removed in v1.4.0. - """ - rank_zero_deprecation( - "This `auc` was deprecated in v1.2.0 in favor of" - " `pytorch_lightning.metrics.functional.auc import auc`." - " It will be removed in v1.4.0" - ) - return __auc(x, y) - - -# todo: remove in 1.4 -def _auc_decorator() -> Callable: - - def wrapper(func_to_decorate: Callable) -> Callable: - - @wraps(func_to_decorate) - def new_func(*args, **kwargs) -> torch.Tensor: - x, y = func_to_decorate(*args, **kwargs)[:2] - - return auc(x, y) - - return new_func - - return wrapper - - -# todo: remove in 1.4 -def _multiclass_auc_decorator() -> Callable: - - def wrapper(func_to_decorate: Callable) -> Callable: - - @wraps(func_to_decorate) - def new_func(*args, **kwargs) -> torch.Tensor: - results = [] - for class_result in func_to_decorate(*args, **kwargs): - x, y = class_result[:2] - results.append(auc(x, y)) - - return torch.stack(results) - - return new_func - - return wrapper - - -# todo: remove in 1.4 -def auroc( - pred: torch.Tensor, - target: torch.Tensor, - sample_weight: Optional[Sequence] = None, - pos_label: int = 1., - max_fpr: float = None, -) -> torch.Tensor: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.auroc`. Will be removed in v1.4.0. - """ - rank_zero_deprecation( - "This `auroc` was deprecated in v1.2.0 in favor of `pytorch_lightning.metrics.functional.auroc import auroc`." - " It will be removed in v1.4.0" - ) - return __auroc( - preds=pred, target=target, sample_weights=sample_weight, pos_label=pos_label, max_fpr=max_fpr, num_classes=1 - ) - - -# todo: remove in 1.4 -def multiclass_auroc( - pred: torch.Tensor, - target: torch.Tensor, - sample_weight: Optional[Sequence] = None, - num_classes: Optional[int] = None, -) -> torch.Tensor: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.auroc`. Will be removed in v1.4.0. - """ - rank_zero_deprecation( - "This `multiclass_auroc` was deprecated in v1.2.0 in favor of" - " `pytorch_lightning.metrics.functional.auroc import auroc`." - " It will be removed in v1.4.0" - ) - - return __auroc(preds=pred, target=target, sample_weights=sample_weight, num_classes=num_classes) - - -def dice_score( - pred: torch.Tensor, - target: torch.Tensor, - bg: bool = False, - nan_score: float = 0.0, - no_fg_score: float = 0.0, - reduction: str = 'elementwise_mean', -) -> torch.Tensor: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.dice_score`. Will be removed in v1.4.0. - """ - num_classes = pred.shape[1] - bg = (1 - int(bool(bg))) - scores = torch.zeros(num_classes - bg, device=pred.device, dtype=torch.float32) - for i in range(bg, num_classes): - if not (target == i).any(): - # no foreground class - scores[i - bg] += no_fg_score - continue - - tp, fp, tn, fn, sup = stat_scores(pred=pred, target=target, class_index=i) - denom = (2 * tp + fp + fn).to(torch.float) - # nan result - score_cls = (2 * tp).to(torch.float) / denom if torch.is_nonzero(denom) else nan_score - - scores[i - bg] += score_cls - return reduce(scores, reduction=reduction) - - -# todo: remove in 1.4 -def iou( - pred: torch.Tensor, - target: torch.Tensor, - ignore_index: Optional[int] = None, - absent_score: float = 0.0, - num_classes: Optional[int] = None, - reduction: str = 'elementwise_mean', -) -> torch.Tensor: - """ - .. deprecated:: - Use :func:`torchmetrics.functional.iou`. Will be removed in v1.4.0. - """ - rank_zero_deprecation( - "This `iou` was deprecated in v1.2.0 in favor of `from pytorch_lightning.metrics.functional.iou import iou`." - " It will be removed in v1.4.0" - ) - return __iou( - pred, - target, - ignore_index=ignore_index, - absent_score=absent_score, - threshold=0.5, - num_classes=num_classes, - reduction=reduction - ) diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py index 272f4c67502c7..3d6e527ef95a9 100644 --- a/pytorch_lightning/overrides/data_parallel.py +++ b/pytorch_lightning/overrides/data_parallel.py @@ -16,12 +16,9 @@ from typing import Any import torch -from torch.nn import DataParallel -from torch.nn.parallel import DistributedDataParallel from pytorch_lightning.core.lightning import LightningModule from pytorch_lightning.overrides.base import _LightningModuleWrapperBase -from pytorch_lightning.overrides.distributed import LightningDistributedModule from pytorch_lightning.utilities import rank_zero_warn from pytorch_lightning.utilities.apply_func import apply_to_collection @@ -35,27 +32,6 @@ def _ignore_scalar_return_in_dp(): ) -class LightningDataParallel(DataParallel): - - def __init__(self, module: LightningModule, *args, **kwargs): - warnings.warn( - "The usage of `LightningDataParallel` is deprecated since v1.2 and will be removed in v1.4." - " From now on we recommend to directly subclass `torch.nn.parallel.DataParallel`.", DeprecationWarning - ) - super().__init__(LightningParallelModule(module), *args, **kwargs) - - -class LightningDistributedDataParallel(DistributedDataParallel): - - def __init__(self, module: LightningModule, *args, **kwargs): - warnings.warn( - "The usage of `LightningDistributedDataParallel` is deprecated since v1.2 and will be removed in v1.4." - " From now on we recommend to directly subclass `torch.nn.parallel.DistributedDataParallel`.", - DeprecationWarning - ) - super().__init__(LightningDistributedModule(module), *args, **kwargs) - - class LightningParallelModule(_LightningModuleWrapperBase): """ Wraps the user's LightningModule and redirects the forward call to the appropriate diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 8c09de075147a..1c8298557662b 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -195,14 +195,14 @@ def cache_training_step_metrics(self, opt_closure_result): self._callback_metrics.update(callback_metrics_tmp) self._logged_metrics.update(logged_metrics_tmp) - def log_metrics(self, metrics, grad_norm_dic, step=None): + def log_metrics(self, metrics, grad_norm_dict, step=None): """Logs the metric dict passed in. If `step` parameter is None and `step` key is presented is metrics, uses metrics["step"] as a step Args: metrics (dict): Metric values - grad_norm_dic (dict): Gradient norms + grad_norm_dict (dict): Gradient norms step (int): Step for which metrics should be logged. Default value is `self.global_step` during training or the total validation / test log step count during validation and testing. """ @@ -212,7 +212,7 @@ def log_metrics(self, metrics, grad_norm_dic, step=None): metrics.update(mem_map) # add norms - metrics.update(grad_norm_dic) + metrics.update(grad_norm_dict) # turn all tensors to scalars scalar_metrics = metrics_to_scalars(metrics) @@ -368,11 +368,11 @@ def log_train_step_metrics(self, batch_output): # when metrics should be logged if self.should_update_logs or self.trainer.fast_dev_run is True: # logs user requested information to logger - grad_norm_dic = batch_output.grad_norm_dic - if grad_norm_dic is None: - grad_norm_dic = {} - if len(batch_log_metrics) > 0 or len(grad_norm_dic) > 0: - self.log_metrics(batch_log_metrics, grad_norm_dic) + grad_norm_dict = batch_output.grad_norm_dict + if grad_norm_dict is None: + grad_norm_dict = {} + if len(batch_log_metrics) > 0 or len(grad_norm_dict) > 0: + self.log_metrics(batch_log_metrics, grad_norm_dict) self._callback_metrics.update(batch_log_metrics) @property diff --git a/pytorch_lightning/trainer/deprecated_api.py b/pytorch_lightning/trainer/deprecated_api.py index 32dbc8c4088a3..7e7817d277dae 100644 --- a/pytorch_lightning/trainer/deprecated_api.py +++ b/pytorch_lightning/trainer/deprecated_api.py @@ -11,141 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from pytorch_lightning.accelerators import Accelerator -from pytorch_lightning.core.lightning import LightningModule -from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector -from pytorch_lightning.utilities import DeviceType, DistributedType, rank_zero_deprecation - -class DeprecatedDistDeviceAttributes: - - num_gpus: int - accelerator_connector: AcceleratorConnector - - @property - def on_cpu(self) -> bool: - rank_zero_deprecation("Internal: `on_cpu` is deprecated in v1.2 and will be removed in v1.4.") - return self.accelerator_connector._device_type == DeviceType.CPU - - @on_cpu.setter - def on_cpu(self, val: bool) -> None: - rank_zero_deprecation("Internal: `on_cpu` is deprecated in v1.2 and will be removed in v1.4.") - if val: - self.accelerator_connector._device_type = DeviceType.CPU - - @property - def on_tpu(self) -> bool: - rank_zero_deprecation("Internal: `on_tpu` is deprecated in v1.2 and will be removed in v1.4.") - return self.accelerator_connector._device_type == DeviceType.TPU - - @on_tpu.setter - def on_tpu(self, val: bool) -> None: - rank_zero_deprecation("Internal: `on_tpu` is deprecated in v1.2 and will be removed in v1.4.") - if val: - self.accelerator_connector._device_type = DeviceType.TPU - - @property - def use_tpu(self) -> bool: - rank_zero_deprecation("Internal: `use_tpu` is deprecated in v1.2 and will be removed in v1.4.") - return self.on_tpu - - @use_tpu.setter - def use_tpu(self, val: bool) -> None: - rank_zero_deprecation("Internal: `use_tpu` is deprecated in v1.2 and will be removed in v1.4.") - self.on_tpu = val - - @property - def on_gpu(self) -> bool: - rank_zero_deprecation("Internal: `on_gpu` is deprecated in v1.2 and will be removed in v1.4.") - return self.accelerator_connector._device_type == DeviceType.GPU - - @on_gpu.setter - def on_gpu(self, val: bool) -> None: - rank_zero_deprecation("Internal: `on_gpu` is deprecated in v1.2 and will be removed in v1.4.") - if val: - self.accelerator_connector._device_type = DeviceType.GPU - - @property - def use_dp(self) -> bool: - rank_zero_deprecation("Internal: `use_dp` is deprecated in v1.2 and will be removed in v1.4.") - return self.accelerator_connector._distrib_type == DistributedType.DP - - @use_dp.setter - def use_dp(self, val: bool) -> None: - rank_zero_deprecation("Internal: `use_dp` is deprecated in v1.2 and will be removed in v1.4.") - if val: - self.accelerator_connector._distrib_type = DistributedType.DP - - @property - def use_ddp(self) -> bool: - rank_zero_deprecation("Internal: `use_ddp` is deprecated in v1.2 and will be removed in v1.4.") - return self.accelerator_connector._distrib_type in (DistributedType.DDP, DistributedType.DDP_SPAWN) - - @use_ddp.setter - def use_ddp(self, val: bool) -> None: - rank_zero_deprecation("Internal: `use_ddp` is deprecated in v1.2 and will be removed in v1.4.") - if val: - self.accelerator_connector._distrib_type = DistributedType.DDP - - @property - def use_ddp2(self) -> bool: - rank_zero_deprecation("Internal: `use_ddp2` is deprecated in v1.2 and will be removed in v1.4.") - return self.accelerator_connector._distrib_type == DistributedType.DDP2 - - @use_ddp2.setter - def use_ddp2(self, val: bool) -> None: - rank_zero_deprecation("Internal: `use_ddp2` is deprecated in v1.2 and will be removed in v1.4.") - if val: - self.accelerator_connector._distrib_type = DistributedType.DDP2 - - @property - def use_horovod(self) -> bool: - rank_zero_deprecation("Internal: `use_horovod` is deprecated in v1.2 and will be removed in v1.4.") - return self.accelerator_connector._distrib_type == DistributedType.HOROVOD - - @use_horovod.setter - def use_horovod(self, val: bool) -> None: - rank_zero_deprecation("Internal: `use_horovod` is deprecated in v1.2 and will be removed in v1.4.") - if val: - self.accelerator_connector._distrib_type = DistributedType.HOROVOD - - @property - def use_single_gpu(self) -> bool: - rank_zero_deprecation("Internal: `use_single_gpu` is deprecated in v1.2 and will be removed in v1.4.") - # todo, limiting to exclude DDP2 is not clear but it comes from connectors... - return ( - self.accelerator_connector._device_type and self.accelerator_connector._device_type == DeviceType.GPU - and self.num_gpus == 1 and self.accelerator_connector._distrib_type not in (DistributedType.DDP2, ) - ) - - @use_single_gpu.setter - def use_single_gpu(self, val: bool) -> None: - rank_zero_deprecation("Internal: `use_single_gpu` is deprecated in v1.2 and will be removed in v1.4.") - if val: - self.accelerator_connector._device_type = DeviceType.GPU +from pytorch_lightning.utilities import rank_zero_deprecation class DeprecatedTrainerAttributes: - accelerator: Accelerator - lightning_module: LightningModule sanity_checking: bool - @property - def accelerator_backend(self) -> Accelerator: - rank_zero_deprecation( - "The `Trainer.accelerator_backend` attribute is deprecated in favor of `Trainer.accelerator`" - " since 1.2 and will be removed in v1.4." - ) - return self.accelerator - - def get_model(self) -> LightningModule: - rank_zero_deprecation( - "The use of `Trainer.get_model()` is deprecated in favor of `Trainer.lightning_module`" - " and will be removed in v1.4." - ) - return self.lightning_module - @property def running_sanity_check(self) -> bool: rank_zero_deprecation( diff --git a/pytorch_lightning/trainer/predict_loop.py b/pytorch_lightning/trainer/predict_loop.py index fb1ad3b054c9e..77dfde7f771da 100644 --- a/pytorch_lightning/trainer/predict_loop.py +++ b/pytorch_lightning/trainer/predict_loop.py @@ -60,6 +60,7 @@ def should_store_predictions(self) -> bool: def on_trainer_init(self): self.trainer.num_predict_batches = [] + self.trainer.predicted_ckpt_path = None def get_predict_dataloaders(self): self.trainer.reset_predict_dataloader(self.trainer.lightning_module) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 8732d8c33dce7..a9a431ddbba5e 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -47,7 +47,7 @@ from pytorch_lightning.trainer.connectors.slurm_connector import SLURMConnector from pytorch_lightning.trainer.connectors.training_trick_connector import TrainingTricksConnector from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin -from pytorch_lightning.trainer.deprecated_api import DeprecatedDistDeviceAttributes, DeprecatedTrainerAttributes +from pytorch_lightning.trainer.deprecated_api import DeprecatedTrainerAttributes from pytorch_lightning.trainer.evaluation_loop import EvaluationLoop from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin @@ -83,7 +83,6 @@ class Trainer( TrainerLoggingMixin, TrainerTrainingTricksMixin, TrainerDataLoadingMixin, - DeprecatedDistDeviceAttributes, DeprecatedTrainerAttributes, ): @@ -379,6 +378,7 @@ def __init__( terminate_on_nan, ) self.evaluation_loop.on_trainer_init() + self.predict_loop.on_trainer_init() # configure tuner self.tuner.on_trainer_init(auto_lr_find, auto_scale_batch_size) @@ -585,6 +585,7 @@ def predict( dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None, datamodule: Optional[LightningDataModule] = None, return_predictions: Optional[bool] = None, + ckpt_path: Optional[str] = 'best', ) -> Optional[_PREDICT_OUTPUT]: r""" @@ -601,6 +602,10 @@ def predict( return_predictions: Whether to return predictions. ``True`` by default except when an accelerator that spawns processes is used (not supported). + ckpt_path: Either ``best`` or path to the checkpoint you wish to use to predict. + If ``None``, use the current weights of the model. + When the model is given as argument, this parameter will not apply. + Returns: Returns a list of dictionaries, one for each provided dataloader containing their respective predictions. """ @@ -610,8 +615,6 @@ def predict( # -------------------- Trainer._log_api_event("predict") - model = model or self.lightning_module - self.state.fn = TrainerFn.PREDICTING self.state.status = TrainerStatus.RUNNING self.predicting = True @@ -621,9 +624,15 @@ def predict( if dataloaders is not None and datamodule: raise MisconfigurationException('You cannot pass both `trainer.predict(dataloaders=..., datamodule=...)`') + model_provided = model is not None + model = model or self.lightning_module + # links data to the trainer self.data_connector.attach_data(model, predict_dataloaders=dataloaders, datamodule=datamodule) + if not model_provided: + self.predicted_ckpt_path = self.__load_ckpt_weights(ckpt_path) + results = self._run(model) assert self.state.stopped diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 43ed2c7ffa964..3f269a1cbc146 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -51,8 +51,6 @@ def __init__( self.warning_cache = WarningCache() self._teardown_already_run = False self.running_loss = TensorRunningAccum(window_length=20) - self._curr_step_result = None - self._cur_grad_norm_dict = None self._multiple_trainloader_mode = multiple_trainloader_mode self._skip_backward = False self.trainer._multiple_trainloader_mode = multiple_trainloader_mode @@ -437,15 +435,15 @@ def on_before_zero_grad(self, optimizer): def optimizer_zero_grad(self, batch_idx, optimizer, opt_idx): self.trainer.accelerator.optimizer_zero_grad(self.trainer.current_epoch, batch_idx, optimizer, opt_idx) - def track_and_norm_grad(self, optimizer): + def track_and_norm_grad(self, optimizer) -> dict: # track gradient norms - grad_norm_dic = self._track_gradient_norm() + grad_norm_dict = self._track_gradient_norm() # clip gradients self.trainer.accelerator.clip_gradients( optimizer, self.trainer.gradient_clip_val, gradient_clip_algorithm=self.trainer.gradient_clip_algorithm ) - self._cur_grad_norm_dict = grad_norm_dic + return grad_norm_dict def _track_gradient_norm(self): grad_norm_dict = {} @@ -654,7 +652,7 @@ def _on_train_epoch_end_hook(self, processed_epoch_output) -> None: def run_training_batch(self, batch, batch_idx, dataloader_idx): # track grad norms - grad_norm_dic = {} + grad_norm_dict = {} # bookkeeping self.trainer.hiddens = None @@ -668,19 +666,19 @@ def run_training_batch(self, batch, batch_idx, dataloader_idx): self.warning_cache.warn("train_dataloader yielded None. If this was on purpose, ignore this warning...") return AttributeDict( signal=0, - grad_norm_dic=grad_norm_dic, + grad_norm_dict={}, training_step_output_for_epoch_end=batch_outputs, ) # hook response = self.trainer.call_hook("on_batch_start") if response == -1: - return AttributeDict(signal=-1, grad_norm_dic=grad_norm_dic) + return AttributeDict(signal=-1, grad_norm_dict={}) # hook response = self.trainer.call_hook("on_train_batch_start", batch, batch_idx, dataloader_idx) if response == -1: - return AttributeDict(signal=-1, grad_norm_dic=grad_norm_dic) + return AttributeDict(signal=-1, grad_norm_dict={}) # lightning module hook splits = self._tbptt_split_batch(batch) @@ -693,6 +691,7 @@ def run_training_batch(self, batch, batch_idx, dataloader_idx): # toggle model params + set info to logger_connector self.run_train_split_start(split_idx, split_batch, opt_idx, optimizer) + result = AttributeDict() if self.should_accumulate(): # For gradient accumulation @@ -703,24 +702,19 @@ def run_training_batch(self, batch, batch_idx, dataloader_idx): # automatic_optimization=True: perform dpp sync only when performing optimizer_step # automatic_optimization=False: don't block synchronization here with self.block_ddp_sync_behaviour(): - self.training_step_and_backward( + result = self.training_step_and_backward( split_batch, batch_idx, opt_idx, optimizer, self.trainer.hiddens ) - batch_outputs = self._process_closure_result( - batch_outputs=batch_outputs, - opt_idx=opt_idx, - ) - # ------------------------------ # BACKWARD PASS # ------------------------------ # gradient update with accumulated gradients - else: if self.trainer.lightning_module.automatic_optimization: def train_step_and_backward_closure(): + nonlocal result result = self.training_step_and_backward( split_batch, batch_idx, opt_idx, optimizer, self.trainer.hiddens ) @@ -730,30 +724,28 @@ def train_step_and_backward_closure(): self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure) else: - self._curr_step_result = self.training_step( - split_batch, batch_idx, opt_idx, self.trainer.hiddens - ) + result = self.training_step(split_batch, batch_idx, opt_idx, self.trainer.hiddens) - if self._curr_step_result is None: + if not result: # user decided to skip optimization # make sure to zero grad. continue - batch_outputs = self._process_closure_result( - batch_outputs=batch_outputs, - opt_idx=opt_idx, - ) - # todo: Properly aggregate grad_norm accros opt_idx and split_idx - grad_norm_dic = self._cur_grad_norm_dict - self._cur_grad_norm_dict = None + grad_norm_dict = result.get("grad_norm_dict", {}) # update running loss + reset accumulated loss - self.update_running_loss() + self.update_running_loss(result.loss) + + batch_outputs = self._process_closure_result( + opt_closure_result=result, + batch_outputs=batch_outputs, + opt_idx=opt_idx, + ) result = AttributeDict( signal=0, - grad_norm_dic=grad_norm_dic, + grad_norm_dict=grad_norm_dict, training_step_output_for_epoch_end=batch_outputs, ) return result @@ -782,11 +774,10 @@ def block_ddp_sync_behaviour(self, should_block_sync: bool = False): else: yield None - def _process_closure_result(self, batch_outputs: list, opt_idx: int) -> list: - opt_closure_result = self._curr_step_result - - if opt_closure_result is not None: - + def _process_closure_result( + self, opt_closure_result: Optional[AttributeDict], batch_outputs: list, opt_idx: int + ) -> list: + if opt_closure_result: # cache metrics self.trainer.logger_connector.cache_training_step_metrics(opt_closure_result) @@ -798,12 +789,6 @@ def _process_closure_result(self, batch_outputs: list, opt_idx: int) -> list: batch_opt_idx = opt_idx if len(batch_outputs) > 1 else 0 batch_outputs[batch_opt_idx].append(opt_closure_result.training_step_output_for_epoch_end) - if self.trainer.lightning_module.automatic_optimization: - # track total loss for logging (avoid mem leaks) - self.accumulated_loss.append(opt_closure_result.loss) - - self._curr_step_result = None - return batch_outputs def training_step_and_backward(self, split_batch, batch_idx, opt_idx, optimizer, hiddens): @@ -811,7 +796,6 @@ def training_step_and_backward(self, split_batch, batch_idx, opt_idx, optimizer, with self.trainer.profiler.profile("training_step_and_backward"): # lightning module hook result = self.training_step(split_batch, batch_idx, opt_idx, hiddens) - self._curr_step_result = result if not self._skip_backward and self.trainer.lightning_module.automatic_optimization: is_first_batch_to_accumulate = batch_idx % self.trainer.accumulate_grad_batches == 0 @@ -866,7 +850,7 @@ def backward(self, result, optimizer, opt_idx, *args, **kwargs): if not self.should_accumulate(): # track gradients - self.track_and_norm_grad(optimizer=optimizer) + result.grad_norm_dict = self.track_and_norm_grad(optimizer=optimizer) def update_train_loop_lr_schedulers(self, monitor_metrics=None): num_accumulated_batches_reached = self._accumulated_batches_reached() @@ -995,7 +979,11 @@ def run_train_split_start(self, split_idx, split_batch, opt_idx, optimizer): # use to track metrics internally self.trainer.logger_connector.on_train_split_start(split_idx, opt_idx, split_batch) - def update_running_loss(self): + def update_running_loss(self, current_loss: torch.Tensor) -> None: + if self.trainer.lightning_module.automatic_optimization: + # track total loss for logging (avoid mem leaks) + self.accumulated_loss.append(current_loss) + accumulated_loss = self.accumulated_loss.mean() if accumulated_loss is not None: diff --git a/pytorch_lightning/utilities/argparse_utils.py b/pytorch_lightning/utilities/argparse_utils.py index 17f0e9b8cc4a9..92cb7804da878 100644 --- a/pytorch_lightning/utilities/argparse_utils.py +++ b/pytorch_lightning/utilities/argparse_utils.py @@ -2,8 +2,6 @@ rank_zero_deprecation("`argparse_utils` package has been renamed to `argparse` since v1.2 and will be removed in v1.4") -from pytorch_lightning.utilities.argparse import * # noqa: F403 E402 F401 - # for backward compatibility with old checkpoints (versions < 1.2.0) # that need to be able to unpickle the function from the checkpoint from pytorch_lightning.utilities.argparse import _gpus_arg_default # noqa: E402 F401 # isort: skip diff --git a/pytorch_lightning/utilities/cli.py b/pytorch_lightning/utilities/cli.py index 413b06f39f7a6..da6592ae66c18 100644 --- a/pytorch_lightning/utilities/cli.py +++ b/pytorch_lightning/utilities/cli.py @@ -128,13 +128,14 @@ def __init__( .. warning:: ``LightningCLI`` is in beta and subject to change. Args: - model_class: The LightningModule class to train on. - datamodule_class: An optional LightningDataModule class. + model_class: :class:`~pytorch_lightning.core.lightning.LightningModule` class to train on. + datamodule_class: An optional :class:`~pytorch_lightning.core.datamodule.LightningDataModule` class. save_config_callback: A callback class to save the training config. - trainer_class: An optional extension of the Trainer class. + trainer_class: An optional subclass of the :class:`~pytorch_lightning.trainer.trainer.Trainer` class. trainer_defaults: Set to override Trainer defaults or add persistent callbacks. - seed_everything_default: Default value for seed_everything argument. - description: Description of the tool shown when running --help. + seed_everything_default: Default value for the :func:`~pytorch_lightning.utilities.seed.seed_everything` + seed argument. + description: Description of the tool shown when running ``--help``. env_prefix: Prefix for environment variables. env_parse: Whether environment variable parsing is enabled. parser_kwargs: Additional arguments to instantiate LightningArgumentParser. @@ -165,7 +166,7 @@ def __init__( self.add_arguments_to_parser(self.parser) self.parse_arguments() if self.config['seed_everything'] is not None: - seed_everything(self.config['seed_everything']) + seed_everything(self.config['seed_everything'], workers=True) self.before_instantiate_classes() self.instantiate_classes() self.prepare_fit_kwargs() diff --git a/pytorch_lightning/utilities/model_utils.py b/pytorch_lightning/utilities/model_utils.py deleted file mode 100644 index 728f73f4f0d32..0000000000000 --- a/pytorch_lightning/utilities/model_utils.py +++ /dev/null @@ -1,7 +0,0 @@ -from pytorch_lightning.utilities import rank_zero_deprecation - -rank_zero_deprecation( - "`model_utils` package has been renamed to `model_helpers` since v1.2 and will be removed in v1.4" -) - -from pytorch_lightning.utilities.model_helpers import * # noqa: F403 E402 F401 diff --git a/pytorch_lightning/utilities/types.py b/pytorch_lightning/utilities/types.py index ecb0101a2279e..fdfdb95b08692 100644 --- a/pytorch_lightning/utilities/types.py +++ b/pytorch_lightning/utilities/types.py @@ -1,12 +1,26 @@ -from typing import Any, Dict, Iterator, List, Union - -import torch -from torchmetrics import Metric +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Convention: - Do not include any `_TYPE` suffix - Types used in public hooks (as those in the `LightningModule` and `Callback`) should be public (no trailing `_`) """ +from typing import Any, Dict, Iterator, List, Union + +import torch +from torchmetrics import Metric + _METRIC = Union[Metric, torch.Tensor, int, float] STEP_OUTPUT = Union[torch.Tensor, Dict[str, Any]] EPOCH_OUTPUT = List[STEP_OUTPUT] diff --git a/pytorch_lightning/utilities/warning_utils.py b/pytorch_lightning/utilities/warning_utils.py deleted file mode 100644 index 0668bababa609..0000000000000 --- a/pytorch_lightning/utilities/warning_utils.py +++ /dev/null @@ -1,5 +0,0 @@ -from pytorch_lightning.utilities import rank_zero_deprecation - -rank_zero_deprecation("`warning_utils` package has been renamed to `warnings` since v1.2 and will be removed in v1.4") - -from pytorch_lightning.utilities.warnings import * # noqa: F403 E402 F401 diff --git a/pytorch_lightning/utilities/xla_device_utils.py b/pytorch_lightning/utilities/xla_device_utils.py deleted file mode 100644 index f028222e3930b..0000000000000 --- a/pytorch_lightning/utilities/xla_device_utils.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from pytorch_lightning.utilities import rank_zero_deprecation - -rank_zero_deprecation( - "`xla_device_utils` package has been renamed to `xla_device` since v1.2 and will be removed in v1.4" -) - -from pytorch_lightning.utilities.xla_device import * # noqa: F403 E402 F401 diff --git a/setup.cfg b/setup.cfg index 34ad80e7cbed0..d747530ec2c2e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,7 +44,6 @@ exclude_lines = # *metrics (94%+) are temporarily removed from testing while tests speed up omit = pytorch_lightning/cluster_environments/*.py - pytorch_lightning/utilities/xla_device_utils.py pytorch_lightning/utilities/distributed.py pytorch_lightning/tuner/auto_gpu_select.py diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index a57fbb4afcbdc..fc24401aa106c 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -105,7 +105,6 @@ def test_accelerator_choice_ddp_slurm(setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.use_ddp assert trainer.accelerator_connector.is_slurm_managing_tasks assert isinstance(trainer.accelerator, GPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) @@ -144,7 +143,6 @@ def test_accelerator_choice_ddp2_slurm(device_count_mock, setup_distributed_mock class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.use_ddp2 assert trainer.accelerator_connector.is_slurm_managing_tasks assert isinstance(trainer.accelerator, GPUAccelerator) assert isinstance(trainer.training_type_plugin, DDP2Plugin) @@ -183,7 +181,6 @@ def test_accelerator_choice_ddp_te(device_count_mock, setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.use_ddp assert isinstance(trainer.accelerator, GPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment) @@ -221,7 +218,6 @@ def test_accelerator_choice_ddp2_te(device_count_mock, setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.use_ddp2 assert isinstance(trainer.accelerator, GPUAccelerator) assert isinstance(trainer.training_type_plugin, DDP2Plugin) assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment) @@ -257,7 +253,6 @@ def test_accelerator_choice_ddp_cpu_te(device_count_mock, setup_distributed_mock class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.use_ddp assert isinstance(trainer.accelerator, CPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment) @@ -294,7 +289,6 @@ def test_accelerator_choice_ddp_cpu_slurm(device_count_mock, setup_distributed_m class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.use_ddp assert trainer.accelerator_connector.is_slurm_managing_tasks assert isinstance(trainer.accelerator, CPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) @@ -342,7 +336,6 @@ def creates_children(self) -> bool: class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.use_ddp assert isinstance(trainer.accelerator, CPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) assert isinstance(trainer.training_type_plugin.cluster_environment, CustomCluster) diff --git a/tests/deprecated_api/test_remove_1-4.py b/tests/deprecated_api/test_remove_1-4.py index 99e1b31f6edad..37d8abfdf905d 100644 --- a/tests/deprecated_api/test_remove_1-4.py +++ b/tests/deprecated_api/test_remove_1-4.py @@ -14,176 +14,16 @@ """Test deprecated functionality which will be removed in v1.4.0""" import pytest -import torch from pytorch_lightning import Trainer -from pytorch_lightning.overrides.data_parallel import ( - LightningDataParallel, - LightningDistributedDataParallel, - LightningParallelModule, -) -from pytorch_lightning.overrides.distributed import LightningDistributedModule -from pytorch_lightning.plugins import DDPSpawnPlugin -from pytorch_lightning.plugins.environments import LightningEnvironment from tests.deprecated_api import _soft_unimport_module from tests.helpers import BoringModel -from tests.helpers.runif import RunIf - - -def test_v1_4_0_deprecated_trainer_attributes(): - with pytest.deprecated_call(match="will be removed in v1.4."): - trainer = Trainer() - _ = trainer.accelerator_backend - assert trainer.accelerator == trainer.accelerator_backend - - -def test_v1_4_0_deprecated_trainer_methods(): - with pytest.deprecated_call(match='will be removed in v1.4'): - trainer = Trainer() - _ = trainer.get_model() - assert trainer.get_model() == trainer.lightning_module def test_v1_4_0_deprecated_imports(): _soft_unimport_module('pytorch_lightning.utilities.argparse_utils') with pytest.deprecated_call(match='will be removed in v1.4'): - from pytorch_lightning.utilities.argparse_utils import from_argparse_args # noqa: F811 F401 - - _soft_unimport_module('pytorch_lightning.utilities.model_utils') - with pytest.deprecated_call(match='will be removed in v1.4'): - from pytorch_lightning.utilities.model_utils import is_overridden # noqa: F811 F401 - - _soft_unimport_module('pytorch_lightning.utilities.warning_utils') - with pytest.deprecated_call(match='will be removed in v1.4'): - from pytorch_lightning.utilities.warning_utils import WarningCache # noqa: F811 F401 - - _soft_unimport_module('pytorch_lightning.utilities.xla_device_utils') - with pytest.deprecated_call(match='will be removed in v1.4'): - from pytorch_lightning.utilities.xla_device_utils import XLADeviceUtils # noqa: F811 F401 - - -def test_v1_4_0_deprecated_trainer_device_distrib(): - """Test that Trainer attributes works fine.""" - trainer = Trainer() - trainer.accelerator_connector._distrib_type = None - trainer.accelerator_connector._device_type = None - - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - trainer.on_cpu = True - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - assert trainer.on_cpu - - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - trainer.on_gpu = True - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - assert trainer.on_gpu - - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - trainer.on_tpu = True - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - assert trainer.on_tpu - trainer.accelerator_connector._device_type = None - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - trainer.use_tpu = True - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - assert trainer.use_tpu - - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - trainer.use_dp = True - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - assert trainer.use_dp - - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - trainer.use_ddp = True - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - assert trainer.use_ddp - - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - trainer.use_ddp2 = True - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - assert trainer.use_ddp2 - - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - trainer.use_horovod = True - with pytest.deprecated_call(match='deprecated in v1.2 and will be removed in v1.4'): - assert trainer.use_horovod - - -def test_v1_4_0_deprecated_metrics(): - from pytorch_lightning.metrics.functional.classification import stat_scores_multiple_classes - with pytest.deprecated_call(match='will be removed in v1.4'): - stat_scores_multiple_classes(pred=torch.tensor([0, 1]), target=torch.tensor([0, 1])) - - from pytorch_lightning.metrics.functional.classification import iou - with pytest.deprecated_call(match='will be removed in v1.4'): - iou(torch.randint(0, 2, (10, 3, 3)), torch.randint(0, 2, (10, 3, 3))) - - from pytorch_lightning.metrics.functional.classification import recall - with pytest.deprecated_call(match='will be removed in v1.4'): - recall(torch.randint(0, 2, (10, 3, 3)), torch.randint(0, 2, (10, 3, 3))) - - from pytorch_lightning.metrics.functional.classification import precision - with pytest.deprecated_call(match='will be removed in v1.4'): - precision(torch.randint(0, 2, (10, 3, 3)), torch.randint(0, 2, (10, 3, 3))) - - from pytorch_lightning.metrics.functional.classification import precision_recall - with pytest.deprecated_call(match='will be removed in v1.4'): - precision_recall(torch.randint(0, 2, (10, 3, 3)), torch.randint(0, 2, (10, 3, 3))) - - from pytorch_lightning.metrics.functional.classification import auc - with pytest.deprecated_call(match='will be removed in v1.4'): - auc(torch.rand(10, ).sort().values, torch.rand(10, )) - - from pytorch_lightning.metrics.functional.classification import auroc - with pytest.deprecated_call(match='will be removed in v1.4'): - auroc(torch.rand(10, ), torch.randint(0, 2, (10, ))) - - from pytorch_lightning.metrics.functional.classification import multiclass_auroc - with pytest.deprecated_call(match='will be removed in v1.4'): - multiclass_auroc(torch.rand(20, 5).softmax(dim=-1), torch.randint(0, 5, (20, )), num_classes=5) - - -class CustomDDPPlugin(DDPSpawnPlugin): - - def configure_ddp(self): - # old, deprecated implementation - with pytest.deprecated_call( - match='`LightningDistributedDataParallel` is deprecated since v1.2 and will be removed in v1.4.' - ): - self._model = LightningDistributedDataParallel( - module=self.lightning_module, - device_ids=self.determine_ddp_device_ids(), - **self._ddp_kwargs, - ) - assert isinstance(self.model, torch.nn.parallel.DistributedDataParallel) - assert isinstance(self.model.module, LightningDistributedModule) - - -@RunIf(min_gpus=2, skip_windows=True) -def test_v1_4_0_deprecated_lightning_distributed_data_parallel(tmpdir): - model = BoringModel() - trainer = Trainer( - default_root_dir=tmpdir, - fast_dev_run=True, - gpus=2, - accelerator="ddp_spawn", - plugins=[ - CustomDDPPlugin( - parallel_devices=[torch.device("cuda", 0), torch.device("cuda", 1)], - cluster_environment=LightningEnvironment(), - ) - ] - ) - trainer.fit(model) - - -@RunIf(min_gpus=1) -def test_v1_4_0_deprecated_lightning_data_parallel(): - model = BoringModel() - with pytest.deprecated_call(match="`LightningDataParallel` is deprecated since v1.2 and will be removed in v1.4."): - dp_model = LightningDataParallel(model, device_ids=[0]) - assert isinstance(dp_model, torch.nn.DataParallel) - assert isinstance(dp_model.module, LightningParallelModule) + from pytorch_lightning.utilities.argparse_utils import _gpus_arg_default # noqa: F811 F401 def test_v1_4_0_deprecated_manual_optimization_optimizer(tmpdir): diff --git a/tests/trainer/loops/test_evaluation_loop_flow.py b/tests/trainer/loops/test_evaluation_loop_flow.py index 8fdb321b6f230..3177a3aa09156 100644 --- a/tests/trainer/loops/test_evaluation_loop_flow.py +++ b/tests/trainer/loops/test_evaluation_loop_flow.py @@ -71,7 +71,7 @@ def backward(self, loss, optimizer, optimizer_idx): out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) assert out.signal == 0 - assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) + assert len(out.grad_norm_dict) == 0 and isinstance(out.grad_norm_dict, dict) train_step_out = out.training_step_output_for_epoch_end assert len(train_step_out) == 1 @@ -140,7 +140,7 @@ def backward(self, loss, optimizer, optimizer_idx): out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) assert out.signal == 0 - assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) + assert len(out.grad_norm_dict) == 0 and isinstance(out.grad_norm_dict, dict) train_step_out = out.training_step_output_for_epoch_end assert len(train_step_out) == 1 diff --git a/tests/trainer/loops/test_training_loop_flow_scalar.py b/tests/trainer/loops/test_training_loop_flow_scalar.py index 816134ee52941..f14f7d339d83f 100644 --- a/tests/trainer/loops/test_training_loop_flow_scalar.py +++ b/tests/trainer/loops/test_training_loop_flow_scalar.py @@ -155,7 +155,7 @@ def backward(self, loss, optimizer, optimizer_idx): out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) assert out.signal == 0 - assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) + assert len(out.grad_norm_dict) == 0 and isinstance(out.grad_norm_dict, dict) train_step_out = out.training_step_output_for_epoch_end assert len(train_step_out) == 1 @@ -231,7 +231,7 @@ def backward(self, loss, optimizer, optimizer_idx): out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) assert out.signal == 0 - assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) + assert len(out.grad_norm_dict) == 0 and isinstance(out.grad_norm_dict, dict) train_step_out = out.training_step_output_for_epoch_end assert len(train_step_out) == 1 diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index f04061a23e096..19de7b0a985d4 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -38,6 +38,7 @@ from pytorch_lightning.plugins import DDPSpawnPlugin from pytorch_lightning.profiler import AdvancedProfiler, PassThroughProfiler, PyTorchProfiler, SimpleProfiler from pytorch_lightning.trainer.states import TrainerFn +from pytorch_lightning.utilities import DeviceType, DistributedType from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.seed import seed_everything @@ -611,7 +612,7 @@ def test_benchmark_option(tmpdir): @pytest.mark.parametrize("ckpt_path", (None, "best", "specific")) @pytest.mark.parametrize("save_top_k", (-1, 0, 1, 2)) -@pytest.mark.parametrize("fn", ("validate", "test")) +@pytest.mark.parametrize("fn", ("validate", "test", "predict")) def test_tested_checkpoint_path(tmpdir, ckpt_path, save_top_k, fn): class TestModel(BoringModel): @@ -620,48 +621,55 @@ def validation_step(self, batch, batch_idx): self.log("foo", -batch_idx) return super().validation_step(batch, batch_idx) + def test_step(self, *args): + return self.validation_step(*args) + + def predict_step(self, *args): + args = args[:-1] # remove `dataloader_idx` + return self.validation_step(*args) + model = TestModel() + model.test_epoch_end = None trainer = Trainer( max_epochs=2, + limit_val_batches=1, + limit_test_batches=1, + limit_predict_batches=1, progress_bar_refresh_rate=0, default_root_dir=tmpdir, callbacks=[ModelCheckpoint(monitor="foo", save_top_k=save_top_k)], ) trainer.fit(model) - test_or_validate = getattr(trainer, fn) + trainer_fn = getattr(trainer, fn) + path_attr = f"{fn}{'d' if fn == 'validate' else 'ed'}_ckpt_path" + assert getattr(trainer, path_attr) is None + if ckpt_path == "best": # ckpt_path is 'best', meaning we load the best weights if save_top_k == 0: with pytest.raises(MisconfigurationException, match=".*is not configured to save the best.*"): - test_or_validate(ckpt_path=ckpt_path) + trainer_fn(ckpt_path=ckpt_path) else: - test_or_validate(ckpt_path=ckpt_path) - if fn == "test": - assert trainer.tested_ckpt_path == trainer.checkpoint_callback.best_model_path - else: - assert trainer.validated_ckpt_path == trainer.checkpoint_callback.best_model_path + trainer_fn(ckpt_path=ckpt_path) + assert getattr(trainer, path_attr) == trainer.checkpoint_callback.best_model_path elif ckpt_path is None: # ckpt_path is None, meaning we don't load any checkpoints and # use the weights from the end of training - test_or_validate(ckpt_path=ckpt_path) - assert trainer.tested_ckpt_path is None - assert trainer.validated_ckpt_path is None + trainer_fn(ckpt_path=ckpt_path) + assert getattr(trainer, path_attr) is None else: # specific checkpoint, pick one from saved ones if save_top_k == 0: with pytest.raises(FileNotFoundError): - test_or_validate(ckpt_path="random.ckpt") + trainer_fn(ckpt_path="random.ckpt") else: ckpt_path = str( list((Path(tmpdir) / f"lightning_logs/version_{trainer.logger.version}/checkpoints").iterdir() )[0].absolute() ) - test_or_validate(ckpt_path=ckpt_path) - if fn == "test": - assert trainer.tested_ckpt_path == ckpt_path - else: - assert trainer.validated_ckpt_path == ckpt_path + trainer_fn(ckpt_path=ckpt_path) + assert getattr(trainer, path_attr) == ckpt_path def test_disabled_training(tmpdir): @@ -1042,14 +1050,8 @@ def test_gpu_choice(tmpdir): @pytest.mark.parametrize( - ["limit_val_batches"], - [ - pytest.param(0.0), # this should run no sanity checks - pytest.param(1), - pytest.param(1.0), - pytest.param(0.5), - pytest.param(5), - ], + "limit_val_batches", + [0.0, 1, 1.0, 0.5, 5], ) def test_num_sanity_val_steps(tmpdir, limit_val_batches): """ @@ -1079,15 +1081,7 @@ def test_num_sanity_val_steps(tmpdir, limit_val_batches): ) -@pytest.mark.parametrize( - ["limit_val_batches"], - [ - pytest.param(0.0), # this should run no sanity checks - pytest.param(1), - pytest.param(1.0), - pytest.param(0.3), - ], -) +@pytest.mark.parametrize("limit_val_batches", [0.0, 1, 1.0, 0.3]) def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): """ Test that `num_sanity_val_steps=-1` runs through all validation data once, and as many batches as @@ -1118,207 +1112,67 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): [ ( dict(accelerator=None, gpus=None), - dict( - use_dp=False, - use_ddp=False, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=1, - ), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( dict(accelerator="dp", gpus=None), - dict( - use_dp=False, - use_ddp=False, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=1, - ), - ), - ( - dict(accelerator="dp", gpus=None), - dict( - use_dp=False, - use_ddp=False, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=1, - ), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( dict(accelerator="ddp", gpus=None), - dict( - use_dp=False, - use_ddp=False, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=1, - ), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( dict(accelerator="ddp", num_processes=2, gpus=None), - dict( - use_dp=False, - use_ddp=True, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=2, - ), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(accelerator="ddp", num_nodes=2, gpus=None), - dict( - use_dp=False, - use_ddp=True, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=1, - ), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( dict(accelerator="ddp_cpu", num_processes=2, gpus=None), - dict( - use_dp=False, - use_ddp=True, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=2, - ), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(accelerator="ddp2", gpus=None), - dict( - use_dp=False, - use_ddp=False, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=1, - ), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( dict(accelerator=None, gpus=1), - dict( - use_dp=False, - use_ddp=False, - use_ddp2=False, - num_gpus=1, - on_gpu=True, - use_single_gpu=True, - num_processes=1, - ), + dict(_distrib_type=None, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(accelerator="dp", gpus=1), - dict( - use_dp=True, - use_ddp=False, - use_ddp2=False, - num_gpus=1, - on_gpu=True, - use_single_gpu=True, - num_processes=1, - ), + dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(accelerator="ddp", gpus=1), - dict( - use_dp=False, - use_ddp=True, - use_ddp2=False, - num_gpus=1, - on_gpu=True, - use_single_gpu=True, - num_processes=1, - ), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(accelerator="ddp_cpu", num_processes=2, gpus=1), - dict( - use_dp=False, - use_ddp=True, - use_ddp2=False, - num_gpus=0, - on_gpu=False, - use_single_gpu=False, - num_processes=2, - ), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(accelerator="ddp2", gpus=1), - dict( - use_dp=False, - use_ddp=False, - use_ddp2=True, - num_gpus=1, - on_gpu=True, - use_single_gpu=False, - num_processes=1, - ), + dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(accelerator=None, gpus=2), - dict( - use_dp=False, - use_ddp=True, - use_ddp2=False, - num_gpus=2, - on_gpu=True, - use_single_gpu=False, - num_processes=2, - ), + dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( dict(accelerator="dp", gpus=2), - dict( - use_dp=True, - use_ddp=False, - use_ddp2=False, - num_gpus=2, - on_gpu=True, - use_single_gpu=False, - num_processes=1, - ), + dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(accelerator="ddp", gpus=2), - dict( - use_dp=False, - use_ddp=True, - use_ddp2=False, - num_gpus=2, - on_gpu=True, - use_single_gpu=False, - num_processes=2, - ), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( dict(accelerator="ddp2", gpus=2), - dict( - use_dp=False, - use_ddp=False, - use_ddp2=True, - num_gpus=2, - on_gpu=True, - use_single_gpu=False, - num_processes=1, - ), + dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ], ) @@ -1327,7 +1181,7 @@ def test_trainer_config(trainer_kwargs, expected, monkeypatch): monkeypatch.setattr(torch.cuda, "is_available", lambda: True) monkeypatch.setattr(torch.cuda, "device_count", lambda: trainer_kwargs["gpus"]) trainer = Trainer(**trainer_kwargs) - assert len(expected) == 7 + assert len(expected) == 4 for k, v in expected.items(): assert getattr(trainer, k) == v, f"Failed {k}: {v}" @@ -1371,17 +1225,10 @@ def __init__(self, **kwargs): @pytest.mark.parametrize( - "trainer_params", - [ - OmegaConf.create({ - "max_epochs": 1, - "gpus": 1 - }), - OmegaConf.create({ - "max_epochs": 1, - "gpus": [0] - }), - ], + "trainer_params", [ + OmegaConf.create(dict(max_epochs=1, gpus=1)), + OmegaConf.create(dict(max_epochs=1, gpus=[0])), + ] ) @RunIf(min_gpus=1) def test_trainer_omegaconf(trainer_params): @@ -2001,8 +1848,9 @@ def on_predict_start(self) -> None: assert not self.training -@pytest.mark.parametrize(['accelerator', 'num_processes'], - [(None, 1), pytest.param('ddp', 2, marks=RunIf(skip_windows=True))]) +@pytest.mark.parametrize( + 'accelerator,num_processes', [(None, 1), pytest.param('ddp', 2, marks=RunIf(skip_windows=True))] +) def test_model_in_correct_mode_during_stages(tmpdir, accelerator, num_processes): model = TrainerStagesModel() trainer = Trainer(default_root_dir=tmpdir, accelerator=accelerator, num_processes=num_processes, fast_dev_run=True)