1/n Move precision plugin into strategy - update reference (Lightning…

…-AI#10570) * 1/n move precision plugin into strategy - update reference * update precision plugin reference in tpu_spawn * add missing reference in error message * add back removed license line * update references in tests * update reference in trainer * update return annotation for precision_plugin property on TTP * simplify access to precision plugin reference in sharded plug * add changelog * remove precision property from ttp and add deprecation message * fix make doc and update precision reference * simplify a reference to precision accidentally overridden Adrian's change, now add it back * Update CHANGELOG.md add Adrian's change back * Update accelerator precision Add Adrian's change back * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add none check for precision plugin just to be safe * Update ipu.py * update precision_plugin param deprecation message * Update accelerator.py * Remove deprecated warning Tests will fail after 9940 Co-authored-by: Adrian Wälchli <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
neptune-ai · Nov 23, 2021 · e913725 · e913725
1 parent 69ae318
commit e913725
Show file tree

Hide file tree

Showing 24 changed files with 142 additions and 59 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -37,6 +37,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Raise `MisconfigurationException` when `enable_progress_bar=False` and a progress bar instance has been passed in the callback list ([#10520](https://github.com/PyTorchLightning/pytorch-lightning/issues/10520))
 
 
+- Moved ownership of the `PrecisionPlugin` into `TrainingTypePlugin` and updated all references ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
+
+
 -
 
 
@@ -50,7 +53,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/PyTorchLightning/pytorch-lightning/pull/10505))
 
 
--
+- Deprecated the `precision_plugin` constructor argument from `Accelerator` ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
 
 
 -
@@ -139,6 +142,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `reload_dataloaders_every_epoch` from `Trainer` in favour of `reload_dataloaders_every_n_epochs` ([#10481](https://github.com/PyTorchLightning/pytorch-lightning/pull/10481))
 
 
+- Removed the `precision_plugin` attribute from `Accelerator` in favor of its equivalent attribute `precision_plugin` in the `TrainingTypePlugin` ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
 
 ### Fixed
 

diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py
@@ -25,6 +25,7 @@
 from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin, PrecisionPlugin
 from pytorch_lightning.plugins.training_type import DataParallelPlugin, TrainingTypePlugin
 from pytorch_lightning.trainer.states import TrainerFn
+from pytorch_lightning.utilities import rank_zero_deprecation
 from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device
 from pytorch_lightning.utilities.enums import AMPType, LightningEnum
 from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -44,15 +45,23 @@ class Accelerator:
     One to handle differences from the training routine and one to handle different precisions.
     """
 
-    def __init__(self, precision_plugin: PrecisionPlugin, training_type_plugin: TrainingTypePlugin) -> None:
+    def __init__(self, precision_plugin: Optional[PrecisionPlugin], training_type_plugin: TrainingTypePlugin) -> None:
         """
         Args:
             precision_plugin: the plugin to handle precision-specific parts
+
+                .. deprecated::
+                    The ``precision_plugin`` parameter has been deprecated and will be removed soon.
+                    Pass the precision plugin as a parameter to the ``TrainingTypePlugin`` instead.
+
             training_type_plugin: the plugin to handle different training routines
         """
-        self.precision_plugin = precision_plugin
+
         self.training_type_plugin = training_type_plugin
 
+        if precision_plugin is not None:
+            self.training_type_plugin._precision_plugin = precision_plugin
+
         self.optimizers: List = []
         self.lr_schedulers: List = []
         self.optimizer_frequencies: List = []
@@ -84,7 +93,7 @@ def pre_dispatch(self, trainer: "pl.Trainer") -> None:
         if self.training_type_plugin.setup_optimizers_in_pre_dispatch:
             self.setup_optimizers(trainer)
 
-        self.precision_plugin.pre_dispatch()
+        self.training_type_plugin.precision_plugin.pre_dispatch()
 
     def _move_optimizer_state(self, device: Optional[torch.device] = None) -> None:
         """Moves the state of the optimizers to the GPU if needed."""
@@ -96,12 +105,12 @@ def _move_optimizer_state(self, device: Optional[torch.device] = None) -> None:
     def dispatch(self, trainer: "pl.Trainer") -> None:
         """Hook to do something before the training/evaluation/prediction starts."""
         self.training_type_plugin.dispatch(trainer)
-        self.precision_plugin.dispatch(trainer)
+        self.training_type_plugin.precision_plugin.dispatch(trainer)
 
     def post_dispatch(self, trainer: "pl.Trainer") -> None:
         """Hook to do something after the training/evaluation/prediction starts."""
         self.training_type_plugin.post_dispatch(trainer)
-        self.precision_plugin.post_dispatch()
+        self.training_type_plugin.precision_plugin.post_dispatch()
 
     @property
     def model(self) -> Module:
@@ -159,31 +168,31 @@ def training_step(self, step_kwargs: Dict[str, Union[Any, int]]) -> STEP_OUTPUT:
 
         See :meth:`~pytorch_lightning.core.lightning.LightningModule.training_step` for more details
         """
-        with self.precision_plugin.train_step_context():
+        with self.training_type_plugin.precision_plugin.train_step_context():
             return self.training_type_plugin.training_step(*step_kwargs.values())
 
     def validation_step(self, step_kwargs: Dict[str, Union[Any, int]]) -> Optional[STEP_OUTPUT]:
         """The actual validation step.
 
         See :meth:`~pytorch_lightning.core.lightning.LightningModule.validation_step` for more details
         """
-        with self.precision_plugin.val_step_context():
+        with self.training_type_plugin.precision_plugin.val_step_context():
             return self.training_type_plugin.validation_step(*step_kwargs.values())
 
     def test_step(self, step_kwargs: Dict[str, Union[Any, int]]) -> Optional[STEP_OUTPUT]:
         """The actual test step.
 
         See :meth:`~pytorch_lightning.core.lightning.LightningModule.test_step` for more details
         """
-        with self.precision_plugin.test_step_context():
+        with self.training_type_plugin.precision_plugin.test_step_context():
             return self.training_type_plugin.test_step(*step_kwargs.values())
 
     def predict_step(self, step_kwargs: Dict[str, Union[Any, int]]) -> STEP_OUTPUT:
         """The actual predict step.
 
         See :meth:`~pytorch_lightning.core.lightning.LightningModule.predict_step` for more details
         """
-        with self.precision_plugin.predict_step_context():
+        with self.training_type_plugin.precision_plugin.predict_step_context():
             return self.training_type_plugin.predict_step(*step_kwargs.values())
 
     def backward(self, closure_loss: Tensor, *args: Any, **kwargs: Any) -> Tensor:
@@ -193,11 +202,11 @@ def backward(self, closure_loss: Tensor, *args: Any, **kwargs: Any) -> Tensor:
             closure_loss: a tensor holding the loss value to backpropagate
         """
         self.training_type_plugin.pre_backward(closure_loss)
-        closure_loss = self.precision_plugin.pre_backward(self.lightning_module, closure_loss)
+        closure_loss = self.training_type_plugin.precision_plugin.pre_backward(self.lightning_module, closure_loss)
 
-        self.precision_plugin.backward(self.lightning_module, closure_loss, *args, **kwargs)
+        self.training_type_plugin.precision_plugin.backward(self.lightning_module, closure_loss, *args, **kwargs)
 
-        closure_loss = self.precision_plugin.post_backward(self.lightning_module, closure_loss)
+        closure_loss = self.training_type_plugin.precision_plugin.post_backward(self.lightning_module, closure_loss)
         self.training_type_plugin.post_backward(closure_loss)
 
         return closure_loss
@@ -208,7 +217,7 @@ def optimizer_step(
         opt_idx: int,
         closure: Callable[[], Any],
         model: Optional[Union["pl.LightningModule", Module]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> None:
         """performs the actual optimizer step.
 
@@ -220,7 +229,7 @@ def optimizer_step(
             **kwargs: Any extra arguments to ``optimizer.step``
         """
         model = model or self.lightning_module
-        self.precision_plugin.optimizer_step(model, optimizer, opt_idx, closure, **kwargs)
+        self.training_type_plugin.precision_plugin.optimizer_step(model, optimizer, opt_idx, closure, **kwargs)
 
     def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: Optimizer, opt_idx: int) -> None:
         """Zeros all model parameter's gradients."""
@@ -248,26 +257,38 @@ def setup_training_type_plugin(self) -> None:
 
     def setup_precision_plugin(self) -> None:
         """Attaches the precision plugin to the accelerator."""
-        model, optimizers, schedulers = self.precision_plugin.connect(self.model, self.optimizers, self.lr_schedulers)
+        model, optimizers, schedulers = self.training_type_plugin.precision_plugin.connect(
+            self.model, self.optimizers, self.lr_schedulers
+        )
         self.model = model
         self.optimizers = optimizers
         self.lr_schedulers = schedulers
 
     @property
     def amp_backend(self) -> Optional[LightningEnum]:
-        if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin):
+        if isinstance(self.training_type_plugin.precision_plugin, ApexMixedPrecisionPlugin):
             return AMPType.APEX
-        if isinstance(self.precision_plugin, NativeMixedPrecisionPlugin):
+        if isinstance(self.training_type_plugin.precision_plugin, NativeMixedPrecisionPlugin):
             return AMPType.NATIVE
         return None
 
     @property
     def precision(self) -> Union[str, int]:
-        return self.precision_plugin.precision
+        """The type of precision being used with this accelerator.
+
+        .. deprecated::
+            This property been deprecated and will be removed soon.
+            Use ``training_type_plugin.precision_plugin.precision`` instead.
+        """
+        rank_zero_deprecation(
+            f"`{self.__class__.__name__}.precision` has been deprecated and will be removed soon"
+            f" Use `training_type_plugin.precision_plugin.precision` instead."
+        )
+        return self.training_type_plugin.precision_plugin.precision
 
     @property
     def scaler(self) -> Optional["GradScaler"]:
-        return getattr(self.precision_plugin, "scaler", None)
+        return getattr(self.training_type_plugin.precision_plugin, "scaler", None)
 
     def optimizer_state(self, optimizer: Optimizer) -> Dict[str, Tensor]:
         """Returns state of an optimizer.

diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py
@@ -36,10 +36,11 @@ def setup(self, trainer: "pl.Trainer") -> None:
             ValueError:
                 If the precision or training type plugin are unsupported.
         """
-        if not isinstance(self.precision_plugin, TPUPrecisionPlugin):
+        if not isinstance(self.training_type_plugin.precision_plugin, TPUPrecisionPlugin):
             # this configuration should have been avoided in the accelerator connector
             raise ValueError(
-                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`, found: {self.precision_plugin}."
+                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
+                f" found: {self.training_type_plugin.precision_plugin}."
             )
         if not isinstance(self.training_type_plugin, (SingleTPUPlugin, TPUSpawnPlugin)):
             raise ValueError(

diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py
@@ -108,7 +108,7 @@ def __init__(
         )
         self._accelerator = self._accelerator_connector.accelerator
         self._strategy = self._accelerator.training_type_plugin
-        self._precision_plugin = self._accelerator.precision_plugin
+        self._precision_plugin = self._strategy.precision_plugin
         self._models_setup: int = 0
 
         # wrap the run method so we can inject setup logic or spawn processes for the user

diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
@@ -36,6 +36,7 @@
 from pytorch_lightning.overrides.distributed import prepare_for_backward
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
+from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import (
@@ -86,6 +87,7 @@ def __init__(
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
+        precision_plugin: Optional[PrecisionPlugin] = None,
         ddp_comm_state: Optional[object] = None,
         ddp_comm_hook: Optional[callable] = None,
         ddp_comm_wrapper: Optional[callable] = None,
@@ -96,6 +98,7 @@ def __init__(
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
             checkpoint_io=checkpoint_io,
+            precision_plugin=precision_plugin,
         )
         self.interactive_ddp_procs = []
         self._num_nodes = 1

diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -29,6 +29,7 @@
 from pytorch_lightning.overrides.distributed import prepare_for_backward
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
+from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_8, rank_zero_warn
@@ -65,6 +66,7 @@ def __init__(
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
+        precision_plugin: Optional[PrecisionPlugin] = None,
         ddp_comm_state: Optional[object] = None,
         ddp_comm_hook: Optional[callable] = None,
         ddp_comm_wrapper: Optional[callable] = None,
@@ -74,6 +76,7 @@ def __init__(
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
             checkpoint_io=checkpoint_io,
+            precision_plugin=precision_plugin,
         )
         self._num_nodes = 1
         self.sync_batchnorm = False

diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -30,6 +30,7 @@
 from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
+from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.trainer.optimizers import _get_default_scheduler_config
 from pytorch_lightning.trainer.states import TrainerFn
@@ -129,6 +130,7 @@ def __init__(
         synchronize_checkpoint_boundary: bool = False,
         load_full_weights: bool = False,
         partition_module: bool = True,
+        precision_plugin: Optional[PrecisionPlugin] = None,
     ) -> None:
         """Provides capabilities to run training using the DeepSpeed library, with training optimizations for large
         billion parameter models. `For more information: https://pytorch-
@@ -273,6 +275,7 @@ def __init__(
         super().__init__(
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
+            precision_plugin=precision_plugin,
         )
 
         self.config = self._load_config(config)
@@ -331,7 +334,7 @@ def __init__(
 
     @property
     def precision(self) -> Union[str, int]:
-        return self._precision or self.lightning_module.trainer.precision
+        return self._precision or self.precision_plugin.precision
 
     @property
     def amp_level(self) -> Optional[str]:
@@ -456,8 +459,7 @@ def init_deepspeed(self):
                 "DeepSpeed currently does not support different `accumulate_grad_batches` at different epochs."
             )
 
-        precision = self.lightning_module.trainer.accelerator.precision
-        model = LightningDeepSpeedModule(pl_module=self.model, precision=precision)
+        model = LightningDeepSpeedModule(pl_module=self.model, precision=self.precision)
 
         if self.zero_stage_3 and self.partition_module:
             # Ensure the entire model has been moved to the appropriate device

diff --git a/pytorch_lightning/plugins/training_type/dp.py b/pytorch_lightning/plugins/training_type/dp.py
@@ -18,6 +18,7 @@
 
 from pytorch_lightning.overrides.data_parallel import LightningParallelModule
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
+from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.enums import _StrategyType
@@ -35,8 +36,14 @@ def __init__(
         self,
         parallel_devices: Optional[List[torch.device]] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
+        precision_plugin: Optional[PrecisionPlugin] = None,
     ):
-        super().__init__(parallel_devices=parallel_devices, cluster_environment=None, checkpoint_io=checkpoint_io)
+        super().__init__(
+            parallel_devices=parallel_devices,
+            cluster_environment=None,
+            checkpoint_io=checkpoint_io,
+            precision_plugin=precision_plugin,
+        )
 
     @property
     def global_rank(self) -> int:

diff --git a/pytorch_lightning/plugins/training_type/fully_sharded.py b/pytorch_lightning/plugins/training_type/fully_sharded.py
@@ -18,6 +18,7 @@
 
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
+from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.utilities import _FAIRSCALE_FULLY_SHARDED_AVAILABLE
 from pytorch_lightning.utilities.enums import _StrategyType
@@ -46,6 +47,7 @@ def __init__(
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
+        precision_plugin: Optional[PrecisionPlugin] = None,
     ):
         """Plugin for Fully Sharded Data Parallel provided by FairScale.
 
@@ -97,6 +99,7 @@ def __init__(
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
             checkpoint_io=checkpoint_io,
+            precision_plugin=precision_plugin,
         )
         self.cpu_offload = cpu_offload
         self.move_grads_to_cpu = move_grads_to_cpu
@@ -124,7 +127,7 @@ def setup_distributed(self) -> None:
 
     @contextlib.contextmanager
     def model_sharded_context(self) -> Generator:
-        precision = self.lightning_module.trainer.precision
+        precision = self.precision_plugin.precision
 
         def wrap_policy(*args, **kwargs):
             return default_auto_wrap_policy(*args, **kwargs, min_num_params=self.min_num_params)

diff --git a/pytorch_lightning/plugins/training_type/horovod.py b/pytorch_lightning/plugins/training_type/horovod.py
@@ -21,6 +21,7 @@
 
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
+from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.utilities import _HOROVOD_AVAILABLE
 from pytorch_lightning.utilities.distributed import distributed_available
@@ -41,8 +42,14 @@ def __init__(
         self,
         parallel_devices: Optional[List[torch.device]] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
+        precision_plugin: Optional[PrecisionPlugin] = None,
     ):
-        super().__init__(parallel_devices=parallel_devices, cluster_environment=None, checkpoint_io=checkpoint_io)
+        super().__init__(
+            parallel_devices=parallel_devices,
+            cluster_environment=None,
+            checkpoint_io=checkpoint_io,
+            precision_plugin=precision_plugin,
+        )
         rank_zero_only.rank = self.global_rank
 
     @property