From 51a52fc05568ffe3a261894871c171c0ef666af7 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Sat, 18 Sep 2021 01:10:06 +0530 Subject: [PATCH 01/25] init hook --- pytorch_lightning/core/lightning.py | 25 ++++++++++++++++++- .../loops/optimization/optimizer_loop.py | 8 +++--- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 00eeae5f13537..cac49323afbb3 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -36,7 +36,12 @@ from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.core.saving import ModelIO from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator -from pytorch_lightning.utilities import _TORCH_SHARDED_TENSOR_AVAILABLE, rank_zero_deprecation, rank_zero_warn +from pytorch_lightning.utilities import ( + _TORCH_SHARDED_TENSOR_AVAILABLE, + GradClipAlgorithmType, + rank_zero_deprecation, + rank_zero_warn, +) from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.distributed import distributed_available, sync_ddp @@ -1468,6 +1473,24 @@ def untoggle_optimizer(self, optimizer_idx: int): # save memory self._param_requires_grad_state = {} + def clip_gradients( + self, + optimizer: Optimizer, + optimizer_idx: int, + gradient_clip_val: Union[int, float] = 0.0, + gradient_clip_algorithm: str = "norm", + ): + gradient_clip_val = gradient_clip_val or self.trainer.gradient_clip_val + gradient_clip_algorithm = gradient_clip_algorithm or self.trainer.gradient_clip_algorithm + + if not isinstance(gradient_clip_val, (int, float)): + raise MisconfigurationException("`gradient_clip_val` should be either an int or a float") + + if gradient_clip_algorithm not in list(GradClipAlgorithmType): + raise MisconfigurationException(f"`gradient_clip_algorithm` should be in {list(GradClipAlgorithmType)}") + + self.trainer.accelerator.clip_gradients(optimizer, gradient_clip_val, gradient_clip_algorithm) + def optimizer_step( self, epoch: int = None, diff --git a/pytorch_lightning/loops/optimization/optimizer_loop.py b/pytorch_lightning/loops/optimization/optimizer_loop.py index 590160c645afc..f13a55d05373f 100644 --- a/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -240,7 +240,7 @@ def _backward( if not self.trainer.fit_loop._should_accumulate(): # track gradients - grad_norm_dict = self._track_and_norm_grad(optimizer=optimizer) + grad_norm_dict = self._track_and_norm_grad(optimizer=optimizer, opt_idx=opt_idx) if grad_norm_dict: self.trainer.lightning_module._current_fx_name = "on_after_backward" self.trainer.lightning_module.log_grad_norm(grad_norm_dict) @@ -470,7 +470,7 @@ def _training_step(self, split_batch: Any, batch_idx: int, opt_idx: int) -> Clos return result - def _track_and_norm_grad(self, optimizer: torch.optim.Optimizer) -> Dict[str, float]: + def _track_and_norm_grad(self, optimizer: torch.optim.Optimizer, opt_idx: int) -> Dict[str, float]: """Tracks gradient norms and clips the gradients of all parameters optimized by the current optimizer. Args: @@ -484,7 +484,5 @@ def _track_and_norm_grad(self, optimizer: torch.optim.Optimizer) -> Dict[str, fl grad_norm_dict = grad_norm(self.trainer.lightning_module, self.trainer.track_grad_norm) # clip gradients - self.trainer.accelerator.clip_gradients( - optimizer, self.trainer.gradient_clip_val, gradient_clip_algorithm=self.trainer.gradient_clip_algorithm - ) + self.trainer.lightning_module.clip_gradients(optimizer, opt_idx) return grad_norm_dict From 3d91f7ae228a6bd50563aa068eaae614471cc0a6 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Sat, 18 Sep 2021 01:19:00 +0530 Subject: [PATCH 02/25] docs --- pytorch_lightning/core/lightning.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index cac49323afbb3..53f1d762b2757 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1480,6 +1480,23 @@ def clip_gradients( gradient_clip_val: Union[int, float] = 0.0, gradient_clip_algorithm: str = "norm", ): + """Perform Gradient Clipping for the optimizer parameters. Called before :meth:`optimizer_step`. + + Args: + optimizer: Current optimizer being used. ``None`` if using manual optimization. + optimizer_idx: Index of the current optimizer being used. ``None`` if using manual optimization. + gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=0`` disables gradient + clipping. + gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` + for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. + + Example:: + + # Perform gradient clipping on discriminator (optimizer_idx=1) + def clip_gradients(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + if optimizer_idx == 1: + super().clip_gradients(optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm) + """ gradient_clip_val = gradient_clip_val or self.trainer.gradient_clip_val gradient_clip_algorithm = gradient_clip_algorithm or self.trainer.gradient_clip_algorithm From fa4c2445d47753affd5247f01878ddbf48296a26 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 23 Sep 2021 01:47:18 +0530 Subject: [PATCH 03/25] dep train args --- .../trainer/connectors/training_trick_connector.py | 5 +++-- pytorch_lightning/trainer/trainer.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/training_trick_connector.py b/pytorch_lightning/trainer/connectors/training_trick_connector.py index 84cfdb16846cb..1d4e3ed73228c 100644 --- a/pytorch_lightning/trainer/connectors/training_trick_connector.py +++ b/pytorch_lightning/trainer/connectors/training_trick_connector.py @@ -11,9 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union +from typing import Dict, Optional, Union -from pytorch_lightning.utilities import GradClipAlgorithmType +from pytorch_lightning.callbacks import GradientAccumulationScheduler +from pytorch_lightning.utilities import GradClipAlgorithmType, rank_zero_deprecation from pytorch_lightning.utilities.exceptions import MisconfigurationException diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 01a6c1143a69a..e93d083911a63 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -123,8 +123,13 @@ def __init__( checkpoint_callback: bool = True, callbacks: Optional[Union[List[Callback], Callback]] = None, default_root_dir: Optional[str] = None, +<<<<<<< HEAD gradient_clip_val: Union[int, float] = 0.0, gradient_clip_algorithm: str = "norm", +======= + gradient_clip_val: Optional[float] = None, + gradient_clip_algorithm: Optional[str] = None, +>>>>>>> 0a8c5a0d8 (dep train args) process_position: int = 0, num_nodes: int = 1, num_processes: int = 1, @@ -245,9 +250,17 @@ def __init__( gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=0`` disables gradient clipping. + .. deprecated:: v1.5 + ``gradient_clip_val`` has been deprecated in v1.5 and will be removed in v1.7. + Please configure gradient clipping directly in ``LightningModule.clip_gradients`` instead. + gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. + .. deprecated:: v1.5 + ``gradient_clip_algorithm`` has been deprecated in v1.5 and will be removed in v1.7. + Please configure gradient clipping directly in ``LightningModule.clip_gradients`` instead. + limit_train_batches: How much of training dataset to check (float = fraction, int = num_batches). limit_val_batches: How much of validation dataset to check (float = fraction, int = num_batches). From 33ead9bd6c4b09f9af6f77a2f66859955ab65889 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 23 Sep 2021 01:47:38 +0530 Subject: [PATCH 04/25] update tests --- tests/models/test_hooks.py | 1 + tests/trainer/test_trainer.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index de849f90a079f..21af2bada0baf 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -306,6 +306,7 @@ def _auto_train_batch(trainer, model, batches, device=torch.device("cpu"), curre dict(name="Callback.on_after_backward", args=(trainer, model)), dict(name="on_after_backward"), *(on_before_optimizer_step if using_plugin else []), + dict(name="clip_gradients", args=(ANY, 0)), dict( name="optimizer_step", args=(current_epoch, i, ANY, 0, ANY), diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index a1e6ce01107db..f42556d29d1c2 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1049,17 +1049,21 @@ def backward(*args, **kwargs): def test_gradient_clipping_by_value(tmpdir, precision): """Test gradient clipping by value.""" tutils.reset_seed() + grad_clip_val = 1e-10 - model = BoringModel() + class CustomBoringModel(BoringModel): + def clip_gradients(self, optimizer, optimizer_idx, gradient_clip_val=0.0, gradient_clip_algorithm="norm"): + super().clip_gradients( + optimizer, optimizer_idx, gradient_clip_val=grad_clip_val, gradient_clip_algorithm="value" + ) + + model = CustomBoringModel() - grad_clip_val = 1e-10 trainer = Trainer( max_steps=1, max_epochs=1, precision=precision, gpus=int(torch.cuda.is_available()), - gradient_clip_val=grad_clip_val, - gradient_clip_algorithm="value", default_root_dir=tmpdir, ) From 277039e55161e1c052e2e7eacb77fa54b4260cef Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 23 Sep 2021 01:51:49 +0530 Subject: [PATCH 05/25] doc --- pytorch_lightning/core/lightning.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 53f1d762b2757..0dc99129c5cdc 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1493,9 +1493,11 @@ def clip_gradients( Example:: # Perform gradient clipping on discriminator (optimizer_idx=1) - def clip_gradients(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + def clip_gradients(self, optimizer, optimizer_idx, gradient_clip_val=0.0, gradient_clip_algorithm="norm"): if optimizer_idx == 1: - super().clip_gradients(optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm) + super().clip_gradients( + optimizer, optimizer_idx, gradient_clip_val=1e-7, gradient_clip_algorithm="value" + ) """ gradient_clip_val = gradient_clip_val or self.trainer.gradient_clip_val gradient_clip_algorithm = gradient_clip_algorithm or self.trainer.gradient_clip_algorithm From 55c0f655277af9429d0c37adb4a0d733190c3ec2 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 23 Sep 2021 02:06:44 +0530 Subject: [PATCH 06/25] doc --- pytorch_lightning/trainer/trainer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index e93d083911a63..9618f4cc26577 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -248,14 +248,15 @@ def __init__( gpus: Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=0`` disables gradient - clipping. + clipping. Default (None) which sets it's value to 0.0 internally. .. deprecated:: v1.5 ``gradient_clip_val`` has been deprecated in v1.5 and will be removed in v1.7. Please configure gradient clipping directly in ``LightningModule.clip_gradients`` instead. gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` - for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. + for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. Default (None) which sets + it's value to "norm" internally. .. deprecated:: v1.5 ``gradient_clip_algorithm`` has been deprecated in v1.5 and will be removed in v1.7. From 3a74d95dc53b6c681240f445492a1230a5642a7d Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 28 Sep 2021 16:06:10 +0530 Subject: [PATCH 07/25] .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6ad0671fb3306..7b1247433e7b4 100644 --- a/.gitignore +++ b/.gitignore @@ -156,3 +156,4 @@ cifar-10-batches-py *.pt # ctags tags +.tags From 2e7d66582cc9990554e2d24169711712df6bf279 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 28 Sep 2021 16:28:00 +0530 Subject: [PATCH 08/25] not dep --- pytorch_lightning/core/lightning.py | 63 +++++++++++++++++++--------- pytorch_lightning/trainer/trainer.py | 18 +------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 0dc99129c5cdc..c89c7ed796b9c 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1473,42 +1473,67 @@ def untoggle_optimizer(self, optimizer_idx: int): # save memory self._param_requires_grad_state = {} - def clip_gradients( + def clip_gradients(self, optimizer: Optimizer, gradient_clip_val: Union[int, float], gradient_clip_algorithm: str): + """Handles Gradient Clipping internally. + + Args: + optimizer: Current optimizer being used. ``None`` if using manual optimization. + gradient_clip_val: The value at which to clip gradients. + gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` + for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. + + Note: + Do not override this method. If you want to customize gradient clipping, consider + using :meth:`configure_gradient_clipping`. + """ + gradient_clip_val = gradient_clip_val or self.trainer.gradient_clip_val + gradient_clip_algorithm = gradient_clip_algorithm or self.trainer.gradient_clip_algorithm + + # gradient clipping + if not isinstance(gradient_clip_val, (int, float)): + raise TypeError(f"`gradient_clip_val` should be an int or a float. Got {gradient_clip_val}.") + + if not GradClipAlgorithmType.supported_type(gradient_clip_algorithm.lower()): + raise MisconfigurationException( + f"`gradient_clip_algorithm` {gradient_clip_algorithm} is invalid. " + f"Allowed algorithms: {GradClipAlgorithmType.supported_types()}." + ) + + self.trainer.accelerator.clip_gradients(optimizer, gradient_clip_val, gradient_clip_algorithm) + + def configure_gradient_clipping( self, optimizer: Optimizer, optimizer_idx: int, - gradient_clip_val: Union[int, float] = 0.0, - gradient_clip_algorithm: str = "norm", + gradient_clip_val: Union[int, float], + gradient_clip_algorithm: str, ): """Perform Gradient Clipping for the optimizer parameters. Called before :meth:`optimizer_step`. Args: optimizer: Current optimizer being used. ``None`` if using manual optimization. optimizer_idx: Index of the current optimizer being used. ``None`` if using manual optimization. - gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=0`` disables gradient - clipping. + gradient_clip_val: The value at which to clip gradients. By default value passed in Trainer + will be available here. gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` - for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. + for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. By default value + passed in Trainer will be available here. Example:: # Perform gradient clipping on discriminator (optimizer_idx=1) - def clip_gradients(self, optimizer, optimizer_idx, gradient_clip_val=0.0, gradient_clip_algorithm="norm"): + def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): if optimizer_idx == 1: - super().clip_gradients( - optimizer, optimizer_idx, gradient_clip_val=1e-7, gradient_clip_algorithm="value" + # lightning will handle the gradient clipping + self.clip_gradients( + optimizer, gradient_clip_val=gradient_clip_val, gradient_clip_algorithm=gradient_clip_algorithm ) + else: + # implement your own logic """ - gradient_clip_val = gradient_clip_val or self.trainer.gradient_clip_val - gradient_clip_algorithm = gradient_clip_algorithm or self.trainer.gradient_clip_algorithm - - if not isinstance(gradient_clip_val, (int, float)): - raise MisconfigurationException("`gradient_clip_val` should be either an int or a float") - - if gradient_clip_algorithm not in list(GradClipAlgorithmType): - raise MisconfigurationException(f"`gradient_clip_algorithm` should be in {list(GradClipAlgorithmType)}") - - self.trainer.accelerator.clip_gradients(optimizer, gradient_clip_val, gradient_clip_algorithm) + self.clip_gradients( + optimizer, gradient_clip_val=gradient_clip_val, gradient_clip_algorithm=gradient_clip_algorithm + ) def optimizer_step( self, diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 9618f4cc26577..01a6c1143a69a 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -123,13 +123,8 @@ def __init__( checkpoint_callback: bool = True, callbacks: Optional[Union[List[Callback], Callback]] = None, default_root_dir: Optional[str] = None, -<<<<<<< HEAD gradient_clip_val: Union[int, float] = 0.0, gradient_clip_algorithm: str = "norm", -======= - gradient_clip_val: Optional[float] = None, - gradient_clip_algorithm: Optional[str] = None, ->>>>>>> 0a8c5a0d8 (dep train args) process_position: int = 0, num_nodes: int = 1, num_processes: int = 1, @@ -248,19 +243,10 @@ def __init__( gpus: Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=0`` disables gradient - clipping. Default (None) which sets it's value to 0.0 internally. - - .. deprecated:: v1.5 - ``gradient_clip_val`` has been deprecated in v1.5 and will be removed in v1.7. - Please configure gradient clipping directly in ``LightningModule.clip_gradients`` instead. + clipping. gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` - for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. Default (None) which sets - it's value to "norm" internally. - - .. deprecated:: v1.5 - ``gradient_clip_algorithm`` has been deprecated in v1.5 and will be removed in v1.7. - Please configure gradient clipping directly in ``LightningModule.clip_gradients`` instead. + for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. limit_train_batches: How much of training dataset to check (float = fraction, int = num_batches). From b9b393305938c28f8ce780982d4115211cf0fece Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 28 Sep 2021 16:59:41 +0530 Subject: [PATCH 09/25] add trainer args --- pytorch_lightning/loops/optimization/optimizer_loop.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/loops/optimization/optimizer_loop.py b/pytorch_lightning/loops/optimization/optimizer_loop.py index f13a55d05373f..eabb716a3fe2b 100644 --- a/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -484,5 +484,7 @@ def _track_and_norm_grad(self, optimizer: torch.optim.Optimizer, opt_idx: int) - grad_norm_dict = grad_norm(self.trainer.lightning_module, self.trainer.track_grad_norm) # clip gradients - self.trainer.lightning_module.clip_gradients(optimizer, opt_idx) + self.trainer.lightning_module.configure_gradient_clipping( + optimizer, opt_idx, self.trainer.gradient_clip_val, self.trainer.gradient_clip_algorithm + ) return grad_norm_dict From 6ec8cb0780de62ccd746f9a9b1ab5c8cdf73dea1 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 28 Sep 2021 17:15:25 +0530 Subject: [PATCH 10/25] add & update tests --- tests/core/test_lightning_module.py | 31 +++++++++++++++++++++++++++++ tests/trainer/test_trainer.py | 12 ++++------- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py index 8b787e0f57fcb..9c4365ea93b6c 100644 --- a/tests/core/test_lightning_module.py +++ b/tests/core/test_lightning_module.py @@ -335,3 +335,34 @@ def test_sharded_tensor_state_dict(tmpdir, single_process_pg): assert torch.allclose( m_1.sharded_tensor.local_shards()[0].tensor, m_0.sharded_tensor.local_shards()[0].tensor ), "Expect the shards to be same after `m_1` loading `m_0`'s state dict" + + +def test_lightning_module_configure_gradient_clipping(tmpdir): + class TestModel(BoringModel): + + has_validated_gradients = False + custom_gradient_clip_val = 1e-2 + + def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + assert gradient_clip_val == self.trainer.gradient_clip_val + assert gradient_clip_algorithm == self.trainer.gradient_clip_algorithm + + for pg in optimizer.param_groups: + for p in pg["params"]: + p.grad[p.grad > self.custom_gradient_clip_val] = self.custom_gradient_clip_val + p.grad[p.grad <= 0] = 0 + + def on_before_optimizer_step(self, optimizer, optimizer_idx): + for pg in optimizer.param_groups: + for p in pg["params"]: + if p.grad is not None and p.grad.abs().sum() > 0: + self.has_validated_gradients = True + assert p.grad.min() >= 0 + assert p.grad.max() <= self.custom_gradient_clip_val + + model = TestModel() + trainer = Trainer( + default_root_dir=tmpdir, max_epochs=1, limit_train_batches=2, limit_val_batches=0, gradient_clip_val=1e-4 + ) + trainer.fit(model) + assert model.has_validated_gradients diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index f42556d29d1c2..a1e6ce01107db 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1049,21 +1049,17 @@ def backward(*args, **kwargs): def test_gradient_clipping_by_value(tmpdir, precision): """Test gradient clipping by value.""" tutils.reset_seed() - grad_clip_val = 1e-10 - - class CustomBoringModel(BoringModel): - def clip_gradients(self, optimizer, optimizer_idx, gradient_clip_val=0.0, gradient_clip_algorithm="norm"): - super().clip_gradients( - optimizer, optimizer_idx, gradient_clip_val=grad_clip_val, gradient_clip_algorithm="value" - ) - model = CustomBoringModel() + model = BoringModel() + grad_clip_val = 1e-10 trainer = Trainer( max_steps=1, max_epochs=1, precision=precision, gpus=int(torch.cuda.is_available()), + gradient_clip_val=grad_clip_val, + gradient_clip_algorithm="value", default_root_dir=tmpdir, ) From deed69bb2ba2776d735c42451ac7b3fcb59d347a Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 28 Sep 2021 17:43:21 +0530 Subject: [PATCH 11/25] fix tests --- pytorch_lightning/core/lightning.py | 7 ++----- .../loops/optimization/optimizer_loop.py | 5 ++++- tests/models/test_hooks.py | 11 ++++++++++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index c89c7ed796b9c..cfa335da10c4e 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1486,9 +1486,6 @@ def clip_gradients(self, optimizer: Optimizer, gradient_clip_val: Union[int, flo Do not override this method. If you want to customize gradient clipping, consider using :meth:`configure_gradient_clipping`. """ - gradient_clip_val = gradient_clip_val or self.trainer.gradient_clip_val - gradient_clip_algorithm = gradient_clip_algorithm or self.trainer.gradient_clip_algorithm - # gradient clipping if not isinstance(gradient_clip_val, (int, float)): raise TypeError(f"`gradient_clip_val` should be an int or a float. Got {gradient_clip_val}.") @@ -1521,7 +1518,7 @@ def configure_gradient_clipping( Example:: - # Perform gradient clipping on discriminator (optimizer_idx=1) + # Perform gradient clipping on discriminator (optimizer_idx=1) in GAN def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): if optimizer_idx == 1: # lightning will handle the gradient clipping @@ -1529,7 +1526,7 @@ def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_va optimizer, gradient_clip_val=gradient_clip_val, gradient_clip_algorithm=gradient_clip_algorithm ) else: - # implement your own logic + # implement your own custom logic to clip gradients for generator (optimizer_idx=0) """ self.clip_gradients( optimizer, gradient_clip_val=gradient_clip_val, gradient_clip_algorithm=gradient_clip_algorithm diff --git a/pytorch_lightning/loops/optimization/optimizer_loop.py b/pytorch_lightning/loops/optimization/optimizer_loop.py index eabb716a3fe2b..56497eee86dd2 100644 --- a/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -485,6 +485,9 @@ def _track_and_norm_grad(self, optimizer: torch.optim.Optimizer, opt_idx: int) - # clip gradients self.trainer.lightning_module.configure_gradient_clipping( - optimizer, opt_idx, self.trainer.gradient_clip_val, self.trainer.gradient_clip_algorithm + optimizer, + opt_idx, + gradient_clip_val=self.trainer.gradient_clip_val, + gradient_clip_algorithm=self.trainer.gradient_clip_algorithm, ) return grad_norm_dict diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index 21af2bada0baf..1b74c6be222a2 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -306,7 +306,16 @@ def _auto_train_batch(trainer, model, batches, device=torch.device("cpu"), curre dict(name="Callback.on_after_backward", args=(trainer, model)), dict(name="on_after_backward"), *(on_before_optimizer_step if using_plugin else []), - dict(name="clip_gradients", args=(ANY, 0)), + dict( + name="clip_gradients", + args=(ANY,), + kwargs=dict(gradient_clip_val=0.0, gradient_clip_algorithm="norm"), + ), + dict( + name="configure_gradient_clipping", + args=(ANY, 0), + kwargs=dict(gradient_clip_val=0.0, gradient_clip_algorithm="norm"), + ), dict( name="optimizer_step", args=(current_epoch, i, ANY, 0, ANY), From fb9d34e274eb5fe0d32bc3d4bde4b555d81a12ab Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 28 Sep 2021 17:47:24 +0530 Subject: [PATCH 12/25] pre-commit --- .../trainer/connectors/training_trick_connector.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/training_trick_connector.py b/pytorch_lightning/trainer/connectors/training_trick_connector.py index 1d4e3ed73228c..84cfdb16846cb 100644 --- a/pytorch_lightning/trainer/connectors/training_trick_connector.py +++ b/pytorch_lightning/trainer/connectors/training_trick_connector.py @@ -11,10 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Optional, Union +from typing import Union -from pytorch_lightning.callbacks import GradientAccumulationScheduler -from pytorch_lightning.utilities import GradClipAlgorithmType, rank_zero_deprecation +from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException From adcfcf21cbf10bd612f7366d362c4eea5c8e9db2 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 28 Sep 2021 17:55:28 +0530 Subject: [PATCH 13/25] docs --- pytorch_lightning/core/lightning.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index cfa335da10c4e..9108bf7b3a94f 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1477,7 +1477,7 @@ def clip_gradients(self, optimizer: Optimizer, gradient_clip_val: Union[int, flo """Handles Gradient Clipping internally. Args: - optimizer: Current optimizer being used. ``None`` if using manual optimization. + optimizer: Current optimizer being used. gradient_clip_val: The value at which to clip gradients. gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. @@ -1508,12 +1508,11 @@ def configure_gradient_clipping( """Perform Gradient Clipping for the optimizer parameters. Called before :meth:`optimizer_step`. Args: - optimizer: Current optimizer being used. ``None`` if using manual optimization. - optimizer_idx: Index of the current optimizer being used. ``None`` if using manual optimization. + optimizer: Current optimizer being used. + optimizer_idx: Index of the current optimizer being used. gradient_clip_val: The value at which to clip gradients. By default value passed in Trainer will be available here. - gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` - for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. By default value + gradient_clip_algorithm: The gradient clipping algorithm to use. By default value passed in Trainer will be available here. Example:: From 5cbc044204f143c312e4c306b9fb5bcd83143612 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 29 Sep 2021 14:26:16 +0530 Subject: [PATCH 14/25] add docs --- docs/source/common/lightning_module.rst | 7 ++++ docs/source/common/optimizers.rst | 46 +++++++++++++++++++++++++ pytorch_lightning/core/lightning.py | 22 ++++++------ pytorch_lightning/trainer/trainer.py | 2 +- 4 files changed, 66 insertions(+), 11 deletions(-) diff --git a/docs/source/common/lightning_module.rst b/docs/source/common/lightning_module.rst index ba2694286739e..6ee0ebe7b1110 100644 --- a/docs/source/common/lightning_module.rst +++ b/docs/source/common/lightning_module.rst @@ -1195,6 +1195,7 @@ for more information. on_after_backward() on_before_optimizer_step() + configure_gradient_clipping() optimizer_step() on_train_batch_end() @@ -1452,6 +1453,12 @@ on_before_optimizer_step .. automethod:: pytorch_lightning.core.hooks.ModelHooks.on_before_optimizer_step :noindex: +configure_gradient_clipping +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automethod:: pytorch_lightning.core.lightning.LightningModule.configure_gradient_clipping + :noindex: + optimizer_step ~~~~~~~~~~~~~~ diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst index 39a583d9c94d8..b5ec0ea64952c 100644 --- a/docs/source/common/optimizers.rst +++ b/docs/source/common/optimizers.rst @@ -516,3 +516,49 @@ to perform a step, Lightning won't be able to support accelerators and precision ): optimizer = optimizer.optimizer optimizer.step(closure=optimizer_closure) + +----- + +Configure Gradient Clipping +--------------------------- +To configure custom gradient clipping, consider overriding +the :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_gradient_clipping` function. +By default it will get arguments :attr:`~pytorch_lightning.trainer.Trainer.gradient_clip_val` and +:attr:`~pytorch_lightning.trainer.Trainer.gradient_clip_algorithm` from ``Trainer`` in their respective +fields and lightning will handle gradient clipping on its own. In case you want to set different values +for your arguments of your choice and let Lightning handle the gradient clipping, you can use the inbuilt +:meth:`~pytorch_lightning.core.lightning.LightningModule.clip_gradients` function and pass the arguments +along with your optimizer. + +.. note:: + Make sure to not override :meth:`~pytorch_lightning.core.lightning.LightningModule.clip_gradients` + function. If you want to customize gradient clipping, consider using + :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_gradient_clipping` function. + +For example, here we will apply gradient clipping only to optimizer A. + +.. testcode:: python + + def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + if optimizer_idx == 0: + # Lightning will handle the gradient clipping + self.clip_gradients( + optimizer, gradient_clip_val=gradient_clip_val, gradient_clip_algorithm=gradient_clip_algorithm + ) + +Here we configure gradient clipping differently for optimizer B. + +.. testcode:: python + + def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + if optimizer_idx == 0: + # Lightning will handle the gradient clipping + self.clip_gradients( + optimizer, gradient_clip_val=gradient_clip_val, gradient_clip_algorithm=gradient_clip_algorithm + ) + elif optimizer_idx == 1: + custom_gradient_clip_val = 1e-2 + for pg in optimizer.param_groups: + for p in pg["params"]: + p.grad[p.grad > custom_gradient_clip_val] = custom_gradient_clip_val + p.grad[p.grad <= 0] = 0 diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 9108bf7b3a94f..69d7c70924639 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1462,7 +1462,7 @@ def untoggle_optimizer(self, optimizer_idx: int): optimizer_idx: Current optimizer idx in the training loop Note: - Only called when using multiple optimizers + Only called when using multiple_optimizers """ for opt_idx, opt in enumerate(self.optimizers(use_pl_optimizer=False)): if optimizer_idx != opt_idx: @@ -1474,17 +1474,17 @@ def untoggle_optimizer(self, optimizer_idx: int): self._param_requires_grad_state = {} def clip_gradients(self, optimizer: Optimizer, gradient_clip_val: Union[int, float], gradient_clip_algorithm: str): - """Handles Gradient Clipping internally. + """Handles gradient clipping internally. + + Note: + Do not override this method. If you want to customize gradient clipping, consider + using :meth:`configure_gradient_clipping`. Args: optimizer: Current optimizer being used. gradient_clip_val: The value at which to clip gradients. gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` - for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. - - Note: - Do not override this method. If you want to customize gradient clipping, consider - using :meth:`configure_gradient_clipping`. + to clip by value, and ``gradient_clip_algorithm="norm"`` to clip by norm. """ # gradient clipping if not isinstance(gradient_clip_val, (int, float)): @@ -1505,7 +1505,7 @@ def configure_gradient_clipping( gradient_clip_val: Union[int, float], gradient_clip_algorithm: str, ): - """Perform Gradient Clipping for the optimizer parameters. Called before :meth:`optimizer_step`. + """Perform gradient clipping for the optimizer parameters. Called before :meth:`optimizer_step`. Args: optimizer: Current optimizer being used. @@ -1520,9 +1520,11 @@ def configure_gradient_clipping( # Perform gradient clipping on discriminator (optimizer_idx=1) in GAN def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): if optimizer_idx == 1: - # lightning will handle the gradient clipping + # Lightning will handle the gradient clipping self.clip_gradients( - optimizer, gradient_clip_val=gradient_clip_val, gradient_clip_algorithm=gradient_clip_algorithm + optimizer, + gradient_clip_val=gradient_clip_val, + gradient_clip_algorithm=gradient_clip_algorithm ) else: # implement your own custom logic to clip gradients for generator (optimizer_idx=0) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 01a6c1143a69a..af6096a190fe1 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -246,7 +246,7 @@ def __init__( clipping. gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` - for clip_by_value, and ``gradient_clip_algorithm="norm"`` for clip_by_norm. + to clip by value, and ``gradient_clip_algorithm="norm"`` to clip by norm. limit_train_batches: How much of training dataset to check (float = fraction, int = num_batches). From bb8e23bb60edcd96a0d838e98d6c42ddfae0b8fd Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 30 Sep 2021 16:54:01 +0530 Subject: [PATCH 15/25] add exception --- pytorch_lightning/core/lightning.py | 38 +++++++++++++++--- .../trainer/configuration_validator.py | 2 +- .../connectors/training_trick_connector.py | 18 ++++++--- pytorch_lightning/trainer/trainer.py | 11 ++--- tests/core/test_lightning_module.py | 40 +++++++++++++++++++ tests/models/test_hooks.py | 4 +- 6 files changed, 93 insertions(+), 20 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 69d7c70924639..f4e3263f734bc 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1473,7 +1473,12 @@ def untoggle_optimizer(self, optimizer_idx: int): # save memory self._param_requires_grad_state = {} - def clip_gradients(self, optimizer: Optimizer, gradient_clip_val: Union[int, float], gradient_clip_algorithm: str): + def clip_gradients( + self, + optimizer: Optimizer, + gradient_clip_val: Optional[Union[int, float]] = None, + gradient_clip_algorithm: Optional[Union[str, GradClipAlgorithmType]] = None, + ): """Handles gradient clipping internally. Note: @@ -1486,24 +1491,45 @@ def clip_gradients(self, optimizer: Optimizer, gradient_clip_val: Union[int, flo gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` to clip by value, and ``gradient_clip_algorithm="norm"`` to clip by norm. """ - # gradient clipping + if gradient_clip_val is None: + gradient_clip_val = self.trainer.gradient_clip_val or 0.0 + elif self.trainer.gradient_clip_val is not None and self.trainer.gradient_clip_val != gradient_clip_val: + raise MisconfigurationException( + "You have set `Trainer(gradient_clip_val)` and have passed" + " `gradient_clip_val` inside `clip_gradients`. Please use only one of them." + ) + + if gradient_clip_algorithm is None: + gradient_clip_algorithm = self.trainer.gradient_clip_algorithm or "norm" + else: + gradient_clip_algorithm = gradient_clip_algorithm.lower() + if ( + self.trainer.gradient_clip_algorithm is not None + and self.trainer.gradient_clip_algorithm != gradient_clip_algorithm + ): + raise MisconfigurationException( + "You have set `Trainer(gradient_clip_algorithm)` and have passed" + " `gradient_clip_algorithm` inside `clip_gradients`. Please use only one of them." + ) + if not isinstance(gradient_clip_val, (int, float)): raise TypeError(f"`gradient_clip_val` should be an int or a float. Got {gradient_clip_val}.") if not GradClipAlgorithmType.supported_type(gradient_clip_algorithm.lower()): raise MisconfigurationException( - f"`gradient_clip_algorithm` {gradient_clip_algorithm} is invalid. " - f"Allowed algorithms: {GradClipAlgorithmType.supported_types()}." + f"`gradient_clip_algorithm` {gradient_clip_algorithm} is invalid." + f" Allowed algorithms: {GradClipAlgorithmType.supported_types()}." ) + gradient_clip_algorithm = GradClipAlgorithmType(gradient_clip_algorithm) self.trainer.accelerator.clip_gradients(optimizer, gradient_clip_val, gradient_clip_algorithm) def configure_gradient_clipping( self, optimizer: Optimizer, optimizer_idx: int, - gradient_clip_val: Union[int, float], - gradient_clip_algorithm: str, + gradient_clip_val: Optional[Union[int, float]] = None, + gradient_clip_algorithm: Optional[str] = None, ): """Perform gradient clipping for the optimizer parameters. Called before :meth:`optimizer_step`. diff --git a/pytorch_lightning/trainer/configuration_validator.py b/pytorch_lightning/trainer/configuration_validator.py index ee5be467b86bf..7249c27473d8e 100644 --- a/pytorch_lightning/trainer/configuration_validator.py +++ b/pytorch_lightning/trainer/configuration_validator.py @@ -184,7 +184,7 @@ def __verify_dp_batch_transfer_support(self, model: "pl.LightningModule") -> Non def __verify_manual_optimization_support(self, model: "pl.LightningModule") -> None: if model.automatic_optimization: return - if self.trainer.gradient_clip_val > 0: + if self.trainer.gradient_clip_val is not None and self.trainer.gradient_clip_val > 0: raise MisconfigurationException( "Automatic gradient clipping is not supported for manual optimization." f" Remove `Trainer(gradient_clip_val={self.trainer.gradient_clip_val})`" diff --git a/pytorch_lightning/trainer/connectors/training_trick_connector.py b/pytorch_lightning/trainer/connectors/training_trick_connector.py index 84cfdb16846cb..2bcd4bd9c5750 100644 --- a/pytorch_lightning/trainer/connectors/training_trick_connector.py +++ b/pytorch_lightning/trainer/connectors/training_trick_connector.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union +from typing import Optional, Union from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -23,8 +23,8 @@ def __init__(self, trainer): def on_trainer_init( self, - gradient_clip_val: Union[int, float], - gradient_clip_algorithm: str, + gradient_clip_val: Optional[Union[int, float]], + gradient_clip_algorithm: Optional[str], track_grad_norm: Union[int, float, str], terminate_on_nan: bool, ): @@ -32,10 +32,12 @@ def on_trainer_init( raise TypeError(f"`terminate_on_nan` should be a bool, got {terminate_on_nan}.") # gradient clipping - if not isinstance(gradient_clip_val, (int, float)): + if gradient_clip_val is not None and not isinstance(gradient_clip_val, (int, float)): raise TypeError(f"`gradient_clip_val` should be an int or a float. Got {gradient_clip_val}.") - if not GradClipAlgorithmType.supported_type(gradient_clip_algorithm.lower()): + if gradient_clip_algorithm is not None and not GradClipAlgorithmType.supported_type( + gradient_clip_algorithm.lower() + ): raise MisconfigurationException( f"`gradient_clip_algorithm` {gradient_clip_algorithm} is invalid. " f"Allowed algorithms: {GradClipAlgorithmType.supported_types()}." @@ -49,5 +51,9 @@ def on_trainer_init( self.trainer.terminate_on_nan = terminate_on_nan self.trainer.gradient_clip_val = gradient_clip_val - self.trainer.gradient_clip_algorithm = GradClipAlgorithmType(gradient_clip_algorithm.lower()) + self.trainer.gradient_clip_algorithm = ( + GradClipAlgorithmType(gradient_clip_algorithm.lower()) + if gradient_clip_algorithm is not None + else gradient_clip_algorithm + ) self.trainer.track_grad_norm = float(track_grad_norm) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index af6096a190fe1..44a7f689189de 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -123,8 +123,8 @@ def __init__( checkpoint_callback: bool = True, callbacks: Optional[Union[List[Callback], Callback]] = None, default_root_dir: Optional[str] = None, - gradient_clip_val: Union[int, float] = 0.0, - gradient_clip_algorithm: str = "norm", + gradient_clip_val: Optional[Union[int, float]] = None, + gradient_clip_algorithm: Optional[str] = None, process_position: int = 0, num_nodes: int = 1, num_processes: int = 1, @@ -242,11 +242,12 @@ def __init__( gpus: Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node - gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=0`` disables gradient - clipping. + gradient_clip_val: The value at which to clip gradients. Passing ``gradient_clip_val=None`` disables + gradient clipping. gradient_clip_algorithm: The gradient clipping algorithm to use. Pass ``gradient_clip_algorithm="value"`` - to clip by value, and ``gradient_clip_algorithm="norm"`` to clip by norm. + to clip by value, and ``gradient_clip_algorithm="norm"`` to clip by norm. By default it will + be set to ``"norm"``. limit_train_batches: How much of training dataset to check (float = fraction, int = num_batches). diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py index 9c4365ea93b6c..692044d91b894 100644 --- a/tests/core/test_lightning_module.py +++ b/tests/core/test_lightning_module.py @@ -22,6 +22,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.loggers import TensorBoardLogger from pytorch_lightning.utilities import _TORCH_SHARDED_TENSOR_AVAILABLE +from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers import BoringModel from tests.helpers.runif import RunIf @@ -338,6 +339,8 @@ def test_sharded_tensor_state_dict(tmpdir, single_process_pg): def test_lightning_module_configure_gradient_clipping(tmpdir): + """Test custom gradient clipping inside `configure_gradient_clipping` hook.""" + class TestModel(BoringModel): has_validated_gradients = False @@ -366,3 +369,40 @@ def on_before_optimizer_step(self, optimizer, optimizer_idx): ) trainer.fit(model) assert model.has_validated_gradients + + +def test_lightning_module_configure_gradient_clipping_different_argument_values(tmpdir): + """Test that setting gradient clipping arguments in `Trainer` and cusotmizing gradient clipping inside + `configure_gradient_clipping` with different values raises an exception.""" + + class TestModel(BoringModel): + custom_gradient_clip_val = 1e-2 + + def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + self.clip_gradients(optimizer, gradient_clip_val=self.custom_gradient_clip_val) + + model = TestModel() + trainer = Trainer( + default_root_dir=tmpdir, max_epochs=1, limit_train_batches=2, limit_val_batches=0, gradient_clip_val=1e-4 + ) + with pytest.raises(MisconfigurationException, match=r".*have set `Trainer\(gradient_clip_val\)` and have passed.*"): + trainer.fit(model) + + class TestModel(BoringModel): + custom_gradient_clip_algorithm = "value" + + def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + self.clip_gradients(optimizer, gradient_clip_algorithm=self.custom_gradient_clip_algorithm) + + model = TestModel() + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + limit_train_batches=2, + limit_val_batches=0, + gradient_clip_algorithm="norm", + ) + with pytest.raises( + MisconfigurationException, match=r".*have set `Trainer\(gradient_clip_algorithm\)` and have passed.*" + ): + trainer.fit(model) diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index 1b74c6be222a2..6a78e632e9d39 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -309,12 +309,12 @@ def _auto_train_batch(trainer, model, batches, device=torch.device("cpu"), curre dict( name="clip_gradients", args=(ANY,), - kwargs=dict(gradient_clip_val=0.0, gradient_clip_algorithm="norm"), + kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), ), dict( name="configure_gradient_clipping", args=(ANY, 0), - kwargs=dict(gradient_clip_val=0.0, gradient_clip_algorithm="norm"), + kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), ), dict( name="optimizer_step", From a3d297f8f2b1b49bb269c040a27829b7d3a70c5c Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Fri, 1 Oct 2021 15:18:48 +0530 Subject: [PATCH 16/25] code review --- docs/source/common/optimizers.rst | 32 ++++++++++++++--------------- pytorch_lightning/core/lightning.py | 4 ++-- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst index b5ec0ea64952c..0405b9a4365af 100644 --- a/docs/source/common/optimizers.rst +++ b/docs/source/common/optimizers.rst @@ -69,7 +69,7 @@ Here is a minimal example of manual optimization. Gradient accumulation --------------------- You can accumulate gradients over batches similarly to -:attr:`~pytorch_lightning.trainer.Trainer.accumulate_grad_batches` of automatic optimization. +:attr:`~pytorch_lightning.trainer.trainer.Trainer.accumulate_grad_batches` of automatic optimization. To perform gradient accumulation with one optimizer, you can do as such. .. testcode:: python @@ -519,23 +519,23 @@ to perform a step, Lightning won't be able to support accelerators and precision ----- -Configure Gradient Clipping +Configure gradient clipping --------------------------- To configure custom gradient clipping, consider overriding -the :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_gradient_clipping` function. -By default it will get arguments :attr:`~pytorch_lightning.trainer.Trainer.gradient_clip_val` and -:attr:`~pytorch_lightning.trainer.Trainer.gradient_clip_algorithm` from ``Trainer`` in their respective -fields and lightning will handle gradient clipping on its own. In case you want to set different values -for your arguments of your choice and let Lightning handle the gradient clipping, you can use the inbuilt -:meth:`~pytorch_lightning.core.lightning.LightningModule.clip_gradients` function and pass the arguments -along with your optimizer. +the :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_gradient_clipping` method. +Attributes :attr:`~pytorch_lightning.trainer.trainer.Trainer.gradient_clip_val` and +:attr:`~pytorch_lightning.trainer.trainer.Trainer.gradient_clip_algorithm` will be passed in the respective +arguments here and Lightning will handle gradient clipping for you. In case you want to set +different values for your arguments of your choice and let Lightning handle the gradient clipping, you can +use the inbuilt :meth:`~pytorch_lightning.core.lightning.LightningModule.clip_gradients` method and pass +the arguments along with your optimizer. .. note:: Make sure to not override :meth:`~pytorch_lightning.core.lightning.LightningModule.clip_gradients` - function. If you want to customize gradient clipping, consider using - :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_gradient_clipping` function. + method. If you want to customize gradient clipping, consider using + :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_gradient_clipping` method. -For example, here we will apply gradient clipping only to optimizer A. +For example, here we will apply gradient clipping only to the gradients associated with optimizer A. .. testcode:: python @@ -557,8 +557,6 @@ Here we configure gradient clipping differently for optimizer B. optimizer, gradient_clip_val=gradient_clip_val, gradient_clip_algorithm=gradient_clip_algorithm ) elif optimizer_idx == 1: - custom_gradient_clip_val = 1e-2 - for pg in optimizer.param_groups: - for p in pg["params"]: - p.grad[p.grad > custom_gradient_clip_val] = custom_gradient_clip_val - p.grad[p.grad <= 0] = 0 + self.clip_gradients( + optimizer, gradient_clip_val=gradient_clip_val * 2, gradient_clip_algorithm=gradient_clip_algorithm + ) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index f4e3263f734bc..6003322dcec56 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1483,7 +1483,7 @@ def clip_gradients( Note: Do not override this method. If you want to customize gradient clipping, consider - using :meth:`configure_gradient_clipping`. + using :meth:`configure_gradient_clipping` method. Args: optimizer: Current optimizer being used. @@ -1543,7 +1543,7 @@ def configure_gradient_clipping( Example:: - # Perform gradient clipping on discriminator (optimizer_idx=1) in GAN + # Perform gradient clipping on gradients associated with discriminator (optimizer_idx=1) in GAN def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): if optimizer_idx == 1: # Lightning will handle the gradient clipping From 36ff5b5eaa76d314defdd95ba834da5bde159477 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Mon, 4 Oct 2021 15:03:52 +0530 Subject: [PATCH 17/25] deepspeed --- .../plugins/training_type/deepspeed.py | 16 ++++++++- tests/plugins/test_deepspeed_plugin.py | 34 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 1785b3644e2c7..6eeed6185ca50 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -33,8 +33,10 @@ from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.apply_func import apply_to_collection from pytorch_lightning.utilities.distributed import log, rank_zero_info, rank_zero_only +from pytorch_lightning.utilities.enums import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE +from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.seed import reset_seed from pytorch_lightning.utilities.types import _PATH, LRSchedulerTypeTuple from pytorch_lightning.utilities.warnings import rank_zero_warn, WarningCache @@ -346,6 +348,18 @@ def setup_distributed(self): self._format_config() self._config_initialized = True + def setup(self) -> None: + # check that `configure_gradient_clipping` hook isn't overriden since deepspeed handles + # gradient clipping internally + if is_overridden("configure_gradient_clipping", self.lightning_module): + raise MisconfigurationException( + "Deepspeed handles gradient clipping internally. Consider setting" + " `gradient_clip_val` and `gradient_clip_algorithm` inside `Trainer`." + ) + + if self.trainer.gradient_clip_algorithm == GradClipAlgorithmType.VALUE: + raise MisconfigurationException("Deepspeed does not support clipping gradients by value.") + def _init_deepspeed_distributed(self) -> None: if platform.system() != "Windows": # do not set env variables on windows, allow deepspeed to control setup @@ -566,7 +580,7 @@ def _format_batch_size_and_grad_accum_config(self): batch_size = self._auto_select_batch_size() self.config["train_micro_batch_size_per_gpu"] = batch_size if "gradient_clipping" not in self.config: - self.config["gradient_clipping"] = self.lightning_module.trainer.gradient_clip_val + self.config["gradient_clipping"] = self.lightning_module.trainer.gradient_clip_val or 0.0 def _auto_select_batch_size(self): # train_micro_batch_size_per_gpu is used for throughput logging purposes diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index 96e132d12c1c8..a3a19979f732a 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -961,3 +961,37 @@ def configure_optimizers(self): else: # assert called once at init and once during training assert mock_step.call_count == 1 + (max_epoch * limit_train_batches) + + +@RunIf(min_gpus=1, deepspeed=True, special=True) +def test_deepspeed_configure_gradient_clipping(tmpdir): + """Test to ensure that an exception is raised when `LightningModule.configure_gradient_clipping` is overridden + in case of deepspeed.""" + + class TestModel(BoringModel): + def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): + if optimizer_idx == 0: + self.clip_gradients(optimizer, gradient_clip_val, gradient_clip_algorithm) + + model = TestModel() + trainer = Trainer( + default_root_dir=tmpdir, + gpus=1, + plugins="deepspeed", + ) + with pytest.raises(MisconfigurationException, match="handles gradient clipping internally"): + trainer.fit(model) + + +@RunIf(min_gpus=1, deepspeed=True, special=True) +def test_deepspeed_gradient_clip_by_value(tmpdir): + """Test to ensure that an exception is raised when using `gradient_clip_algorithm='value'`.""" + model = BoringModel() + trainer = Trainer( + default_root_dir=tmpdir, + gpus=1, + plugins="deepspeed", + gradient_clip_algorithm="value", + ) + with pytest.raises(MisconfigurationException, match="does not support clipping gradients by value"): + trainer.fit(model) From 5471130dc2ee8b1779ad7b45f7d0e17438a7d45d Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 5 Oct 2021 15:34:55 +0530 Subject: [PATCH 18/25] update tests --- .../plugins/training_type/deepspeed.py | 2 +- tests/models/test_hooks.py | 25 ++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 6eeed6185ca50..8552e53caf6b9 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -357,7 +357,7 @@ def setup(self) -> None: " `gradient_clip_val` and `gradient_clip_algorithm` inside `Trainer`." ) - if self.trainer.gradient_clip_algorithm == GradClipAlgorithmType.VALUE: + if self.lightning_module.trainer.gradient_clip_algorithm == GradClipAlgorithmType.VALUE: raise MisconfigurationException("Deepspeed does not support clipping gradients by value.") def _init_deepspeed_distributed(self) -> None: diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index 6a78e632e9d39..6c3f9a0e2f324 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -306,15 +306,22 @@ def _auto_train_batch(trainer, model, batches, device=torch.device("cpu"), curre dict(name="Callback.on_after_backward", args=(trainer, model)), dict(name="on_after_backward"), *(on_before_optimizer_step if using_plugin else []), - dict( - name="clip_gradients", - args=(ANY,), - kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), - ), - dict( - name="configure_gradient_clipping", - args=(ANY, 0), - kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), + # DeepSpeed handles gradient clipping internally + *( + [ + dict( + name="clip_gradients", + args=(ANY,), + kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), + ), + dict( + name="configure_gradient_clipping", + args=(ANY, 0), + kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), + ), + ] + if using_deepspeed + else [] ), dict( name="optimizer_step", From 217cc059d9efb69f36c51016ad04c6b0e3138ce3 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 5 Oct 2021 16:04:07 +0530 Subject: [PATCH 19/25] not --- tests/models/test_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index 6c3f9a0e2f324..466a62f6e0834 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -320,7 +320,7 @@ def _auto_train_batch(trainer, model, batches, device=torch.device("cpu"), curre kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), ), ] - if using_deepspeed + if not using_deepspeed else [] ), dict( From ae7a79e4cf4fa2804f6b321cf574010608b37e46 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 5 Oct 2021 18:06:59 +0530 Subject: [PATCH 20/25] try fix --- tests/models/test_hooks.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index 466a62f6e0834..a072a655c5608 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -281,6 +281,18 @@ def _auto_train_batch(trainer, model, batches, device=torch.device("cpu"), curre dict(name="Callback.on_before_optimizer_step", args=(trainer, model, ANY, 0)), dict(name="on_before_optimizer_step", args=(ANY, 0)), ] + configure_gradient_clipping = [ + dict( + name="clip_gradients", + args=(ANY,), + kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), + ), + dict( + name="configure_gradient_clipping", + args=(ANY, 0), + kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), + ), + ] for i in range(batches): out.extend( [ @@ -305,24 +317,8 @@ def _auto_train_batch(trainer, model, batches, device=torch.device("cpu"), curre *([dict(name="backward", args=(ANY, ANY, 0))] if not using_deepspeed else []), dict(name="Callback.on_after_backward", args=(trainer, model)), dict(name="on_after_backward"), + *configure_gradient_clipping, *(on_before_optimizer_step if using_plugin else []), - # DeepSpeed handles gradient clipping internally - *( - [ - dict( - name="clip_gradients", - args=(ANY,), - kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), - ), - dict( - name="configure_gradient_clipping", - args=(ANY, 0), - kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), - ), - ] - if not using_deepspeed - else [] - ), dict( name="optimizer_step", args=(current_epoch, i, ANY, 0, ANY), From 79f696af75fc0dd8d27d061c3431508c6a34d82c Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Tue, 12 Oct 2021 17:19:12 +0530 Subject: [PATCH 21/25] Apply suggestions from code review --- tests/plugins/test_deepspeed_plugin.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index 4261d10f18936..4d873d072c6aa 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -962,6 +962,7 @@ def configure_optimizers(self): assert mock_step.call_count == 1 + (max_epoch * limit_train_batches) +@RunIf(min_gpus=1, deepspeed=True, special=True) def test_deepspeed_configure_gradient_clipping(tmpdir): """Test to ensure that an exception is raised when `LightningModule.configure_gradient_clipping` is overridden in case of deepspeed.""" @@ -995,6 +996,7 @@ def test_deepspeed_gradient_clip_by_value(tmpdir): trainer.fit(model) +@RunIf(min_gpus=1, deepspeed=True, special=True) def test_different_accumulate_grad_batches_fails(tmpdir): model = BoringModel() trainer = Trainer(default_root_dir=tmpdir, accumulate_grad_batches={1: 2}, gpus=1, plugins="deepspeed") From 1101d2858c4892ecb2ef17eb0c1a1f900d331d54 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 13 Oct 2021 18:05:51 +0530 Subject: [PATCH 22/25] update deepspeed --- pytorch_lightning/core/lightning.py | 4 +++ .../loops/optimization/optimizer_loop.py | 13 ++++---- .../plugins/training_type/deepspeed.py | 24 +++++++-------- tests/models/test_hooks.py | 30 +++++++++++-------- tests/plugins/test_deepspeed_plugin.py | 6 ++-- 5 files changed, 44 insertions(+), 33 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 184ccd8418ac7..995d4f7ace3cf 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1536,6 +1536,10 @@ def configure_gradient_clipping( ): """Perform gradient clipping for the optimizer parameters. Called before :meth:`optimizer_step`. + Note: + This hook won't be called when using deepspeed since it handles gradient clipping internally. + Consider setting ``gradient_clip_val`` and ``gradient_clip_algorithm`` inside ``Trainer``." + Args: optimizer: Current optimizer being used. optimizer_idx: Index of the current optimizer being used. diff --git a/pytorch_lightning/loops/optimization/optimizer_loop.py b/pytorch_lightning/loops/optimization/optimizer_loop.py index 56497eee86dd2..50e58d393d9be 100644 --- a/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -484,10 +484,11 @@ def _track_and_norm_grad(self, optimizer: torch.optim.Optimizer, opt_idx: int) - grad_norm_dict = grad_norm(self.trainer.lightning_module, self.trainer.track_grad_norm) # clip gradients - self.trainer.lightning_module.configure_gradient_clipping( - optimizer, - opt_idx, - gradient_clip_val=self.trainer.gradient_clip_val, - gradient_clip_algorithm=self.trainer.gradient_clip_algorithm, - ) + if not self.trainer.accelerator_connector.use_deepspeed: + self.trainer.lightning_module.configure_gradient_clipping( + optimizer, + opt_idx, + gradient_clip_val=self.trainer.gradient_clip_val, + gradient_clip_algorithm=self.trainer.gradient_clip_algorithm, + ) return grad_norm_dict diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 3620e522acc18..e2e8c316f48d1 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -349,18 +349,6 @@ def setup_distributed(self): self._format_config() self._config_initialized = True - def setup(self) -> None: - # check that `configure_gradient_clipping` hook isn't overriden since deepspeed handles - # gradient clipping internally - if is_overridden("configure_gradient_clipping", self.lightning_module): - raise MisconfigurationException( - "Deepspeed handles gradient clipping internally. Consider setting" - " `gradient_clip_val` and `gradient_clip_algorithm` inside `Trainer`." - ) - - if self.lightning_module.trainer.gradient_clip_algorithm == GradClipAlgorithmType.VALUE: - raise MisconfigurationException("Deepspeed does not support clipping gradients by value.") - def _init_deepspeed_distributed(self) -> None: if platform.system() != "Windows": # do not set env variables on windows, allow deepspeed to control setup @@ -390,6 +378,18 @@ def pre_dispatch(self): self.barrier() def init_deepspeed(self): + # check that `configure_gradient_clipping` hook isn't overriden since deepspeed handles + # gradient clipping internally + if is_overridden("configure_gradient_clipping", self.lightning_module): + rank_zero_warn( + "Since deepspeed handles gradient clipping internally, this hook will" + " be ignored. Consider setting `gradient_clip_val` and `gradient_clip_algorithm`" + " inside `Trainer`." + ) + + if self.lightning_module.trainer.gradient_clip_algorithm == GradClipAlgorithmType.VALUE: + raise MisconfigurationException("Deepspeed does not support clipping gradients by value.") + accumulation_scheduler = self.lightning_module.trainer.accumulation_scheduler if accumulation_scheduler.epochs != [0]: diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index bf4940ba01f16..9e62a89f3a21c 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -281,18 +281,24 @@ def _auto_train_batch(trainer, model, batches, device=torch.device("cpu"), curre dict(name="Callback.on_before_optimizer_step", args=(trainer, model, ANY, 0)), dict(name="on_before_optimizer_step", args=(ANY, 0)), ] - configure_gradient_clipping = [ - dict( - name="clip_gradients", - args=(ANY,), - kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), - ), - dict( - name="configure_gradient_clipping", - args=(ANY, 0), - kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), - ), - ] + + # deepspeed handles gradient clipping internally + configure_gradient_clipping = ( + [] + if using_deepspeed + else [ + dict( + name="clip_gradients", + args=(ANY,), + kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), + ), + dict( + name="configure_gradient_clipping", + args=(ANY, 0), + kwargs=dict(gradient_clip_val=None, gradient_clip_algorithm=None), + ), + ] + ) for i in range(batches): out.extend( [ diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index 4d873d072c6aa..ca87ffa262788 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -964,8 +964,8 @@ def configure_optimizers(self): @RunIf(min_gpus=1, deepspeed=True, special=True) def test_deepspeed_configure_gradient_clipping(tmpdir): - """Test to ensure that an exception is raised when `LightningModule.configure_gradient_clipping` is overridden - in case of deepspeed.""" + """Test to ensure that a warning is raised when `LightningModule.configure_gradient_clipping` is overridden in + case of deepspeed.""" class TestModel(BoringModel): def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_val, gradient_clip_algorithm): @@ -978,7 +978,7 @@ def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_va gpus=1, plugins="deepspeed", ) - with pytest.raises(MisconfigurationException, match="handles gradient clipping internally"): + with pytest.warns(UserWarning, match="handles gradient clipping internally"): trainer.fit(model) From 87b48f006fc9bb1820fc534fe6deb3d5220513ca Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 13 Oct 2021 18:06:12 +0530 Subject: [PATCH 23/25] disable some tests --- .azure-pipelines/gpu-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index a0959541d0a7c..68ac20ac42148 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -68,7 +68,7 @@ jobs: displayName: 'Get legacy checkpoints' - bash: | - python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 + python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests/models/ -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 displayName: 'Testing: standard' - bash: | From 2bc551fca401b95ba501bf26c85be6fb1b5f5466 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 13 Oct 2021 18:35:17 +0530 Subject: [PATCH 24/25] disable some tests --- .azure-pipelines/gpu-tests.yml | 2 +- tests/plugins/test_deepspeed_plugin.py | 1 + tests/special_tests.sh | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 68ac20ac42148..d422ff011e696 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -68,7 +68,7 @@ jobs: displayName: 'Get legacy checkpoints' - bash: | - python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests/models/ -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 + python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests/models/test_hooks.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 displayName: 'Testing: standard' - bash: | diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index ca87ffa262788..53b7bdbf7f0f0 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -977,6 +977,7 @@ def configure_gradient_clipping(self, optimizer, optimizer_idx, gradient_clip_va default_root_dir=tmpdir, gpus=1, plugins="deepspeed", + fast_dev_run=True, ) with pytest.warns(UserWarning, match="handles gradient clipping internally"): trainer.fit(model) diff --git a/tests/special_tests.sh b/tests/special_tests.sh index 1346cea295d54..ee27e4bbb37d0 100755 --- a/tests/special_tests.sh +++ b/tests/special_tests.sh @@ -20,7 +20,7 @@ export PL_RUNNING_SPECIAL_TESTS=1 defaults='-m coverage run --source pytorch_lightning --append -m pytest --durations=0 --capture=no --disable-warnings' # find tests marked as `@RunIf(special=True)` -grep_output=$(grep --recursive --line-number --word-regexp 'tests' 'benchmarks' --regexp 'special=True') +grep_output=$(grep --recursive --line-number --word-regexp 'tests/plugins' 'benchmarks' --regexp 'special=True') # file paths files=$(echo "$grep_output" | cut -f1 -d:) files_arr=($files) From 84003ec9e34eb55e500e1c488fb364ec7a2ac465 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 13 Oct 2021 18:48:07 +0530 Subject: [PATCH 25/25] enable all tests --- .azure-pipelines/gpu-tests.yml | 2 +- tests/special_tests.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index d422ff011e696..a0959541d0a7c 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -68,7 +68,7 @@ jobs: displayName: 'Get legacy checkpoints' - bash: | - python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests/models/test_hooks.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 + python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 displayName: 'Testing: standard' - bash: | diff --git a/tests/special_tests.sh b/tests/special_tests.sh index ee27e4bbb37d0..1346cea295d54 100755 --- a/tests/special_tests.sh +++ b/tests/special_tests.sh @@ -20,7 +20,7 @@ export PL_RUNNING_SPECIAL_TESTS=1 defaults='-m coverage run --source pytorch_lightning --append -m pytest --durations=0 --capture=no --disable-warnings' # find tests marked as `@RunIf(special=True)` -grep_output=$(grep --recursive --line-number --word-regexp 'tests/plugins' 'benchmarks' --regexp 'special=True') +grep_output=$(grep --recursive --line-number --word-regexp 'tests' 'benchmarks' --regexp 'special=True') # file paths files=$(echo "$grep_output" | cut -f1 -d:) files_arr=($files)