From 013cb7b526901fafbd0fa1a079911ba419709404 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Mon, 5 Jul 2021 11:43:30 +0200
Subject: [PATCH 1/6] add fix

---
 pytorch_lightning/loops/batch/training_batch_loop.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pytorch_lightning/loops/batch/training_batch_loop.py b/pytorch_lightning/loops/batch/training_batch_loop.py
index ef6c885930914..4155a7f90efec 100644
--- a/pytorch_lightning/loops/batch/training_batch_loop.py
+++ b/pytorch_lightning/loops/batch/training_batch_loop.py
@@ -178,6 +178,10 @@ def _run_optimization(
             with self.block_ddp_sync_behaviour():
                 closure()
 
+            if self.trainer.lightning_module.automatic_optimization and len(self.trainer.optimizers) > 1:
+                # revert back to previous state
+                self.trainer.lightning_module.untoggle_optimizer(opt_idx)
+
         # ------------------------------
         # BACKWARD PASS
         # ------------------------------

From e6aff65dc7b732707dd0c31e43d3b5e4f06f351e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Mon, 5 Jul 2021 12:02:36 +0200
Subject: [PATCH 2/6] toggle test

---
 tests/core/test_lightning_module.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py
index f05305c785c7e..09c7b3a7115c5 100644
--- a/tests/core/test_lightning_module.py
+++ b/tests/core/test_lightning_module.py
@@ -197,7 +197,7 @@ def optimizer_step(
         max_epochs=1,
         default_root_dir=tmpdir,
         limit_train_batches=8,
-        accumulate_grad_batches=1,
+        accumulate_grad_batches=2,
         limit_val_batches=0,
     )
     trainer.fit(model)
@@ -331,7 +331,7 @@ def configure_optimizers(self):
         max_epochs=1,
         default_root_dir=tmpdir,
         limit_train_batches=8,
-        accumulate_grad_batches=1,
+        accumulate_grad_batches=2,
     )
 
     trainer.fit(model)

From ab8336b12b2523799ef64b09860fcbe2c911a8e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Mon, 5 Jul 2021 12:02:44 +0200
Subject: [PATCH 3/6] re-structure

---
 .../loops/batch/training_batch_loop.py        | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/pytorch_lightning/loops/batch/training_batch_loop.py b/pytorch_lightning/loops/batch/training_batch_loop.py
index 4155a7f90efec..4f0c454af4885 100644
--- a/pytorch_lightning/loops/batch/training_batch_loop.py
+++ b/pytorch_lightning/loops/batch/training_batch_loop.py
@@ -130,6 +130,7 @@ def advance(self, batch, batch_idx, dataloader_idx):
 
         if self.trainer.lightning_module.automatic_optimization:
             for opt_idx, optimizer in self.get_active_optimizers(batch_idx):
+                # assert self.trainer.lightning_module.layer.weight.requires_grad
                 result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
                 if result:
                     self.batch_outputs[opt_idx].append(result.training_step_output)
@@ -178,10 +179,6 @@ def _run_optimization(
             with self.block_ddp_sync_behaviour():
                 closure()
 
-            if self.trainer.lightning_module.automatic_optimization and len(self.trainer.optimizers) > 1:
-                # revert back to previous state
-                self.trainer.lightning_module.untoggle_optimizer(opt_idx)
-
         # ------------------------------
         # BACKWARD PASS
         # ------------------------------
@@ -189,20 +186,16 @@ def _run_optimization(
         else:
             if self.trainer.lightning_module.automatic_optimization:
                 self._optimizer_step(optimizer, opt_idx, batch_idx, closure)
-                if len(self.trainer.optimizers) > 1:
-                    # revert back to previous state
-                    self.trainer.lightning_module.untoggle_optimizer(opt_idx)
             else:
                 result = self._training_step(split_batch, batch_idx, opt_idx, self._hiddens)
 
-            if not result:
-                # user decided to skip optimization
-                return result
-
+        if result:
             # update running loss + reset accumulated loss
             self._update_running_loss(result.loss)
+            self._process_closure_result(result)
 
-        self._process_closure_result(result)
+        # untoggle model params
+        self._run_optimization_end(opt_idx)
         return result
 
     def _training_step_and_backward_closure(
@@ -494,6 +487,11 @@ def _run_optimization_start(self, opt_idx: int, optimizer: torch.optim.Optimizer
             model = self.trainer.lightning_module
             model.toggle_optimizer(optimizer, opt_idx)
 
+    def _run_optimization_end(self, opt_idx: int) -> None:
+        if self.trainer.lightning_module.automatic_optimization and len(self.trainer.optimizers) > 1:
+            model = self.trainer.lightning_module
+            model.untoggle_optimizer(opt_idx)
+
     @contextmanager
     def block_ddp_sync_behaviour(self, should_block_sync: bool = False) -> Generator[None, None, None]:
         """

From 2eb191207684bcb6ec00c79548d5c47e10effaae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Mon, 5 Jul 2021 12:07:34 +0200
Subject: [PATCH 4/6] update changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 26c863be63d83..9666cd9e54e2e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -354,6 +354,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed passing a custom `DDPPlugin` when choosing `accelerator="ddp_cpu"` for the accelerator ([#6208](https://github.com/PyTorchLightning/pytorch-lightning/pull/6208))
 
 
+- Fixed missing call to `LightningModule.untoggle_optimizer` in training loop when running gradient accumulation with multiple optimizers ([#8284](https://github.com/PyTorchLightning/pytorch-lightning/pull/8284))
+
+
 ## [1.3.8] - 2021-07-01
 
 ### Fixed

From 521ff99ef79eca0d3890e4ad90e250a2cc951daa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Mon, 5 Jul 2021 12:35:18 +0200
Subject: [PATCH 5/6] update comment

---
 pytorch_lightning/loops/batch/training_batch_loop.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/loops/batch/training_batch_loop.py b/pytorch_lightning/loops/batch/training_batch_loop.py
index 4f0c454af4885..f79b9c17d8cbc 100644
--- a/pytorch_lightning/loops/batch/training_batch_loop.py
+++ b/pytorch_lightning/loops/batch/training_batch_loop.py
@@ -190,7 +190,8 @@ def _run_optimization(
                 result = self._training_step(split_batch, batch_idx, opt_idx, self._hiddens)
 
         if result:
-            # update running loss + reset accumulated loss
+            # if no result, user decided to skip optimization
+            # otherwise update running loss + reset accumulated loss
             self._update_running_loss(result.loss)
             self._process_closure_result(result)
 

From 43c7fb0d59cbc204c9d086d4c2b82881ac054912 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Mon, 5 Jul 2021 13:29:20 +0200
Subject: [PATCH 6/6] remove debugging assertion

---
 pytorch_lightning/loops/batch/training_batch_loop.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch_lightning/loops/batch/training_batch_loop.py b/pytorch_lightning/loops/batch/training_batch_loop.py
index f79b9c17d8cbc..e1e9a1bf3bcf6 100644
--- a/pytorch_lightning/loops/batch/training_batch_loop.py
+++ b/pytorch_lightning/loops/batch/training_batch_loop.py
@@ -130,7 +130,6 @@ def advance(self, batch, batch_idx, dataloader_idx):
 
         if self.trainer.lightning_module.automatic_optimization:
             for opt_idx, optimizer in self.get_active_optimizers(batch_idx):
-                # assert self.trainer.lightning_module.layer.weight.requires_grad
                 result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
                 if result:
                     self.batch_outputs[opt_idx].append(result.training_step_output)