From ad50e7e4a2b96ced834dd4bb1440c9bec60c7034 Mon Sep 17 00:00:00 2001
From: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com>
Date: Tue, 17 Oct 2023 15:47:03 -0700
Subject: [PATCH] Add NLPDDPStrategyNotebook and change trainer gpus to devices
 (#7741)

* Add NLPDDPStrategyNotebook and change trainer gpus to devices

Signed-off-by: Abhishree <abhishreetm@gmail.com>

* Add NLPDDPStrategyNotebook for strategy_eval in lora.ipynb

Signed-off-by: Abhishree <abhishreetm@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Abhishree <abhishreetm@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../asr/experimental/k2/align_speech_parallel.py    |  2 +-
 scripts/nemo_legacy_import/nlp_checkpoint_port.py   |  2 +-
 tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb    |  6 +++---
 tutorials/nlp/lora.ipynb                            | 13 ++++++++++---
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/examples/asr/experimental/k2/align_speech_parallel.py b/examples/asr/experimental/k2/align_speech_parallel.py
index bd03420e94c1..abfffa0cdfdb 100644
--- a/examples/asr/experimental/k2/align_speech_parallel.py
+++ b/examples/asr/experimental/k2/align_speech_parallel.py
@@ -101,7 +101,7 @@ class ParallelAlignmentConfig:
     output_path: str = MISSING
     model_stride: int = 8
 
-    trainer: TrainerConfig = field(default_factory=lambda: TrainerConfig(gpus=-1, accelerator="ddp"))
+    trainer: TrainerConfig = field(default_factory=lambda: TrainerConfig(devices=-1, accelerator="ddp"))
 
     # there arguments will be ignored
     return_predictions: bool = False
diff --git a/scripts/nemo_legacy_import/nlp_checkpoint_port.py b/scripts/nemo_legacy_import/nlp_checkpoint_port.py
index 162e4e4bef7a..909c1b7562c9 100644
--- a/scripts/nemo_legacy_import/nlp_checkpoint_port.py
+++ b/scripts/nemo_legacy_import/nlp_checkpoint_port.py
@@ -81,7 +81,7 @@ def nemo_convert(argv):
 
     # Create a PL trainer object which is required for restoring Megatron models
     cfg_trainer = TrainerConfig(
-        gpus=1,
+        devices=1,
         accelerator="ddp",
         num_nodes=1,
         # Need to set the following two to False as ExpManager will take care of them differently.
diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
index 004014ebdeeb..076a8ffad3df 100644
--- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
+++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
@@ -596,7 +596,7 @@
     "source": [
      "import torch\n",
      "import pytorch_lightning as pl\n",
-     "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n",
+     "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategyNotebook\n",
      "from pytorch_lightning.plugins.environments import TorchElasticEnvironment\n",
      "\n",
      "# let's modify some trainer configs\n",
@@ -618,7 +618,7 @@
      "os.environ[\"RANK\"] = '0'\n",
      "os.environ[\"WORLD_SIZE\"] = '1'\n",
      "\n",
-     "strategy = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n",
+     "strategy = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n",
      "plugins = [TorchElasticEnvironment()]\n",
      "trainer = pl.Trainer(plugins= plugins, strategy=strategy, **config.trainer)\n",
      "\n",
@@ -783,4 +783,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 5
- }
\ No newline at end of file
+ }
diff --git a/tutorials/nlp/lora.ipynb b/tutorials/nlp/lora.ipynb
index 8603bbb62411..21f99f2b8e69 100644
--- a/tutorials/nlp/lora.ipynb
+++ b/tutorials/nlp/lora.ipynb
@@ -423,7 +423,8 @@
     "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n",
     "import torch\n",
     "import pytorch_lightning as pl\n",
-    "from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder\n",
+    "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategyNotebook\n",
+    "from pytorch_lightning.plugins.environments import TorchElasticEnvironment\n",
     "\n",
     "# let's modify some trainer configs\n",
     "# check if we have GPU available and uses it\n",
@@ -441,7 +442,9 @@
     "os.environ[\"RANK\"] = '0'\n",
     "os.environ[\"WORLD_SIZE\"] = '1'\n",
     "\n",
-    "trainer = MegatronTrainerBuilder(config).create_trainer()\n",
+    "strategy = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n",
+    "plugins = [TorchElasticEnvironment()]\n",
+    "trainer = pl.Trainer(plugins= plugins, strategy=strategy, **config.trainer)\n",
     "\n",
     "print(\"Trainer config - \\n\")\n",
     "print(OmegaConf.to_yaml(config.trainer))"
@@ -685,7 +688,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "trainer_eval = MegatronTrainerBuilder(config_eval).create_trainer()"
+    "strategy_eval = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n",
+    "plugins_eval = [TorchElasticEnvironment()]\n",
+    "# notice the plugins, strategy and config.trainer args are the same as is training portion of this tutorial\n",
+    "# we just create a new object with no overlap from the training section of this tutorial\n",
+    "trainer_eval = pl.Trainer(plugins= plugins_eval, strategy=strategy_eval, **config_eval.trainer) "
    ]
   },
   {