diff --git a/examples/asr/experimental/k2/align_speech_parallel.py b/examples/asr/experimental/k2/align_speech_parallel.py index bd03420e94c1..abfffa0cdfdb 100644 --- a/examples/asr/experimental/k2/align_speech_parallel.py +++ b/examples/asr/experimental/k2/align_speech_parallel.py @@ -101,7 +101,7 @@ class ParallelAlignmentConfig: output_path: str = MISSING model_stride: int = 8 - trainer: TrainerConfig = field(default_factory=lambda: TrainerConfig(gpus=-1, accelerator="ddp")) + trainer: TrainerConfig = field(default_factory=lambda: TrainerConfig(devices=-1, accelerator="ddp")) # there arguments will be ignored return_predictions: bool = False diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index 65db066765ee..8b2e06b4eb0c 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -436,6 +436,18 @@ def restore_checkpoint_after_setup(self) -> bool: return True +class NLPDDPStrategyNotebook(NLPDDPStrategy): + """ Version of NLPDDPStrategy to be used in a Jupyter Notebook + A large portion of Megatron code has DDP dependency, so it has been necessary to use NLPDDPStrategy even for + single-GPU training (e.g. in a Jupyter notebook) + A PTL 2.0 changes has prevented DDPStrategy to be used in a notebook. + This version of NLPDDPStrategy enables megatron training in a notebook in PTL 2.0. + """ + + def _configure_launcher(self): + self._launcher = None + + class NLPSaveRestoreConnector(SaveRestoreConnector): def __init__(self) -> None: if not HAVE_APEX: diff --git a/scripts/nemo_legacy_import/nlp_checkpoint_port.py b/scripts/nemo_legacy_import/nlp_checkpoint_port.py index 162e4e4bef7a..909c1b7562c9 100644 --- a/scripts/nemo_legacy_import/nlp_checkpoint_port.py +++ b/scripts/nemo_legacy_import/nlp_checkpoint_port.py @@ -81,7 +81,7 @@ def nemo_convert(argv): # Create a PL trainer object which is required for restoring Megatron models cfg_trainer = TrainerConfig( - gpus=1, + devices=1, accelerator="ddp", num_nodes=1, # Need to set the following two to False as ExpManager will take care of them differently. diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index d52b8e9b301b..557e94e39f5e 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -596,7 +596,7 @@ "source": [ "import torch\n", "import pytorch_lightning as pl\n", - "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n", + "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategyNotebook\n", "from pytorch_lightning.plugins.environments import TorchElasticEnvironment\n", "\n", "# let's modify some trainer configs\n", @@ -618,7 +618,7 @@ "os.environ[\"RANK\"] = '0'\n", "os.environ[\"WORLD_SIZE\"] = '1'\n", "\n", - "strategy = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n", + "strategy = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n", "plugins = [TorchElasticEnvironment()]\n", "trainer = pl.Trainer(plugins= plugins, strategy=strategy, **config.trainer)\n", "\n", @@ -783,4 +783,4 @@ }, "nbformat": 4, "nbformat_minor": 5 - } \ No newline at end of file + } diff --git a/tutorials/nlp/lora.ipynb b/tutorials/nlp/lora.ipynb index 3e3090afe305..1afe711654df 100644 --- a/tutorials/nlp/lora.ipynb +++ b/tutorials/nlp/lora.ipynb @@ -673,7 +673,7 @@ "source": [ "import torch\n", "import pytorch_lightning as pl\n", - "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n", + "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategyNotebook\n", "from pytorch_lightning.plugins.environments import TorchElasticEnvironment\n", "\n", "# let's modify some trainer configs\n", @@ -695,7 +695,7 @@ "os.environ[\"RANK\"] = '0'\n", "os.environ[\"WORLD_SIZE\"] = '1'\n", "\n", - "strategy = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n", + "strategy = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n", "plugins = [TorchElasticEnvironment()]\n", "trainer = pl.Trainer(plugins= plugins, strategy=strategy, **config.trainer)\n", "\n", @@ -1311,7 +1311,7 @@ } ], "source": [ - "strategy_eval = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n", + "strategy_eval = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n", "plugins_eval = [TorchElasticEnvironment()]\n", "# notice the plugins, strategy and config.trainer args are the same as is training portion of this tutorial\n", "# we just create a new object with no overlap from the training section of this tutorial\n",