Skip to content

Commit

Permalink
Add NLPDDPStrategyNotebook and change trainer gpus to devices (#7741)
Browse files Browse the repository at this point in the history
* Add NLPDDPStrategyNotebook and change trainer gpus to devices

Signed-off-by: Abhishree <[email protected]>

* Add NLPDDPStrategyNotebook for strategy_eval in lora.ipynb

Signed-off-by: Abhishree <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Abhishree <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and web-flow committed Oct 17, 2023
1 parent 3f31216 commit ad50e7e
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 8 deletions.
2 changes: 1 addition & 1 deletion examples/asr/experimental/k2/align_speech_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class ParallelAlignmentConfig:
output_path: str = MISSING
model_stride: int = 8

trainer: TrainerConfig = field(default_factory=lambda: TrainerConfig(gpus=-1, accelerator="ddp"))
trainer: TrainerConfig = field(default_factory=lambda: TrainerConfig(devices=-1, accelerator="ddp"))

# there arguments will be ignored
return_predictions: bool = False
Expand Down
2 changes: 1 addition & 1 deletion scripts/nemo_legacy_import/nlp_checkpoint_port.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def nemo_convert(argv):

# Create a PL trainer object which is required for restoring Megatron models
cfg_trainer = TrainerConfig(
gpus=1,
devices=1,
accelerator="ddp",
num_nodes=1,
# Need to set the following two to False as ExpManager will take care of them differently.
Expand Down
6 changes: 3 additions & 3 deletions tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@
"source": [
"import torch\n",
"import pytorch_lightning as pl\n",
"from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n",
"from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategyNotebook\n",
"from pytorch_lightning.plugins.environments import TorchElasticEnvironment\n",
"\n",
"# let's modify some trainer configs\n",
Expand All @@ -618,7 +618,7 @@
"os.environ[\"RANK\"] = '0'\n",
"os.environ[\"WORLD_SIZE\"] = '1'\n",
"\n",
"strategy = NLPDDPStrategy(find_unused_parameters=False, no_ddp_communication_hook=True)\n",
"strategy = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n",
"plugins = [TorchElasticEnvironment()]\n",
"trainer = pl.Trainer(plugins= plugins, strategy=strategy, **config.trainer)\n",
"\n",
Expand Down Expand Up @@ -783,4 +783,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
13 changes: 10 additions & 3 deletions tutorials/nlp/lora.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,8 @@
"from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n",
"import torch\n",
"import pytorch_lightning as pl\n",
"from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder\n",
"from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategyNotebook\n",
"from pytorch_lightning.plugins.environments import TorchElasticEnvironment\n",
"\n",
"# let's modify some trainer configs\n",
"# check if we have GPU available and uses it\n",
Expand All @@ -441,7 +442,9 @@
"os.environ[\"RANK\"] = '0'\n",
"os.environ[\"WORLD_SIZE\"] = '1'\n",
"\n",
"trainer = MegatronTrainerBuilder(config).create_trainer()\n",
"strategy = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n",
"plugins = [TorchElasticEnvironment()]\n",
"trainer = pl.Trainer(plugins= plugins, strategy=strategy, **config.trainer)\n",
"\n",
"print(\"Trainer config - \\n\")\n",
"print(OmegaConf.to_yaml(config.trainer))"
Expand Down Expand Up @@ -685,7 +688,11 @@
"metadata": {},
"outputs": [],
"source": [
"trainer_eval = MegatronTrainerBuilder(config_eval).create_trainer()"
"strategy_eval = NLPDDPStrategyNotebook(find_unused_parameters=False, no_ddp_communication_hook=True)\n",
"plugins_eval = [TorchElasticEnvironment()]\n",
"# notice the plugins, strategy and config.trainer args are the same as is training portion of this tutorial\n",
"# we just create a new object with no overlap from the training section of this tutorial\n",
"trainer_eval = pl.Trainer(plugins= plugins_eval, strategy=strategy_eval, **config_eval.trainer) "
]
},
{
Expand Down

0 comments on commit ad50e7e

Please sign in to comment.