From 7bb4049e98914a6e874f9fefef5ab33d6a3328e7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:48:13 -0700 Subject: [PATCH] Change accelerator to 'auto' in nlp_checkpoint_port.py (#7761) * Change accelerator to 'auto' in nlp_checkpoint_port.py (#7747) * Change accelerator to auto Signed-off-by: Abhishree * Pass omegaconf object to trainer in nlp_checkpoint_port.py Signed-off-by: Abhishree * Pass omegaconf object to trainer in export.py Signed-off-by: Abhishree * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper Signed-off-by: Abhishree * docs: fix typos (#7758) Signed-off-by: shuoer86 <129674997+shuoer86@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Signed-off-by: Abhishree * Snake act (#7736) Signed-off-by: Abhishree * Update gpt_dataset.py (#6963) Signed-off-by: Xin Yao Co-authored-by: Sandeep Subramanian Signed-off-by: Abhishree --------- Signed-off-by: Abhishree Signed-off-by: shuoer86 <129674997+shuoer86@users.noreply.github.com> Signed-off-by: Xin Yao Co-authored-by: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper Co-authored-by: shuoer86 <129674997+shuoer86@users.noreply.github.com> Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com> Co-authored-by: Nithin Rao Co-authored-by: Xin Yao Co-authored-by: Sandeep Subramanian --- nemo/core/config/pytorch_lightning.py | 6 +++--- scripts/export.py | 4 +++- scripts/nemo_legacy_import/nlp_checkpoint_port.py | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/nemo/core/config/pytorch_lightning.py b/nemo/core/config/pytorch_lightning.py index dce2c14851da..98c1a1157f3f 100644 --- a/nemo/core/config/pytorch_lightning.py +++ b/nemo/core/config/pytorch_lightning.py @@ -54,7 +54,7 @@ class TrainerConfig: limit_test_batches: Any = 1.0 val_check_interval: Any = 1.0 log_every_n_steps: int = 50 - accelerator: Optional[str] = None + accelerator: Optional[str] = 'auto' sync_batchnorm: bool = False precision: Any = 32 num_sanity_val_steps: int = 2 @@ -68,8 +68,8 @@ class TrainerConfig: gradient_clip_algorithm: str = 'norm' max_time: Optional[Any] = None # can be one of Union[str, timedelta, Dict[str, int], None] reload_dataloaders_every_n_epochs: int = 0 - devices: Any = None - strategy: Any = None + devices: Any = 'auto' + strategy: Any = 'auto' enable_checkpointing: bool = False enable_model_summary: bool = True inference_mode: bool = True diff --git a/scripts/export.py b/scripts/export.py index 8fa44bb305f9..dbe5b2b7fe2b 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -30,6 +30,7 @@ import sys import torch +from omegaconf import OmegaConf from pytorch_lightning import Trainer import nemo @@ -103,7 +104,8 @@ def nemo_export(argv): logger=False, enable_checkpointing=False, ) - trainer = Trainer(cfg_trainer) + cfg_trainer = OmegaConf.to_container(OmegaConf.create(cfg_trainer)) + trainer = Trainer(**cfg_trainer) logging.info("Restoring NeMo model from '{}'".format(nemo_in)) try: diff --git a/scripts/nemo_legacy_import/nlp_checkpoint_port.py b/scripts/nemo_legacy_import/nlp_checkpoint_port.py index 909c1b7562c9..b7541ffdb8cd 100644 --- a/scripts/nemo_legacy_import/nlp_checkpoint_port.py +++ b/scripts/nemo_legacy_import/nlp_checkpoint_port.py @@ -82,13 +82,14 @@ def nemo_convert(argv): # Create a PL trainer object which is required for restoring Megatron models cfg_trainer = TrainerConfig( devices=1, - accelerator="ddp", + accelerator='auto', num_nodes=1, # Need to set the following two to False as ExpManager will take care of them differently. logger=False, enable_checkpointing=False, ) - trainer = pl.Trainer(cfg_trainer) + cfg_trainer = OmegaConf.to_container(OmegaConf.create(cfg_trainer)) + trainer = pl.Trainer(**cfg_trainer) logging.info("Restoring NeMo model from '{}'".format(nemo_in)) try: