Disable nvfuser for gpt (#3845)

* disable nvfuser Signed-off-by: ericharper <[email protected]> * disable nvfuser Signed-off-by: ericharper <[email protected]> * delete line Signed-off-by: ericharper <[email protected]>
NVIDIA · Mar 16, 2022 · 8c80f1b · 8c80f1b
1 parent 3ffea73
commit 8c80f1b
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 5 deletions.
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -106,7 +106,7 @@ model:
     splits_string: 900,50,50
     seq_length: ${model.encoder_seq_length}
     skip_warmup: True
-    num_workers: 0
+    num_workers: 2
     dataloader_type: single # cyclic
     reset_position_ids: False # Reset position ids after end-of-document token
     reset_attention_mask: False # Reset attention mask after end-of-document token

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -106,7 +106,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer):
         self._validate_trainer()
 
         # used in NVIDIA NGC PyTorch containers
-        self._enable_nvidia_optimizations()
+        # self._enable_nvidia_optimizations()
 
         if self.cfg.get('use_cpu_initialization', False) is False:
             torch.cuda.set_device(trainer.local_rank)
@@ -558,9 +558,8 @@ def build_train_valid_test_datasets(self):
             return
 
         logging.info('Building GPT datasets.')
-        global_batch_size = self.trainer.world_size * self.cfg.micro_batch_size / self.cfg.tensor_model_parallel_size
-        # Compute trianing micro-batch steps: total_global_batch_steps x grad_acumms_per_global_batch
-        max_train_steps = self.trainer.max_steps * self.trainer.accumulate_grad_batches
+        global_batch_size = self.cfg.global_batch_size
+        max_train_steps = self.trainer.max_steps
         eval_iters = (max_train_steps // self.trainer.val_check_interval + 1) * self.trainer.limit_val_batches
         test_iters = self.trainer.limit_test_batches