diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml index 6bf0953e7947..7167cd5a8bb6 100755 --- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml @@ -106,7 +106,7 @@ model: splits_string: 900,50,50 seq_length: ${model.encoder_seq_length} skip_warmup: True - num_workers: 0 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index db50f5c604f2..8befb347d275 100755 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -559,8 +559,8 @@ def build_train_valid_test_datasets(self): logging.info('Building GPT datasets.') global_batch_size = self.trainer.world_size * self.cfg.micro_batch_size / self.cfg.tensor_model_parallel_size - # Compute trianing micro-batch steps: total_global_batch_steps x grad_acumms_per_global_batch - max_train_steps = self.trainer.max_steps * self.trainer.accumulate_grad_batches + global_batch_size = self.cfg.global_batch_size + max_train_steps = self.trainer.max_steps eval_iters = (max_train_steps // self.trainer.val_check_interval + 1) * self.trainer.limit_val_batches test_iters = self.trainer.limit_test_batches