NVIDIA · ericharper · Mar 17, 2022 · Mar 16, 2022 · Mar 17, 2022 · Mar 17, 2022
diff --git a/examples/nlp/language_modeling/conf/megatron_t5_config_finetune_glue_xnli_eval.yaml b/examples/nlp/language_modeling/conf/megatron_t5_config_finetune_glue_xnli_eval.yaml
@@ -0,0 +1,34 @@
+name: megatron_t5_glue_xnli_eval
+
+trainer:
+  gpus: 1
+  num_nodes: 1
+  accelerator: ddp
+  precision: 16
+  logger: False # logger provided by exp_manager
+  checkpoint_callback: False
+  replace_sampler_ddp: False
+  log_every_n_steps: 10
+
+exp_manager:
+  explicit_log_dir: null
+  exp_dir: null
+  name: megatron_t5_glue_xnli_eval
+  create_wandb_logger: False
+
+model:
+  restore_from_finetuned_path: ??? # Path to a finetuned T5 .nemo file
+  tensor_model_parallel_size: 1
+  gradient_as_bucket_view: True # Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
+  megatron_amp_O2: False # Enable O2 optimization for megatron amp
+  eval_languages: ['fr', 'de', 'en', 'es'] # List of languages to evaluate zero-shot XNLI performance.
+
+  data:
+    test_ds:
+      task_name: 'xnli'
+      file_path: ??? # Path to the TSV file for XNLI dev ex: '/raid/Data/GLUE/MNLI/dev_matched.tsv'
+      batch_size: 32
+      shuffle: False
+      num_workers: 8
+      pin_memory: True
+      max_seq_length: 512
diff --git a/examples/nlp/language_modeling/megatron_t5_xnli_eval.py b/examples/nlp/language_modeling/megatron_t5_xnli_eval.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from omegaconf.omegaconf import OmegaConf, open_dict
+from pytorch_lightning import Trainer
+from pytorch_lightning.callbacks.timer import Timer
+from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
+from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
+from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
+
+from nemo.collections.nlp.models.language_modeling.megatron_xnli_model import MegatronXNlIModel
+from nemo.collections.nlp.parts.nlp_overrides import GradScaler, MegatronHalfPrecisionPlugin, NLPDDPPlugin
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+from nemo.utils.exp_manager import StatelessTimer, exp_manager
+
+
+@hydra_runner(config_path="conf", config_name="megatron_t5_config_finetune_glue_xnli_eval")
+def main(cfg) -> None:
+    logging.info("\n\n************** Experiment configuration ***********")
+    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
+
+    megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False)
+    plugins = [
+        NLPDDPPlugin(
+            num_nodes=cfg.trainer.num_nodes,
+            no_ddp_communication_hook=(
+                megatron_amp_o2 and cfg.trainer.precision == 'bf16'
+            ),  # Only bf16 uses fp32_grad_accum.
+            gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
+            find_unused_parameters=False,
+        )
+    ]
+    if cfg.trainer.precision in [16, 'bf16']:
+        scaler = None
+        if cfg.trainer.precision == 16:
+            scaler = GradScaler(
+                init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
+                growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
+                hysteresis=cfg.model.get('hysteresis', 2),
+            )
+        if megatron_amp_o2:
+            plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler))
+        else:
+            plugins.append(NativeMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler))
+
+    if cfg.get('cluster_type', None) == 'BCP':
+        plugins.append(TorchElasticEnvironment())
+
+    trainer = Trainer(plugins=plugins, **cfg.trainer)
+
+    exp_manager(trainer, cfg.exp_manager)
+
+    # Override timer callback to a stateless one
+    for idx, callback in enumerate(trainer.callbacks):
+        if isinstance(callback, Timer):
+            trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time,)
+
+    model = MegatronXNlIModel.restore_from(restore_path=cfg.model.restore_from_finetuned_path, trainer=trainer)
+    model.freeze()
+
+    # Hard overwrite eval languages
+    # TODO: find better way to do it
+    model.cfg.eval_languages = cfg.model.eval_languages
+
+    trainer.test(model)
+
+
+if __name__ == '__main__':
+    main()