added ci test

Signed-off-by: Yi Dong <[email protected]>
NVIDIA · Jan 26, 2022 · b3db907 · b3db907
1 parent d4e2cdd
commit b3db907
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 3 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -1863,6 +1863,33 @@ pipeline {
         sh "rm -rf examples/nlp/language_modeling/bert_pretrain_results"
       }
     }
+    stage('L2: Megatron P-Tuning GPT LM') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      steps {
+        sh "python  examples/nlp/text_classification/ptune_text_classification.py \
+        trainer.gpus=2 \
+        trainer.max_epochs=1 \
+        +trainer.limit_val_batches=10 \
+        +trainer.limit_train_batches=10 \
+        +trainer.limit_test_batches=10 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/ptune_results \
+        model.tokenizer.vocab_file=/home/TestData/nlp/ptune/gpt2-vocab.json \
+        model.tensor_model_parallel_size=2 \
+        model.tokenizer.merge_file=/home/TestData/nlp/ptune/gpt2-merges.txt \
+        model.language_model.nemo_file=/home/TestData/nlp/ptune/small_gpt.nemo \
+        model.dataset.classes=[positive,neutral,negative] \
+        model.train_ds.file_path=/home/TestData/nlp/ptune/data/train_0.txt \
+        model.validation_ds.file_path=/home/TestData/nlp/ptune/data/validation_0.txt \
+        model.test_ds.file_path=/home/TestData/nlp/ptune/data/test_0.txt "
+        sh "rm -rf examples/nlp/language_modeling/ptune_results"
+      }
+    }
     stage('L2: Megatron GPT Pretraining and Resume Training') {
       when {
         anyOf {

diff --git a/nemo/collections/nlp/models/text_classification/ptune_text_classification_model.py b/nemo/collections/nlp/models/text_classification/ptune_text_classification_model.py
@@ -249,9 +249,19 @@ def forward_eval(self, sentences):
         encoder_input, new_atten, label_position = self.get_encoder_input(sentences)
         batch_size, _, seq_len, _ = new_atten.shape
 
-        output = self.model.model(
-            None, None, encoder_input=encoder_input.to(self.device), attention_mask=new_atten.to(self.device)
-        )
+        # workaround to do auto-cast
+        # get the LM dtype
+        dtype = self.model.model.language_model.encoder.layers[0].dtype
+
+        if dtype == torch.float32:
+            output = self.model.model(
+                None, None, encoder_input=encoder_input.to(self.device), attention_mask=new_atten.to(self.device)
+            )
+        else:
+            with torch.autocast(device_type="cuda", dtype=dtype):
+                output = self.model.model(
+                    None, None, encoder_input=encoder_input.to(self.device), attention_mask=new_atten.to(self.device)
+                )
         logits = output
 
         _, returned_pred = self.get_prediction(batch_size, label_position.to(self.device), logits)