diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 06e56498d9254a..96649b953c914c 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1597,9 +1597,7 @@ def test_generate_with_head_masking(self): attn_weights = out[attn_name] if attn_name == attention_names[0] else out[attn_name][-1] self.assertEqual(sum([w.sum().item() for w in attn_weights]), 0.0) - # TODO (joao): this test is actually not slow :) However, it is not passing in some models (e.g. GPTNeoX) and the - # fix for some models is quite lengthy. Being slow means it doesn't block our push CI while we fix it. - @slow + @slow # TODO (Joao): fix GPTBigCode def test_left_padding_compatibility(self): # The check done in this test is fairly difficult -- depending on the model architecture, passing the right # position index for the position embeddings can still result in a different output, due to numerical masking. diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py index a4b77e8431bf76..949e647e6fc384 100644 --- a/tests/models/bart/test_modeling_bart.py +++ b/tests/models/bart/test_modeling_bart.py @@ -1516,3 +1516,7 @@ def test_retain_grad_hidden_states_attentions(self): def test_save_load_fast_init_from_base(self): pass + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py index 32b7bbcbb24a76..5d345db3fc5dc0 100644 --- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py +++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py @@ -811,3 +811,7 @@ def test_decoder_model_attn_mask_past(self): def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py index 8762d11aaa082b..499c7aa5215561 100644 --- a/tests/models/blenderbot/test_modeling_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_blenderbot.py @@ -561,3 +561,7 @@ def test_decoder_model_attn_mask_past(self): def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py index f8d247ba12e12f..257aa1699c3eae 100644 --- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py @@ -566,3 +566,7 @@ def test_decoder_model_attn_mask_past(self): def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/ctrl/test_modeling_ctrl.py b/tests/models/ctrl/test_modeling_ctrl.py index 87d719853ad7d7..8941927f173261 100644 --- a/tests/models/ctrl/test_modeling_ctrl.py +++ b/tests/models/ctrl/test_modeling_ctrl.py @@ -249,6 +249,10 @@ def test_model_from_pretrained(self): model = CTRLModel.from_pretrained(model_name) self.assertIsNotNone(model) + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass + @require_torch class CTRLModelLanguageGenerationTest(unittest.TestCase): diff --git a/tests/models/imagegpt/test_modeling_imagegpt.py b/tests/models/imagegpt/test_modeling_imagegpt.py index e6c8524c6d6f2b..27d83f3eb8c1e9 100644 --- a/tests/models/imagegpt/test_modeling_imagegpt.py +++ b/tests/models/imagegpt/test_modeling_imagegpt.py @@ -520,6 +520,10 @@ def _create_and_check_torchscript(self, config, inputs_dict): self.assertTrue(models_equal) + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass + # We will verify our results on an image of cute cats def prepare_img(): diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py index fe5f8606521e8c..877f5273c9bc58 100644 --- a/tests/models/marian/test_modeling_marian.py +++ b/tests/models/marian/test_modeling_marian.py @@ -862,3 +862,7 @@ def test_decoder_model_attn_mask_past(self): def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py index d7094ca6e4796d..ec3d36f33d0e8c 100644 --- a/tests/models/mbart/test_modeling_mbart.py +++ b/tests/models/mbart/test_modeling_mbart.py @@ -692,3 +692,7 @@ def test_decoder_model_attn_mask_past(self): def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/mvp/test_modeling_mvp.py b/tests/models/mvp/test_modeling_mvp.py index e996a998a80d7f..cc3986a3701a32 100644 --- a/tests/models/mvp/test_modeling_mvp.py +++ b/tests/models/mvp/test_modeling_mvp.py @@ -818,3 +818,7 @@ def test_decoder_model_attn_mask_past(self): def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/pegasus/test_modeling_pegasus.py b/tests/models/pegasus/test_modeling_pegasus.py index cb8b36c9af31d1..bde7477f945040 100644 --- a/tests/models/pegasus/test_modeling_pegasus.py +++ b/tests/models/pegasus/test_modeling_pegasus.py @@ -573,3 +573,7 @@ def test_decoder_model_attn_mask_past(self): def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/plbart/test_modeling_plbart.py b/tests/models/plbart/test_modeling_plbart.py index 6ad226747a3391..05dbac6a2c80e9 100644 --- a/tests/models/plbart/test_modeling_plbart.py +++ b/tests/models/plbart/test_modeling_plbart.py @@ -658,3 +658,7 @@ def test_decoder_model_attn_mask_past(self): def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass diff --git a/tests/models/prophetnet/test_modeling_prophetnet.py b/tests/models/prophetnet/test_modeling_prophetnet.py index baf7351bf5ab3b..fa717b27430681 100644 --- a/tests/models/prophetnet/test_modeling_prophetnet.py +++ b/tests/models/prophetnet/test_modeling_prophetnet.py @@ -1146,6 +1146,10 @@ def test_retain_grad_hidden_states_attentions(self): # decoder cannot keep gradients return + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass + @require_torch class ProphetNetStandaloneEncoderModelTest(ModelTesterMixin, unittest.TestCase): diff --git a/tests/models/reformer/test_modeling_reformer.py b/tests/models/reformer/test_modeling_reformer.py index 39e1389477b844..c84f729633cc8a 100644 --- a/tests/models/reformer/test_modeling_reformer.py +++ b/tests/models/reformer/test_modeling_reformer.py @@ -682,6 +682,10 @@ def _check_hidden_states_for_generate( [expected_shape] * len(iter_hidden_states), ) + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass + @require_torch class ReformerLSHAttnModelTest( @@ -839,6 +843,10 @@ def test_problem_types(self): def test_past_key_values_format(self): pass + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass + @require_torch @require_sentencepiece diff --git a/tests/models/transfo_xl/test_modeling_transfo_xl.py b/tests/models/transfo_xl/test_modeling_transfo_xl.py index c86cd704f1593a..970f87bf1063ac 100644 --- a/tests/models/transfo_xl/test_modeling_transfo_xl.py +++ b/tests/models/transfo_xl/test_modeling_transfo_xl.py @@ -486,6 +486,10 @@ def _mock_init_weights(self, module): weight = getattr(module, param) weight.data.fill_(3) + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass + @require_torch class TransfoXLModelLanguageGenerationTest(unittest.TestCase): diff --git a/tests/models/trocr/test_modeling_trocr.py b/tests/models/trocr/test_modeling_trocr.py index f670f4e6d95e3e..0033f339ae58fb 100644 --- a/tests/models/trocr/test_modeling_trocr.py +++ b/tests/models/trocr/test_modeling_trocr.py @@ -192,3 +192,7 @@ def test_decoder_model_past(self): # decoder cannot keep gradients def test_retain_grad_hidden_states_attentions(self): return + + @unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :) + def test_left_padding_compatibility(self): + pass