diff --git a/tests/quantization/aqlm_integration/test_aqlm.py b/tests/quantization/aqlm_integration/test_aqlm.py index b79eae54c0c3..8195d975711a 100644 --- a/tests/quantization/aqlm_integration/test_aqlm.py +++ b/tests/quantization/aqlm_integration/test_aqlm.py @@ -17,6 +17,7 @@ import importlib import tempfile import unittest +from unittest import skip from packaging import version @@ -142,6 +143,9 @@ def test_quantized_model_conversion(self): self.assertEqual(nb_linears - 1, nb_aqlm_linear) + @skip( + "inference doesn't work with quantized aqlm models using torch.Any type with recent torch versions. Waiting for the fix from AQLM side" + ) def test_quantized_model(self): """ Simple test that checks if the quantized model is working properly @@ -158,6 +162,9 @@ def test_raise_if_non_quantized(self): with self.assertRaises(ValueError): _ = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config) + @skip( + "inference doesn't work with quantized aqlm models using torch.Any type with recent torch versions. Waiting for the fix from AQLM side" + ) def test_save_pretrained(self): """ Simple test that checks if the quantized model is working properly after being saved and loaded @@ -171,6 +178,9 @@ def test_save_pretrained(self): output = model.generate(**input_ids, max_new_tokens=self.max_new_tokens) self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT) + @skip( + "inference doesn't work with quantized aqlm models using torch.Any type with recent torch versions. Waiting for the fix from AQLM side" + ) @require_torch_multi_gpu def test_quantized_model_multi_gpu(self): """