From 8696fbb43a5ab52796cea68f50aaab300b453d72 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 10:04:00 +0530 Subject: [PATCH 01/27] allow big lora tests to run on the CI. --- tests/lora/test_lora_layers_flux.py | 11 +++++++++-- tests/lora/test_lora_layers_sd3.py | 12 +++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index b58525cc7a6f..760b33ddb2e8 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -31,9 +31,12 @@ numpy_cosine_similarity_distance, require_peft_backend, require_torch_gpu, + require_big_gpu_with_torch_cuda, slow, torch_device, + print_tensor_test ) +import pytest if is_peft_available(): @@ -169,8 +172,8 @@ def test_modify_padding_mode(self): @nightly @require_torch_gpu @require_peft_backend -@unittest.skip("We cannot run inference on this model with the current CI hardware") -# TODO (DN6, sayakpaul): move these tests to a beefier GPU +@require_big_gpu_with_torch_cuda +@pytest.mark.big_gpu_with_torch_cuda class FluxLoRAIntegrationTests(unittest.TestCase): """internal note: The integration slices were obtained on audace. @@ -211,6 +214,7 @@ def test_flux_the_last_ben(self): generator=torch.manual_seed(self.seed), ).images out_slice = out[0, -3:, -3:, -1].flatten() + print_tensor_test(out_slice) expected_slice = np.array([0.1855, 0.1855, 0.1836, 0.1855, 0.1836, 0.1875, 0.1777, 0.1758, 0.2246]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) @@ -233,6 +237,7 @@ def test_flux_kohya(self): ).images out_slice = out[0, -3:, -3:, -1].flatten() + print_tensor_test(out_slice) expected_slice = np.array([0.6367, 0.6367, 0.6328, 0.6367, 0.6328, 0.6289, 0.6367, 0.6328, 0.6484]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) @@ -255,6 +260,7 @@ def test_flux_kohya_with_text_encoder(self): ).images out_slice = out[0, -3:, -3:, -1].flatten() + print_tensor_test(out_slice) expected_slice = np.array([0.4023, 0.4023, 0.4023, 0.3965, 0.3984, 0.3965, 0.3926, 0.3906, 0.4219]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) @@ -277,6 +283,7 @@ def test_flux_xlabs(self): generator=torch.manual_seed(self.seed), ).images out_slice = out[0, -3:, -3:, -1].flatten() + print_tensor_test(out_slice) expected_slice = np.array([0.3965, 0.4180, 0.4434, 0.4082, 0.4375, 0.4590, 0.4141, 0.4375, 0.4980]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index 78d4b786d21b..d86d731774ff 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -34,7 +34,12 @@ require_peft_backend, require_torch_gpu, torch_device, + slow, + nightly, + require_big_gpu_with_torch_cuda, + print_tensor_test ) +import pytest if is_peft_available(): @@ -130,9 +135,13 @@ def test_modify_padding_mode(self): pass +@slow +@nightly @require_torch_gpu @require_peft_backend -class LoraSD3IntegrationTests(unittest.TestCase): +@require_big_gpu_with_torch_cuda +@pytest.mark.big_gpu_with_torch_cuda +class SD3LoraIntegrationTests(unittest.TestCase): pipeline_class = StableDiffusion3Img2ImgPipeline repo_id = "stabilityai/stable-diffusion-3-medium-diffusers" @@ -173,6 +182,7 @@ def test_sd3_img2img_lora(self): image = pipe(**inputs).images[0] image_slice = image[0, :10, :10] + print_tensor_test(image[0, -3:, -3:, -1].flatten()) expected_slice = np.array( [ 0.47827148, From 06b3919c37dedace8f812b988741d3e425fd3aec Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 14:56:13 +0530 Subject: [PATCH 02/27] print --- src/diffusers/pipelines/flux/pipeline_flux.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index 040d935f1b88..19175b56cadf 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -776,7 +776,9 @@ def __call__( image = self.image_processor.postprocess(image, output_type=output_type) # Offload all models + print("before maybe") self.maybe_free_model_hooks() + print("after maybe") if not return_dict: return (image,) From b062bd9b9161230f5655a9928125da85425791fa Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 15:04:00 +0530 Subject: [PATCH 03/27] print. --- src/diffusers/pipelines/pipeline_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 2e1858b16148..aa2c7d18fb4d 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -983,6 +983,7 @@ def remove_all_hooks(self): r""" Removes all hooks that were added when using `enable_sequential_cpu_offload` or `enable_model_cpu_offload`. """ + print("Within remove_all_hooks().") for _, model in self.components.items(): if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"): accelerate.hooks.remove_hook_from_module(model, recurse=True) From 360935c04ff80757a5cdd637301f8181538ed006 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 15:10:47 +0530 Subject: [PATCH 04/27] print --- src/diffusers/pipelines/pipeline_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index aa2c7d18fb4d..9e2dab68fa60 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -986,6 +986,7 @@ def remove_all_hooks(self): print("Within remove_all_hooks().") for _, model in self.components.items(): if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"): + print(f"{model.__class__.__name__=}") accelerate.hooks.remove_hook_from_module(model, recurse=True) self._all_hooks = [] From 1d1248ac1783fc519c1da04980585dcb2557371b Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 15:19:53 +0530 Subject: [PATCH 05/27] print --- src/diffusers/pipelines/pipeline_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 9e2dab68fa60..007870d81fa1 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -988,6 +988,7 @@ def remove_all_hooks(self): if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"): print(f"{model.__class__.__name__=}") accelerate.hooks.remove_hook_from_module(model, recurse=True) + print("Done removing from the current model.") self._all_hooks = [] def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"): From f5550e35a2503c3f9ad764aa8ee45b99cd49a1b0 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 15:28:59 +0530 Subject: [PATCH 06/27] print --- src/diffusers/pipelines/pipeline_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 007870d81fa1..9084cb635f17 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -990,6 +990,7 @@ def remove_all_hooks(self): accelerate.hooks.remove_hook_from_module(model, recurse=True) print("Done removing from the current model.") self._all_hooks = [] + print("Done in remove.") def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"): r""" @@ -1064,6 +1065,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t _, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook) self._all_hooks.append(hook) + print("Initial hooks appended.") # CPU offload models that are not in the seq chain unless they are explicitly excluded # these models will stay on CPU until maybe_free_model_hooks is called @@ -1077,6 +1079,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t else: _, hook = cpu_offload_with_hook(model, device) self._all_hooks.append(hook) + print("Done second time.") def maybe_free_model_hooks(self): r""" From 4cd5a3cd629143143a162831193c07e22dcb027b Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 15:37:27 +0530 Subject: [PATCH 07/27] print --- src/diffusers/pipelines/pipeline_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 9084cb635f17..c8e193c2cf37 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -1050,6 +1050,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t self._all_hooks = [] hook = None for model_str in self.model_cpu_offload_seq.split("->"): + print(f"Entering with {model_str}") model = all_model_components.pop(model_str, None) if not isinstance(model, torch.nn.Module): @@ -1079,8 +1080,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t else: _, hook = cpu_offload_with_hook(model, device) self._all_hooks.append(hook) - print("Done second time.") - + def maybe_free_model_hooks(self): r""" Function that offloads all components, removes all model hooks that were added when using From a901420bedfb8b6a1988a901e62f7229ef747bbe Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 15:45:48 +0530 Subject: [PATCH 08/27] more --- src/diffusers/pipelines/pipeline_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index c8e193c2cf37..64d74a3bd286 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -1044,7 +1044,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t device_mod = getattr(torch, device.type, None) if hasattr(device_mod, "empty_cache") and device_mod.is_available(): device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - + print("Empty cache called.") all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)} self._all_hooks = [] From d659f1ccf0a61887579e642d8fc12f653b447ed6 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 15:52:31 +0530 Subject: [PATCH 09/27] print --- src/diffusers/pipelines/pipeline_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 64d74a3bd286..0de84c8e02fd 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -1039,9 +1039,12 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t device_type = torch_device.type device = torch.device(f"{device_type}:{self._offload_gpu_id}") self._offload_device = device + print("Initial assignments done.") self.to("cpu", silence_dtype_warnings=True) + print("placed on CPU.") device_mod = getattr(torch, device.type, None) + print(f"{device=}") if hasattr(device_mod, "empty_cache") and device_mod.is_available(): device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist) print("Empty cache called.") From 96d27ff0ec600f44050c0b8ca6b74640b2c76f60 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 16:10:49 +0530 Subject: [PATCH 10/27] remove print. --- src/diffusers/pipelines/pipeline_utils.py | 13 ++----------- tests/lora/test_lora_layers_flux.py | 6 +++--- tests/lora/test_lora_layers_sd3.py | 10 +++++----- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 0de84c8e02fd..b56f9f472f01 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -983,14 +983,11 @@ def remove_all_hooks(self): r""" Removes all hooks that were added when using `enable_sequential_cpu_offload` or `enable_model_cpu_offload`. """ - print("Within remove_all_hooks().") for _, model in self.components.items(): if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"): - print(f"{model.__class__.__name__=}") accelerate.hooks.remove_hook_from_module(model, recurse=True) - print("Done removing from the current model.") + self._all_hooks = [] - print("Done in remove.") def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"): r""" @@ -1039,21 +1036,16 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t device_type = torch_device.type device = torch.device(f"{device_type}:{self._offload_gpu_id}") self._offload_device = device - print("Initial assignments done.") self.to("cpu", silence_dtype_warnings=True) - print("placed on CPU.") device_mod = getattr(torch, device.type, None) - print(f"{device=}") if hasattr(device_mod, "empty_cache") and device_mod.is_available(): device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - print("Empty cache called.") all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)} self._all_hooks = [] hook = None for model_str in self.model_cpu_offload_seq.split("->"): - print(f"Entering with {model_str}") model = all_model_components.pop(model_str, None) if not isinstance(model, torch.nn.Module): @@ -1069,7 +1061,6 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t _, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook) self._all_hooks.append(hook) - print("Initial hooks appended.") # CPU offload models that are not in the seq chain unless they are explicitly excluded # these models will stay on CPU until maybe_free_model_hooks is called @@ -1083,7 +1074,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t else: _, hook = cpu_offload_with_hook(model, device) self._all_hooks.append(hook) - + def maybe_free_model_hooks(self): r""" Function that offloads all components, removes all model hooks that were added when using diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index 760b33ddb2e8..fa7136601c69 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -19,6 +19,7 @@ import unittest import numpy as np +import pytest import safetensors.torch import torch from transformers import AutoTokenizer, CLIPTextModel, CLIPTokenizer, T5EncoderModel @@ -29,14 +30,13 @@ is_peft_available, nightly, numpy_cosine_similarity_distance, + print_tensor_test, + require_big_gpu_with_torch_cuda, require_peft_backend, require_torch_gpu, - require_big_gpu_with_torch_cuda, slow, torch_device, - print_tensor_test ) -import pytest if is_peft_available(): diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index d86d731774ff..d718c236d69d 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -17,6 +17,7 @@ import unittest import numpy as np +import pytest import torch from transformers import AutoTokenizer, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel @@ -30,16 +31,15 @@ from diffusers.utils.import_utils import is_accelerate_available from diffusers.utils.testing_utils import ( is_peft_available, + nightly, numpy_cosine_similarity_distance, + print_tensor_test, + require_big_gpu_with_torch_cuda, require_peft_backend, require_torch_gpu, - torch_device, slow, - nightly, - require_big_gpu_with_torch_cuda, - print_tensor_test + torch_device, ) -import pytest if is_peft_available(): From 8510f98b8d175c310b26134447121b802e91a99e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 16:11:21 +0530 Subject: [PATCH 11/27] remove print --- src/diffusers/pipelines/flux/pipeline_flux.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index 19175b56cadf..ebaa6bbdc359 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -776,10 +776,8 @@ def __call__( image = self.image_processor.postprocess(image, output_type=output_type) # Offload all models - print("before maybe") self.maybe_free_model_hooks() - print("after maybe") - + if not return_dict: return (image,) From 9fe7b91b42bd06b1716f19c19c596cc5cce82f48 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 16:16:36 +0530 Subject: [PATCH 12/27] directly place on cuda. --- src/diffusers/pipelines/flux/pipeline_flux.py | 2 +- tests/lora/test_lora_layers_flux.py | 11 +++++++---- tests/lora/test_lora_layers_sd3.py | 4 +++- tests/pipelines/flux/test_pipeline_flux.py | 3 +-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index ebaa6bbdc359..040d935f1b88 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -777,7 +777,7 @@ def __call__( # Offload all models self.maybe_free_model_hooks() - + if not return_dict: return (image,) diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index fa7136601c69..789db2b5c582 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -202,7 +202,7 @@ def test_flux_the_last_ben(self): self.pipeline.load_lora_weights("TheLastBen/Jon_Snow_Flux_LoRA", weight_name="jon_snow.safetensors") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() - self.pipeline.enable_model_cpu_offload() + self.pipeline = self.pipeline.to("cuda") prompt = "jon snow eating pizza with ketchup" @@ -225,7 +225,10 @@ def test_flux_kohya(self): self.pipeline.load_lora_weights("Norod78/brain-slug-flux") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() - self.pipeline.enable_model_cpu_offload() + # Instead of calling `enable_model_cpu_offload()`, we do a cuda placement here because the CI + # run supports it. We have about 34GB RAM in the CI runner which kills the test when run with + # `enable_model_cpu_offload()`. + self.pipeline = self.pipeline.to("cuda") prompt = "The cat with a brain slug earring" out = self.pipeline( @@ -248,7 +251,7 @@ def test_flux_kohya_with_text_encoder(self): self.pipeline.load_lora_weights("cocktailpeanut/optimus", weight_name="optimus.safetensors") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() - self.pipeline.enable_model_cpu_offload() + self.pipeline = self.pipeline.to("cuda") prompt = "optimus is cleaning the house with broomstick" out = self.pipeline( @@ -271,7 +274,7 @@ def test_flux_xlabs(self): self.pipeline.load_lora_weights("XLabs-AI/flux-lora-collection", weight_name="disney_lora.safetensors") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() - self.pipeline.enable_model_cpu_offload() + self.pipeline = self.pipeline.to("cuda") prompt = "A blue jay standing on a large basket of rainbow macarons, disney style" diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index d718c236d69d..19fe8cbb732b 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -176,7 +176,9 @@ def get_inputs(self, device, seed=0): def test_sd3_img2img_lora(self): pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16) pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors") - pipe.enable_sequential_cpu_offload() + pipe.fuse_lora() + pipe.unload_lora_weights() + pipe = pipe.to("cuda") inputs = self.get_inputs(torch_device) diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py index 3ccf3f80ba3c..88997de5f5fb 100644 --- a/tests/pipelines/flux/test_pipeline_flux.py +++ b/tests/pipelines/flux/test_pipeline_flux.py @@ -236,8 +236,7 @@ def get_inputs(self, device, seed=0): def test_flux_inference(self): pipe = self.pipeline_class.from_pretrained( self.repo_id, torch_dtype=torch.bfloat16, text_encoder=None, text_encoder_2=None - ) - pipe.enable_model_cpu_offload() + ).to("cuda") inputs = self.get_inputs(torch_device) From 286af0e87d44388417725622dd5c01f2edd245e7 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 16:24:31 +0530 Subject: [PATCH 13/27] remove pipeline. --- tests/lora/test_lora_layers_flux.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index 789db2b5c582..390a3deed491 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -195,6 +195,7 @@ def setUp(self): def tearDown(self): super().tearDown() + del self.pipeline gc.collect() torch.cuda.empty_cache() From 741a44fb5dc822421ebdcbe7d25e65577bf1505f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 16:36:49 +0530 Subject: [PATCH 14/27] remove --- tests/lora/test_lora_layers_flux.py | 4 ---- tests/lora/test_lora_layers_sd3.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index 390a3deed491..b3b2e9d222d7 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -215,7 +215,6 @@ def test_flux_the_last_ben(self): generator=torch.manual_seed(self.seed), ).images out_slice = out[0, -3:, -3:, -1].flatten() - print_tensor_test(out_slice) expected_slice = np.array([0.1855, 0.1855, 0.1836, 0.1855, 0.1836, 0.1875, 0.1777, 0.1758, 0.2246]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) @@ -241,7 +240,6 @@ def test_flux_kohya(self): ).images out_slice = out[0, -3:, -3:, -1].flatten() - print_tensor_test(out_slice) expected_slice = np.array([0.6367, 0.6367, 0.6328, 0.6367, 0.6328, 0.6289, 0.6367, 0.6328, 0.6484]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) @@ -264,7 +262,6 @@ def test_flux_kohya_with_text_encoder(self): ).images out_slice = out[0, -3:, -3:, -1].flatten() - print_tensor_test(out_slice) expected_slice = np.array([0.4023, 0.4023, 0.4023, 0.3965, 0.3984, 0.3965, 0.3926, 0.3906, 0.4219]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) @@ -287,7 +284,6 @@ def test_flux_xlabs(self): generator=torch.manual_seed(self.seed), ).images out_slice = out[0, -3:, -3:, -1].flatten() - print_tensor_test(out_slice) expected_slice = np.array([0.3965, 0.4180, 0.4434, 0.4082, 0.4375, 0.4590, 0.4141, 0.4375, 0.4980]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index 19fe8cbb732b..04ba923a9d8e 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -184,7 +184,7 @@ def test_sd3_img2img_lora(self): image = pipe(**inputs).images[0] image_slice = image[0, :10, :10] - print_tensor_test(image[0, -3:, -3:, -1].flatten()) + print_tensor_test(image[0, :10, :10].flatten()) expected_slice = np.array( [ 0.47827148, From e818907ece93960cb54ba727abd39e4ab69d6a75 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 16:39:02 +0530 Subject: [PATCH 15/27] fix --- tests/lora/test_lora_layers_flux.py | 3 +-- tests/lora/test_lora_layers_sd3.py | 2 -- tests/pipelines/flux/test_pipeline_flux.py | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index b3b2e9d222d7..880fe70e8531 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -30,7 +30,6 @@ is_peft_available, nightly, numpy_cosine_similarity_distance, - print_tensor_test, require_big_gpu_with_torch_cuda, require_peft_backend, require_torch_gpu, @@ -195,7 +194,7 @@ def setUp(self): def tearDown(self): super().tearDown() - del self.pipeline + del self.pipeline gc.collect() torch.cuda.empty_cache() diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index 04ba923a9d8e..584a42fc7879 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -33,7 +33,6 @@ is_peft_available, nightly, numpy_cosine_similarity_distance, - print_tensor_test, require_big_gpu_with_torch_cuda, require_peft_backend, require_torch_gpu, @@ -184,7 +183,6 @@ def test_sd3_img2img_lora(self): image = pipe(**inputs).images[0] image_slice = image[0, :10, :10] - print_tensor_test(image[0, :10, :10].flatten()) expected_slice = np.array( [ 0.47827148, diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py index 88997de5f5fb..23e0f18c0009 100644 --- a/tests/pipelines/flux/test_pipeline_flux.py +++ b/tests/pipelines/flux/test_pipeline_flux.py @@ -9,6 +9,7 @@ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel from diffusers.utils.testing_utils import ( + nightly, numpy_cosine_similarity_distance, require_big_gpu_with_torch_cuda, slow, @@ -193,6 +194,7 @@ def test_fused_qkv_projections(self): @slow +@nightly @require_big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda class FluxPipelineSlowTests(unittest.TestCase): From 3ed98a12994267866572b2f56751509f9151fa72 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 16:45:11 +0530 Subject: [PATCH 16/27] fix --- tests/lora/test_lora_layers_flux.py | 8 ++++---- tests/lora/test_lora_layers_sd3.py | 2 +- tests/pipelines/flux/test_pipeline_flux.py | 11 ++++------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index 880fe70e8531..63aed8bbce2a 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -202,7 +202,7 @@ def test_flux_the_last_ben(self): self.pipeline.load_lora_weights("TheLastBen/Jon_Snow_Flux_LoRA", weight_name="jon_snow.safetensors") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() - self.pipeline = self.pipeline.to("cuda") + self.pipeline = self.pipeline.to(torch_device) prompt = "jon snow eating pizza with ketchup" @@ -227,7 +227,7 @@ def test_flux_kohya(self): # Instead of calling `enable_model_cpu_offload()`, we do a cuda placement here because the CI # run supports it. We have about 34GB RAM in the CI runner which kills the test when run with # `enable_model_cpu_offload()`. - self.pipeline = self.pipeline.to("cuda") + self.pipeline = self.pipeline.to(torch_device) prompt = "The cat with a brain slug earring" out = self.pipeline( @@ -249,7 +249,7 @@ def test_flux_kohya_with_text_encoder(self): self.pipeline.load_lora_weights("cocktailpeanut/optimus", weight_name="optimus.safetensors") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() - self.pipeline = self.pipeline.to("cuda") + self.pipeline = self.pipeline.to(torch_device) prompt = "optimus is cleaning the house with broomstick" out = self.pipeline( @@ -271,7 +271,7 @@ def test_flux_xlabs(self): self.pipeline.load_lora_weights("XLabs-AI/flux-lora-collection", weight_name="disney_lora.safetensors") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() - self.pipeline = self.pipeline.to("cuda") + self.pipeline = self.pipeline.to(torch_device) prompt = "A blue jay standing on a large basket of rainbow macarons, disney style" diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index 584a42fc7879..87e9ded3f1c4 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -177,7 +177,7 @@ def test_sd3_img2img_lora(self): pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors") pipe.fuse_lora() pipe.unload_lora_weights() - pipe = pipe.to("cuda") + pipe = pipe.to(torch_device) inputs = self.get_inputs(torch_device) diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py index 23e0f18c0009..cb8146160f36 100644 --- a/tests/pipelines/flux/test_pipeline_flux.py +++ b/tests/pipelines/flux/test_pipeline_flux.py @@ -212,19 +212,16 @@ def tearDown(self): torch.cuda.empty_cache() def get_inputs(self, device, seed=0): - if str(device).startswith("mps"): - generator = torch.manual_seed(seed) - else: - generator = torch.Generator(device="cpu").manual_seed(seed) + generator = torch.Generator(device="cpu").manual_seed(seed) prompt_embeds = torch.load( hf_hub_download(repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/prompt_embeds.pt") - ) + ).to(torch_device) pooled_prompt_embeds = torch.load( hf_hub_download( repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/pooled_prompt_embeds.pt" ) - ) + ).to(torch_device) return { "prompt_embeds": prompt_embeds, "pooled_prompt_embeds": pooled_prompt_embeds, @@ -238,7 +235,7 @@ def get_inputs(self, device, seed=0): def test_flux_inference(self): pipe = self.pipeline_class.from_pretrained( self.repo_id, torch_dtype=torch.bfloat16, text_encoder=None, text_encoder_2=None - ).to("cuda") + ).to(torch_device) inputs = self.get_inputs(torch_device) From 9124f28bd10fbb8811fc050e8428fe829fc2cddc Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 17:14:57 +0530 Subject: [PATCH 17/27] spaces --- src/diffusers/pipelines/pipeline_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index b56f9f472f01..a275a7e92969 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -986,7 +986,6 @@ def remove_all_hooks(self): for _, model in self.components.items(): if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"): accelerate.hooks.remove_hook_from_module(model, recurse=True) - self._all_hooks = [] def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"): @@ -1041,6 +1040,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t device_mod = getattr(torch, device.type, None) if hasattr(device_mod, "empty_cache") and device_mod.is_available(): device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist) + all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)} self._all_hooks = [] From a938831dcb3186622a7041a60699b708a97d2bd1 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 2 Nov 2024 17:15:26 +0530 Subject: [PATCH 18/27] quality --- src/diffusers/pipelines/pipeline_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index a275a7e92969..2e1858b16148 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -1040,7 +1040,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t device_mod = getattr(torch, device.type, None) if hasattr(device_mod, "empty_cache") and device_mod.is_available(): device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - + all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)} self._all_hooks = [] From bd94852dde5fbb10191833cc9e641a301904986b Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 3 Nov 2024 06:56:48 +0530 Subject: [PATCH 19/27] updates --- tests/lora/test_lora_layers_flux.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index 63aed8bbce2a..cc7913cd611e 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -202,6 +202,9 @@ def test_flux_the_last_ben(self): self.pipeline.load_lora_weights("TheLastBen/Jon_Snow_Flux_LoRA", weight_name="jon_snow.safetensors") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() + # Instead of calling `enable_model_cpu_offload()`, we do a cuda placement here because the CI + # run supports it. We have about 34GB RAM in the CI runner which kills the test when run with + # `enable_model_cpu_offload()`. We repeat this for the other tests, too. self.pipeline = self.pipeline.to(torch_device) prompt = "jon snow eating pizza with ketchup" @@ -224,9 +227,6 @@ def test_flux_kohya(self): self.pipeline.load_lora_weights("Norod78/brain-slug-flux") self.pipeline.fuse_lora() self.pipeline.unload_lora_weights() - # Instead of calling `enable_model_cpu_offload()`, we do a cuda placement here because the CI - # run supports it. We have about 34GB RAM in the CI runner which kills the test when run with - # `enable_model_cpu_offload()`. self.pipeline = self.pipeline.to(torch_device) prompt = "The cat with a brain slug earring" From 176041904e07ab2451748f08abf3af01205f782f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 3 Nov 2024 06:58:34 +0530 Subject: [PATCH 20/27] directly place flux controlnet pipeline on cuda. --- tests/pipelines/controlnet_flux/test_controlnet_flux.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py index 89540232f9cf..06144ce67b42 100644 --- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py +++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py @@ -35,6 +35,7 @@ numpy_cosine_similarity_distance, require_big_gpu_with_torch_cuda, slow, + nightly, torch_device, ) from diffusers.utils.torch_utils import randn_tensor @@ -183,6 +184,7 @@ def test_xformers_attention_forwardGenerator_pass(self): @slow +@nightly @require_big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda class FluxControlNetPipelineSlowTests(unittest.TestCase): @@ -208,8 +210,7 @@ def test_canny(self): text_encoder_2=None, controlnet=controlnet, torch_dtype=torch.bfloat16, - ) - pipe.enable_model_cpu_offload() + ).to("cuda") pipe.set_progress_bar_config(disable=None) generator = torch.Generator(device="cpu").manual_seed(0) From 021f0deb9b4ecd5ed4b50b3cc3b78c48a3f759ce Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 3 Nov 2024 06:58:58 +0530 Subject: [PATCH 21/27] torch_device instead of cuda. --- tests/pipelines/controlnet_flux/test_controlnet_flux.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py index 06144ce67b42..eb67b5f41b78 100644 --- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py +++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py @@ -210,7 +210,7 @@ def test_canny(self): text_encoder_2=None, controlnet=controlnet, torch_dtype=torch.bfloat16, - ).to("cuda") + ).to(torch_device) pipe.set_progress_bar_config(disable=None) generator = torch.Generator(device="cpu").manual_seed(0) From ee662cfd3917d1fef092e92bd55ac0311829ceb3 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 3 Nov 2024 07:06:25 +0530 Subject: [PATCH 22/27] style --- tests/pipelines/controlnet_flux/test_controlnet_flux.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py index eb67b5f41b78..f7e1df9f8ca2 100644 --- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py +++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py @@ -32,10 +32,10 @@ from diffusers.utils import load_image from diffusers.utils.testing_utils import ( enable_full_determinism, + nightly, numpy_cosine_similarity_distance, require_big_gpu_with_torch_cuda, slow, - nightly, torch_device, ) from diffusers.utils.torch_utils import randn_tensor From c46331fe13a0e72dcf60d859449bbe25afa218a7 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 3 Nov 2024 07:20:41 +0530 Subject: [PATCH 23/27] device placement. --- tests/pipelines/controlnet_flux/test_controlnet_flux.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py index f7e1df9f8ca2..fe1f192623c5 100644 --- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py +++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py @@ -220,12 +220,12 @@ def test_canny(self): prompt_embeds = torch.load( hf_hub_download(repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/prompt_embeds.pt") - ) + ).to(torch_device) pooled_prompt_embeds = torch.load( hf_hub_download( repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/pooled_prompt_embeds.pt" ) - ) + ).to(torch_device) output = pipe( prompt_embeds=prompt_embeds, From 207579b529df93718f5b7fdcc7c1f84b1539b9e3 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 5 Nov 2024 23:35:27 +0100 Subject: [PATCH 24/27] fixes --- tests/lora/test_lora_layers_sd3.py | 39 +++--------------------------- 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index 87e9ded3f1c4..c8728c8f7fe8 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -174,7 +174,7 @@ def get_inputs(self, device, seed=0): def test_sd3_img2img_lora(self): pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16) - pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors") + pipe.load_lora_weights("zwloong/sd3-lora-training-rank16-v2") pipe.fuse_lora() pipe.unload_lora_weights() pipe = pipe.to(torch_device) @@ -182,41 +182,8 @@ def test_sd3_img2img_lora(self): inputs = self.get_inputs(torch_device) image = pipe(**inputs).images[0] - image_slice = image[0, :10, :10] - expected_slice = np.array( - [ - 0.47827148, - 0.5, - 0.71972656, - 0.3955078, - 0.4194336, - 0.69628906, - 0.37036133, - 0.40820312, - 0.6923828, - 0.36450195, - 0.40429688, - 0.6904297, - 0.35595703, - 0.39257812, - 0.68652344, - 0.35498047, - 0.3984375, - 0.68310547, - 0.34716797, - 0.3996582, - 0.6855469, - 0.3388672, - 0.3959961, - 0.6816406, - 0.34033203, - 0.40429688, - 0.6845703, - 0.34228516, - 0.4086914, - 0.6870117, - ] - ) + image_slice = image[0, -3:, -3:] + expected_slice = np.array([0.5396, 0.5776, 0.7432, 0.5151, 0.5586, 0.7383, 0.5537, 0.5933, 0.7153]) max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten()) From 295315c6435d36a33588e91d42321b27db8dbf12 Mon Sep 17 00:00:00 2001 From: Aryan Date: Wed, 20 Nov 2024 04:53:23 +0100 Subject: [PATCH 25/27] add big gpu marker for mochi; rename test correctly --- tests/pipelines/mochi/test_mochi.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/mochi/test_mochi.py b/tests/pipelines/mochi/test_mochi.py index 2192c171aa22..dfbe77f04eb7 100644 --- a/tests/pipelines/mochi/test_mochi.py +++ b/tests/pipelines/mochi/test_mochi.py @@ -17,13 +17,16 @@ import unittest import numpy as np +import pytest import torch from transformers import AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel from diffusers.utils.testing_utils import ( enable_full_determinism, + nightly, numpy_cosine_similarity_distance, + require_big_gpu_with_torch_cuda, require_torch_gpu, slow, torch_device, @@ -261,7 +264,10 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2): @slow +@nightly @require_torch_gpu +@require_big_gpu_with_torch_cuda +@pytest.mark.big_gpu_with_torch_cuda class MochiPipelineIntegrationTests(unittest.TestCase): prompt = "A painting of a squirrel eating a burger." @@ -275,7 +281,7 @@ def tearDown(self): gc.collect() torch.cuda.empty_cache() - def test_cogvideox(self): + def test_mochi(self): generator = torch.Generator("cpu").manual_seed(0) pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview", torch_dtype=torch.float16) @@ -293,7 +299,7 @@ def test_cogvideox(self): ).frames video = videos[0] - expected_video = torch.randn(1, 16, 480, 848, 3).numpy() + expected_video = torch.randn(1, 19, 480, 848, 3).numpy() max_diff = numpy_cosine_similarity_distance(video, expected_video) assert max_diff < 1e-3, f"Max diff is too high. got {video}" From 58b79f2c12cfb4ea06d14d7425564eea54cf0c9c Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 2 Dec 2024 14:24:22 +0530 Subject: [PATCH 26/27] address feedback --- tests/lora/test_lora_layers_sd3.py | 2 -- tests/pipelines/controlnet_flux/test_controlnet_flux.py | 2 -- tests/pipelines/flux/test_pipeline_flux.py | 2 -- tests/pipelines/mochi/test_mochi.py | 2 -- 4 files changed, 8 deletions(-) diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index c8728c8f7fe8..992a13b22604 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -36,7 +36,6 @@ require_big_gpu_with_torch_cuda, require_peft_backend, require_torch_gpu, - slow, torch_device, ) @@ -134,7 +133,6 @@ def test_modify_padding_mode(self): pass -@slow @nightly @require_torch_gpu @require_peft_backend diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py index 9de7b4b0fa9b..5e856b125f32 100644 --- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py +++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py @@ -35,7 +35,6 @@ nightly, numpy_cosine_similarity_distance, require_big_gpu_with_torch_cuda, - slow, torch_device, ) from diffusers.utils.torch_utils import randn_tensor @@ -205,7 +204,6 @@ def test_flux_image_output_shape(self): assert (output_height, output_width) == (expected_height, expected_width) -@slow @nightly @require_big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py index 100c8afb211c..c6905d8a7f22 100644 --- a/tests/pipelines/flux/test_pipeline_flux.py +++ b/tests/pipelines/flux/test_pipeline_flux.py @@ -12,7 +12,6 @@ nightly, numpy_cosine_similarity_distance, require_big_gpu_with_torch_cuda, - slow, torch_device, ) @@ -207,7 +206,6 @@ def test_flux_image_output_shape(self): assert (output_height, output_width) == (expected_height, expected_width) -@slow @nightly @require_big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda diff --git a/tests/pipelines/mochi/test_mochi.py b/tests/pipelines/mochi/test_mochi.py index dfbe77f04eb7..c9df5785897c 100644 --- a/tests/pipelines/mochi/test_mochi.py +++ b/tests/pipelines/mochi/test_mochi.py @@ -28,7 +28,6 @@ numpy_cosine_similarity_distance, require_big_gpu_with_torch_cuda, require_torch_gpu, - slow, torch_device, ) @@ -263,7 +262,6 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2): ) -@slow @nightly @require_torch_gpu @require_big_gpu_with_torch_cuda From 44db42338af37c06ff83344488ab8ca2cc57e63a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 25 Dec 2024 16:58:15 +0530 Subject: [PATCH 27/27] fix --- tests/pipelines/flux/test_pipeline_flux.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py index a0b2274adcff..ab36333c4056 100644 --- a/tests/pipelines/flux/test_pipeline_flux.py +++ b/tests/pipelines/flux/test_pipeline_flux.py @@ -12,6 +12,7 @@ nightly, numpy_cosine_similarity_distance, require_big_gpu_with_torch_cuda, + slow, torch_device, )