From d66b8583ac8bdc617de25ce0019b15f9bb45bfe6 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 24 Sep 2024 12:15:40 +0530 Subject: [PATCH 01/24] allow loras to be loaded with low_cpu_mem_usage. --- src/diffusers/loaders/lora_pipeline.py | 31 +++++++++++- src/diffusers/loaders/unet.py | 19 ++++++-- tests/lora/utils.py | 66 +++++++++++++++++++++++++- 3 files changed, 110 insertions(+), 6 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index ba1435a8cbdc..050daeb4abbc 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -88,10 +88,17 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`str`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + # if a dict is passed, copy it instead of modifying it inplace if isinstance(pretrained_model_name_or_path_or_dict, dict): pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() @@ -109,6 +116,7 @@ def load_lora_weights( unet=getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet, adapter_name=adapter_name, _pipeline=self, + low_cpu_mem_usage=kwargs.pop("low_cpu_mem_usage", False), ) self.load_lora_into_text_encoder( state_dict, @@ -232,7 +240,9 @@ def lora_state_dict( return state_dict, network_alphas @classmethod - def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None, _pipeline=None): + def load_lora_into_unet( + cls, state_dict, network_alphas, unet, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False + ): """ This will load the LoRA layers specified in `state_dict` into `unet`. @@ -250,10 +260,16 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), # then the `state_dict` keys should have `cls.unet_name` and/or `cls.text_encoder_name` as # their prefixes. @@ -263,7 +279,11 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None # Load the layers corresponding to UNet. logger.info(f"Loading {cls.unet_name}.") unet.load_attn_procs( - state_dict, network_alphas=network_alphas, adapter_name=adapter_name, _pipeline=_pipeline + state_dict, + network_alphas=network_alphas, + adapter_name=adapter_name, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, ) @classmethod @@ -276,6 +296,7 @@ def load_lora_into_text_encoder( lora_scale=1.0, adapter_name=None, _pipeline=None, + low_cpu_mem_usage=False, ): """ This will load the LoRA layers specified in `state_dict` into `text_encoder` @@ -298,10 +319,16 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + from peft import LoraConfig # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py index 32ace77b6224..e5c0600d0119 100644 --- a/src/diffusers/loaders/unet.py +++ b/src/diffusers/loaders/unet.py @@ -115,6 +115,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict `default_{i}` where i is the total number of adapters being loaded. weight_name (`str`, *optional*, defaults to None): Name of the serialized state dict file. + low_cpu_mem_usage (`str`, *optional*, defaults to False): TODO Example: @@ -142,8 +143,14 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict adapter_name = kwargs.pop("adapter_name", None) _pipeline = kwargs.pop("_pipeline", None) network_alphas = kwargs.pop("network_alphas", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) allow_pickle = False + if low_cpu_mem_usage and is_peft_version("<=", "0.12.1.dev0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + if use_safetensors is None: use_safetensors = True allow_pickle = True @@ -209,6 +216,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict network_alphas=network_alphas, adapter_name=adapter_name, _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, ) else: raise ValueError( @@ -268,7 +276,9 @@ def _process_custom_diffusion(self, state_dict): return attn_processors - def _process_lora(self, state_dict, unet_identifier_key, network_alphas, adapter_name, _pipeline): + def _process_lora( + self, state_dict, unet_identifier_key, network_alphas, adapter_name, _pipeline, low_cpu_mem_usage + ): # This method does the following things: # 1. Filters the `state_dict` with keys matching `unet_identifier_key` when using the non-legacy # format. For legacy format no filtering is applied. @@ -335,9 +345,12 @@ def _process_lora(self, state_dict, unet_identifier_key, network_alphas, adapter # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks # otherwise loading LoRA weights will lead to an error is_model_cpu_offload, is_sequential_cpu_offload = self._optionally_disable_offloading(_pipeline) + peft_kwargs = {} + if is_peft_version(">=", "0.12.1.dev0"): + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage - inject_adapter_in_model(lora_config, self, adapter_name=adapter_name) - incompatible_keys = set_peft_model_state_dict(self, state_dict, adapter_name) + inject_adapter_in_model(lora_config, self, adapter_name=adapter_name, **peft_kwargs) + incompatible_keys = set_peft_model_state_dict(self, state_dict, adapter_name, **peft_kwargs) if incompatible_keys is not None: # check only for unexpected keys diff --git a/tests/lora/utils.py b/tests/lora/utils.py index 939b749c286a..edcff7566aa9 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -38,7 +38,7 @@ if is_peft_available(): - from peft import LoraConfig + from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict from peft.tuners.tuners_utils import BaseTunerLayer from peft.utils import get_peft_model_state_dict @@ -65,6 +65,12 @@ def check_if_lora_correctly_set(model) -> bool: return False +def populate_meta_state_dict_with_dummy(state_dict): + if not all(v.device.type == "meta" for _, v in state_dict.items()): + raise ValueError("`state_dict` has non-meta values.") + return {k: torch.randn(v.shape, device=torch_device, dtype=v.dtype) for k, v in state_dict.items()} + + @require_peft_backend class PeftLoraLoaderMixinTests: pipeline_class = None @@ -201,6 +207,64 @@ def get_dummy_tokens(self): prepared_inputs["input_ids"] = inputs return prepared_inputs + @require_peft_version_greater("0.12.0") + def test_low_cpu_mem_usage(self): + for scheduler_cls in self.scheduler_classes: + components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + inject_adapter_in_model(text_lora_config, pipe.text_encoder, low_cpu_mem_usage=True) + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder." + ) + self.assertTrue( + "meta" in {p.device.type for p in pipe.text_encoder.parameters()}, + "The LoRA params should be on 'meta' device.", + ) + + te_state_dict = populate_meta_state_dict_with_dummy(get_peft_model_state_dict(pipe.text_encoder)) + set_peft_model_state_dict(pipe.text_encoder, te_state_dict, low_cpu_mem_usage=True) + self.assertTrue( + "meta" not in {p.device.type for p in pipe.text_encoder.parameters()}, + "No param should be on 'meta' device.", + ) + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + inject_adapter_in_model(denoiser_lora_config, denoiser, low_cpu_mem_usage=True) + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + self.assertTrue( + "meta" in {p.device.type for p in denoiser.parameters()}, "The LoRA params should be on 'meta' device." + ) + + denoiser_state_dict = populate_meta_state_dict_with_dummy(get_peft_model_state_dict(denoiser)) + set_peft_model_state_dict(denoiser, denoiser_state_dict, low_cpu_mem_usage=True) + self.assertTrue( + "meta" not in {p.device.type for p in denoiser.parameters()}, "No param should be on 'meta' device." + ) + + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + inject_adapter_in_model(text_lora_config, pipe.text_encoder_2, low_cpu_mem_usage=True) + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + self.assertTrue( + "meta" in {p.device.type for p in pipe.text_encoder.parameters()}, + "The LoRA params should be on 'meta' device.", + ) + + te2_state_dict = populate_meta_state_dict_with_dummy( + get_peft_model_state_dict(pipe.text_encoder_2) + ) + set_peft_model_state_dict(pipe.text_encoder, te2_state_dict, low_cpu_mem_usage=True) + self.assertTrue( + "meta" not in {p.device.type for p in pipe.text_encoder_2.parameters()}, + "No param should be on 'meta' device.", + ) + def test_simple_inference(self): """ Tests a simple inference and makes sure it works as expected From 9a22fc89fcd07d8b437c92656edcbde7bc8be9a8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Sep 2024 08:36:19 +0530 Subject: [PATCH 02/24] add flux support but note https://github.com/huggingface/diffusers/pull/9510\#issuecomment-2378316687 --- src/diffusers/loaders/lora_pipeline.py | 36 +++++++++++++++++++++----- src/diffusers/loaders/unet.py | 4 +-- tests/lora/utils.py | 4 +++ 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 050daeb4abbc..52fa059e4751 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -94,7 +94,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) - if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"): + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -116,7 +116,7 @@ def load_lora_weights( unet=getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet, adapter_name=adapter_name, _pipeline=self, - low_cpu_mem_usage=kwargs.pop("low_cpu_mem_usage", False), + low_cpu_mem_usage=low_cpu_mem_usage, ) self.load_lora_into_text_encoder( state_dict, @@ -127,6 +127,7 @@ def load_lora_weights( lora_scale=self.lora_scale, adapter_name=adapter_name, _pipeline=self, + # TODO: need to add here once `transformers` integration is ready: https://github.com/huggingface/transformers/pull/33725/ ) @classmethod @@ -265,7 +266,7 @@ def load_lora_into_unet( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"): + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -324,7 +325,7 @@ def load_lora_into_text_encoder( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"): + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1673,10 +1674,17 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`str`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + # if a dict is passed, copy it instead of modifying it inplace if isinstance(pretrained_model_name_or_path_or_dict, dict): pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() @@ -1696,6 +1704,7 @@ def load_lora_weights( transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, adapter_name=adapter_name, _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, ) text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k} @@ -1708,10 +1717,13 @@ def load_lora_weights( lora_scale=self.lora_scale, adapter_name=adapter_name, _pipeline=self, + # TODO: add `low_cpu_mem_usage` once `transformers` integration is ready. ) @classmethod - def load_lora_into_transformer(cls, state_dict, network_alphas, transformer, adapter_name=None, _pipeline=None): + def load_lora_into_transformer( + cls, state_dict, network_alphas, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False + ): """ This will load the LoRA layers specified in `state_dict` into `transformer`. @@ -1729,7 +1741,13 @@ def load_lora_into_transformer(cls, state_dict, network_alphas, transformer, ada adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict keys = list(state_dict.keys()) @@ -1778,8 +1796,12 @@ def load_lora_into_transformer(cls, state_dict, network_alphas, transformer, ada # otherwise loading LoRA weights will lead to an error is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) - inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name) - incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name) + peft_kwargs = {} + if is_peft_version(">=", "0.13.0"): + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + + inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs) + incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name, **peft_kwargs) if incompatible_keys is not None: # check only for unexpected keys diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py index e5c0600d0119..7f767730ad42 100644 --- a/src/diffusers/loaders/unet.py +++ b/src/diffusers/loaders/unet.py @@ -146,7 +146,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) allow_pickle = False - if low_cpu_mem_usage and is_peft_version("<=", "0.12.1.dev0"): + if low_cpu_mem_usage and is_peft_version("<=", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -346,7 +346,7 @@ def _process_lora( # otherwise loading LoRA weights will lead to an error is_model_cpu_offload, is_sequential_cpu_offload = self._optionally_disable_offloading(_pipeline) peft_kwargs = {} - if is_peft_version(">=", "0.12.1.dev0"): + if is_peft_version(">=", "0.13.0"): peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage inject_adapter_in_model(lora_config, self, adapter_name=adapter_name, **peft_kwargs) diff --git a/tests/lora/utils.py b/tests/lora/utils.py index edcff7566aa9..09075645518c 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -265,6 +265,10 @@ def test_low_cpu_mem_usage(self): "No param should be on 'meta' device.", ) + _, _, inputs = self.get_dummy_inputs() + output_lora = pipe(**inputs)[0] + self.assertTrue(output_lora.shape == self.output_shape) + def test_simple_inference(self): """ Tests a simple inference and makes sure it works as expected From b87eec855ac81c8ad14577d69760da97451b22e4 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 7 Oct 2024 18:54:40 +0530 Subject: [PATCH 03/24] low_cpu_mem_usage. --- src/diffusers/loaders/lora_pipeline.py | 116 +++++++++++++++++++------ 1 file changed, 91 insertions(+), 25 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 52fa059e4751..1b9dda53c1bf 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -83,18 +83,18 @@ def load_lora_weights( Parameters: pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. - kwargs (`dict`, *optional*): - See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`str`, *optional*): TODO + low_cpu_mem_usage (`bool`, *optional*): TODO + kwargs (`dict`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) - if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -127,7 +127,7 @@ def load_lora_weights( lora_scale=self.lora_scale, adapter_name=adapter_name, _pipeline=self, - # TODO: need to add here once `transformers` integration is ready: https://github.com/huggingface/transformers/pull/33725/ + low_cpu_mem_usage=low_cpu_mem_usage, ) @classmethod @@ -266,7 +266,7 @@ def load_lora_into_unet( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -325,10 +325,19 @@ def load_lora_into_text_encoder( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): - raise ValueError( - "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." - ) + peft_kwargs = {} + if low_cpu_mem_usage: + if is_peft_version(">", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + if not is_transformers_available(">", "4.45.1"): + # Note from sayakpaul: It's not in `transformers` stable yet. + # https://github.com/huggingface/transformers/pull/33725/ + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`." + ) + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage from peft import LoraConfig @@ -395,11 +404,12 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or not + # in transformers we automatically check whether the adapter name is already in use or text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, peft_config=lora_config, + **peft_kwargs, ) # scale LoRA layers with `lora_scale` @@ -570,12 +580,19 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + # We could have accessed the unet config from `lora_state_dict()` too. We pass # it here explicitly to be able to tell that it's coming from an SDXL # pipeline. @@ -595,7 +612,12 @@ def load_lora_weights( raise ValueError("Invalid LoRA checkpoint.") self.load_lora_into_unet( - state_dict, network_alphas=network_alphas, unet=self.unet, adapter_name=adapter_name, _pipeline=self + state_dict, + network_alphas=network_alphas, + unet=self.unet, + adapter_name=adapter_name, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, ) text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k} if len(text_encoder_state_dict) > 0: @@ -607,6 +629,7 @@ def load_lora_weights( lora_scale=self.lora_scale, adapter_name=adapter_name, _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, ) text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k} @@ -619,6 +642,7 @@ def load_lora_weights( lora_scale=self.lora_scale, adapter_name=adapter_name, _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, ) @classmethod @@ -734,7 +758,9 @@ def lora_state_dict( @classmethod # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_unet - def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None, _pipeline=None): + def load_lora_into_unet( + cls, state_dict, network_alphas, unet, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False + ): """ This will load the LoRA layers specified in `state_dict` into `unet`. @@ -752,10 +778,16 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), # then the `state_dict` keys should have `cls.unet_name` and/or `cls.text_encoder_name` as # their prefixes. @@ -765,7 +797,11 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None # Load the layers corresponding to UNet. logger.info(f"Loading {cls.unet_name}.") unet.load_attn_procs( - state_dict, network_alphas=network_alphas, adapter_name=adapter_name, _pipeline=_pipeline + state_dict, + network_alphas=network_alphas, + adapter_name=adapter_name, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, ) @classmethod @@ -1137,15 +1173,22 @@ def load_lora_weights( Parameters: pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. - kwargs (`dict`, *optional*): - See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO + kwargs (`dict`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + # if a dict is passed, copy it instead of modifying it inplace if isinstance(pretrained_model_name_or_path_or_dict, dict): pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() @@ -1162,6 +1205,7 @@ def load_lora_weights( transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, adapter_name=adapter_name, _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, ) text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k} @@ -1174,6 +1218,7 @@ def load_lora_weights( lora_scale=self.lora_scale, adapter_name=adapter_name, _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, ) text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k} @@ -1186,10 +1231,13 @@ def load_lora_weights( lora_scale=self.lora_scale, adapter_name=adapter_name, _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, ) @classmethod - def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None, _pipeline=None): + def load_lora_into_transformer( + cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False + ): """ This will load the LoRA layers specified in `state_dict` into `transformer`. @@ -1203,7 +1251,13 @@ def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None, adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict keys = list(state_dict.keys()) @@ -1247,8 +1301,12 @@ def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None, # otherwise loading LoRA weights will lead to an error is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) - inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name) - incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name) + peft_kwargs = {} + if is_peft_version(">=", "0.13.0"): + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + + inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs) + incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name, **peft_kwargs) if incompatible_keys is not None: # check only for unexpected keys @@ -1674,13 +1732,13 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`str`, *optional*): TODO + low_cpu_mem_usage (`bool`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) - if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1717,7 +1775,7 @@ def load_lora_weights( lora_scale=self.lora_scale, adapter_name=adapter_name, _pipeline=self, - # TODO: add `low_cpu_mem_usage` once `transformers` integration is ready. + low_cpu_mem_usage=low_cpu_mem_usage, ) @classmethod @@ -1743,7 +1801,7 @@ def load_lora_into_transformer( `default_{i}` where i is the total number of adapters being loaded. low_cpu_mem_usage (`bool`, *optional*): TODO """ - if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -2438,15 +2496,22 @@ def load_lora_weights( Parameters: pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. - kwargs (`dict`, *optional*): - See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO + kwargs (`dict`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + # if a dict is passed, copy it instead of modifying it inplace if isinstance(pretrained_model_name_or_path_or_dict, dict): pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() @@ -2463,6 +2528,7 @@ def load_lora_weights( transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, adapter_name=adapter_name, _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, ) @classmethod From d4a1fbfe61b3520752986a642f68dc6ead1b1e00 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 7 Oct 2024 18:58:46 +0530 Subject: [PATCH 04/24] fix-copies --- src/diffusers/loaders/lora_pipeline.py | 96 +++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 8 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 1b9dda53c1bf..ffe4ed6758f2 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -404,7 +404,7 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or + # in transformers we automatically check whether the adapter name is already in use or not text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, @@ -815,6 +815,7 @@ def load_lora_into_text_encoder( lora_scale=1.0, adapter_name=None, _pipeline=None, + low_cpu_mem_usage=False, ): """ This will load the LoRA layers specified in `state_dict` into `text_encoder` @@ -837,10 +838,25 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + peft_kwargs = {} + if low_cpu_mem_usage: + if is_peft_version(">", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + if not is_transformers_available(">", "4.45.1"): + # Note from sayakpaul: It's not in `transformers` stable yet. + # https://github.com/huggingface/transformers/pull/33725/ + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`." + ) + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + from peft import LoraConfig # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), @@ -906,11 +922,12 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or not + # in transformers we automatically check whether the adapter name is already in use or text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, peft_config=lora_config, + **peft_kwargs, ) # scale LoRA layers with `lora_scale` @@ -1335,6 +1352,7 @@ def load_lora_into_text_encoder( lora_scale=1.0, adapter_name=None, _pipeline=None, + low_cpu_mem_usage=False, ): """ This will load the LoRA layers specified in `state_dict` into `text_encoder` @@ -1357,10 +1375,25 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + peft_kwargs = {} + if low_cpu_mem_usage: + if is_peft_version(">", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + if not is_transformers_available(">", "4.45.1"): + # Note from sayakpaul: It's not in `transformers` stable yet. + # https://github.com/huggingface/transformers/pull/33725/ + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`." + ) + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + from peft import LoraConfig # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), @@ -1426,11 +1459,12 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or not + # in transformers we automatically check whether the adapter name is already in use or text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, peft_config=lora_config, + **peft_kwargs, ) # scale LoRA layers with `lora_scale` @@ -1888,6 +1922,7 @@ def load_lora_into_text_encoder( lora_scale=1.0, adapter_name=None, _pipeline=None, + low_cpu_mem_usage=False, ): """ This will load the LoRA layers specified in `state_dict` into `text_encoder` @@ -1910,10 +1945,25 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + peft_kwargs = {} + if low_cpu_mem_usage: + if is_peft_version(">", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + if not is_transformers_available(">", "4.45.1"): + # Note from sayakpaul: It's not in `transformers` stable yet. + # https://github.com/huggingface/transformers/pull/33725/ + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`." + ) + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + from peft import LoraConfig # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), @@ -1979,11 +2029,12 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or not + # in transformers we automatically check whether the adapter name is already in use or text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, peft_config=lora_config, + **peft_kwargs, ) # scale LoRA layers with `lora_scale` @@ -2218,6 +2269,7 @@ def load_lora_into_text_encoder( lora_scale=1.0, adapter_name=None, _pipeline=None, + low_cpu_mem_usage=False, ): """ This will load the LoRA layers specified in `state_dict` into `text_encoder` @@ -2240,10 +2292,25 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") + peft_kwargs = {} + if low_cpu_mem_usage: + if is_peft_version(">", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + if not is_transformers_available(">", "4.45.1"): + # Note from sayakpaul: It's not in `transformers` stable yet. + # https://github.com/huggingface/transformers/pull/33725/ + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`." + ) + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + from peft import LoraConfig # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), @@ -2309,11 +2376,12 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or not + # in transformers we automatically check whether the adapter name is already in use or text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, peft_config=lora_config, + **peft_kwargs, ) # scale LoRA layers with `lora_scale` @@ -2533,7 +2601,9 @@ def load_lora_weights( @classmethod # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer - def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None, _pipeline=None): + def load_lora_into_transformer( + cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False + ): """ This will load the LoRA layers specified in `state_dict` into `transformer`. @@ -2547,7 +2617,13 @@ def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None, adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): TODO """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict keys = list(state_dict.keys()) @@ -2591,8 +2667,12 @@ def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None, # otherwise loading LoRA weights will lead to an error is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) - inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name) - incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name) + peft_kwargs = {} + if is_peft_version(">=", "0.13.0"): + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + + inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs) + incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name, **peft_kwargs) if incompatible_keys is not None: # check only for unexpected keys From 5c831cc9ebb0b76c8c06065c9586438364611b21 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 7 Oct 2024 18:59:30 +0530 Subject: [PATCH 05/24] fix-copies again --- src/diffusers/loaders/lora_pipeline.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index ffe4ed6758f2..6ca18f4fbf84 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -922,7 +922,7 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or + # in transformers we automatically check whether the adapter name is already in use or not text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, @@ -1459,7 +1459,7 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or + # in transformers we automatically check whether the adapter name is already in use or not text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, @@ -2029,7 +2029,7 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or + # in transformers we automatically check whether the adapter name is already in use or not text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, @@ -2376,7 +2376,7 @@ def load_lora_into_text_encoder( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) # inject LoRA layers and load the state dict - # in transformers we automatically check whether the adapter name is already in use or + # in transformers we automatically check whether the adapter name is already in use or not text_encoder.load_adapter( adapter_name=adapter_name, adapter_state_dict=text_encoder_lora_state_dict, From 1131e3d04e3131f4c24565257665d75364d696d9 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 7 Oct 2024 19:36:22 +0530 Subject: [PATCH 06/24] tests --- src/diffusers/loaders/lora_pipeline.py | 33 +++++------ src/diffusers/utils/testing_utils.py | 18 ++++++ tests/lora/utils.py | 79 ++++++++++++++++++++++++-- 3 files changed, 110 insertions(+), 20 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 6ca18f4fbf84..408a724cd89d 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -27,6 +27,7 @@ get_peft_kwargs, is_peft_version, is_transformers_available, + is_transformers_version, logging, scale_lora_layers, ) @@ -94,7 +95,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) - if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -266,7 +267,7 @@ def load_lora_into_unet( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -327,11 +328,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_available(">", "4.45.1"): + if not is_transformers_version(">", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -588,7 +589,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) - if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -783,7 +784,7 @@ def load_lora_into_unet( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -845,11 +846,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_available(">", "4.45.1"): + if not is_transformers_version(">", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -1382,11 +1383,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_available(">", "4.45.1"): + if not is_transformers_version(">", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -1772,7 +1773,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) - if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1835,7 +1836,7 @@ def load_lora_into_transformer( `default_{i}` where i is the total number of adapters being loaded. low_cpu_mem_usage (`bool`, *optional*): TODO """ - if low_cpu_mem_usage and is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1952,11 +1953,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_available(">", "4.45.1"): + if not is_transformers_version(">", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -2299,11 +2300,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_available(">", "4.45.1"): + if not is_transformers_version(">", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py index 7dc3f414d55c..a2f283d0c4f5 100644 --- a/src/diffusers/utils/testing_utils.py +++ b/src/diffusers/utils/testing_utils.py @@ -388,6 +388,24 @@ def decorator(test_case): return decorator +def require_transformers_version_greater(transformers_version): + """ + Decorator marking a test that requires transformers with a specific version, this would require some specific + versions of PEFT and transformers. + """ + + def decorator(test_case): + correct_transformers_version = is_transformers_available() and version.parse( + version.parse(importlib.metadata.version("transformers")).base_version + ) > version.parse(transformers_version) + return unittest.skipUnless( + correct_transformers_version, + f"test requires transformers with the version greater than {transformers_version}", + )(test_case) + + return decorator + + def require_accelerate_version_greater(accelerate_version): def decorator(test_case): correct_accelerate_version = is_peft_available() and version.parse( diff --git a/tests/lora/utils.py b/tests/lora/utils.py index d9a061589c83..d3df40380b21 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -32,6 +32,7 @@ floats_tensor, require_peft_backend, require_peft_version_greater, + require_transformers_version_greater, skip_mps, torch_device, ) @@ -278,8 +279,9 @@ def test_simple_inference_with_text_lora(self): not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" ) - @require_peft_version_greater("0.12.0") - def test_low_cpu_mem_usage(self): + @require_peft_version_greater("0.13.0") + def test_low_cpu_mem_usage_with_injection(self): + """Tests if we can inject LoRA state dict with low_cpu_mem_usage.""" for scheduler_cls in self.scheduler_classes: components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) pipe = self.pipeline_class(**components) @@ -323,14 +325,14 @@ def test_low_cpu_mem_usage(self): check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" ) self.assertTrue( - "meta" in {p.device.type for p in pipe.text_encoder.parameters()}, + "meta" in {p.device.type for p in pipe.text_encoder_2.parameters()}, "The LoRA params should be on 'meta' device.", ) te2_state_dict = populate_meta_state_dict_with_dummy( get_peft_model_state_dict(pipe.text_encoder_2) ) - set_peft_model_state_dict(pipe.text_encoder, te2_state_dict, low_cpu_mem_usage=True) + set_peft_model_state_dict(pipe.text_encoder_2, te2_state_dict, low_cpu_mem_usage=True) self.assertTrue( "meta" not in {p.device.type for p in pipe.text_encoder_2.parameters()}, "No param should be on 'meta' device.", @@ -340,6 +342,75 @@ def test_low_cpu_mem_usage(self): output_lora = pipe(**inputs)[0] self.assertTrue(output_lora.shape == self.output_shape) + @require_peft_version_greater("0.13.0") + @require_transformers_version_greater("4.45.1") + def test_low_cpu_mem_usage_with_loading(self): + """Tests if we can load LoRA state dict with low_cpu_mem_usage.""" + + for scheduler_cls in self.scheduler_classes: + components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) + + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) + + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config) + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" + ) + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config) + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder_2.add_adapter(text_lora_config) + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + + with tempfile.TemporaryDirectory() as tmpdirname: + modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) + lora_state_dicts = self._get_lora_state_dicts(modules_to_save) + self.pipeline_class.save_lora_weights( + save_directory=tmpdirname, safe_serialization=False, **lora_state_dicts + ) + + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) + pipe.unload_lora_weights() + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) + + for module_name, module in modules_to_save.items(): + self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") + + images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results.", + ) + + # Now, check for `low_cpu_mem_usage.` + pipe.unload_lora_weights() + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"), low_cpu_mem_usage=True) + + for module_name, module in modules_to_save.items(): + self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") + + images_lora_from_pretrained_low_cpu = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + np.allclose( + images_lora_from_pretrained_low_cpu, images_lora_from_pretrained, atol=1e-3, rtol=1e-3 + ), + "Loading from saved checkpoints with `low_cpu_mem_usage` should give same results.", + ) + def test_simple_inference_with_text_lora_and_scale(self): """ Tests a simple inference with lora attached on the text encoder + scale argument From 3c1acf06fe6548274940c584419158950eb08972 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 09:04:41 +0530 Subject: [PATCH 07/24] _LOW_CPU_MEM_USAGE_DEFAULT_LORA --- src/diffusers/loaders/lora_pipeline.py | 11 ++++++----- src/diffusers/loaders/utils.py | 8 ++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 408a724cd89d..6c19b131289a 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -38,6 +38,7 @@ _convert_xlabs_flux_lora_to_diffusers, _maybe_map_sgm_blocks_to_diffusers, ) +from .utils import _LOW_CPU_MEM_USAGE_DEFAULT_LORA if is_transformers_available(): @@ -94,7 +95,7 @@ def load_lora_weights( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." @@ -588,7 +589,7 @@ def load_lora_weights( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." @@ -1201,7 +1202,7 @@ def load_lora_weights( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." @@ -1772,7 +1773,7 @@ def load_lora_weights( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." @@ -2575,7 +2576,7 @@ def load_lora_weights( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." diff --git a/src/diffusers/loaders/utils.py b/src/diffusers/loaders/utils.py index 142d72bf6b77..e16eaab4ae05 100644 --- a/src/diffusers/loaders/utils.py +++ b/src/diffusers/loaders/utils.py @@ -16,6 +16,14 @@ import torch +from ..utils import is_peft_version, is_torch_version, is_transformers_version + + +if is_torch_version(">=", "1.9.0") and is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"): + _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True +else: + _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False + class AttnProcsLayers(torch.nn.Module): def __init__(self, state_dict: Dict[str, torch.Tensor]): From c45c6649f18dfac00019edc1b1563809c3d65d3a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 09:09:00 +0530 Subject: [PATCH 08/24] _peft_version default. --- src/diffusers/utils/import_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py index daecec4aa258..9bfc93f9c4ce 100644 --- a/src/diffusers/utils/import_utils.py +++ b/src/diffusers/utils/import_utils.py @@ -280,6 +280,7 @@ _invisible_watermark_available = False +_peft_version = "N/A" _peft_available = importlib.util.find_spec("peft") is not None try: _peft_version = importlib_metadata.version("peft") From 23fe79ed5a223c4e1d86b663e84142488be55f19 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 09:14:23 +0530 Subject: [PATCH 09/24] version checks. --- src/diffusers/loaders/utils.py | 14 +++++++++++--- src/diffusers/utils/import_utils.py | 1 - 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/diffusers/loaders/utils.py b/src/diffusers/loaders/utils.py index e16eaab4ae05..cc1347f66ef9 100644 --- a/src/diffusers/loaders/utils.py +++ b/src/diffusers/loaders/utils.py @@ -16,11 +16,19 @@ import torch -from ..utils import is_peft_version, is_torch_version, is_transformers_version +from ..utils import ( + is_peft_available, + is_peft_version, + is_torch_version, + is_transformers_available, + is_transformers_version, +) -if is_torch_version(">=", "1.9.0") and is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"): - _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True +if is_torch_version(">=", "1.9.0"): + if is_peft_available() and is_transformers_available(): + if is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"): + _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True else: _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py index 9bfc93f9c4ce..daecec4aa258 100644 --- a/src/diffusers/utils/import_utils.py +++ b/src/diffusers/utils/import_utils.py @@ -280,7 +280,6 @@ _invisible_watermark_available = False -_peft_version = "N/A" _peft_available = importlib.util.find_spec("peft") is not None try: _peft_version = importlib_metadata.version("peft") From b57bc3a0bcc578b7c2e18d88e0d88d9337f997c7 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 09:22:12 +0530 Subject: [PATCH 10/24] version check. --- src/diffusers/loaders/lora_pipeline.py | 9 ++++++++- src/diffusers/loaders/utils.py | 16 ---------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 6c19b131289a..d3eef0259c51 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -26,6 +26,7 @@ get_adapter_name, get_peft_kwargs, is_peft_version, + is_torch_version, is_transformers_available, is_transformers_version, logging, @@ -38,7 +39,13 @@ _convert_xlabs_flux_lora_to_diffusers, _maybe_map_sgm_blocks_to_diffusers, ) -from .utils import _LOW_CPU_MEM_USAGE_DEFAULT_LORA + + +if is_torch_version(">=", "1.9.0"): + if is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"): + _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True +else: + _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False if is_transformers_available(): diff --git a/src/diffusers/loaders/utils.py b/src/diffusers/loaders/utils.py index cc1347f66ef9..142d72bf6b77 100644 --- a/src/diffusers/loaders/utils.py +++ b/src/diffusers/loaders/utils.py @@ -16,22 +16,6 @@ import torch -from ..utils import ( - is_peft_available, - is_peft_version, - is_torch_version, - is_transformers_available, - is_transformers_version, -) - - -if is_torch_version(">=", "1.9.0"): - if is_peft_available() and is_transformers_available(): - if is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"): - _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True -else: - _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False - class AttnProcsLayers(torch.nn.Module): def __init__(self, state_dict: Dict[str, torch.Tensor]): From c18e2ebaedd8aab9e2fb675432c1c9afdc52c152 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 09:26:24 +0530 Subject: [PATCH 11/24] version check. --- src/diffusers/loaders/lora_pipeline.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index d3eef0259c51..804491fd4f26 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -25,6 +25,7 @@ deprecate, get_adapter_name, get_peft_kwargs, + is_peft_available, is_peft_version, is_torch_version, is_transformers_available, @@ -42,7 +43,12 @@ if is_torch_version(">=", "1.9.0"): - if is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"): + if ( + is_peft_available() + and is_peft_version(">", "0.13.1") + and is_transformers_available() + and is_transformers_version(">", "4.45.1") + ): _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True else: _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False From 2e13836ca2c04399cfeccde4d6e84dec6ee41d00 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 10:07:21 +0530 Subject: [PATCH 12/24] version check. --- src/diffusers/loaders/lora_pipeline.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 804491fd4f26..c7f187a10b55 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -42,6 +42,7 @@ ) +_LOW_CPU_MEM_USAGE_DEFAULT_LORA = False if is_torch_version(">=", "1.9.0"): if ( is_peft_available() @@ -50,8 +51,6 @@ and is_transformers_version(">", "4.45.1") ): _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True -else: - _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False if is_transformers_available(): From efa33e37c600a8e97d88852e953bc809eff35397 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 19:47:38 +0530 Subject: [PATCH 13/24] require peft 0.13.1. --- src/diffusers/loaders/lora_pipeline.py | 28 +++++++++++++------------- src/diffusers/loaders/unet.py | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index c7f187a10b55..47a9a4c7a17e 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -108,7 +108,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) - if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -280,7 +280,7 @@ def load_lora_into_unet( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -341,7 +341,7 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -602,7 +602,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) - if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -797,7 +797,7 @@ def load_lora_into_unet( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -859,7 +859,7 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1333,7 +1333,7 @@ def load_lora_into_transformer( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) peft_kwargs = {} - if is_peft_version(">=", "0.13.0"): + if is_peft_version(">=", "0.13.1"): peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs) @@ -1396,7 +1396,7 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1786,7 +1786,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) - if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1849,7 +1849,7 @@ def load_lora_into_transformer( `default_{i}` where i is the total number of adapters being loaded. low_cpu_mem_usage (`bool`, *optional*): TODO """ - if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"): + if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1903,7 +1903,7 @@ def load_lora_into_transformer( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) peft_kwargs = {} - if is_peft_version(">=", "0.13.0"): + if is_peft_version(">=", "0.13.1"): peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs) @@ -1966,7 +1966,7 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -2313,7 +2313,7 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.0"): + if not is_peft_version(">", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -2682,7 +2682,7 @@ def load_lora_into_transformer( is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline) peft_kwargs = {} - if is_peft_version(">=", "0.13.0"): + if is_peft_version(">=", "0.13.1"): peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs) diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py index 7f767730ad42..3bd3597bdd48 100644 --- a/src/diffusers/loaders/unet.py +++ b/src/diffusers/loaders/unet.py @@ -346,7 +346,7 @@ def _process_lora( # otherwise loading LoRA weights will lead to an error is_model_cpu_offload, is_sequential_cpu_offload = self._optionally_disable_offloading(_pipeline) peft_kwargs = {} - if is_peft_version(">=", "0.13.0"): + if is_peft_version(">=", "0.13.1"): peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage inject_adapter_in_model(lora_config, self, adapter_name=adapter_name, **peft_kwargs) From 83701deb97b5b9664331f29ca43c973c3ab55de6 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 19:49:22 +0530 Subject: [PATCH 14/24] explicitly specify low_cpu_mem_usage=False. --- tests/lora/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lora/utils.py b/tests/lora/utils.py index d3df40380b21..23efdf9e096c 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -385,7 +385,7 @@ def test_low_cpu_mem_usage_with_loading(self): self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) pipe.unload_lora_weights() - pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"), low_cpu_mem_usage=False) for module_name, module in modules_to_save.items(): self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") From e88fb06f1f48e54cf15ad8a51eba48589091589f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 19:55:13 +0530 Subject: [PATCH 15/24] docs. --- .../en/tutorials/using_peft_for_inference.md | 6 ++++ src/diffusers/loaders/lora_pipeline.py | 36 +++++++++---------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md index 907f93d573a0..d08f14afbb62 100644 --- a/docs/source/en/tutorials/using_peft_for_inference.md +++ b/docs/source/en/tutorials/using_peft_for_inference.md @@ -75,6 +75,12 @@ image ![pixel-art](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/peft_integration/diffusers_peft_lora_inference_12_1.png) + + +By default, if we detect the most up-to-date versions of `peft` (0.13.1) and `transformers` (4.46.0), we set `low_cpu_mem_usage=True` to speed up the loading time of LoRA checkpoints. + + + ## Merge adapters You can also merge different adapter checkpoints for inference to blend their styles together. diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 47a9a4c7a17e..c83e146fe4a7 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -46,9 +46,9 @@ if is_torch_version(">=", "1.9.0"): if ( is_peft_available() - and is_peft_version(">", "0.13.1") + and is_peft_version(">=", "0.13.1") and is_transformers_available() - and is_transformers_version(">", "4.45.1") + and is_transformers_version(">=", "4.45.1") ): _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True @@ -108,7 +108,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) - if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -280,7 +280,7 @@ def load_lora_into_unet( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -341,11 +341,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.1"): + if not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">", "4.45.1"): + if not is_transformers_version(">=", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -602,7 +602,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) - if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -797,7 +797,7 @@ def load_lora_into_unet( if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") - if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -859,11 +859,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.1"): + if not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">", "4.45.1"): + if not is_transformers_version(">=", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -1396,11 +1396,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.1"): + if not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">", "4.45.1"): + if not is_transformers_version(">=", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -1786,7 +1786,7 @@ def load_lora_weights( raise ValueError("PEFT backend is required for this method.") low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) - if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1849,7 +1849,7 @@ def load_lora_into_transformer( `default_{i}` where i is the total number of adapters being loaded. low_cpu_mem_usage (`bool`, *optional*): TODO """ - if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"): + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) @@ -1966,11 +1966,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.1"): + if not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">", "4.45.1"): + if not is_transformers_version(">=", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -2313,11 +2313,11 @@ def load_lora_into_text_encoder( peft_kwargs = {} if low_cpu_mem_usage: - if not is_peft_version(">", "0.13.1"): + if not is_peft_version(">=", "0.13.1"): raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">", "4.45.1"): + if not is_transformers_version(">=", "4.45.1"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( From 28007f45d2cac13d020912bc26c1d8a117a045dc Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 20:04:53 +0530 Subject: [PATCH 16/24] transformers version 4.45.2. --- src/diffusers/loaders/lora_pipeline.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index c83e146fe4a7..6c33fca8e450 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -48,7 +48,7 @@ is_peft_available() and is_peft_version(">=", "0.13.1") and is_transformers_available() - and is_transformers_version(">=", "4.45.1") + and is_transformers_version(">=", "4.45.2") ): _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True @@ -345,7 +345,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.1"): + if not is_transformers_version(">=", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -863,7 +863,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.1"): + if not is_transformers_version(">=", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -1400,7 +1400,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.1"): + if not is_transformers_version(">=", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -1970,7 +1970,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.1"): + if not is_transformers_version(">=", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -2317,7 +2317,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.1"): + if not is_transformers_version(">=", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( From ba5576cf852830469a52b614b1fbece0e79cb4d2 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 20:05:40 +0530 Subject: [PATCH 17/24] update --- tests/lora/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lora/utils.py b/tests/lora/utils.py index 23efdf9e096c..ab6cad01a64f 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -279,7 +279,7 @@ def test_simple_inference_with_text_lora(self): not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" ) - @require_peft_version_greater("0.13.0") + @require_peft_version_greater("0.13.1") def test_low_cpu_mem_usage_with_injection(self): """Tests if we can inject LoRA state dict with low_cpu_mem_usage.""" for scheduler_cls in self.scheduler_classes: @@ -342,7 +342,7 @@ def test_low_cpu_mem_usage_with_injection(self): output_lora = pipe(**inputs)[0] self.assertTrue(output_lora.shape == self.output_shape) - @require_peft_version_greater("0.13.0") + @require_peft_version_greater("0.13.1") @require_transformers_version_greater("4.45.1") def test_low_cpu_mem_usage_with_loading(self): """Tests if we can load LoRA state dict with low_cpu_mem_usage.""" From 48641dc701bcebb6c9fd9dddcc3e16978311a841 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 20:19:02 +0530 Subject: [PATCH 18/24] fix --- src/diffusers/loaders/lora_pipeline.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 6c33fca8e450..046f66a9cc27 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -48,7 +48,7 @@ is_peft_available() and is_peft_version(">=", "0.13.1") and is_transformers_available() - and is_transformers_version(">=", "4.45.2") + and is_transformers_version(">", "4.45.2") ): _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True @@ -345,7 +345,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.2"): + if not is_transformers_version(">", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -863,7 +863,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.2"): + if not is_transformers_version(">", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -1400,7 +1400,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.2"): + if not is_transformers_version(">", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -1970,7 +1970,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.2"): + if not is_transformers_version(">", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( @@ -2317,7 +2317,7 @@ def load_lora_into_text_encoder( raise ValueError( "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." ) - if not is_transformers_version(">=", "4.45.2"): + if not is_transformers_version(">", "4.45.2"): # Note from sayakpaul: It's not in `transformers` stable yet. # https://github.com/huggingface/transformers/pull/33725/ raise ValueError( From 0ab1d443a35be3845d67bd592201f966d19cb95e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 8 Oct 2024 20:27:30 +0530 Subject: [PATCH 19/24] empty From ca5a1d5194058f92c6b7524dcb4d2d49f6cc7885 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 9 Oct 2024 01:27:58 +0530 Subject: [PATCH 20/24] better name initialize_dummy_state_dict. --- tests/lora/utils.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/lora/utils.py b/tests/lora/utils.py index ab6cad01a64f..9c982e8de37f 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -66,7 +66,7 @@ def check_if_lora_correctly_set(model) -> bool: return False -def populate_meta_state_dict_with_dummy(state_dict): +def initialize_dummy_state_dict(state_dict): if not all(v.device.type == "meta" for _, v in state_dict.items()): raise ValueError("`state_dict` has non-meta values.") return {k: torch.randn(v.shape, device=torch_device, dtype=v.dtype) for k, v in state_dict.items()} @@ -298,7 +298,7 @@ def test_low_cpu_mem_usage_with_injection(self): "The LoRA params should be on 'meta' device.", ) - te_state_dict = populate_meta_state_dict_with_dummy(get_peft_model_state_dict(pipe.text_encoder)) + te_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(pipe.text_encoder)) set_peft_model_state_dict(pipe.text_encoder, te_state_dict, low_cpu_mem_usage=True) self.assertTrue( "meta" not in {p.device.type for p in pipe.text_encoder.parameters()}, @@ -312,7 +312,7 @@ def test_low_cpu_mem_usage_with_injection(self): "meta" in {p.device.type for p in denoiser.parameters()}, "The LoRA params should be on 'meta' device." ) - denoiser_state_dict = populate_meta_state_dict_with_dummy(get_peft_model_state_dict(denoiser)) + denoiser_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(denoiser)) set_peft_model_state_dict(denoiser, denoiser_state_dict, low_cpu_mem_usage=True) self.assertTrue( "meta" not in {p.device.type for p in denoiser.parameters()}, "No param should be on 'meta' device." @@ -329,9 +329,7 @@ def test_low_cpu_mem_usage_with_injection(self): "The LoRA params should be on 'meta' device.", ) - te2_state_dict = populate_meta_state_dict_with_dummy( - get_peft_model_state_dict(pipe.text_encoder_2) - ) + te2_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(pipe.text_encoder_2)) set_peft_model_state_dict(pipe.text_encoder_2, te2_state_dict, low_cpu_mem_usage=True) self.assertTrue( "meta" not in {p.device.type for p in pipe.text_encoder_2.parameters()}, From 95534e6792fcb034ed32c9cd538424e7f096ac7e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 9 Oct 2024 01:32:15 +0530 Subject: [PATCH 21/24] doc todos. --- src/diffusers/loaders/lora_pipeline.py | 32 ++++++++++++++------------ src/diffusers/loaders/unet.py | 4 +++- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index f08e6e293007..f3033fdf81a3 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -100,7 +100,9 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading only loading the pretrained LoRA weights and not initializing the random + weights. kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -280,7 +282,7 @@ def load_lora_into_unet( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -339,7 +341,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -599,7 +601,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -803,7 +805,7 @@ def load_lora_into_unet( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -863,7 +865,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1224,7 +1226,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -1299,7 +1301,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( @@ -1406,7 +1408,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1801,7 +1803,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1868,7 +1870,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( @@ -1980,7 +1982,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -2327,7 +2329,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -2608,7 +2610,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -2658,7 +2660,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - low_cpu_mem_usage (`bool`, *optional*): TODO + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py index 3bd3597bdd48..fe7778c4d27f 100644 --- a/src/diffusers/loaders/unet.py +++ b/src/diffusers/loaders/unet.py @@ -115,7 +115,9 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict `default_{i}` where i is the total number of adapters being loaded. weight_name (`str`, *optional*, defaults to None): Name of the serialized state dict file. - low_cpu_mem_usage (`str`, *optional*, defaults to False): TODO + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading only loading the pretrained LoRA weights and not initializing the random + weights. Example: From cf4917c9798c61e8a94a1c429038e5eaba677645 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Wed, 9 Oct 2024 10:38:44 +0530 Subject: [PATCH 22/24] Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- .../en/tutorials/using_peft_for_inference.md | 2 +- src/diffusers/loaders/lora_pipeline.py | 28 +++++++++---------- src/diffusers/loaders/unet.py | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md index d08f14afbb62..615af55ef5b5 100644 --- a/docs/source/en/tutorials/using_peft_for_inference.md +++ b/docs/source/en/tutorials/using_peft_for_inference.md @@ -77,7 +77,7 @@ image -By default, if we detect the most up-to-date versions of `peft` (0.13.1) and `transformers` (4.46.0), we set `low_cpu_mem_usage=True` to speed up the loading time of LoRA checkpoints. +By default, if the most up-to-date versions of PEFT and Transformers are detected, `low_cpu_mem_usage` is set to `True` to speed up the loading time of LoRA checkpoints. diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index f3033fdf81a3..98e8d2ffdb88 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -101,7 +101,7 @@ def load_lora_weights( Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. low_cpu_mem_usage (`bool`, *optional*): - Speed up model loading only loading the pretrained LoRA weights and not initializing the random + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. @@ -341,7 +341,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -601,7 +601,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -805,7 +805,7 @@ def load_lora_into_unet( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -865,7 +865,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1226,7 +1226,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -1301,7 +1301,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( @@ -1408,7 +1408,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1803,7 +1803,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1870,7 +1870,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( @@ -1982,7 +1982,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -2329,7 +2329,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -2610,7 +2610,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -2660,7 +2660,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. """ if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py index fe7778c4d27f..eaac52df6202 100644 --- a/src/diffusers/loaders/unet.py +++ b/src/diffusers/loaders/unet.py @@ -116,7 +116,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict weight_name (`str`, *optional*, defaults to None): Name of the serialized state dict file. low_cpu_mem_usage (`bool`, *optional*): - Speed up model loading only loading the pretrained LoRA weights and not initializing the random + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. Example: From 8ffe3bec5be634f3d95be9d4aac6394950752bb0 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 9 Oct 2024 10:39:59 +0530 Subject: [PATCH 23/24] style --- src/diffusers/loaders/lora_pipeline.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 98e8d2ffdb88..d70f3780c835 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -341,7 +341,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -601,7 +601,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -805,7 +805,7 @@ def load_lora_into_unet( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -865,7 +865,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1226,7 +1226,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -1301,7 +1301,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( @@ -1408,7 +1408,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1803,7 +1803,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -1870,7 +1870,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): raise ValueError( @@ -1982,7 +1982,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -2329,7 +2329,7 @@ def load_lora_into_text_encoder( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.") @@ -2610,7 +2610,7 @@ def load_lora_weights( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: kwargs (`dict`, *optional*): See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. """ @@ -2660,7 +2660,7 @@ def load_lora_into_transformer( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights. + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: """ if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): raise ValueError( From a4eaa426020414e06e25823f563965e39f91c58f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 9 Oct 2024 10:43:33 +0530 Subject: [PATCH 24/24] fix-copies --- src/diffusers/loaders/lora_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index d70f3780c835..2037bd787433 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -805,7 +805,7 @@ def load_lora_into_unet( adapter_name (`str`, *optional*): Adapter name to be used for referencing the loaded adapter model. If not specified, it will use `default_{i}` where i is the total number of adapters being loaded. - Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.: + Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights. """ if not USE_PEFT_BACKEND: raise ValueError("PEFT backend is required for this method.")