From d66b8583ac8bdc617de25ce0019b15f9bb45bfe6 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 24 Sep 2024 12:15:40 +0530
Subject: [PATCH 01/24] allow loras to be loaded with low_cpu_mem_usage.

---
 src/diffusers/loaders/lora_pipeline.py | 31 +++++++++++-
 src/diffusers/loaders/unet.py          | 19 ++++++--
 tests/lora/utils.py                    | 66 +++++++++++++++++++++++++-
 3 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index ba1435a8cbdc..050daeb4abbc 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -88,10 +88,17 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`str`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         # if a dict is passed, copy it instead of modifying it inplace
         if isinstance(pretrained_model_name_or_path_or_dict, dict):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
@@ -109,6 +116,7 @@ def load_lora_weights(
             unet=getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet,
             adapter_name=adapter_name,
             _pipeline=self,
+            low_cpu_mem_usage=kwargs.pop("low_cpu_mem_usage", False),
         )
         self.load_lora_into_text_encoder(
             state_dict,
@@ -232,7 +240,9 @@ def lora_state_dict(
         return state_dict, network_alphas
 
     @classmethod
-    def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None, _pipeline=None):
+    def load_lora_into_unet(
+        cls, state_dict, network_alphas, unet, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False
+    ):
         """
         This will load the LoRA layers specified in `state_dict` into `unet`.
 
@@ -250,10 +260,16 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
         # then the `state_dict` keys should have `cls.unet_name` and/or `cls.text_encoder_name` as
         # their prefixes.
@@ -263,7 +279,11 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None
             # Load the layers corresponding to UNet.
             logger.info(f"Loading {cls.unet_name}.")
             unet.load_attn_procs(
-                state_dict, network_alphas=network_alphas, adapter_name=adapter_name, _pipeline=_pipeline
+                state_dict,
+                network_alphas=network_alphas,
+                adapter_name=adapter_name,
+                _pipeline=_pipeline,
+                low_cpu_mem_usage=low_cpu_mem_usage,
             )
 
     @classmethod
@@ -276,6 +296,7 @@ def load_lora_into_text_encoder(
         lora_scale=1.0,
         adapter_name=None,
         _pipeline=None,
+        low_cpu_mem_usage=False,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -298,10 +319,16 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         from peft import LoraConfig
 
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 32ace77b6224..e5c0600d0119 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -115,6 +115,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
                 `default_{i}` where i is the total number of adapters being loaded.
             weight_name (`str`, *optional*, defaults to None):
                 Name of the serialized state dict file.
+            low_cpu_mem_usage (`str`, *optional*, defaults to False): TODO
 
         Example:
 
@@ -142,8 +143,14 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
         adapter_name = kwargs.pop("adapter_name", None)
         _pipeline = kwargs.pop("_pipeline", None)
         network_alphas = kwargs.pop("network_alphas", None)
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
         allow_pickle = False
 
+        if low_cpu_mem_usage and is_peft_version("<=", "0.12.1.dev0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
@@ -209,6 +216,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
                 network_alphas=network_alphas,
                 adapter_name=adapter_name,
                 _pipeline=_pipeline,
+                low_cpu_mem_usage=low_cpu_mem_usage,
             )
         else:
             raise ValueError(
@@ -268,7 +276,9 @@ def _process_custom_diffusion(self, state_dict):
 
         return attn_processors
 
-    def _process_lora(self, state_dict, unet_identifier_key, network_alphas, adapter_name, _pipeline):
+    def _process_lora(
+        self, state_dict, unet_identifier_key, network_alphas, adapter_name, _pipeline, low_cpu_mem_usage
+    ):
         # This method does the following things:
         # 1. Filters the `state_dict` with keys matching  `unet_identifier_key` when using the non-legacy
         #    format. For legacy format no filtering is applied.
@@ -335,9 +345,12 @@ def _process_lora(self, state_dict, unet_identifier_key, network_alphas, adapter
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = self._optionally_disable_offloading(_pipeline)
+            peft_kwargs = {}
+            if is_peft_version(">=", "0.12.1.dev0"):
+                peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
 
-            inject_adapter_in_model(lora_config, self, adapter_name=adapter_name)
-            incompatible_keys = set_peft_model_state_dict(self, state_dict, adapter_name)
+            inject_adapter_in_model(lora_config, self, adapter_name=adapter_name, **peft_kwargs)
+            incompatible_keys = set_peft_model_state_dict(self, state_dict, adapter_name, **peft_kwargs)
 
             if incompatible_keys is not None:
                 # check only for unexpected keys
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index 939b749c286a..edcff7566aa9 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -38,7 +38,7 @@
 
 
 if is_peft_available():
-    from peft import LoraConfig
+    from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
     from peft.tuners.tuners_utils import BaseTunerLayer
     from peft.utils import get_peft_model_state_dict
 
@@ -65,6 +65,12 @@ def check_if_lora_correctly_set(model) -> bool:
     return False
 
 
+def populate_meta_state_dict_with_dummy(state_dict):
+    if not all(v.device.type == "meta" for _, v in state_dict.items()):
+        raise ValueError("`state_dict` has non-meta values.")
+    return {k: torch.randn(v.shape, device=torch_device, dtype=v.dtype) for k, v in state_dict.items()}
+
+
 @require_peft_backend
 class PeftLoraLoaderMixinTests:
     pipeline_class = None
@@ -201,6 +207,64 @@ def get_dummy_tokens(self):
         prepared_inputs["input_ids"] = inputs
         return prepared_inputs
 
+    @require_peft_version_greater("0.12.0")
+    def test_low_cpu_mem_usage(self):
+        for scheduler_cls in self.scheduler_classes:
+            components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls)
+            pipe = self.pipeline_class(**components)
+            pipe = pipe.to(torch_device)
+            pipe.set_progress_bar_config(disable=None)
+
+            if "text_encoder" in self.pipeline_class._lora_loadable_modules:
+                inject_adapter_in_model(text_lora_config, pipe.text_encoder, low_cpu_mem_usage=True)
+                self.assertTrue(
+                    check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder."
+                )
+                self.assertTrue(
+                    "meta" in {p.device.type for p in pipe.text_encoder.parameters()},
+                    "The LoRA params should be on 'meta' device.",
+                )
+
+                te_state_dict = populate_meta_state_dict_with_dummy(get_peft_model_state_dict(pipe.text_encoder))
+                set_peft_model_state_dict(pipe.text_encoder, te_state_dict, low_cpu_mem_usage=True)
+                self.assertTrue(
+                    "meta" not in {p.device.type for p in pipe.text_encoder.parameters()},
+                    "No param should be on 'meta' device.",
+                )
+
+            denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet
+            inject_adapter_in_model(denoiser_lora_config, denoiser, low_cpu_mem_usage=True)
+            self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.")
+            self.assertTrue(
+                "meta" in {p.device.type for p in denoiser.parameters()}, "The LoRA params should be on 'meta' device."
+            )
+
+            denoiser_state_dict = populate_meta_state_dict_with_dummy(get_peft_model_state_dict(denoiser))
+            set_peft_model_state_dict(denoiser, denoiser_state_dict, low_cpu_mem_usage=True)
+            self.assertTrue(
+                "meta" not in {p.device.type for p in denoiser.parameters()}, "No param should be on 'meta' device."
+            )
+
+            if self.has_two_text_encoders or self.has_three_text_encoders:
+                if "text_encoder_2" in self.pipeline_class._lora_loadable_modules:
+                    inject_adapter_in_model(text_lora_config, pipe.text_encoder_2, low_cpu_mem_usage=True)
+                    self.assertTrue(
+                        check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+                    )
+                    self.assertTrue(
+                        "meta" in {p.device.type for p in pipe.text_encoder.parameters()},
+                        "The LoRA params should be on 'meta' device.",
+                    )
+
+                    te2_state_dict = populate_meta_state_dict_with_dummy(
+                        get_peft_model_state_dict(pipe.text_encoder_2)
+                    )
+                    set_peft_model_state_dict(pipe.text_encoder, te2_state_dict, low_cpu_mem_usage=True)
+                    self.assertTrue(
+                        "meta" not in {p.device.type for p in pipe.text_encoder_2.parameters()},
+                        "No param should be on 'meta' device.",
+                    )
+
     def test_simple_inference(self):
         """
         Tests a simple inference and makes sure it works as expected

From 9a22fc89fcd07d8b437c92656edcbde7bc8be9a8 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 27 Sep 2024 08:36:19 +0530
Subject: [PATCH 02/24] add flux support but note
 https://github.com/huggingface/diffusers/pull/9510\#issuecomment-2378316687

---
 src/diffusers/loaders/lora_pipeline.py | 36 +++++++++++++++++++++-----
 src/diffusers/loaders/unet.py          |  4 +--
 tests/lora/utils.py                    |  4 +++
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 050daeb4abbc..52fa059e4751 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -94,7 +94,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
-        if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"):
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -116,7 +116,7 @@ def load_lora_weights(
             unet=getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet,
             adapter_name=adapter_name,
             _pipeline=self,
-            low_cpu_mem_usage=kwargs.pop("low_cpu_mem_usage", False),
+            low_cpu_mem_usage=low_cpu_mem_usage,
         )
         self.load_lora_into_text_encoder(
             state_dict,
@@ -127,6 +127,7 @@ def load_lora_weights(
             lora_scale=self.lora_scale,
             adapter_name=adapter_name,
             _pipeline=self,
+            # TODO: need to add here once `transformers` integration is ready: https://github.com/huggingface/transformers/pull/33725/
         )
 
     @classmethod
@@ -265,7 +266,7 @@ def load_lora_into_unet(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"):
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -324,7 +325,7 @@ def load_lora_into_text_encoder(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and is_peft_version("<", "0.12.1.dev0"):
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -1673,10 +1674,17 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`str`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         # if a dict is passed, copy it instead of modifying it inplace
         if isinstance(pretrained_model_name_or_path_or_dict, dict):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
@@ -1696,6 +1704,7 @@ def load_lora_weights(
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
             _pipeline=self,
+            low_cpu_mem_usage=low_cpu_mem_usage,
         )
 
         text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
@@ -1708,10 +1717,13 @@ def load_lora_weights(
                 lora_scale=self.lora_scale,
                 adapter_name=adapter_name,
                 _pipeline=self,
+                # TODO: add `low_cpu_mem_usage` once `transformers` integration is ready.
             )
 
     @classmethod
-    def load_lora_into_transformer(cls, state_dict, network_alphas, transformer, adapter_name=None, _pipeline=None):
+    def load_lora_into_transformer(
+        cls, state_dict, network_alphas, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False
+    ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
 
@@ -1729,7 +1741,13 @@ def load_lora_into_transformer(cls, state_dict, network_alphas, transformer, ada
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
 
         keys = list(state_dict.keys())
@@ -1778,8 +1796,12 @@ def load_lora_into_transformer(cls, state_dict, network_alphas, transformer, ada
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
-            inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name)
-            incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name)
+            peft_kwargs = {}
+            if is_peft_version(">=", "0.13.0"):
+                peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
+
+            inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs)
+            incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name, **peft_kwargs)
 
             if incompatible_keys is not None:
                 # check only for unexpected keys
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index e5c0600d0119..7f767730ad42 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -146,7 +146,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
         allow_pickle = False
 
-        if low_cpu_mem_usage and is_peft_version("<=", "0.12.1.dev0"):
+        if low_cpu_mem_usage and is_peft_version("<=", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -346,7 +346,7 @@ def _process_lora(
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = self._optionally_disable_offloading(_pipeline)
             peft_kwargs = {}
-            if is_peft_version(">=", "0.12.1.dev0"):
+            if is_peft_version(">=", "0.13.0"):
                 peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
 
             inject_adapter_in_model(lora_config, self, adapter_name=adapter_name, **peft_kwargs)
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index edcff7566aa9..09075645518c 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -265,6 +265,10 @@ def test_low_cpu_mem_usage(self):
                         "No param should be on 'meta' device.",
                     )
 
+            _, _, inputs = self.get_dummy_inputs()
+            output_lora = pipe(**inputs)[0]
+            self.assertTrue(output_lora.shape == self.output_shape)
+
     def test_simple_inference(self):
         """
         Tests a simple inference and makes sure it works as expected

From b87eec855ac81c8ad14577d69760da97451b22e4 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 7 Oct 2024 18:54:40 +0530
Subject: [PATCH 03/24] low_cpu_mem_usage.

---
 src/diffusers/loaders/lora_pipeline.py | 116 +++++++++++++++++++------
 1 file changed, 91 insertions(+), 25 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 52fa059e4751..1b9dda53c1bf 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -83,18 +83,18 @@ def load_lora_weights(
         Parameters:
             pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
-            kwargs (`dict`, *optional*):
-                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`str`, *optional*): TODO
+            low_cpu_mem_usage (`bool`, *optional*): TODO
+            kwargs (`dict`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
-        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -127,7 +127,7 @@ def load_lora_weights(
             lora_scale=self.lora_scale,
             adapter_name=adapter_name,
             _pipeline=self,
-            # TODO: need to add here once `transformers` integration is ready: https://github.com/huggingface/transformers/pull/33725/
+            low_cpu_mem_usage=low_cpu_mem_usage,
         )
 
     @classmethod
@@ -266,7 +266,7 @@ def load_lora_into_unet(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -325,10 +325,19 @@ def load_lora_into_text_encoder(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
-            raise ValueError(
-                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
-            )
+        peft_kwargs = {}
+        if low_cpu_mem_usage:
+            if is_peft_version(">", "0.13.0"):
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+                )
+            if not is_transformers_available(">", "4.45.1"):
+                # Note from sayakpaul: It's not in `transformers` stable yet.
+                # https://github.com/huggingface/transformers/pull/33725/
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`."
+                )
+            peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
 
         from peft import LoraConfig
 
@@ -395,11 +404,12 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or not
+                # in transformers we automatically check whether the adapter name is already in use or
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
                     peft_config=lora_config,
+                    **peft_kwargs,
                 )
 
                 # scale LoRA layers with `lora_scale`
@@ -570,12 +580,19 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         # We could have accessed the unet config from `lora_state_dict()` too. We pass
         # it here explicitly to be able to tell that it's coming from an SDXL
         # pipeline.
@@ -595,7 +612,12 @@ def load_lora_weights(
             raise ValueError("Invalid LoRA checkpoint.")
 
         self.load_lora_into_unet(
-            state_dict, network_alphas=network_alphas, unet=self.unet, adapter_name=adapter_name, _pipeline=self
+            state_dict,
+            network_alphas=network_alphas,
+            unet=self.unet,
+            adapter_name=adapter_name,
+            _pipeline=self,
+            low_cpu_mem_usage=low_cpu_mem_usage,
         )
         text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
         if len(text_encoder_state_dict) > 0:
@@ -607,6 +629,7 @@ def load_lora_weights(
                 lora_scale=self.lora_scale,
                 adapter_name=adapter_name,
                 _pipeline=self,
+                low_cpu_mem_usage=low_cpu_mem_usage,
             )
 
         text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k}
@@ -619,6 +642,7 @@ def load_lora_weights(
                 lora_scale=self.lora_scale,
                 adapter_name=adapter_name,
                 _pipeline=self,
+                low_cpu_mem_usage=low_cpu_mem_usage,
             )
 
     @classmethod
@@ -734,7 +758,9 @@ def lora_state_dict(
 
     @classmethod
     # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_unet
-    def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None, _pipeline=None):
+    def load_lora_into_unet(
+        cls, state_dict, network_alphas, unet, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False
+    ):
         """
         This will load the LoRA layers specified in `state_dict` into `unet`.
 
@@ -752,10 +778,16 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
         # then the `state_dict` keys should have `cls.unet_name` and/or `cls.text_encoder_name` as
         # their prefixes.
@@ -765,7 +797,11 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, adapter_name=None
             # Load the layers corresponding to UNet.
             logger.info(f"Loading {cls.unet_name}.")
             unet.load_attn_procs(
-                state_dict, network_alphas=network_alphas, adapter_name=adapter_name, _pipeline=_pipeline
+                state_dict,
+                network_alphas=network_alphas,
+                adapter_name=adapter_name,
+                _pipeline=_pipeline,
+                low_cpu_mem_usage=low_cpu_mem_usage,
             )
 
     @classmethod
@@ -1137,15 +1173,22 @@ def load_lora_weights(
         Parameters:
             pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
-            kwargs (`dict`, *optional*):
-                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
+            kwargs (`dict`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         # if a dict is passed, copy it instead of modifying it inplace
         if isinstance(pretrained_model_name_or_path_or_dict, dict):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
@@ -1162,6 +1205,7 @@ def load_lora_weights(
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
             _pipeline=self,
+            low_cpu_mem_usage=low_cpu_mem_usage,
         )
 
         text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
@@ -1174,6 +1218,7 @@ def load_lora_weights(
                 lora_scale=self.lora_scale,
                 adapter_name=adapter_name,
                 _pipeline=self,
+                low_cpu_mem_usage=low_cpu_mem_usage,
             )
 
         text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k}
@@ -1186,10 +1231,13 @@ def load_lora_weights(
                 lora_scale=self.lora_scale,
                 adapter_name=adapter_name,
                 _pipeline=self,
+                low_cpu_mem_usage=low_cpu_mem_usage,
             )
 
     @classmethod
-    def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None, _pipeline=None):
+    def load_lora_into_transformer(
+        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False
+    ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
 
@@ -1203,7 +1251,13 @@ def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None,
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
 
         keys = list(state_dict.keys())
@@ -1247,8 +1301,12 @@ def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None,
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
-            inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name)
-            incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name)
+            peft_kwargs = {}
+            if is_peft_version(">=", "0.13.0"):
+                peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
+
+            inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs)
+            incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name, **peft_kwargs)
 
             if incompatible_keys is not None:
                 # check only for unexpected keys
@@ -1674,13 +1732,13 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`str`, *optional*): TODO
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
-        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -1717,7 +1775,7 @@ def load_lora_weights(
                 lora_scale=self.lora_scale,
                 adapter_name=adapter_name,
                 _pipeline=self,
-                # TODO: add `low_cpu_mem_usage` once `transformers` integration is ready.
+                low_cpu_mem_usage=low_cpu_mem_usage,
             )
 
     @classmethod
@@ -1743,7 +1801,7 @@ def load_lora_into_transformer(
                 `default_{i}` where i is the total number of adapters being loaded.
             low_cpu_mem_usage (`bool`, *optional*): TODO
         """
-        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -2438,15 +2496,22 @@ def load_lora_weights(
         Parameters:
             pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
-            kwargs (`dict`, *optional*):
-                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
+            kwargs (`dict`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         # if a dict is passed, copy it instead of modifying it inplace
         if isinstance(pretrained_model_name_or_path_or_dict, dict):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
@@ -2463,6 +2528,7 @@ def load_lora_weights(
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
             _pipeline=self,
+            low_cpu_mem_usage=low_cpu_mem_usage,
         )
 
     @classmethod

From d4a1fbfe61b3520752986a642f68dc6ead1b1e00 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 7 Oct 2024 18:58:46 +0530
Subject: [PATCH 04/24] fix-copies

---
 src/diffusers/loaders/lora_pipeline.py | 96 +++++++++++++++++++++++---
 1 file changed, 88 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 1b9dda53c1bf..ffe4ed6758f2 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -404,7 +404,7 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or
+                # in transformers we automatically check whether the adapter name is already in use or not
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
@@ -815,6 +815,7 @@ def load_lora_into_text_encoder(
         lora_scale=1.0,
         adapter_name=None,
         _pipeline=None,
+        low_cpu_mem_usage=False,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -837,10 +838,25 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        peft_kwargs = {}
+        if low_cpu_mem_usage:
+            if is_peft_version(">", "0.13.0"):
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+                )
+            if not is_transformers_available(">", "4.45.1"):
+                # Note from sayakpaul: It's not in `transformers` stable yet.
+                # https://github.com/huggingface/transformers/pull/33725/
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`."
+                )
+            peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
+
         from peft import LoraConfig
 
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
@@ -906,11 +922,12 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or not
+                # in transformers we automatically check whether the adapter name is already in use or
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
                     peft_config=lora_config,
+                    **peft_kwargs,
                 )
 
                 # scale LoRA layers with `lora_scale`
@@ -1335,6 +1352,7 @@ def load_lora_into_text_encoder(
         lora_scale=1.0,
         adapter_name=None,
         _pipeline=None,
+        low_cpu_mem_usage=False,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -1357,10 +1375,25 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        peft_kwargs = {}
+        if low_cpu_mem_usage:
+            if is_peft_version(">", "0.13.0"):
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+                )
+            if not is_transformers_available(">", "4.45.1"):
+                # Note from sayakpaul: It's not in `transformers` stable yet.
+                # https://github.com/huggingface/transformers/pull/33725/
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`."
+                )
+            peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
+
         from peft import LoraConfig
 
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
@@ -1426,11 +1459,12 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or not
+                # in transformers we automatically check whether the adapter name is already in use or
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
                     peft_config=lora_config,
+                    **peft_kwargs,
                 )
 
                 # scale LoRA layers with `lora_scale`
@@ -1888,6 +1922,7 @@ def load_lora_into_text_encoder(
         lora_scale=1.0,
         adapter_name=None,
         _pipeline=None,
+        low_cpu_mem_usage=False,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -1910,10 +1945,25 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        peft_kwargs = {}
+        if low_cpu_mem_usage:
+            if is_peft_version(">", "0.13.0"):
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+                )
+            if not is_transformers_available(">", "4.45.1"):
+                # Note from sayakpaul: It's not in `transformers` stable yet.
+                # https://github.com/huggingface/transformers/pull/33725/
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`."
+                )
+            peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
+
         from peft import LoraConfig
 
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
@@ -1979,11 +2029,12 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or not
+                # in transformers we automatically check whether the adapter name is already in use or
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
                     peft_config=lora_config,
+                    **peft_kwargs,
                 )
 
                 # scale LoRA layers with `lora_scale`
@@ -2218,6 +2269,7 @@ def load_lora_into_text_encoder(
         lora_scale=1.0,
         adapter_name=None,
         _pipeline=None,
+        low_cpu_mem_usage=False,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -2240,10 +2292,25 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
+        peft_kwargs = {}
+        if low_cpu_mem_usage:
+            if is_peft_version(">", "0.13.0"):
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+                )
+            if not is_transformers_available(">", "4.45.1"):
+                # Note from sayakpaul: It's not in `transformers` stable yet.
+                # https://github.com/huggingface/transformers/pull/33725/
+                raise ValueError(
+                    "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`."
+                )
+            peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
+
         from peft import LoraConfig
 
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
@@ -2309,11 +2376,12 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or not
+                # in transformers we automatically check whether the adapter name is already in use or
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
                     peft_config=lora_config,
+                    **peft_kwargs,
                 )
 
                 # scale LoRA layers with `lora_scale`
@@ -2533,7 +2601,9 @@ def load_lora_weights(
 
     @classmethod
     # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer
-    def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None, _pipeline=None):
+    def load_lora_into_transformer(
+        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False
+    ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
 
@@ -2547,7 +2617,13 @@ def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None,
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*): TODO
         """
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+
         from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
 
         keys = list(state_dict.keys())
@@ -2591,8 +2667,12 @@ def load_lora_into_transformer(cls, state_dict, transformer, adapter_name=None,
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
-            inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name)
-            incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name)
+            peft_kwargs = {}
+            if is_peft_version(">=", "0.13.0"):
+                peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
+
+            inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs)
+            incompatible_keys = set_peft_model_state_dict(transformer, state_dict, adapter_name, **peft_kwargs)
 
             if incompatible_keys is not None:
                 # check only for unexpected keys

From 5c831cc9ebb0b76c8c06065c9586438364611b21 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 7 Oct 2024 18:59:30 +0530
Subject: [PATCH 05/24] fix-copies again

---
 src/diffusers/loaders/lora_pipeline.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index ffe4ed6758f2..6ca18f4fbf84 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -922,7 +922,7 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or
+                # in transformers we automatically check whether the adapter name is already in use or not
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
@@ -1459,7 +1459,7 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or
+                # in transformers we automatically check whether the adapter name is already in use or not
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
@@ -2029,7 +2029,7 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or
+                # in transformers we automatically check whether the adapter name is already in use or not
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,
@@ -2376,7 +2376,7 @@ def load_lora_into_text_encoder(
                 is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                 # inject LoRA layers and load the state dict
-                # in transformers we automatically check whether the adapter name is already in use or
+                # in transformers we automatically check whether the adapter name is already in use or not
                 text_encoder.load_adapter(
                     adapter_name=adapter_name,
                     adapter_state_dict=text_encoder_lora_state_dict,

From 1131e3d04e3131f4c24565257665d75364d696d9 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 7 Oct 2024 19:36:22 +0530
Subject: [PATCH 06/24] tests

---
 src/diffusers/loaders/lora_pipeline.py | 33 +++++------
 src/diffusers/utils/testing_utils.py   | 18 ++++++
 tests/lora/utils.py                    | 79 ++++++++++++++++++++++++--
 3 files changed, 110 insertions(+), 20 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 6ca18f4fbf84..408a724cd89d 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -27,6 +27,7 @@
     get_peft_kwargs,
     is_peft_version,
     is_transformers_available,
+    is_transformers_version,
     logging,
     scale_lora_layers,
 )
@@ -94,7 +95,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
-        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -266,7 +267,7 @@ def load_lora_into_unet(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -327,11 +328,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.0"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_available(">", "4.45.1"):
+            if not is_transformers_version(">", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -588,7 +589,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
-        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -783,7 +784,7 @@ def load_lora_into_unet(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -845,11 +846,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.0"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_available(">", "4.45.1"):
+            if not is_transformers_version(">", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -1382,11 +1383,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.0"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_available(">", "4.45.1"):
+            if not is_transformers_version(">", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -1772,7 +1773,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
-        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -1835,7 +1836,7 @@ def load_lora_into_transformer(
                 `default_{i}` where i is the total number of adapters being loaded.
             low_cpu_mem_usage (`bool`, *optional*): TODO
         """
-        if low_cpu_mem_usage and is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -1952,11 +1953,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.0"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_available(">", "4.45.1"):
+            if not is_transformers_version(">", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -2299,11 +2300,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.0"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_available(">", "4.45.1"):
+            if not is_transformers_version(">", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
index 7dc3f414d55c..a2f283d0c4f5 100644
--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -388,6 +388,24 @@ def decorator(test_case):
     return decorator
 
 
+def require_transformers_version_greater(transformers_version):
+    """
+    Decorator marking a test that requires transformers with a specific version, this would require some specific
+    versions of PEFT and transformers.
+    """
+
+    def decorator(test_case):
+        correct_transformers_version = is_transformers_available() and version.parse(
+            version.parse(importlib.metadata.version("transformers")).base_version
+        ) > version.parse(transformers_version)
+        return unittest.skipUnless(
+            correct_transformers_version,
+            f"test requires transformers with the version greater than {transformers_version}",
+        )(test_case)
+
+    return decorator
+
+
 def require_accelerate_version_greater(accelerate_version):
     def decorator(test_case):
         correct_accelerate_version = is_peft_available() and version.parse(
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index d9a061589c83..d3df40380b21 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -32,6 +32,7 @@
     floats_tensor,
     require_peft_backend,
     require_peft_version_greater,
+    require_transformers_version_greater,
     skip_mps,
     torch_device,
 )
@@ -278,8 +279,9 @@ def test_simple_inference_with_text_lora(self):
                 not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
             )
 
-    @require_peft_version_greater("0.12.0")
-    def test_low_cpu_mem_usage(self):
+    @require_peft_version_greater("0.13.0")
+    def test_low_cpu_mem_usage_with_injection(self):
+        """Tests if we can inject LoRA state dict with low_cpu_mem_usage."""
         for scheduler_cls in self.scheduler_classes:
             components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls)
             pipe = self.pipeline_class(**components)
@@ -323,14 +325,14 @@ def test_low_cpu_mem_usage(self):
                         check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
                     )
                     self.assertTrue(
-                        "meta" in {p.device.type for p in pipe.text_encoder.parameters()},
+                        "meta" in {p.device.type for p in pipe.text_encoder_2.parameters()},
                         "The LoRA params should be on 'meta' device.",
                     )
 
                     te2_state_dict = populate_meta_state_dict_with_dummy(
                         get_peft_model_state_dict(pipe.text_encoder_2)
                     )
-                    set_peft_model_state_dict(pipe.text_encoder, te2_state_dict, low_cpu_mem_usage=True)
+                    set_peft_model_state_dict(pipe.text_encoder_2, te2_state_dict, low_cpu_mem_usage=True)
                     self.assertTrue(
                         "meta" not in {p.device.type for p in pipe.text_encoder_2.parameters()},
                         "No param should be on 'meta' device.",
@@ -340,6 +342,75 @@ def test_low_cpu_mem_usage(self):
             output_lora = pipe(**inputs)[0]
             self.assertTrue(output_lora.shape == self.output_shape)
 
+    @require_peft_version_greater("0.13.0")
+    @require_transformers_version_greater("4.45.1")
+    def test_low_cpu_mem_usage_with_loading(self):
+        """Tests if we can load LoRA state dict with low_cpu_mem_usage."""
+
+        for scheduler_cls in self.scheduler_classes:
+            components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls)
+            pipe = self.pipeline_class(**components)
+            pipe = pipe.to(torch_device)
+            pipe.set_progress_bar_config(disable=None)
+            _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+            output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
+            self.assertTrue(output_no_lora.shape == self.output_shape)
+
+            if "text_encoder" in self.pipeline_class._lora_loadable_modules:
+                pipe.text_encoder.add_adapter(text_lora_config)
+                self.assertTrue(
+                    check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
+                )
+
+            denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet
+            denoiser.add_adapter(denoiser_lora_config)
+            self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.")
+
+            if self.has_two_text_encoders or self.has_three_text_encoders:
+                if "text_encoder_2" in self.pipeline_class._lora_loadable_modules:
+                    pipe.text_encoder_2.add_adapter(text_lora_config)
+                    self.assertTrue(
+                        check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+                    )
+
+            images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
+
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True)
+                lora_state_dicts = self._get_lora_state_dicts(modules_to_save)
+                self.pipeline_class.save_lora_weights(
+                    save_directory=tmpdirname, safe_serialization=False, **lora_state_dicts
+                )
+
+                self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
+                pipe.unload_lora_weights()
+                pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
+
+                for module_name, module in modules_to_save.items():
+                    self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}")
+
+                images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0))[0]
+                self.assertTrue(
+                    np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3),
+                    "Loading from saved checkpoints should give same results.",
+                )
+
+                # Now, check for `low_cpu_mem_usage.`
+                pipe.unload_lora_weights()
+                pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"), low_cpu_mem_usage=True)
+
+                for module_name, module in modules_to_save.items():
+                    self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}")
+
+                images_lora_from_pretrained_low_cpu = pipe(**inputs, generator=torch.manual_seed(0))[0]
+                self.assertTrue(
+                    np.allclose(
+                        images_lora_from_pretrained_low_cpu, images_lora_from_pretrained, atol=1e-3, rtol=1e-3
+                    ),
+                    "Loading from saved checkpoints with `low_cpu_mem_usage` should give same results.",
+                )
+
     def test_simple_inference_with_text_lora_and_scale(self):
         """
         Tests a simple inference with lora attached on the text encoder + scale argument

From 3c1acf06fe6548274940c584419158950eb08972 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 09:04:41 +0530
Subject: [PATCH 07/24] _LOW_CPU_MEM_USAGE_DEFAULT_LORA

---
 src/diffusers/loaders/lora_pipeline.py | 11 ++++++-----
 src/diffusers/loaders/utils.py         |  8 ++++++++
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 408a724cd89d..6c19b131289a 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -38,6 +38,7 @@
     _convert_xlabs_flux_lora_to_diffusers,
     _maybe_map_sgm_blocks_to_diffusers,
 )
+from .utils import _LOW_CPU_MEM_USAGE_DEFAULT_LORA
 
 
 if is_transformers_available():
@@ -94,7 +95,7 @@ def load_lora_weights(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
         if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
@@ -588,7 +589,7 @@ def load_lora_weights(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
         if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
@@ -1201,7 +1202,7 @@ def load_lora_weights(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
@@ -1772,7 +1773,7 @@ def load_lora_weights(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
         if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
@@ -2575,7 +2576,7 @@ def load_lora_weights(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
diff --git a/src/diffusers/loaders/utils.py b/src/diffusers/loaders/utils.py
index 142d72bf6b77..e16eaab4ae05 100644
--- a/src/diffusers/loaders/utils.py
+++ b/src/diffusers/loaders/utils.py
@@ -16,6 +16,14 @@
 
 import torch
 
+from ..utils import is_peft_version, is_torch_version, is_transformers_version
+
+
+if is_torch_version(">=", "1.9.0") and is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"):
+    _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
+else:
+    _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False
+
 
 class AttnProcsLayers(torch.nn.Module):
     def __init__(self, state_dict: Dict[str, torch.Tensor]):

From c45c6649f18dfac00019edc1b1563809c3d65d3a Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 09:09:00 +0530
Subject: [PATCH 08/24] _peft_version default.

---
 src/diffusers/utils/import_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py
index daecec4aa258..9bfc93f9c4ce 100644
--- a/src/diffusers/utils/import_utils.py
+++ b/src/diffusers/utils/import_utils.py
@@ -280,6 +280,7 @@
     _invisible_watermark_available = False
 
 
+_peft_version = "N/A"
 _peft_available = importlib.util.find_spec("peft") is not None
 try:
     _peft_version = importlib_metadata.version("peft")

From 23fe79ed5a223c4e1d86b663e84142488be55f19 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 09:14:23 +0530
Subject: [PATCH 09/24] version checks.

---
 src/diffusers/loaders/utils.py      | 14 +++++++++++---
 src/diffusers/utils/import_utils.py |  1 -
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/loaders/utils.py b/src/diffusers/loaders/utils.py
index e16eaab4ae05..cc1347f66ef9 100644
--- a/src/diffusers/loaders/utils.py
+++ b/src/diffusers/loaders/utils.py
@@ -16,11 +16,19 @@
 
 import torch
 
-from ..utils import is_peft_version, is_torch_version, is_transformers_version
+from ..utils import (
+    is_peft_available,
+    is_peft_version,
+    is_torch_version,
+    is_transformers_available,
+    is_transformers_version,
+)
 
 
-if is_torch_version(">=", "1.9.0") and is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"):
-    _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
+if is_torch_version(">=", "1.9.0"):
+    if is_peft_available() and is_transformers_available():
+        if is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"):
+            _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
 else:
     _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False
 
diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py
index 9bfc93f9c4ce..daecec4aa258 100644
--- a/src/diffusers/utils/import_utils.py
+++ b/src/diffusers/utils/import_utils.py
@@ -280,7 +280,6 @@
     _invisible_watermark_available = False
 
 
-_peft_version = "N/A"
 _peft_available = importlib.util.find_spec("peft") is not None
 try:
     _peft_version = importlib_metadata.version("peft")

From b57bc3a0bcc578b7c2e18d88e0d88d9337f997c7 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 09:22:12 +0530
Subject: [PATCH 10/24] version check.

---
 src/diffusers/loaders/lora_pipeline.py |  9 ++++++++-
 src/diffusers/loaders/utils.py         | 16 ----------------
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 6c19b131289a..d3eef0259c51 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -26,6 +26,7 @@
     get_adapter_name,
     get_peft_kwargs,
     is_peft_version,
+    is_torch_version,
     is_transformers_available,
     is_transformers_version,
     logging,
@@ -38,7 +39,13 @@
     _convert_xlabs_flux_lora_to_diffusers,
     _maybe_map_sgm_blocks_to_diffusers,
 )
-from .utils import _LOW_CPU_MEM_USAGE_DEFAULT_LORA
+
+
+if is_torch_version(">=", "1.9.0"):
+    if is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"):
+        _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
+else:
+    _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False
 
 
 if is_transformers_available():
diff --git a/src/diffusers/loaders/utils.py b/src/diffusers/loaders/utils.py
index cc1347f66ef9..142d72bf6b77 100644
--- a/src/diffusers/loaders/utils.py
+++ b/src/diffusers/loaders/utils.py
@@ -16,22 +16,6 @@
 
 import torch
 
-from ..utils import (
-    is_peft_available,
-    is_peft_version,
-    is_torch_version,
-    is_transformers_available,
-    is_transformers_version,
-)
-
-
-if is_torch_version(">=", "1.9.0"):
-    if is_peft_available() and is_transformers_available():
-        if is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"):
-            _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
-else:
-    _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False
-
 
 class AttnProcsLayers(torch.nn.Module):
     def __init__(self, state_dict: Dict[str, torch.Tensor]):

From c18e2ebaedd8aab9e2fb675432c1c9afdc52c152 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 09:26:24 +0530
Subject: [PATCH 11/24] version check.

---
 src/diffusers/loaders/lora_pipeline.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index d3eef0259c51..804491fd4f26 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -25,6 +25,7 @@
     deprecate,
     get_adapter_name,
     get_peft_kwargs,
+    is_peft_available,
     is_peft_version,
     is_torch_version,
     is_transformers_available,
@@ -42,7 +43,12 @@
 
 
 if is_torch_version(">=", "1.9.0"):
-    if is_peft_version(">", "0.13.1") and is_transformers_version(">", "4.45.1"):
+    if (
+        is_peft_available()
+        and is_peft_version(">", "0.13.1")
+        and is_transformers_available()
+        and is_transformers_version(">", "4.45.1")
+    ):
         _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
 else:
     _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False

From 2e13836ca2c04399cfeccde4d6e84dec6ee41d00 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 10:07:21 +0530
Subject: [PATCH 12/24] version check.

---
 src/diffusers/loaders/lora_pipeline.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 804491fd4f26..c7f187a10b55 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -42,6 +42,7 @@
 )
 
 
+_LOW_CPU_MEM_USAGE_DEFAULT_LORA = False
 if is_torch_version(">=", "1.9.0"):
     if (
         is_peft_available()
@@ -50,8 +51,6 @@
         and is_transformers_version(">", "4.45.1")
     ):
         _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
-else:
-    _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False
 
 
 if is_transformers_available():

From efa33e37c600a8e97d88852e953bc809eff35397 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 19:47:38 +0530
Subject: [PATCH 13/24] require peft 0.13.1.

---
 src/diffusers/loaders/lora_pipeline.py | 28 +++++++++++++-------------
 src/diffusers/loaders/unet.py          |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index c7f187a10b55..47a9a4c7a17e 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -108,7 +108,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -280,7 +280,7 @@ def load_lora_into_unet(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -341,7 +341,7 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
@@ -602,7 +602,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -797,7 +797,7 @@ def load_lora_into_unet(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -859,7 +859,7 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
@@ -1333,7 +1333,7 @@ def load_lora_into_transformer(
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
             peft_kwargs = {}
-            if is_peft_version(">=", "0.13.0"):
+            if is_peft_version(">=", "0.13.1"):
                 peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
 
             inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs)
@@ -1396,7 +1396,7 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
@@ -1786,7 +1786,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -1849,7 +1849,7 @@ def load_lora_into_transformer(
                 `default_{i}` where i is the total number of adapters being loaded.
             low_cpu_mem_usage (`bool`, *optional*): TODO
         """
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.0"):
+        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -1903,7 +1903,7 @@ def load_lora_into_transformer(
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
             peft_kwargs = {}
-            if is_peft_version(">=", "0.13.0"):
+            if is_peft_version(">=", "0.13.1"):
                 peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
 
             inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs)
@@ -1966,7 +1966,7 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
@@ -2313,7 +2313,7 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.0"):
+            if not is_peft_version(">", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
@@ -2682,7 +2682,7 @@ def load_lora_into_transformer(
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
             peft_kwargs = {}
-            if is_peft_version(">=", "0.13.0"):
+            if is_peft_version(">=", "0.13.1"):
                 peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
 
             inject_adapter_in_model(lora_config, transformer, adapter_name=adapter_name, **peft_kwargs)
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 7f767730ad42..3bd3597bdd48 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -346,7 +346,7 @@ def _process_lora(
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = self._optionally_disable_offloading(_pipeline)
             peft_kwargs = {}
-            if is_peft_version(">=", "0.13.0"):
+            if is_peft_version(">=", "0.13.1"):
                 peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
 
             inject_adapter_in_model(lora_config, self, adapter_name=adapter_name, **peft_kwargs)

From 83701deb97b5b9664331f29ca43c973c3ab55de6 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 19:49:22 +0530
Subject: [PATCH 14/24] explicitly specify low_cpu_mem_usage=False.

---
 tests/lora/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index d3df40380b21..23efdf9e096c 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -385,7 +385,7 @@ def test_low_cpu_mem_usage_with_loading(self):
 
                 self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
                 pipe.unload_lora_weights()
-                pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
+                pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"), low_cpu_mem_usage=False)
 
                 for module_name, module in modules_to_save.items():
                     self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}")

From e88fb06f1f48e54cf15ad8a51eba48589091589f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 19:55:13 +0530
Subject: [PATCH 15/24] docs.

---
 .../en/tutorials/using_peft_for_inference.md  |  6 ++++
 src/diffusers/loaders/lora_pipeline.py        | 36 +++++++++----------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index 907f93d573a0..d08f14afbb62 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -75,6 +75,12 @@ image
 
 ![pixel-art](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/peft_integration/diffusers_peft_lora_inference_12_1.png)
 
+<Tip>
+
+By default, if we detect the most up-to-date versions of `peft` (0.13.1) and `transformers` (4.46.0), we set `low_cpu_mem_usage=True` to speed up the loading time of LoRA checkpoints. 
+
+</Tip>
+
 ## Merge adapters
 
 You can also merge different adapter checkpoints for inference to blend their styles together.
diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 47a9a4c7a17e..c83e146fe4a7 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -46,9 +46,9 @@
 if is_torch_version(">=", "1.9.0"):
     if (
         is_peft_available()
-        and is_peft_version(">", "0.13.1")
+        and is_peft_version(">=", "0.13.1")
         and is_transformers_available()
-        and is_transformers_version(">", "4.45.1")
+        and is_transformers_version(">=", "4.45.1")
     ):
         _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
 
@@ -108,7 +108,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
+        if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -280,7 +280,7 @@ def load_lora_into_unet(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
+        if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -341,11 +341,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.1"):
+            if not is_peft_version(">=", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -602,7 +602,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
+        if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -797,7 +797,7 @@ def load_lora_into_unet(
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
+        if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -859,11 +859,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.1"):
+            if not is_peft_version(">=", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -1396,11 +1396,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.1"):
+            if not is_peft_version(">=", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -1786,7 +1786,7 @@ def load_lora_weights(
             raise ValueError("PEFT backend is required for this method.")
 
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
+        if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -1849,7 +1849,7 @@ def load_lora_into_transformer(
                 `default_{i}` where i is the total number of adapters being loaded.
             low_cpu_mem_usage (`bool`, *optional*): TODO
         """
-        if low_cpu_mem_usage and not is_peft_version(">", "0.13.1"):
+        if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
@@ -1966,11 +1966,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.1"):
+            if not is_peft_version(">=", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -2313,11 +2313,11 @@ def load_lora_into_text_encoder(
 
         peft_kwargs = {}
         if low_cpu_mem_usage:
-            if not is_peft_version(">", "0.13.1"):
+            if not is_peft_version(">=", "0.13.1"):
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.1"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(

From 28007f45d2cac13d020912bc26c1d8a117a045dc Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 20:04:53 +0530
Subject: [PATCH 16/24] transformers version 4.45.2.

---
 src/diffusers/loaders/lora_pipeline.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index c83e146fe4a7..6c33fca8e450 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -48,7 +48,7 @@
         is_peft_available()
         and is_peft_version(">=", "0.13.1")
         and is_transformers_available()
-        and is_transformers_version(">=", "4.45.1")
+        and is_transformers_version(">=", "4.45.2")
     ):
         _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
 
@@ -345,7 +345,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -863,7 +863,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -1400,7 +1400,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -1970,7 +1970,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -2317,7 +2317,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.1"):
+            if not is_transformers_version(">=", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(

From ba5576cf852830469a52b614b1fbece0e79cb4d2 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 20:05:40 +0530
Subject: [PATCH 17/24] update

---
 tests/lora/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index 23efdf9e096c..ab6cad01a64f 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -279,7 +279,7 @@ def test_simple_inference_with_text_lora(self):
                 not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
             )
 
-    @require_peft_version_greater("0.13.0")
+    @require_peft_version_greater("0.13.1")
     def test_low_cpu_mem_usage_with_injection(self):
         """Tests if we can inject LoRA state dict with low_cpu_mem_usage."""
         for scheduler_cls in self.scheduler_classes:
@@ -342,7 +342,7 @@ def test_low_cpu_mem_usage_with_injection(self):
             output_lora = pipe(**inputs)[0]
             self.assertTrue(output_lora.shape == self.output_shape)
 
-    @require_peft_version_greater("0.13.0")
+    @require_peft_version_greater("0.13.1")
     @require_transformers_version_greater("4.45.1")
     def test_low_cpu_mem_usage_with_loading(self):
         """Tests if we can load LoRA state dict with low_cpu_mem_usage."""

From 48641dc701bcebb6c9fd9dddcc3e16978311a841 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 20:19:02 +0530
Subject: [PATCH 18/24] fix

---
 src/diffusers/loaders/lora_pipeline.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 6c33fca8e450..046f66a9cc27 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -48,7 +48,7 @@
         is_peft_available()
         and is_peft_version(">=", "0.13.1")
         and is_transformers_available()
-        and is_transformers_version(">=", "4.45.2")
+        and is_transformers_version(">", "4.45.2")
     ):
         _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True
 
@@ -345,7 +345,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.2"):
+            if not is_transformers_version(">", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -863,7 +863,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.2"):
+            if not is_transformers_version(">", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -1400,7 +1400,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.2"):
+            if not is_transformers_version(">", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -1970,7 +1970,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.2"):
+            if not is_transformers_version(">", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(
@@ -2317,7 +2317,7 @@ def load_lora_into_text_encoder(
                 raise ValueError(
                     "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
                 )
-            if not is_transformers_version(">=", "4.45.2"):
+            if not is_transformers_version(">", "4.45.2"):
                 # Note from sayakpaul: It's not in `transformers` stable yet.
                 # https://github.com/huggingface/transformers/pull/33725/
                 raise ValueError(

From 0ab1d443a35be3845d67bd592201f966d19cb95e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 8 Oct 2024 20:27:30 +0530
Subject: [PATCH 19/24] empty


From ca5a1d5194058f92c6b7524dcb4d2d49f6cc7885 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 9 Oct 2024 01:27:58 +0530
Subject: [PATCH 20/24] better name initialize_dummy_state_dict.

---
 tests/lora/utils.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index ab6cad01a64f..9c982e8de37f 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -66,7 +66,7 @@ def check_if_lora_correctly_set(model) -> bool:
     return False
 
 
-def populate_meta_state_dict_with_dummy(state_dict):
+def initialize_dummy_state_dict(state_dict):
     if not all(v.device.type == "meta" for _, v in state_dict.items()):
         raise ValueError("`state_dict` has non-meta values.")
     return {k: torch.randn(v.shape, device=torch_device, dtype=v.dtype) for k, v in state_dict.items()}
@@ -298,7 +298,7 @@ def test_low_cpu_mem_usage_with_injection(self):
                     "The LoRA params should be on 'meta' device.",
                 )
 
-                te_state_dict = populate_meta_state_dict_with_dummy(get_peft_model_state_dict(pipe.text_encoder))
+                te_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(pipe.text_encoder))
                 set_peft_model_state_dict(pipe.text_encoder, te_state_dict, low_cpu_mem_usage=True)
                 self.assertTrue(
                     "meta" not in {p.device.type for p in pipe.text_encoder.parameters()},
@@ -312,7 +312,7 @@ def test_low_cpu_mem_usage_with_injection(self):
                 "meta" in {p.device.type for p in denoiser.parameters()}, "The LoRA params should be on 'meta' device."
             )
 
-            denoiser_state_dict = populate_meta_state_dict_with_dummy(get_peft_model_state_dict(denoiser))
+            denoiser_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(denoiser))
             set_peft_model_state_dict(denoiser, denoiser_state_dict, low_cpu_mem_usage=True)
             self.assertTrue(
                 "meta" not in {p.device.type for p in denoiser.parameters()}, "No param should be on 'meta' device."
@@ -329,9 +329,7 @@ def test_low_cpu_mem_usage_with_injection(self):
                         "The LoRA params should be on 'meta' device.",
                     )
 
-                    te2_state_dict = populate_meta_state_dict_with_dummy(
-                        get_peft_model_state_dict(pipe.text_encoder_2)
-                    )
+                    te2_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(pipe.text_encoder_2))
                     set_peft_model_state_dict(pipe.text_encoder_2, te2_state_dict, low_cpu_mem_usage=True)
                     self.assertTrue(
                         "meta" not in {p.device.type for p in pipe.text_encoder_2.parameters()},

From 95534e6792fcb034ed32c9cd538424e7f096ac7e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 9 Oct 2024 01:32:15 +0530
Subject: [PATCH 21/24] doc todos.

---
 src/diffusers/loaders/lora_pipeline.py | 32 ++++++++++++++------------
 src/diffusers/loaders/unet.py          |  4 +++-
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index f08e6e293007..f3033fdf81a3 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -100,7 +100,9 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            low_cpu_mem_usage (`bool`, *optional*):
+                Speed up model loading only loading the pretrained LoRA weights and not initializing the random
+                weights.
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -280,7 +282,7 @@ def load_lora_into_unet(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -339,7 +341,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -599,7 +601,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -803,7 +805,7 @@ def load_lora_into_unet(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -863,7 +865,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1224,7 +1226,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -1299,7 +1301,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -1406,7 +1408,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1801,7 +1803,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1868,7 +1870,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
@@ -1980,7 +1982,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -2327,7 +2329,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -2608,7 +2610,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -2658,7 +2660,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*): TODO
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 3bd3597bdd48..fe7778c4d27f 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -115,7 +115,9 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
                 `default_{i}` where i is the total number of adapters being loaded.
             weight_name (`str`, *optional*, defaults to None):
                 Name of the serialized state dict file.
-            low_cpu_mem_usage (`str`, *optional*, defaults to False): TODO
+            low_cpu_mem_usage (`bool`, *optional*):
+                Speed up model loading only loading the pretrained LoRA weights and not initializing the random
+                weights.
 
         Example:
 

From cf4917c9798c61e8a94a1c429038e5eaba677645 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Wed, 9 Oct 2024 10:38:44 +0530
Subject: [PATCH 22/24] Apply suggestions from code review

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 .../en/tutorials/using_peft_for_inference.md  |  2 +-
 src/diffusers/loaders/lora_pipeline.py        | 28 +++++++++----------
 src/diffusers/loaders/unet.py                 |  2 +-
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index d08f14afbb62..615af55ef5b5 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -77,7 +77,7 @@ image
 
 <Tip>
 
-By default, if we detect the most up-to-date versions of `peft` (0.13.1) and `transformers` (4.46.0), we set `low_cpu_mem_usage=True` to speed up the loading time of LoRA checkpoints. 
+By default, if the most up-to-date versions of PEFT and Transformers are detected, `low_cpu_mem_usage` is set to `True` to speed up the loading time of LoRA checkpoints. 
 
 </Tip>
 
diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index f3033fdf81a3..98e8d2ffdb88 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -101,7 +101,7 @@ def load_lora_weights(
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
             low_cpu_mem_usage (`bool`, *optional*):
-                Speed up model loading only loading the pretrained LoRA weights and not initializing the random
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
@@ -341,7 +341,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -601,7 +601,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -805,7 +805,7 @@ def load_lora_into_unet(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -865,7 +865,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1226,7 +1226,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -1301,7 +1301,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -1408,7 +1408,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1803,7 +1803,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1870,7 +1870,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
@@ -1982,7 +1982,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -2329,7 +2329,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -2610,7 +2610,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -2660,7 +2660,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index fe7778c4d27f..eaac52df6202 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -116,7 +116,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
             weight_name (`str`, *optional*, defaults to None):
                 Name of the serialized state dict file.
             low_cpu_mem_usage (`bool`, *optional*):
-                Speed up model loading only loading the pretrained LoRA weights and not initializing the random
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
 
         Example:

From 8ffe3bec5be634f3d95be9d4aac6394950752bb0 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 9 Oct 2024 10:39:59 +0530
Subject: [PATCH 23/24] style

---
 src/diffusers/loaders/lora_pipeline.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index 98e8d2ffdb88..d70f3780c835 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -341,7 +341,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -601,7 +601,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -805,7 +805,7 @@ def load_lora_into_unet(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -865,7 +865,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1226,7 +1226,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -1301,7 +1301,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -1408,7 +1408,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1803,7 +1803,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1870,7 +1870,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
@@ -1982,7 +1982,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -2329,7 +2329,7 @@ def load_lora_into_text_encoder(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -2610,7 +2610,7 @@ def load_lora_weights(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -2660,7 +2660,7 @@ def load_lora_into_transformer(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.
+            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(

From a4eaa426020414e06e25823f563965e39f91c58f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 9 Oct 2024 10:43:33 +0530
Subject: [PATCH 24/24] fix-copies

---
 src/diffusers/loaders/lora_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
index d70f3780c835..2037bd787433 100644
--- a/src/diffusers/loaders/lora_pipeline.py
+++ b/src/diffusers/loaders/lora_pipeline.py
@@ -805,7 +805,7 @@ def load_lora_into_unet(
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
-            Speed up model loading by only loading the pretrained LoRA weights and not initializing the random weights.:
+            Speed up model loading only loading the pretrained LoRA weights and not initializing the random weights.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")