From 9ddc9f1e1e5bbd6b169c1f5289d9e91b7c7a0757 Mon Sep 17 00:00:00 2001 From: JINO ROHIT Date: Thu, 3 Oct 2024 16:38:08 +0530 Subject: [PATCH] FEAT: Adding exclude modules param(#2044) (#2102) Allows to exclude target modules. --- src/peft/tuners/adalora/config.py | 3 ++ src/peft/tuners/boft/config.py | 21 ++++++-- src/peft/tuners/fourierft/config.py | 11 ++++ src/peft/tuners/hra/config.py | 21 ++++++-- src/peft/tuners/ia3/config.py | 21 ++++++-- src/peft/tuners/ln_tuning/config.py | 8 +++ src/peft/tuners/loha/config.py | 20 +++++-- src/peft/tuners/lokr/config.py | 23 ++++++-- src/peft/tuners/lora/config.py | 11 ++++ src/peft/tuners/oft/config.py | 11 ++++ src/peft/tuners/poly/config.py | 19 +++++-- src/peft/tuners/tuners_utils.py | 82 ++++++++++++++++++++++------- src/peft/tuners/vblora/config.py | 23 ++++++-- tests/test_tuners_utils.py | 73 +++++++++++++++++++++++++ 14 files changed, 300 insertions(+), 47 deletions(-) diff --git a/src/peft/tuners/adalora/config.py b/src/peft/tuners/adalora/config.py index b508588a17..5419159397 100644 --- a/src/peft/tuners/adalora/config.py +++ b/src/peft/tuners/adalora/config.py @@ -61,6 +61,9 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) # if target_modules is a regex expression, then layers_to_transform should be None if isinstance(self.target_modules, str) and self.layers_to_transform is not None: raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.") diff --git a/src/peft/tuners/boft/config.py b/src/peft/tuners/boft/config.py index ecd6a2c13c..dcae4c0841 100644 --- a/src/peft/tuners/boft/config.py +++ b/src/peft/tuners/boft/config.py @@ -15,8 +15,10 @@ # The implementation is based on "Parameter-Efficient Orthogonal Finetuning # via Butterfly Factorization" (https://arxiv.org/abs/2311.06243) in ICLR 2024. +from __future__ import annotations + from dataclasses import dataclass, field -from typing import List, Optional, Union +from typing import Optional, Union from peft.config import PeftConfig from peft.utils import PeftType @@ -32,6 +34,10 @@ class BOFTConfig(PeftConfig): boft_block_num (`int`): Number of BOFT blocks per injected layer. boft_n_butterfly_factor (`int`): Number of butterfly factors across different layers. target_modules (`Union[List[str],str]`): The names of the modules to apply the adapter to. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. boft_dropout (`float`): The multiplicative dropout probability, by setting OFT blocks to identity during training, similar to the dropout layer in LoRA. @@ -76,13 +82,17 @@ class BOFTConfig(PeftConfig): ), }, ) - target_modules: Optional[Union[List[str], str]] = field( + target_modules: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": "List of module names or regex expression of the module names to replace with BOFT.", "example": "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' ", }, ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from BOFT."}, + ) boft_dropout: float = field( default=0.0, metadata={ @@ -94,7 +104,7 @@ class BOFTConfig(PeftConfig): metadata={"help": "Set this to True if the layer to replace stores weight like (fan_in, fan_out)"}, ) bias: str = field(default="none", metadata={"help": "Bias type for BOFT. Can be 'none', 'all' or 'boft_only'"}) - modules_to_save: Optional[List[str]] = field( + modules_to_save: Optional[list[str]] = field( default=None, metadata={ "help": "List of modules apart from BOFT layers to be set as trainable and saved in the final checkpoint. ", @@ -113,7 +123,7 @@ class BOFTConfig(PeftConfig): ), }, ) - layers_to_transform: Optional[Union[List[int], int]] = field( + layers_to_transform: Optional[Union[list[int], int]] = field( default=None, metadata={ "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index." @@ -131,6 +141,9 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) if self.boft_block_size == 0 and self.boft_block_num == 0: raise ValueError( f"Either `boft_block_size` or `boft_block_num` must be non-zero. Currently, boft_block_size = {self.boft_block_size} and boft_block_num = {self.boft_block_num}." diff --git a/src/peft/tuners/fourierft/config.py b/src/peft/tuners/fourierft/config.py index 1816dc4b0b..1efaa22f37 100644 --- a/src/peft/tuners/fourierft/config.py +++ b/src/peft/tuners/fourierft/config.py @@ -52,6 +52,10 @@ class FourierFTConfig(PeftConfig): target_modules (`Union[list[str],str]`): List of module names or regex expression of the module names to replace with FourierFT. For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Only linear layers are supported. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. fan_in_fan_out (`bool`): Set this to True if the layer to replace stores weight like (fan_in, fan_out). bias (`str`): @@ -123,6 +127,10 @@ class FourierFTConfig(PeftConfig): ) }, ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from fourierft."}, + ) bias: str = field( default="none", metadata={"help": "Bias type for FourierFT. Can be 'none', 'all' or 'fourier_only'."} ) @@ -179,6 +187,9 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) # if target_modules is a regex expression, then layers_to_transform should be None if isinstance(self.target_modules, str) and self.layers_to_transform is not None: raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.") diff --git a/src/peft/tuners/hra/config.py b/src/peft/tuners/hra/config.py index 1b5457d9af..01e90471a5 100644 --- a/src/peft/tuners/hra/config.py +++ b/src/peft/tuners/hra/config.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + from dataclasses import dataclass, field -from typing import List, Optional, Union +from typing import Optional, Union from peft.config import PeftConfig from peft.utils import PeftType @@ -38,6 +40,10 @@ class HRAConfig(PeftConfig): the output layer. If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised -- in this case, you should specify the target modules manually. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. init_weights (`bool`): Whether to perform initialization of HRA weights. layers_to_transform (`Union[List[int], int]`): @@ -64,13 +70,17 @@ class HRAConfig(PeftConfig): default=False, metadata={"help": "Whether to apply Gram-Schmidt orthogonalization or not."}, ) - target_modules: Optional[Union[List[str], str]] = field( + target_modules: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": "List of module names or regex expression of the module names to replace with HRA.", "example": "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' ", }, ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from HRA."}, + ) init_weights: bool = field( default=True, metadata={ @@ -80,7 +90,7 @@ class HRAConfig(PeftConfig): ), }, ) - layers_to_transform: Optional[Union[List[int], int]] = field( + layers_to_transform: Optional[Union[list[int], int]] = field( default=None, metadata={ "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index." @@ -93,7 +103,7 @@ class HRAConfig(PeftConfig): }, ) bias: str = field(default="none", metadata={"help": "Bias type for HRA. Can be 'none', 'all' or 'hra_only'"}) - modules_to_save: Optional[List[str]] = field( + modules_to_save: Optional[list[str]] = field( default=None, metadata={ "help": "List of modules apart from HRA layers to be set as trainable and saved in the final checkpoint. " @@ -107,6 +117,9 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) # if target_modules is a regex expression, then layers_to_transform should be None if isinstance(self.target_modules, str) and self.layers_to_transform is not None: raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.") diff --git a/src/peft/tuners/ia3/config.py b/src/peft/tuners/ia3/config.py index 322ea068d3..8d103f99d7 100644 --- a/src/peft/tuners/ia3/config.py +++ b/src/peft/tuners/ia3/config.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + from dataclasses import dataclass, field -from typing import List, Optional, Union +from typing import Optional, Union from peft.config import PeftConfig from peft.utils import PeftType @@ -33,6 +35,10 @@ class IA3Config(PeftConfig): excluding the output layer. If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised -- in this case, you should specify the target modules manually. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. feedforward_modules (`Optional[Union[List[str], str]]`): The names of the modules to be treated as feedforward modules, as in the original paper. These modules will have (IA)³ vectors multiplied to the input, instead of the output. `feedforward_modules` must be a name or @@ -47,7 +53,7 @@ class IA3Config(PeftConfig): discouraged. """ - target_modules: Optional[Union[List[str], str]] = field( + target_modules: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": ( @@ -59,7 +65,11 @@ class IA3Config(PeftConfig): ), }, ) - feedforward_modules: Optional[Union[List[str], str]] = field( + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from (IA)³."}, + ) + feedforward_modules: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": "List of module names or a regex expression of module names which are feedforward" @@ -70,7 +80,7 @@ class IA3Config(PeftConfig): default=False, metadata={"help": "Set this to True if the layer to replace stores weight like (fan_in, fan_out)"}, ) - modules_to_save: Optional[List[str]] = field( + modules_to_save: Optional[list[str]] = field( default=None, metadata={ "help": "List of modules apart from (IA)^3 layers to be set as trainable and saved in the final checkpoint. " @@ -88,6 +98,9 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) self.feedforward_modules = ( set(self.feedforward_modules) if isinstance(self.feedforward_modules, list) else self.feedforward_modules ) diff --git a/src/peft/tuners/ln_tuning/config.py b/src/peft/tuners/ln_tuning/config.py index fac6a633c6..a47429484e 100644 --- a/src/peft/tuners/ln_tuning/config.py +++ b/src/peft/tuners/ln_tuning/config.py @@ -31,6 +31,10 @@ class LNTuningConfig(PeftConfig): '.*decoder.*' or '.*encoder.*'. If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised -- in this case, you should specify the target modules manually. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. modules_to_save (`Optional[Union[List[str], str]]`): List of modules to be set as trainable and saved in the final checkpoint. For example, in Sequence Classification or Token Classification tasks, the final layer `classifier/score` are randomly initialized @@ -48,6 +52,10 @@ class LNTuningConfig(PeftConfig): ), }, ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from LNTuning."}, + ) modules_to_save: Optional[Union[list[str], str]] = field( default=None, metadata={ diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py index c38ba7828b..3f47444eff 100644 --- a/src/peft/tuners/loha/config.py +++ b/src/peft/tuners/loha/config.py @@ -11,9 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations from dataclasses import dataclass, field -from typing import List, Optional, Union +from typing import Optional, Union from peft.tuners.lycoris_utils import LycorisConfig from peft.utils import PeftType @@ -43,6 +44,10 @@ class LoHaConfig(LycorisConfig): excluding the output layer. If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised -- in this case, you should specify the target modules manually. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. init_weights (`bool`): Whether to perform initialization of adapter weights. This defaults to `True`, passing `False` is discouraged. @@ -76,7 +81,7 @@ class LoHaConfig(LycorisConfig): "help": 'Use parameter effective decomposition for Conv2d 3x3 with ksize > 1 ("Proposition 3" from FedPara paper)' }, ) - target_modules: Optional[Union[List[str], str]] = field( + target_modules: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": "List of module names or regex expression of the module names to replace with LoHa." @@ -84,6 +89,10 @@ class LoHaConfig(LycorisConfig): "This can also be a wildcard 'all-linear' which matches all linear/Conv1D layers except the output layer." }, ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from LoHa."}, + ) init_weights: bool = field( default=True, metadata={ @@ -93,7 +102,7 @@ class LoHaConfig(LycorisConfig): ), }, ) - layers_to_transform: Optional[Union[List[int], int]] = field( + layers_to_transform: Optional[Union[list[int], int]] = field( default=None, metadata={ "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index." @@ -105,7 +114,7 @@ class LoHaConfig(LycorisConfig): "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern." }, ) - modules_to_save: Optional[List[str]] = field( + modules_to_save: Optional[list[str]] = field( default=None, metadata={ "help": "List of modules apart from LoHA layers to be set as trainable and saved in the final checkpoint. " @@ -119,3 +128,6 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py index c8d60a7463..0f8e991556 100644 --- a/src/peft/tuners/lokr/config.py +++ b/src/peft/tuners/lokr/config.py @@ -11,9 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations from dataclasses import dataclass, field -from typing import List, Optional, Union +from typing import Optional, Union from peft.tuners.lycoris_utils import LycorisConfig from peft.utils import PeftType @@ -47,6 +48,10 @@ class LoKrConfig(LycorisConfig): excluding the output layer. If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised -- in this case, you should specify the target modules manually. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. init_weights (`bool`): Whether to perform initialization of adapter weights. This defaults to `True`, passing `False` is discouraged. @@ -85,7 +90,7 @@ class LoKrConfig(LycorisConfig): metadata={"help": "Perform rank decomposition of left kronecker product matrix."}, ) decompose_factor: int = field(default=-1, metadata={"help": "Kronecker product decomposition factor."}) - target_modules: Optional[Union[List[str], str]] = field( + target_modules: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": "List of module names or regex expression of the module names to replace with LoKr." @@ -93,6 +98,10 @@ class LoKrConfig(LycorisConfig): "This can also be a wildcard 'all-linear' which matches all linear/Conv1D layers except the output layer." }, ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from LoKr."}, + ) init_weights: bool = field( default=True, metadata={ @@ -102,7 +111,7 @@ class LoKrConfig(LycorisConfig): ), }, ) - layers_to_transform: Optional[Union[List[int], int]] = field( + layers_to_transform: Optional[Union[list[int], int]] = field( default=None, metadata={ "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index." @@ -114,7 +123,7 @@ class LoKrConfig(LycorisConfig): "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern." }, ) - modules_to_save: Optional[List[str]] = field( + modules_to_save: Optional[list[str]] = field( default=None, metadata={ "help": "List of modules apart from LoKr layers to be set as trainable and saved in the final checkpoint. " @@ -125,3 +134,9 @@ class LoKrConfig(LycorisConfig): def __post_init__(self): self.peft_type = PeftType.LOKR + self.target_modules = ( + set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules + ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py index 941582fe89..6fb383a274 100644 --- a/src/peft/tuners/lora/config.py +++ b/src/peft/tuners/lora/config.py @@ -85,6 +85,10 @@ class LoraConfig(PeftConfig): excluding the output layer. If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised -- in this case, you should specify the target modules manually. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. lora_alpha (`int`): The alpha parameter for Lora scaling. lora_dropout (`float`): @@ -166,6 +170,10 @@ class LoraConfig(PeftConfig): ), }, ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from Lora."}, + ) lora_alpha: int = field(default=8, metadata={"help": "Lora alpha"}) lora_dropout: float = field(default=0.0, metadata={"help": "Lora dropout"}) fan_in_fan_out: bool = field( @@ -327,6 +335,9 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) # if target_modules is a regex expression, then layers_to_transform should be None if isinstance(self.target_modules, str) and self.layers_to_transform is not None: raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.") diff --git a/src/peft/tuners/oft/config.py b/src/peft/tuners/oft/config.py index 13a6b5d7ce..85bfc9cc24 100644 --- a/src/peft/tuners/oft/config.py +++ b/src/peft/tuners/oft/config.py @@ -44,6 +44,10 @@ class OFTConfig(PeftConfig): bias (`str`): Bias type for OFT. Can be 'none', 'all' or 'oft_only'. If 'all' or 'oft_only', the corresponding biases will be updated during training. Be aware that this means that, even when disabling the adapters, the model will not produce the same output as the base model would have without adaptation. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. init_weights (`bool`): Whether to perform initialization of OFT weights. layers_to_transform (`Union[List[int], int]`): @@ -94,6 +98,10 @@ class OFTConfig(PeftConfig): bias: Literal["none", "all", "oft_only"] = field( default="none", metadata={"help": "Bias type for OFT. Can be 'none', 'all' or 'oft_only'"} ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from OFT."}, + ) init_weights: bool = field( default=True, metadata={ @@ -163,6 +171,9 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) if self.r == 0 and self.oft_block_size == 0: raise ValueError( f"Either `r` or `oft_block_size` must be non-zero. Currently, r = {self.r} and oft_block_size = {self.oft_block_size}." diff --git a/src/peft/tuners/poly/config.py b/src/peft/tuners/poly/config.py index 3abbc93b02..fea09ce0bd 100644 --- a/src/peft/tuners/poly/config.py +++ b/src/peft/tuners/poly/config.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + from dataclasses import dataclass, field -from typing import List, Literal, Optional, Union +from typing import Literal, Optional, Union from peft.config import PeftConfig from peft.utils import PeftType @@ -29,6 +31,10 @@ class PolyConfig(PeftConfig): Args: r (`int`): Attention dimension of each Lora in Poly. target_modules (`Union[List[str],str]`): The names of the modules to apply Poly to. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. modules_to_save (`List[str]`): List of modules apart from Poly layers to be set as trainable and saved in the final checkpoint. init_weights (bool): Whether to perform initialization of Poly weights. @@ -41,14 +47,18 @@ class PolyConfig(PeftConfig): """ r: int = field(default=8, metadata={"help": "Lora attention dimension"}) - target_modules: Optional[Union[List[str], str]] = field( + target_modules: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": "List of module names or regex expression of the module names to replace with Poly." "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' " }, ) - modules_to_save: Optional[List[str]] = field( + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from Poly."}, + ) + modules_to_save: Optional[list[str]] = field( default=None, metadata={ "help": "List of modules apart from Poly layers to be set as trainable and saved in the final checkpoint. " @@ -87,3 +97,6 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py index 03b8531bfd..51994b456f 100644 --- a/src/peft/tuners/tuners_utils.py +++ b/src/peft/tuners/tuners_utils.py @@ -422,6 +422,8 @@ def inject_adapter( """ peft_config = self.peft_config[adapter_name] + excluded_modules = [] + unmatched_modules = [] # Note: If possible, all checks should be performed *at the start of this method*. # This way, we can raise early if something goes wrong, without leaving the model # in a bad (half-initialized) state. @@ -435,13 +437,12 @@ def inject_adapter( peft_config = self._prepare_adapter_config(peft_config, model_config) self._prepare_model(peft_config, model) - is_target_modules_in_base_model = False key_list = [key for key, _ in model.named_modules()] - if getattr(peft_config, "target_modules", None) == DUMMY_TARGET_MODULES: + uses_dummy_target_modules = getattr(peft_config, "target_modules", None) == DUMMY_TARGET_MODULES + if uses_dummy_target_modules: # dummy adapter, we allow not matching any module key_list = [] - is_target_modules_in_base_model = True # update peft_config.target_modules if required peft_config = _maybe_include_all_linear_layers(peft_config, model) @@ -467,6 +468,8 @@ def inject_adapter( peft_config.target_modules = new_target_modules for key in key_list: + if not key: + continue # Check for modules_to_save in case if _check_for_modules_to_save and any( key.endswith(f"{module_to_save}") for module_to_save in peft_config.modules_to_save @@ -483,15 +486,45 @@ def inject_adapter( _has_modules_to_save = True continue - if not self._check_target_module_exists(peft_config, key): - continue - - self.targeted_module_names.append(key) - is_target_modules_in_base_model = True - parent, target, target_name = _get_submodules(model, key) - ctx = init_empty_weights if low_cpu_mem_usage else nullcontext - with ctx(): - self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key) + result = self._check_target_module_exists(peft_config, key) + if isinstance(result, _ExcludedModule): + excluded_modules.append(key) + elif not result: + unmatched_modules.append(key) + else: + self.targeted_module_names.append(key) + parent, target, target_name = _get_submodules(model, key) + ctx = init_empty_weights if low_cpu_mem_usage else nullcontext + with ctx(): + self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key) + + if not self.targeted_module_names and not uses_dummy_target_modules: + if excluded_modules and not unmatched_modules: + # All targeted modules were excluded + raise ValueError( + "All modules were excluded. This is likely unintended. " + "Check your `target_modules` and `exclude_modules` configuration." + ) + elif not excluded_modules and unmatched_modules: + # None of the targeted modules matched + raise ValueError( + f"Target modules {peft_config.target_modules} not found in the base model. " + f"Please check the target modules and try again." + ) + else: + # Some modules did not match and some matched but were excluded + raise ValueError( + "No modules were targeted for adaptation. " + "This might be caused by a combination of mismatched target modules and excluded modules. " + "Please check your `target_modules` and `exclude_modules` configuration." + ) + + elif hasattr(peft_config, "exclude_modules") and peft_config.exclude_modules and not excluded_modules: + # exclude_modules was passed but was not used + warnings.warn( + f"You have passed exclude_modules={peft_config.exclude_modules} but no modules were excluded. " + "Please check that exclude_modules was set correctly." + ) tied_target_modules = self._get_tied_target_modules(model=model) if tied_target_modules: @@ -502,13 +535,6 @@ def inject_adapter( "See for example https://github.com/huggingface/peft/issues/2018." ) - # Handle X-LoRA case. - if not is_target_modules_in_base_model and hasattr(peft_config, "target_modules"): - raise ValueError( - f"Target modules {peft_config.target_modules} not found in the base model. " - f"Please check the target modules and try again." - ) - # It's important to set the adapter here (again), because otherwise it can happen that if a 2nd adapter is # added, and it targets different layer(s) than the first adapter (which is active), then those different # layers will be activated, which we don't want. @@ -903,6 +929,15 @@ def generate_suffixes(s): return required_suffixes +class _ExcludedModule: + """ + A private helper method used to represent excluded modules in the check_target_module_exists function. + """ + + def __bool__(self): + return False + + def check_target_module_exists(config, key: str) -> bool | re.Match[str] | None: """A helper method to check if the passed module's key name matches any of the target modules in the adapter_config. @@ -914,6 +949,15 @@ def check_target_module_exists(config, key: str) -> bool | re.Match[str] | None: `bool` | `re.Match[str]` | `None`: True of match object if key matches any target modules from config, False or None if no match found """ + if hasattr(config, "exclude_modules") and config.exclude_modules: + if isinstance(config.exclude_modules, str): + if re.fullmatch(config.exclude_modules, key): + return _ExcludedModule() + elif key in config.exclude_modules: + return _ExcludedModule() + elif any(key.endswith(f".{exclude_key}") for exclude_key in config.exclude_modules): + return _ExcludedModule() + if isinstance(config.target_modules, str): target_module_found = re.fullmatch(config.target_modules, key) elif key in config.target_modules: diff --git a/src/peft/tuners/vblora/config.py b/src/peft/tuners/vblora/config.py index ed2b4461d6..879f54ea11 100644 --- a/src/peft/tuners/vblora/config.py +++ b/src/peft/tuners/vblora/config.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + from dataclasses import dataclass, field -from typing import List, Optional, Union +from typing import Optional, Union from peft.config import PeftConfig from peft.utils import PeftType @@ -46,6 +48,10 @@ class VBLoRAConfig(PeftConfig): excluding the output layer. If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised -- in this case, you should specify the target modules manually. + exclude_modules (`Optional[Union[List[str], str]]`): + The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. + When passing a list of strings, either an exact match will be performed or it is checked if the name of the + module ends with any of the passed strings. save_only_topk_weights (`bool`): Whether to only save the topk weights. Setting `save_only_topk_weights = True` significantly reduces storage space. However, models saved in this mode can be used for merging or inference only, not for @@ -97,7 +103,7 @@ class VBLoRAConfig(PeftConfig): "For more details, refer to the discussion in the paper." }, ) - target_modules: Optional[Union[List[str], str]] = field( + target_modules: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": ( @@ -109,6 +115,10 @@ class VBLoRAConfig(PeftConfig): ) }, ) + exclude_modules: Optional[Union[list[str], str]] = field( + default=None, + metadata={"help": "List of module names or regex expression of the module names to exclude from VBLoRA."}, + ) save_only_topk_weights: bool = field( default=False, metadata={ @@ -125,7 +135,7 @@ class VBLoRAConfig(PeftConfig): metadata={"help": "Set this to True if the layer to replace stores weight like (fan_in, fan_out)"}, ) bias: str = field(default="none", metadata={"help": "Bias type for VBLoRA. Can be 'none', 'all' or 'vblora_only'"}) - modules_to_save: Optional[List[str]] = field( + modules_to_save: Optional[list[str]] = field( default=None, metadata={ "help": ( @@ -155,14 +165,14 @@ class VBLoRAConfig(PeftConfig): ), }, ) - layers_to_transform: Optional[Union[List[int], int]] = field( + layers_to_transform: Optional[Union[list[int], int]] = field( default=None, metadata={ "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index. " "This only works when target_modules is a list of str." }, ) - layers_pattern: Optional[Union[List[str], str]] = field( + layers_pattern: Optional[Union[list[str], str]] = field( default=None, metadata={ "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern." @@ -175,3 +185,6 @@ def __post_init__(self): self.target_modules = ( set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules ) + self.exclude_modules = ( + set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules + ) diff --git a/tests/test_tuners_utils.py b/tests/test_tuners_utils.py index 90dbea8d70..5e742f3c88 100644 --- a/tests/test_tuners_utils.py +++ b/tests/test_tuners_utils.py @@ -416,6 +416,79 @@ def test_realistic_example(self): assert model.targeted_module_names == expected +class TestExcludedModuleNames(unittest.TestCase): + """Check that the attribute exclude_module is correctly set. + + This checks LoRA and IA³, but this should be sufficient, testing all other tuners is not necessary. + """ + + def test_two_excluded_module_regex(self): + model = MLP() + model = get_peft_model(model, LoraConfig(target_modules=("lin.*"), exclude_modules="lin0")) + assert model.targeted_module_names == ["lin1"] + + def test_two_excluded_module_list(self): + model = MLP() + model = get_peft_model(model, LoraConfig(target_modules=["lin0", "lin1"], exclude_modules="lin0")) + assert model.targeted_module_names == ["lin1"] + + def test_multiple_excluded_modules_list(self): + model = MLP() + model = get_peft_model(model, LoraConfig(target_modules=["lin0", "lin1"], exclude_modules=["lin0"])) + assert model.targeted_module_names == ["lin1"] + + def test_ia3_two_excluded_module_regex(self): + model = MLP() + model = get_peft_model( + model, IA3Config(target_modules=".*lin.*", feedforward_modules=".*lin.*", exclude_modules="lin0") + ) + assert model.targeted_module_names == ["lin1"] + + def test_ia3_multiple_excluded_modules_list(self): + model = MLP() + model = get_peft_model( + model, IA3Config(target_modules=["lin0", "lin1"], feedforward_modules=".*lin.*", exclude_modules=["lin1"]) + ) + assert model.targeted_module_names == ["lin0"] + + def test_all_modules_excluded(self): + model = MLP() + with pytest.raises(ValueError, match="All modules were excluded"): + get_peft_model( + model, + LoraConfig( + target_modules=["lin0", "lin1", "relu", "drop", "sm"], + exclude_modules=["lin0", "lin1", "relu", "drop", "sm"], + ), + ) + + def test_no_modules_matched(self): + model = MLP() + with pytest.raises(ValueError, match="Target modules .* not found in the base model"): + get_peft_model(model, LoraConfig(target_modules=["non_existent_module"])) + + def test_some_modules_excluded_some_unmatched(self): + model = MLP() + with pytest.raises(ValueError, match="No modules were targeted for adaptation"): + get_peft_model(model, LoraConfig(target_modules=["lin0", "non_existent_module"], exclude_modules=["lin0"])) + + def test_exclude_modules_not_used(self): + model = MLP() + with pytest.warns(UserWarning, match="You have passed exclude_modules=.* but no modules were excluded"): + get_peft_model(model, LoraConfig(target_modules=["lin1"], exclude_modules=["non_existent_module"])) + + def test_realistic_example(self): + model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-BloomForCausalLM") + config = LoraConfig(task_type="CAUSAL_LM", exclude_modules="transformer.h.2.self_attention.query_key_value") + model = get_peft_model(model, config) + expected = [ + f"transformer.h.{i}.self_attention.query_key_value" + for i in range(len(model.base_model.transformer.h)) + if i != 2 + ] + assert model.targeted_module_names == expected + + class TestModelAndLayerStatus: """Check the methods `get_layer_status` and `get_model_status`.`