Skip to content

Commit

Permalink
[WIP] ENH Add support for Qwen2 (#1906)
Browse files Browse the repository at this point in the history
* [WIP] ENH Add support for Qwen2

Add Qwen2 to default target modules, use tiny Qwen2 in tests.

* Add target_modules for FourierFT

* Skip Qwen2 + weighted combination test

It fails when SVD is involved. See:
#1901 (comment)

---------

Co-authored-by: BenjaminBossan <[email protected]>
  • Loading branch information
BenjaminBossan and BenjaminBossan authored Jul 23, 2024
1 parent ba75bb1 commit ebcd079
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/peft/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
"mistral": ["input_layernorm", "post_attention_layernorm", "norm"],
"phi": ["input_layernorm", "final_layernorm"],
"gemma": ["input_layernorm", "post_attention_layernorm", "norm"],
"qwen2": ["post_attention_layernorm"],
}

TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = {
Expand Down Expand Up @@ -99,6 +100,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
"stablelm": ["q_proj", "v_proj"],
"phi": ["q_proj", "v_proj", "fc1", "fc2"],
"gemma": ["q_proj", "v_proj"],
"qwen2": ["q_proj", "v_proj"],
}

TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING = {
Expand All @@ -124,6 +126,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
"falcon": ["query_key_value", "dense_4h_to_h"],
"phi": ["q_proj", "v_proj", "fc2"],
"gemma": ["q_proj", "v_proj", "down_proj"],
"qwen2": ["q_proj", "v_proj", "down_proj"],
}

TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING = {
Expand All @@ -149,6 +152,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
"falcon": ["dense_4h_to_h"],
"phi": ["fc2"],
"gemma": ["down_proj"],
"qwen2": ["down_proj"],
}

TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING = {
Expand All @@ -170,6 +174,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
"gpt_bigcode": ["c_attn"],
"deberta": ["in_proj"],
# "layoutlm": ["query", "value"],
"qwen2": ["q_proj", "v_proj"],
}

TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING = {
Expand Down Expand Up @@ -205,6 +210,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
# "phi": ["q_proj", "v_proj", "fc1", "fc2"], # tested, does not work because of different shapes
"phi": ["q_proj", "v_proj"],
# "gemma": ["q_proj", "v_proj"], # tested, does not work because of different shapes
"qwen2": ["q_proj", "v_proj"],
}

TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING = {
Expand Down Expand Up @@ -238,6 +244,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
"stablelm": ["q_proj", "v_proj"],
"phi": ["q_proj", "v_proj", "fc1", "fc2"],
"gemma": ["q_proj", "v_proj"],
"qwen2": ["q_proj", "v_proj"],
}

WEIGHTS_NAME = "adapter_model.bin"
Expand Down
1 change: 1 addition & 0 deletions tests/test_decoder_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"hf-internal-testing/tiny-random-GPTJForCausalLM",
"hf-internal-testing/tiny-random-GPTBigCodeForCausalLM",
"trl-internal-testing/tiny-random-LlamaForCausalLM",
"peft-internal-testing/tiny-dummy-qwen2",
]

FULL_GRID = {
Expand Down
3 changes: 3 additions & 0 deletions tests/testing_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,6 +1419,9 @@ def _test_weighted_combination_of_adapters(self, model_id, config_cls, config_kw
if issubclass(config_cls, AdaLoraConfig):
# AdaLora does not support adding more than 1 adapter
return pytest.skip(f"Test not applicable for {config_cls}")
if model_id.endswith("qwen2"):
# Qwen2 fails with weighted adapter combinations using SVD
return pytest.skip(f"Test does not work with model {model_id}")

adapter_list = ["adapter1", "adapter_2", "adapter_3"]
weight_list = [0.5, 1.5, 1.5]
Expand Down

0 comments on commit ebcd079

Please sign in to comment.