ENH Update VeRA preconfigured models (#1941)

Some pre-configured models like mistral used not to work with VeRA because the weight shapes were not identical. However, since #1817, this is no longer a requirement. Therefore, this commented code can now be uncommented. I have tested mistral and gemma and they worked. I haven't tested btlm and mixtral but with the update, I'm pretty sure they will work too.
huggingface · Jul 30, 2024 · 1b16753 · 1b16753
1 parent 27833a2
commit 1b16753
Showing 1 changed file with 4 additions and 5 deletions.
diff --git a/src/peft/utils/constants.py b/src/peft/utils/constants.py
@@ -209,14 +209,13 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
     "RefinedWebModel": ["query_key_value"],
     "RefinedWeb": ["query_key_value"],
     "falcon": ["query_key_value"],
-    # "btlm": ["c_proj", "c_attn"],  # tested, does not work because of different shapes
+    "btlm": ["c_proj", "c_attn"],
     "codegen": ["qkv_proj"],
-    # "mistral": ["q_proj", "v_proj"],  # tested, does not work because of different shapes
-    # "mixtral": ["q_proj", "v_proj"],  # tested, does not work because of different shapes
+    "mistral": ["q_proj", "v_proj"],
+    "mixtral": ["q_proj", "v_proj"],
     "stablelm": ["q_proj", "v_proj"],
-    # "phi": ["q_proj", "v_proj", "fc1", "fc2"],  # tested, does not work because of different shapes
     "phi": ["q_proj", "v_proj"],
-    # "gemma": ["q_proj", "v_proj"],  # tested, does not work because of different shapes
+    "gemma": ["q_proj", "v_proj"],
     "qwen2": ["q_proj", "v_proj"],
 }