ENH Update VeRA preconfigured models

Some pre-configured models like mistral used not to work with VeRA because the weight shapes were not identical. However, since huggingface#1817, this is no longer a requirement. Therefore, this commented code can now be uncommented. I have tested mistral and gemma and they worked. I haven't tested btlm and mixtral but with the update, I'm pretty sure they will work too.
BenjaminBossan · Jul 22, 2024 · 505ac7d · 505ac7d
1 parent 6472061
commit 505ac7d
Showing 1 changed file with 4 additions and 5 deletions.
diff --git a/src/peft/utils/constants.py b/src/peft/utils/constants.py
@@ -197,14 +197,13 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
     "RefinedWebModel": ["query_key_value"],
     "RefinedWeb": ["query_key_value"],
     "falcon": ["query_key_value"],
-    # "btlm": ["c_proj", "c_attn"],  # tested, does not work because of different shapes
+    "btlm": ["c_proj", "c_attn"],
     "codegen": ["qkv_proj"],
-    # "mistral": ["q_proj", "v_proj"],  # tested, does not work because of different shapes
-    # "mixtral": ["q_proj", "v_proj"],  # tested, does not work because of different shapes
+    "mistral": ["q_proj", "v_proj"],
+    "mixtral": ["q_proj", "v_proj"],
     "stablelm": ["q_proj", "v_proj"],
-    # "phi": ["q_proj", "v_proj", "fc1", "fc2"],  # tested, does not work because of different shapes
     "phi": ["q_proj", "v_proj"],
-    # "gemma": ["q_proj", "v_proj"],  # tested, does not work because of different shapes
+    "gemma": ["q_proj", "v_proj"],
 }
 
 TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING = {