Merge branch 'main' into feat-add-lora-multihead-attention

BenjaminBossan · Sep 12, 2024 · 03c466f · 03c466f
2 parents cc3ac3d + 7868d03
commit 03c466f
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 20 deletions.
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -9,6 +9,8 @@ jobs:
     name: Close Stale Issues
     if: github.repository == 'huggingface/peft'
     runs-on: ubuntu-latest
+    permissions:
+      issues: write
     env:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     steps:
@@ -24,4 +26,4 @@ jobs:
         pip install PyGithub
     - name: Close stale issues
       run: |
-        python scripts/stale.py
+        python scripts/stale.py
diff --git a/tests/test_stablediffusion.py b/tests/test_stablediffusion.py
@@ -145,7 +145,7 @@ def instantiate_sd_peft(self, model_id, config_cls, config_kwargs):
     def prepare_inputs_for_testing(self):
         return {
             "prompt": "a high quality digital photo of a cute corgi",
-            "num_inference_steps": 20,
+            "num_inference_steps": 3,
         }
 
     @parameterized.expand(

diff --git a/tests/test_vision_models.py b/tests/test_vision_models.py
@@ -76,7 +76,7 @@ def test_past_kv(self):
 
 
 class TestResnet:
-    model_id = "microsoft/resnet-18"
+    model_id = "hf-internal-testing/tiny-random-ResNetForImageClassification"
 
     @pytest.fixture(autouse=True)
     def teardown(self):
@@ -117,8 +117,8 @@ def test_model_with_batchnorm_reproducibility(self, config, tmp_path, data):
         optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
         batch_size = 4
         max_steps = 5 * batch_size
-        labels = torch.zeros(1, 1000)
-        labels[0, 283] = 1
+        labels = torch.zeros(1, 3)
+        labels[0, 1] = 1
         for i in range(0, max_steps, batch_size):
             optimizer.zero_grad()
             outputs = model(**data, labels=labels)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
@@ -15,8 +15,10 @@
 import os
 
 import huggingface_hub
+import packaging
 import pytest
 import torch
+import transformers
 from safetensors.torch import load_file
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
@@ -25,21 +27,24 @@
 from peft.utils import infer_device
 
 
+uses_transformers_4_45 = packaging.version.parse(transformers.__version__) >= packaging.version.parse("4.45.0")
+
+
 class TestXlora:
     torch_device = infer_device()
 
     model_id = "facebook/opt-125m"
     num_loras = 4
 
-    @pytest.fixture(scope="function")
+    @pytest.fixture(scope="class")
     def lora_dir(self, tmp_path_factory):
         return tmp_path_factory.mktemp("lora")
 
-    @pytest.fixture(scope="function")
+    @pytest.fixture(scope="class")
     def lora_embedding_dir(self, tmp_path_factory):
         return tmp_path_factory.mktemp("lora_embedding")
 
-    @pytest.fixture(scope="function")
+    @pytest.fixture(scope="class")
     def saved_lora_adapters(self, lora_dir):
         file_names = []
         for i in range(1, self.num_loras + 1):
@@ -52,7 +57,7 @@ def saved_lora_adapters(self, lora_dir):
             file_names.append(file_name)
         return file_names
 
-    @pytest.fixture(scope="function")
+    @pytest.fixture(scope="class")
     def saved_lora_embedding_adapters(self, lora_embedding_dir):
         file_names = []
         for i in range(1, self.num_loras + 1):
@@ -65,7 +70,7 @@ def saved_lora_embedding_adapters(self, lora_embedding_dir):
             file_names.append(file_name)
         return file_names
 
-    @pytest.fixture(scope="function")
+    @pytest.fixture(scope="class")
     def tokenizer(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.torch_device)
         return tokenizer
@@ -128,6 +133,8 @@ def test_functional(self, tokenizer, model):
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
+    # TODO: remove the skip when 4.45 is released!
+    @pytest.mark.skipif(not uses_transformers_4_45, reason="Requires transformers >= 4.45")
     def test_scalings_logging_methods(self, tokenizer, model):
         model.enable_scalings_logging()
 
@@ -155,16 +162,13 @@ def test_scalings_logging_methods(self, tokenizer, model):
 
         bucketed = model.get_bucketed_scalings_log()
         keys = bucketed.keys()
-        # One bucket for prompt (seqlen=...) and one for the completion (seqlen=1)
-        assert len(bucketed) == 2
-        # One bucket for prompt (which has 1 elem)
-        assert len(bucketed[max(keys)][0]) == 1
-        assert len(bucketed[max(keys)][1]) == 1
-        assert bucketed[max(keys)][0][0] == 0
-        # One bucket for completions with bucket name 1
-        assert len(bucketed[1][0]) > 1
-        assert len(bucketed[1][1]) > 1
-        assert bucketed[1][0][0] > 0
+        # Once bucket for each token as we aren't using cache
+        assert len(bucketed) == 32 == len(keys)
+        seq_len = inputs.shape[1]
+        for key in keys:
+            assert len(bucketed[key][0]) == 1
+            assert len(bucketed[key][1]) == 1
+            assert bucketed[key][0][0] == key - seq_len
 
         model.clear_scalings_log()
         assert len(model.get_scalings_log()) == 0