add more clarifications

huggingface · Sep 13, 2023 · ba1b10f · ba1b10f
1 parent 6572b76
commit ba1b10f
Showing 1 changed file with 2 additions and 0 deletions.
diff --git a/src/transformers/models/rwkv/modeling_rwkv.py b/src/transformers/models/rwkv/modeling_rwkv.py
@@ -760,6 +760,8 @@ def _bnb_4bit_dequantize_and_rescale(self, target_layer, block_id):
         # re-quantize the model:
         # we need to put it first on CPU then back to the device
         # this will create an overhead :/
+        # We set requires_grad=False as we cannot compute gradients on top of 4bit parameters anyway and to avoid
+        # bugs with bnb
         quant_weight = bnb.nn.Params4bit(dequant_weights.to("cpu"), requires_grad=False).to(dequant_weights.device)
         setattr(target_layer, "weight", quant_weight)