From 38fb6f6e8c801f7a5a84e2deda8345ae1dddaa40 Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Wed, 17 Apr 2024 18:11:44 +0800 Subject: [PATCH] Update src/transformers/models/llama/modeling_llama.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> --- src/transformers/models/llama/modeling_llama.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index 5bf3f4230bfe35..d54d031845f0f0 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -1070,7 +1070,6 @@ def _update_causal_mask( return attention_mask return None - ignore_causal_mask = False if self.config._attn_implementation == "sdpa": # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, # in order to dispatch on Flash Attention 2.