huggingface · ArthurZucker · Nov 25, 2024 · Nov 7, 2024 · Nov 25, 2024 · Nov 25, 2024
@@ -1101,7 +1101,8 @@ def forward(
         all_self_attentions = () if output_attentions else None
         all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
         all_hidden_states = () if output_hidden_states else None
-        for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
+        for i in range(len(self.h)):
+            block, layer_past = self.h[i], past_key_values[i]
             # Model parallel
             if self.model_parallel:
                 torch.cuda.set_device(hidden_states.device)