diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index f11cfb9f0b7b..14487f128f58 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -3193,7 +3193,11 @@ def evaluation_loop(
             self.control = self.callback_handler.on_prediction_step(args, self.state, self.control)
 
             # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
-            if args.eval_accumulation_steps is not None and self.accelerator.sync_gradients:
+            if (
+                args.eval_accumulation_steps is not None
+                and (step + 1) % args.eval_accumulation_steps == 0
+                and self.accelerator.sync_gradients
+            ):
                 if losses_host is not None:
                     losses = nested_numpify(losses_host)
                     all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)