diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index f11cfb9f0b7b..14487f128f58 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -3193,7 +3193,11 @@ def evaluation_loop( self.control = self.callback_handler.on_prediction_step(args, self.state, self.control) # Gather all tensors and put them back on the CPU if we have done enough accumulation steps. - if args.eval_accumulation_steps is not None and self.accelerator.sync_gradients: + if ( + args.eval_accumulation_steps is not None + and (step + 1) % args.eval_accumulation_steps == 0 + and self.accelerator.sync_gradients + ): if losses_host is not None: losses = nested_numpify(losses_host) all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)