huggingface · amyeroberts · Sep 7, 2023 · Aug 30, 2023 · Aug 30, 2023 · Aug 30, 2023
@@ -1782,8 +1782,33 @@ def _inner_training_loop(
         # Skip the first epochs_trained epochs to get the random state of the dataloader at the right point.
         if not args.ignore_data_skip:
             for epoch in range(epochs_trained):
-                for _ in train_dataloader:
-                    break
+                is_random_sampler = (
+                    (hasattr(train_dataloader, "sampler") and isinstance(train_dataloader.sampler, RandomSampler))
+                    or (
+                        hasattr(train_dataloader, "batch_sampler")
+                        and isinstance(train_dataloader.batch_sampler.sampler, RandomSampler)
+                    )
+                    or (
+                        hasattr(train_dataloader, "batch_sampler")
+                        and hasattr(train_dataloader.batch_sampler, "batch_sampler")
+                        and isinstance(train_dataloader.batch_sampler.batch_sampler.sampler, RandomSampler)
+                    )
+                )
+                if not is_random_sampler:
+                    # We just need to begin an iteration to create the randomization of the sampler.
+                    for _ in train_dataloader:
+                        break
+                else:
+                    # Otherwise we need to call the whooooole sampler cause there is some random operation added
+                    # AT THE VERY END!
+                    sampler = []
+
+                    if hasattr(train_dataloader, "sampler") and isinstance(train_dataloader.sampler, RandomSampler):
+                        sampler = train_dataloader.sampler
+                    else:
+                        sampler = train_dataloader.batch_sampler
+
+                    _ = list(sampler)
 
         total_batched_samples = 0
         for epoch in range(epochs_trained, num_train_epochs):