From 766725f6e852b97661e1f3bac315a87e0a92a297 Mon Sep 17 00:00:00 2001 From: Steven Liu Date: Wed, 11 Sep 2024 09:31:37 -0700 Subject: [PATCH] generate docstrings --- src/accelerate/utils/dataclasses.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/accelerate/utils/dataclasses.py b/src/accelerate/utils/dataclasses.py index 6fd36db3315..07a37dcbae9 100644 --- a/src/accelerate/utils/dataclasses.py +++ b/src/accelerate/utils/dataclasses.py @@ -733,6 +733,34 @@ class TensorInformation: class DataLoaderConfiguration: """ Configuration for dataloader-related items when calling `accelerator.prepare`. + + Args: + split_batches (`bool`, defaults to `False`): + Whether or not the accelerator should split the batches yielded by the dataloaders across the devices. If + `True`, the actual batch size used will be the same on any kind of distributed processes, but it must be a + round multiple of `num_processes` you are using. If `False`, actual batch size used will be the one set in + your script multiplied by the number of processes. + dispatch_batches (`bool`, defaults to `None`): + If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process + and then the batches are split and broadcast to each process. Will default to `True` for `DataLoader` whose + underlying dataset is an `IterableDataset`, `False` otherwise. + even_batches (`bool`, defaults to `True`): + If set to `True`, in cases where the total batch size across all processes does not exactly divide the + dataset, samples at the start of the dataset will be duplicated so the batch can be divided equally among + all workers. + use_seedable_sampler (`bool`, defaults to `False`): + Whether or not use a fully seedable random sampler ([`data_loader.SeedableRandomSampler`]). Ensures + training results are fully reproducable using a different sampling technique. While seed-to-seed results + may differ, on average the differences are neglible when using multiple different seeds to compare. Should + also be ran with [`~utils.set_seed`] for the best results. + non_blocking (`bool`, defaults to `False`): + If set to `True`, the dataloader prepared by the Accelerator will utilize non-blocking host-to-device + transfers, allowing for better overlap between dataloader communication and computation. Recommended that + the prepared dataloader has `pin_memory` set to `True` to work properly. + use_stateful_dataloader (`bool`, defaults to `False`): + If set to `True`, the dataloader prepared by the Accelerator will be backed by + [torchdata.StatefulDataLoader](https://github.com/pytorch/data/tree/main/torchdata/stateful_dataloader). + This requires `torchdata` version 0.8.0 or higher that supports StatefulDataLoader to be installed. """ split_batches: bool = field(