Skip to content

Commit

Permalink
Expose num_shards flag to tfds build
Browse files Browse the repository at this point in the history
This is especially handy in combination when setting nondeterministic order to true.

PiperOrigin-RevId: 702686971
  • Loading branch information
tomvdw authored and The TensorFlow Datasets Authors committed Dec 4, 2024
1 parent eaefd56 commit 3700745
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 0 deletions.
2 changes: 2 additions & 0 deletions tensorflow_datasets/scripts/cli/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ def _make_download_config(
kwargs = {}
if args.max_shard_size_mb:
kwargs['max_shard_size'] = args.max_shard_size_mb << 20
if args.num_shards:
kwargs['num_shards'] = args.num_shards
if args.download_config:
kwargs.update(json.loads(args.download_config))

Expand Down
3 changes: 3 additions & 0 deletions tensorflow_datasets/scripts/cli/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@ def add_generation_argument_group(parser: argparse.ArgumentParser):
generation_group.add_argument(
'--max_shard_size_mb', type=int, help='The max shard size in megabytes.'
)
generation_group.add_argument(
'--num_shards', type=int, help='The number of shards to write to.'
)
generation_group.add_argument(
'--num-processes',
type=int,
Expand Down

0 comments on commit 3700745

Please sign in to comment.