Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove deprecated distributed_backend from Trainer #10017

Merged
merged 5 commits into from
Oct 19, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Removed `should_rank_save_checkpoint` property from Trainer ([#9433](https://github.com/PyTorchLightning/pytorch-lightning/pull/9433))


- Remove deprecated `distributed_backend` from `Trainer` ([#10017](https://github.com/PyTorchLightning/pytorch-lightning/pull/10017))


### Fixed


Expand Down
6 changes: 1 addition & 5 deletions docs/source/common/trainer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ accelerator

|

The accelerator backend to use (previously known as distributed_backend).
The accelerator backend to use:

- (``'dp'``) is DataParallel (split batch among GPUs of same machine)
- (``'ddp'``) is DistributedDataParallel (each gpu on each node trains, and syncs grads)
Expand Down Expand Up @@ -553,10 +553,6 @@ will need to be set up to use remote filepaths.
# default used by the Trainer
trainer = Trainer(default_root_dir=os.getcwd())

distributed_backend
^^^^^^^^^^^^^^^^^^^
Deprecated: This has been renamed ``accelerator``.

enable_checkpointing
^^^^^^^^^^^^^^^^^^^^

Expand Down
47 changes: 14 additions & 33 deletions pytorch_lightning/trainer/connectors/accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ def __init__(
devices,
tpu_cores,
ipus,
distributed_backend,
accelerator,
strategy: Optional[Union[str, TrainingTypePlugin]],
gpus,
Expand All @@ -113,7 +112,8 @@ def __init__(
self._accelerator_type = None

self.strategy = strategy.lower() if isinstance(strategy, str) else strategy
self.distributed_backend = distributed_backend or accelerator
# TODO: Rename this to something else once all the distributed flags are moved to strategy
self.distributed_backend = accelerator

self._init_deterministic(deterministic)

Expand Down Expand Up @@ -152,7 +152,7 @@ def __init__(

self.plugins = plugins

self._handle_accelerator_and_distributed_backend(distributed_backend, accelerator)
self._handle_accelerator_and_strategy()

self._validate_accelerator_and_devices()

Expand All @@ -176,10 +176,6 @@ def __init__(
self._training_type_plugin_resolved = False
self.accelerator = self.select_accelerator()

# override dist backend when using tpus
if self.use_tpu:
self.distributed_backend = "tpu"

kaushikb11 marked this conversation as resolved.
Show resolved Hide resolved
# init flags for SLURM+DDP to work
self.world_size = 1
self.interactive_ddp_procs = []
Expand Down Expand Up @@ -285,31 +281,16 @@ def _set_devices_if_none(self) -> None:
elif self._accelerator_type == DeviceType.CPU:
self.devices = self.num_processes

def _handle_accelerator_and_distributed_backend(
self, distributed_backend: Optional[str], accelerator: Optional[Union[str, Accelerator]]
) -> None:
if distributed_backend is not None:
rank_zero_deprecation(
f"`Trainer(distributed_backend={distributed_backend!r})` "
"has been deprecated and will be removed in v1.5."
f" Use `Trainer(strategy={distributed_backend!r})` instead."
)
if self.strategy is not None:
raise MisconfigurationException(
f"You have passed `Trainer(strategy={self.strategy!r})` but have"
f" also passed `Trainer(distributed_backend={distributed_backend!r})`."
f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead."
)

if accelerator is not None and accelerator in list(DistributedType):
def _handle_accelerator_and_strategy(self) -> None:
if self.distributed_backend is not None and self.distributed_backend in list(DistributedType):
rank_zero_deprecation(
f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
f"Passing `Trainer(accelerator={self.distributed_backend!r})` has been deprecated"
f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={self.distributed_backend!r})` instead."
)
if self.strategy is not None:
raise MisconfigurationException(
f"You have passed `Trainer(strategy={self.strategy!r})` but have"
f" also passed `Trainer(accelerator={accelerator!r})`."
f" also passed `Trainer(accelerator={self.distributed_backend!r})`."
f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead."
)

Expand Down Expand Up @@ -783,15 +764,15 @@ def select_cluster_environment(self) -> ClusterEnvironment:
env = LightningEnvironment()
return env

def set_distributed_mode(self, distributed_backend: Optional[str] = None):
def set_distributed_mode(self, strategy: Optional[str] = None):

if distributed_backend is None and self.is_training_type_in_plugins:
if strategy is None and self.is_training_type_in_plugins:
return

if distributed_backend is not None and distributed_backend in TrainingTypePluginsRegistry:
self.distributed_backend = TrainingTypePluginsRegistry[distributed_backend]["distributed_backend"]
elif distributed_backend is not None:
self.distributed_backend = distributed_backend
if strategy is not None and strategy in TrainingTypePluginsRegistry:
self.distributed_backend = TrainingTypePluginsRegistry[strategy]["distributed_backend"]
elif strategy is not None:
self.distributed_backend = strategy

if isinstance(self.distributed_backend, Accelerator):
return
Expand Down
11 changes: 1 addition & 10 deletions pytorch_lightning/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ def __init__(
plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None,
amp_backend: str = "native",
amp_level: Optional[str] = None,
distributed_backend: Optional[str] = None,
move_metrics_to_cpu: bool = False,
multiple_trainloader_mode: str = "max_size_cycle",
stochastic_weight_avg: bool = False,
Expand All @@ -187,7 +186,7 @@ def __init__(

Args:

accelerator: Previously known as distributed_backend (dp, ddp, ddp2, etc...).
accelerator: (dp, ddp, ddp2, etc...).
Can also take in an accelerator object for custom hardware.

accumulate_grad_batches: Accumulates grads every k batches or as set up in the dict.
Expand Down Expand Up @@ -241,8 +240,6 @@ def __init__(
devices: Will be mapped to either `gpus`, `tpu_cores`, `num_processes` or `ipus`,
based on the accelerator type.

distributed_backend: Deprecated. Please use ``accelerator``.

fast_dev_run: Runs n if set to ``n`` (int) else 1 if set to ``True`` batch(es)
of train, val and test to find any bugs (ie: a sort of unit test).

Expand Down Expand Up @@ -430,7 +427,6 @@ def __init__(
devices,
tpu_cores,
ipus,
distributed_backend,
accelerator,
strategy,
gpus,
Expand Down Expand Up @@ -1513,11 +1509,6 @@ def _on_exception(self):
def accelerator(self) -> Accelerator:
return self.accelerator_connector.accelerator

@property
def distributed_backend(self) -> Optional[str]:
# for backward compatibility
return self.accelerator_connector.distributed_backend

@property
def training_type_plugin(self) -> TrainingTypePlugin:
return self.accelerator.training_type_plugin
Expand Down
5 changes: 0 additions & 5 deletions tests/accelerators/test_accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,11 +634,6 @@ def test_accelerator_ddp_for_cpu(tmpdir):
assert isinstance(trainer.training_type_plugin, DDPPlugin)


def test_exception_when_strategy_used_with_distributed_backend():
with pytest.raises(MisconfigurationException, match="but have also passed"):
Trainer(distributed_backend="ddp_cpu", strategy="ddp_spawn")


def test_exception_when_strategy_used_with_accelerator():
with pytest.raises(MisconfigurationException, match="but have also passed"):
Trainer(accelerator="ddp", strategy="ddp_spawn")
Expand Down
22 changes: 0 additions & 22 deletions tests/deprecated_api/test_remove_1-5.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/models/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

@RunIf(min_gpus=2)
def test_multi_gpu_none_backend(tmpdir):
"""Make sure when using multiple GPUs the user can't use `distributed_backend = None`."""
"""Make sure when using multiple GPUs the user can't use `accelerator = None`."""
tutils.set_random_master_port()
trainer_options = dict(
default_root_dir=tmpdir,
Expand Down
11 changes: 2 additions & 9 deletions tests/models/test_tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,6 @@

SERIAL_EXEC = xmp.MpSerialExecutor()

_LARGER_DATASET = RandomDataset(32, 2000)


# 8 cores needs a big dataset
def _serial_train_loader():
return DataLoader(_LARGER_DATASET, batch_size=32)


class SerialLoaderBoringModel(BoringModel):
def train_dataloader(self):
Expand Down Expand Up @@ -277,9 +270,9 @@ def test_exception_when_no_tpu_found(tmpdir):

@pytest.mark.parametrize("tpu_cores", [1, 8, [1]])
@RunIf(tpu=True)
def test_distributed_backend_set_when_using_tpu(tmpdir, tpu_cores):
def test_accelerator_set_when_using_tpu(tmpdir, tpu_cores):
"""Test if distributed_backend is set to `tpu` when tpu_cores is not None."""
assert Trainer(tpu_cores=tpu_cores).distributed_backend == "tpu"
assert isinstance(Trainer(tpu_cores=tpu_cores).accelerator, TPUAccelerator)


@RunIf(tpu=True)
Expand Down
2 changes: 1 addition & 1 deletion tests/utilities/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def _raise():
# interface.
min_steps=None,
max_steps=None,
distributed_backend=None,
accelerator=None,
weights_save_path=None,
resume_from_checkpoint=None,
profiler=None,
Expand Down