From 98c8830cfe6fbbeca0d57873c5ad5d51568778f9 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Wed, 28 Jul 2021 17:05:12 +0530 Subject: [PATCH 01/29] Add training_type argument to Trainer --- .../trainer/connectors/accelerator_connector.py | 9 +++++++-- pytorch_lightning/trainer/trainer.py | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index c50590905631b..4520a01eb86b6 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -89,6 +89,7 @@ def __init__( ipus, distributed_backend, accelerator, + training_type, gpus, gpu_ids, num_nodes, @@ -214,11 +215,11 @@ def select_accelerator_type(self) -> None: self._set_devices_to_cpu_num_processes() self._accelerator_type = DeviceType.CPU - if self.distributed_backend in ["auto"] + list(DeviceType): + if self.distributed_backend in self.accelerator_types: self.distributed_backend = None def _validate_accelerator_and_devices(self) -> None: - if self.distributed_backend not in ["auto"] + list(DeviceType) and self.devices is not None: + if self.distributed_backend not in self.accelerator_types and self.devices is not None: raise MisconfigurationException( f"You passed `devices={self.devices}` but haven't specified" " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping," @@ -327,6 +328,10 @@ def handle_given_plugins(self) -> None: self._precision_plugin = precision self._cluster_environment = cluster_environment or self.select_cluster_environment() + @property + def accelerator_types(self) -> List[str]: + return ["auto"] + list(DeviceType) + @property def precision_plugin(self) -> PrecisionPlugin: if self._precision_plugin is None: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 5f3d18ebc4e66..0bc6b69f3309c 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -33,7 +33,7 @@ from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop from pytorch_lightning.loops.dataloader.prediction_loop import PredictionLoop from pytorch_lightning.loops.fit_loop import FitLoop -from pytorch_lightning.plugins import Plugin +from pytorch_lightning.plugins import Plugin, TrainingTypePlugin from pytorch_lightning.plugins.environments import ClusterEnvironment from pytorch_lightning.profiler import ( AdvancedProfiler, @@ -139,6 +139,7 @@ def __init__( flush_logs_every_n_steps: int = 100, log_every_n_steps: int = 50, accelerator: Optional[Union[str, Accelerator]] = None, + training_type: Optional[Union[str, TrainingTypePlugin]] = None, sync_batchnorm: bool = False, precision: int = 32, weights_summary: Optional[str] = "top", @@ -358,6 +359,7 @@ def __init__( ipus, distributed_backend, accelerator, + training_type, gpus, gpu_ids, num_nodes, From 8b2f6c4044fcca41fc1c855af9a1efa9db352699 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Wed, 28 Jul 2021 17:42:30 +0530 Subject: [PATCH 02/29] Add deprecation warning --- .../trainer/connectors/accelerator_connector.py | 16 +++++++++++++--- tests/deprecated_api/test_remove_1-6.py | 5 +++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 4520a01eb86b6..eb6a95f4377af 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -110,9 +110,16 @@ def __init__( if distributed_backend is not None: rank_zero_deprecation( f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5." - f" Use `Trainer(accelerator={distributed_backend})` instead." + f" Use `Trainer(training_type={distributed_backend})` instead." ) - distributed_backend = distributed_backend or accelerator + + if accelerator is not None and accelerator in self.training_types: + rank_zero_deprecation( + f"Passing {accelerator} `training_type` to the `accelerator` flag in Trainer has been deprecated" + f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={accelerator})` instead." + ) + + self.distributed_backend = training_type or distributed_backend or accelerator self.num_processes = num_processes self.devices = devices @@ -121,7 +128,6 @@ def __init__( self.parallel_device_ids = gpu_ids self.tpu_cores = tpu_cores self.ipus = ipus - self.distributed_backend = distributed_backend self.num_nodes = num_nodes self.sync_batchnorm = sync_batchnorm self.benchmark = benchmark @@ -332,6 +338,10 @@ def handle_given_plugins(self) -> None: def accelerator_types(self) -> List[str]: return ["auto"] + list(DeviceType) + @property + def training_types(self) -> List[str]: + return ["single"] + list(DistributedType) + @property def precision_plugin(self) -> PrecisionPlugin: if self._precision_plugin is None: diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index a363c29456fdc..bca2f59937176 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -318,3 +318,8 @@ def test_v1_6_0_deprecated_device_dtype_mixin_import(): _soft_unimport_module("pytorch_lightning.utilities.device_dtype_mixin") with pytest.deprecated_call(match="will be removed in v1.6"): from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin # noqa: F811 F401 + + +def test_v1_6_0_passing_training_type_to_accelerator_trainer_flag(): + with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): + Trainer(accelerator="ddp_spawn") From 73120fabcabcc2c81b1a2b99aa10ebc516994023 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Wed, 28 Jul 2021 18:19:10 +0530 Subject: [PATCH 03/29] Update training_types --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index eb6a95f4377af..7797e066555a0 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -340,7 +340,7 @@ def accelerator_types(self) -> List[str]: @property def training_types(self) -> List[str]: - return ["single"] + list(DistributedType) + return ["single"] + list(DistributedType) + TrainingTypePluginsRegistry.available_plugins() @property def precision_plugin(self) -> PrecisionPlugin: From e856db10c9b1d4706c92781deefaa4b38ce2f3e3 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 30 Jul 2021 12:57:55 +0530 Subject: [PATCH 04/29] Add set training type plugin --- .../connectors/accelerator_connector.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 7797e066555a0..9e9e73619f9c5 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -113,13 +113,14 @@ def __init__( f" Use `Trainer(training_type={distributed_backend})` instead." ) - if accelerator is not None and accelerator in self.training_types: + if accelerator is not None and accelerator in list(DistributedType): rank_zero_deprecation( f"Passing {accelerator} `training_type` to the `accelerator` flag in Trainer has been deprecated" f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={accelerator})` instead." ) - self.distributed_backend = training_type or distributed_backend or accelerator + self.training_type = training_type + self.distributed_backend = distributed_backend or accelerator self.num_processes = num_processes self.devices = devices @@ -156,7 +157,11 @@ def __init__( self._warn_if_devices_flag_ignored() self.select_accelerator_type() - self.set_distributed_mode() + + if self.training_type is not None: + self._set_training_type_plugin(self.training_type) + else: + self.set_distributed_mode() self.configure_slurm_ddp() self.handle_given_plugins() @@ -278,9 +283,17 @@ def _set_devices_if_none(self) -> None: elif self._accelerator_type == DeviceType.CPU: self.devices = self.num_processes + def _set_training_type_plugin(self, training_type: Union[str, TrainingTypePlugin]) -> None: + if isinstance(self.training_type, str) and self.training_type in TrainingTypePluginsRegistry: + self._training_type_plugin = TrainingTypePluginsRegistry.get(self.training_type) + if isinstance(self.training_type, str): + self.set_distributed_mode(self.training_type) + elif isinstance(self.training_type, TrainingTypePlugin): + self._training_type_plugin = self.training_type + def handle_given_plugins(self) -> None: - training_type = None + training_type = self._training_type_plugin precision = None cluster_environment = None From 59ebe70245cf23ddee6af0c5f575eeab070c0f6e Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 30 Jul 2021 13:42:45 +0530 Subject: [PATCH 05/29] Add deprecation_and_warn_for_accelerator_and_distributed_backend --- .../connectors/accelerator_connector.py | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 9e9e73619f9c5..e40a9f2e3443c 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -107,18 +107,6 @@ def __init__( self._distrib_type = None self._accelerator_type = None - if distributed_backend is not None: - rank_zero_deprecation( - f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5." - f" Use `Trainer(training_type={distributed_backend})` instead." - ) - - if accelerator is not None and accelerator in list(DistributedType): - rank_zero_deprecation( - f"Passing {accelerator} `training_type` to the `accelerator` flag in Trainer has been deprecated" - f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={accelerator})` instead." - ) - self.training_type = training_type self.distributed_backend = distributed_backend or accelerator @@ -153,6 +141,8 @@ def __init__( self.plugins = plugins + self._deprecation_and_warn_for_accelerator_and_distributed_backend(distributed_backend, accelerator) + self._validate_accelerator_and_devices() self._warn_if_devices_flag_ignored() @@ -283,6 +273,29 @@ def _set_devices_if_none(self) -> None: elif self._accelerator_type == DeviceType.CPU: self.devices = self.num_processes + def _deprecation_and_warn_for_accelerator_and_distributed_backend(self, distributed_backend, accelerator) -> None: + if distributed_backend is not None: + rank_zero_deprecation( + f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5." + f" Use `Trainer(training_type={distributed_backend})` instead." + ) + if self.training_type is not None: + rank_zero_warn( + f"`Trainer(distributed_backend={distributed_backend})` will be ignored, as you have set" + f" `Trainer(training_type={self.training_type})`" + ) + + if accelerator is not None and accelerator in list(DistributedType): + rank_zero_deprecation( + f"Passing {accelerator} `training_type` to the `accelerator` flag in Trainer has been deprecated" + f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={accelerator})` instead." + ) + if self.training_type is not None: + rank_zero_warn( + f"`Trainer(accelerator={accelerator})` will be ignored, as you have set" + f" `Trainer(training_type={self.training_type})`" + ) + def _set_training_type_plugin(self, training_type: Union[str, TrainingTypePlugin]) -> None: if isinstance(self.training_type, str) and self.training_type in TrainingTypePluginsRegistry: self._training_type_plugin = TrainingTypePluginsRegistry.get(self.training_type) From b5094ec1030111b4e7e302b79c6199f4dc5d6970 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 30 Jul 2021 15:01:50 +0530 Subject: [PATCH 06/29] Add plugins warning --- .../trainer/connectors/accelerator_connector.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index e40a9f2e3443c..a0ad4bfd73aaf 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -306,6 +306,17 @@ def _set_training_type_plugin(self, training_type: Union[str, TrainingTypePlugin def handle_given_plugins(self) -> None: + if self.training_type is not None: + for plug in self.plugins: + exception_msg = ( + f"You have passed `Trainer(training_type={self.training_type})`" + f" and you can only specify one training type plugin, but you have passed {plug} as a plugin." + ) + if isinstance(plug, str) and (plug in TrainingTypePluginsRegistry or plug in list(DistributedType)): + raise MisconfigurationException(exception_msg) + elif isinstance(plug, TrainingTypePlugin): + raise MisconfigurationException(exception_msg) + training_type = self._training_type_plugin precision = None cluster_environment = None @@ -557,7 +568,10 @@ def root_gpu(self) -> Optional[int]: @property def is_training_type_in_plugins(self) -> bool: - return any(isinstance(plug, str) and plug in TrainingTypePluginsRegistry for plug in self.plugins) + return any( + (isinstance(plug, str) and plug in TrainingTypePluginsRegistry) or isinstance(plug, TrainingTypePlugin) + for plug in self.plugins + ) @property def is_using_torchelastic(self) -> bool: From 4d11ced23e02c6fbf82c890cd0d2203f2c90d623 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 30 Jul 2021 15:23:22 +0530 Subject: [PATCH 07/29] Add is_plugin_training_type --- .../connectors/accelerator_connector.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index a0ad4bfd73aaf..abe6cc9d26d32 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -306,16 +306,17 @@ def _set_training_type_plugin(self, training_type: Union[str, TrainingTypePlugin def handle_given_plugins(self) -> None: - if self.training_type is not None: - for plug in self.plugins: - exception_msg = ( + for plug in self.plugins: + if self.training_type is not None and self._is_plugin_training_type(plug): + raise MisconfigurationException( f"You have passed `Trainer(training_type={self.training_type})`" f" and you can only specify one training type plugin, but you have passed {plug} as a plugin." ) - if isinstance(plug, str) and (plug in TrainingTypePluginsRegistry or plug in list(DistributedType)): - raise MisconfigurationException(exception_msg) - elif isinstance(plug, TrainingTypePlugin): - raise MisconfigurationException(exception_msg) + elif self._is_plugin_training_type(plug): + rank_zero_deprecation( + f"Passing {plug} `training_type` to the `plugins` flag in Trainer has been deprecated" + f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={plug})` instead." + ) training_type = self._training_type_plugin precision = None @@ -566,6 +567,11 @@ def root_gpu(self) -> Optional[int]: else None ) + def _is_plugin_training_type(self, plugin: Union[str, TrainingTypePlugin]) -> bool: + if isinstance(plugin, str) and (plugin in TrainingTypePluginsRegistry or plugin in list(DistributedType)): + return True + return isinstance(plugin, TrainingTypePlugin) + @property def is_training_type_in_plugins(self) -> bool: return any( From c878f1cd33ed6befe4fcb16e6c85ed0e7d285ad3 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 30 Jul 2021 15:55:06 +0530 Subject: [PATCH 08/29] Update tests --- .../connectors/accelerator_connector.py | 22 +++++++++---------- .../test_accelerator_connector.py | 18 +++++++++++++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index abe6cc9d26d32..4a15a43f5eab0 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -141,7 +141,7 @@ def __init__( self.plugins = plugins - self._deprecation_and_warn_for_accelerator_and_distributed_backend(distributed_backend, accelerator) + self._handle_accelerator_and_distributed_backend(distributed_backend, accelerator) self._validate_accelerator_and_devices() self._warn_if_devices_flag_ignored() @@ -273,16 +273,17 @@ def _set_devices_if_none(self) -> None: elif self._accelerator_type == DeviceType.CPU: self.devices = self.num_processes - def _deprecation_and_warn_for_accelerator_and_distributed_backend(self, distributed_backend, accelerator) -> None: + def _handle_accelerator_and_distributed_backend(self, distributed_backend, accelerator) -> None: if distributed_backend is not None: rank_zero_deprecation( f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5." f" Use `Trainer(training_type={distributed_backend})` instead." ) if self.training_type is not None: - rank_zero_warn( - f"`Trainer(distributed_backend={distributed_backend})` will be ignored, as you have set" - f" `Trainer(training_type={self.training_type})`" + raise MisconfigurationException( + f"You have passed `Trainer(training_type={self.training_type})` but have" + f" also passed `Trainer(distributed_backend={distributed_backend})`." + f"HINT: Use just `Trainer(training_type={self.training_type})` instead." ) if accelerator is not None and accelerator in list(DistributedType): @@ -291,9 +292,10 @@ def _deprecation_and_warn_for_accelerator_and_distributed_backend(self, distribu f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={accelerator})` instead." ) if self.training_type is not None: - rank_zero_warn( - f"`Trainer(accelerator={accelerator})` will be ignored, as you have set" - f" `Trainer(training_type={self.training_type})`" + raise MisconfigurationException( + f"You have passed `Trainer(training_type={self.training_type})` but have" + f" also passed `Trainer(accelerator={accelerator})`." + f"HINT: Use just `Trainer(training_type={self.training_type})` instead." ) def _set_training_type_plugin(self, training_type: Union[str, TrainingTypePlugin]) -> None: @@ -376,10 +378,6 @@ def handle_given_plugins(self) -> None: def accelerator_types(self) -> List[str]: return ["auto"] + list(DeviceType) - @property - def training_types(self) -> List[str]: - return ["single"] + list(DistributedType) + TrainingTypePluginsRegistry.available_plugins() - @property def precision_plugin(self) -> PrecisionPlugin: if self._precision_plugin is None: diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index bae20d5181fa4..48527ddecce55 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -613,3 +613,21 @@ def test_devices_with_cpu_only_supports_integer(): with pytest.raises(MisconfigurationException, match="The flag `devices` only supports integer"): Trainer(accelerator="cpu", devices="1,3") + + +def test_exception_when_training_type_used_with_distributed_backend(): + + with pytest.raises(MisconfigurationException, match="but have also passed"): + Trainer(distributed_backend="ddp_cpu", training_type="ddp_spawn") + + +def test_exception_when_training_type_used_with_accelerator(): + + with pytest.raises(MisconfigurationException, match="but have also passed"): + Trainer(accelerator="ddp", training_type="ddp_spawn") + + +def test_exception_when_training_type_used_with_plugins(): + + with pytest.raises(MisconfigurationException, match="only specify one training type plugin, but you have passed"): + Trainer(plugins="ddp_find_unused_parameters_false", training_type="ddp_spawn") From f9a1a632a3a4b68a7a569fd5e4341df8ea274913 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 30 Jul 2021 15:59:38 +0530 Subject: [PATCH 09/29] Add deprecation test --- tests/accelerators/test_accelerator_connector.py | 3 --- tests/deprecated_api/test_remove_1-6.py | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 48527ddecce55..f8b7d138f61f2 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -616,18 +616,15 @@ def test_devices_with_cpu_only_supports_integer(): def test_exception_when_training_type_used_with_distributed_backend(): - with pytest.raises(MisconfigurationException, match="but have also passed"): Trainer(distributed_backend="ddp_cpu", training_type="ddp_spawn") def test_exception_when_training_type_used_with_accelerator(): - with pytest.raises(MisconfigurationException, match="but have also passed"): Trainer(accelerator="ddp", training_type="ddp_spawn") def test_exception_when_training_type_used_with_plugins(): - with pytest.raises(MisconfigurationException, match="only specify one training type plugin, but you have passed"): Trainer(plugins="ddp_find_unused_parameters_false", training_type="ddp_spawn") diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index bca2f59937176..725f461e90937 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -323,3 +323,8 @@ def test_v1_6_0_deprecated_device_dtype_mixin_import(): def test_v1_6_0_passing_training_type_to_accelerator_trainer_flag(): with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): Trainer(accelerator="ddp_spawn") + + +def test_v1_6_0_passing_training_type_to_plugins_flag(): + with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): + Trainer(plugins="ddp_spawn") From ba6bc88f29eac9dce67e54be64ede8a55aa24ea8 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Sat, 31 Jul 2021 18:54:47 +0530 Subject: [PATCH 10/29] Add tests for cpus training type --- .../test_accelerator_connector.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index f8b7d138f61f2..306fbc041cb6d 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -628,3 +628,23 @@ def test_exception_when_training_type_used_with_accelerator(): def test_exception_when_training_type_used_with_plugins(): with pytest.raises(MisconfigurationException, match="only specify one training type plugin, but you have passed"): Trainer(plugins="ddp_find_unused_parameters_false", training_type="ddp_spawn") + + +@pytest.mark.parametrize( + ["training_type", "plugin"], + [ + ("ddp_spawn", DDPSpawnPlugin), + ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), + ("ddp", DDPPlugin), + ("ddp_find_unused_parameters_false", DDPPlugin), + ], +) +def test_training_type_choice_cpu_str(tmpdir, training_type, plugin): + trainer = Trainer(training_type=training_type, num_processes=2) + assert isinstance(trainer.training_type_plugin, plugin) + + +@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) +def test_training_type_choice_cpu_plugin(tmpdir, plugin): + trainer = Trainer(training_type=plugin(), num_processes=2) + assert isinstance(trainer.training_type_plugin, plugin) From 3fe1e6799afb8fa008425715f99aa996f1908360 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Sat, 31 Jul 2021 23:18:57 +0530 Subject: [PATCH 11/29] Add tests for gpus & tpus training type --- .../test_accelerator_connector.py | 30 ++++++++++++++++--- tests/accelerators/test_tpu_backend.py | 13 ++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 306fbc041cb6d..e84dd33f1418d 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -635,16 +635,38 @@ def test_exception_when_training_type_used_with_plugins(): [ ("ddp_spawn", DDPSpawnPlugin), ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), - ("ddp", DDPPlugin), - ("ddp_find_unused_parameters_false", DDPPlugin), + # ("ddp", DDPPlugin), + # ("ddp_find_unused_parameters_false", DDPPlugin), ], ) def test_training_type_choice_cpu_str(tmpdir, training_type, plugin): - trainer = Trainer(training_type=training_type, num_processes=2) + trainer = Trainer(training_type=training_type, accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) def test_training_type_choice_cpu_plugin(tmpdir, plugin): - trainer = Trainer(training_type=plugin(), num_processes=2) + trainer = Trainer(training_type=plugin(), accelerator="cpu", devices=2) + assert isinstance(trainer.training_type_plugin, plugin) + + +@RunIf(min_gpus=2) +@pytest.mark.parametrize( + ["training_type", "plugin"], + [ + ("ddp_spawn", DDPSpawnPlugin), + ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), + ("ddp", DDPPlugin), + ("ddp_find_unused_parameters_false", DDPPlugin), + ], +) +def test_training_type_choice_gpu_str(tmpdir, training_type, plugin): + trainer = Trainer(training_type=training_type, accelerator="gpu", devices=2) + assert isinstance(trainer.training_type_plugin, plugin) + + +@RunIf(min_gpus=2) +@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) +def test_training_type_choice_gpu_plugin(tmpdir, plugin): + trainer = Trainer(training_type=plugin(), accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) diff --git a/tests/accelerators/test_tpu_backend.py b/tests/accelerators/test_tpu_backend.py index 99ac579eb99b0..28cde72f2eb4a 100644 --- a/tests/accelerators/test_tpu_backend.py +++ b/tests/accelerators/test_tpu_backend.py @@ -260,3 +260,16 @@ def test_ddp_cpu_not_supported_on_tpus(): with pytest.raises(MisconfigurationException, match="`accelerator='ddp_cpu'` is not supported on TPU machines"): Trainer(accelerator="ddp_cpu") + + +@RunIf(tpu=True) +@pytest.mark.parametrize("training_type", ["tpu_spawn", "tpu_spawn_debug"]) +def test_training_type_choice_tpu_str(tmpdir, training_type): + trainer = Trainer(training_type=training_type, accelerator="tpu", devices=8) + assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) + + +@RunIf(tpu=True) +def test_training_type_choice_tpu_plugin(tmpdir): + trainer = Trainer(training_type=TPUSpawnPlugin(), accelerator="tpu", devices=8) + assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) From ae1395ccbeeb967ec39d6c945d9a8f5e89356146 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Sat, 31 Jul 2021 23:24:11 +0530 Subject: [PATCH 12/29] Add tests for ipus training type --- tests/accelerators/test_ipu.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py index 2c1e7d553b34f..8136344da53dc 100644 --- a/tests/accelerators/test_ipu.py +++ b/tests/accelerators/test_ipu.py @@ -538,3 +538,9 @@ def test_set_devices_if_none_ipu(): trainer = Trainer(accelerator="ipu", ipus=8) assert trainer.devices == 8 + + +@RunIf(ipu=True) +def test_training_type_choice_ipu_plugin(tmpdir): + trainer = Trainer(training_type=IPUPlugin(), accelerator="ipu", devices=8) + assert isinstance(trainer.training_type_plugin, IPUPlugin) From fff3385754a06db34ac914f1b5cbc4e8aae88614 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Sun, 1 Aug 2021 21:55:12 +0530 Subject: [PATCH 13/29] Add update_device_type_if_training_type_plugin_passed --- .../trainer/connectors/accelerator_connector.py | 12 ++++++++++++ tests/accelerators/test_accelerator_connector.py | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 4a15a43f5eab0..d83fc9d53545c 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -156,6 +156,7 @@ def __init__( self.handle_given_plugins() self.update_device_type_if_ipu_plugin() + self.update_device_type_if_training_type_plugin_passed() self._validate_accelerator_type() self._set_devices_if_none() @@ -924,6 +925,17 @@ def update_device_type_if_ipu_plugin(self) -> None: if isinstance(self._training_type_plugin, IPUPlugin) and self._device_type != DeviceType.IPU: self._device_type = DeviceType.IPU + def update_device_type_if_training_type_plugin_passed(self) -> None: + if isinstance(self.training_type, TrainingTypePlugin) or any( + isinstance(plug, TrainingTypePlugin) for plug in self.plugins + ): + if self.use_ipu: + self._device_type = DeviceType.IPU + elif self.use_tpu: + self._device_type = DeviceType.TPU + elif self.use_gpu: + self._device_type = DeviceType.GPU + def configure_slurm_ddp(self): # extract SLURM flag vars # whenever we have the correct number of tasks, we let slurm manage processes diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index e84dd33f1418d..4c6a47deb86bb 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -26,6 +26,7 @@ from pytorch_lightning.accelerators.gpu import GPUAccelerator from pytorch_lightning.callbacks import Callback from pytorch_lightning.plugins import ( + DataParallelPlugin, DDP2Plugin, DDPPlugin, DDPShardedPlugin, @@ -658,6 +659,11 @@ def test_training_type_choice_cpu_plugin(tmpdir, plugin): ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), ("ddp", DDPPlugin), ("ddp_find_unused_parameters_false", DDPPlugin), + ("ddp2", DDP2Plugin), + ("dp", DataParallelPlugin), + ("ddp_sharded", DDPShardedPlugin), + ("ddp_sharded_spawn", DDPSpawnShardedPlugin), + pytest.param("deepspeed", DeepSpeedPlugin, marks=RunIf(deepspeed=True)), ], ) def test_training_type_choice_gpu_str(tmpdir, training_type, plugin): From bf7b9cb472d054dd23d2dc8856694c84170ed48e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Aug 2021 05:25:58 +0000 Subject: [PATCH 14/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/deprecated_api/test_remove_1-6.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 725f461e90937..5bbf185947053 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -317,7 +317,7 @@ def test_v1_6_0_deprecated_device_dtype_mixin_import(): _soft_unimport_module("pytorch_lightning.utilities.device_dtype_mixin") with pytest.deprecated_call(match="will be removed in v1.6"): - from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin # noqa: F811 F401 + from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin # noqa: F401 def test_v1_6_0_passing_training_type_to_accelerator_trainer_flag(): From aeca5aafce98bb5fadc334027403c4c3bb18fbec Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Tue, 3 Aug 2021 11:14:56 +0530 Subject: [PATCH 15/29] Update test --- pytorch_lightning/trainer/trainer.py | 2 ++ .../test_accelerator_connector.py | 34 +++++++++---------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a0008e1f7f1a2..ef3d8ca2ccf43 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -308,6 +308,8 @@ def __init__( ipus: How many IPUs to train on. + training_type: Supports different training strategies with aliases as well custom training type plugins. + track_grad_norm: -1 no tracking. Otherwise tracks that p-norm. May be set to 'inf' infinity-norm. truncated_bptt_steps: Deprecated in v1.3 to be removed in 1.5. diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 013dffe35e4a2..d3c0330d60f29 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -617,6 +617,21 @@ def test_devices_with_cpu_only_supports_integer(): Trainer(accelerator="cpu", devices="1,3") +@pytest.mark.parametrize("training_type", ["ddp2", "dp"]) +def test_unsupported_distrib_types_on_cpu(training_type): + + with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting the distributed type to `ddp`."): + trainer = Trainer(accelerator=training_type, num_processes=2) + + assert trainer._distrib_type == DistributedType.DDP + + +def test_accelerator_ddp_for_cpu(tmpdir): + trainer = Trainer(accelerator="ddp", num_processes=2) + assert isinstance(trainer.accelerator, CPUAccelerator) + assert isinstance(trainer.training_type_plugin, DDPPlugin) + + def test_exception_when_training_type_used_with_distributed_backend(): with pytest.raises(MisconfigurationException, match="but have also passed"): Trainer(distributed_backend="ddp_cpu", training_type="ddp_spawn") @@ -637,8 +652,8 @@ def test_exception_when_training_type_used_with_plugins(): [ ("ddp_spawn", DDPSpawnPlugin), ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), - # ("ddp", DDPPlugin), - # ("ddp_find_unused_parameters_false", DDPPlugin), + ("ddp", DDPPlugin), + ("ddp_find_unused_parameters_false", DDPPlugin), ], ) def test_training_type_choice_cpu_str(tmpdir, training_type, plugin): @@ -677,18 +692,3 @@ def test_training_type_choice_gpu_str(tmpdir, training_type, plugin): def test_training_type_choice_gpu_plugin(tmpdir, plugin): trainer = Trainer(training_type=plugin(), accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) - - -@pytest.mark.parametrize("training_type", ["ddp2", "dp"]) -def test_unsupported_distrib_types_on_cpu(training_type): - - with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting the distributed type to `ddp`."): - trainer = Trainer(accelerator=training_type, num_processes=2) - - assert trainer._distrib_type == DistributedType.DDP - - -def test_accelerator_ddp_for_cpu(tmpdir): - trainer = Trainer(accelerator="ddp", num_processes=2) - assert isinstance(trainer.accelerator, CPUAccelerator) - assert isinstance(trainer.training_type_plugin, DDPPlugin) From 91caf4b952f58acbe44300b529c09e6a5b94a66c Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Tue, 3 Aug 2021 11:34:30 +0530 Subject: [PATCH 16/29] Code improvements --- .../trainer/connectors/accelerator_connector.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 4a32f18c8f120..9e5d021040a4a 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -149,7 +149,7 @@ def __init__( self.select_accelerator_type() if self.training_type is not None: - self._set_training_type_plugin(self.training_type) + self._set_training_type_plugin() else: self.set_distributed_mode() self.configure_slurm_ddp() @@ -299,7 +299,7 @@ def _handle_accelerator_and_distributed_backend(self, distributed_backend, accel f"HINT: Use just `Trainer(training_type={self.training_type})` instead." ) - def _set_training_type_plugin(self, training_type: Union[str, TrainingTypePlugin]) -> None: + def _set_training_type_plugin(self) -> None: if isinstance(self.training_type, str) and self.training_type in TrainingTypePluginsRegistry: self._training_type_plugin = TrainingTypePluginsRegistry.get(self.training_type) if isinstance(self.training_type, str): @@ -315,7 +315,7 @@ def handle_given_plugins(self) -> None: f"You have passed `Trainer(training_type={self.training_type})`" f" and you can only specify one training type plugin, but you have passed {plug} as a plugin." ) - elif self._is_plugin_training_type(plug): + if self._is_plugin_training_type(plug): rank_zero_deprecation( f"Passing {plug} `training_type` to the `plugins` flag in Trainer has been deprecated" f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={plug})` instead." @@ -566,7 +566,8 @@ def root_gpu(self) -> Optional[int]: else None ) - def _is_plugin_training_type(self, plugin: Union[str, TrainingTypePlugin]) -> bool: + @staticmethod + def _is_plugin_training_type(plugin: Union[str, TrainingTypePlugin]) -> bool: if isinstance(plugin, str) and (plugin in TrainingTypePluginsRegistry or plugin in list(DistributedType)): return True return isinstance(plugin, TrainingTypePlugin) From 3110ce96790737f3c4bbaffda3fb9a7ecf566f66 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Tue, 3 Aug 2021 11:56:14 +0530 Subject: [PATCH 17/29] Add trainer kwargs test --- .../connectors/accelerator_connector.py | 4 +- tests/trainer/test_trainer.py | 87 +++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 9e5d021040a4a..505de90cd9ddd 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -274,7 +274,9 @@ def _set_devices_if_none(self) -> None: elif self._accelerator_type == DeviceType.CPU: self.devices = self.num_processes - def _handle_accelerator_and_distributed_backend(self, distributed_backend, accelerator) -> None: + def _handle_accelerator_and_distributed_backend( + self, distributed_backend: Optional[str], accelerator: Optional[Union[str, Accelerator]] + ) -> None: if distributed_backend is not None: rank_zero_deprecation( f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5." diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 62bd0214b6d22..27825fa53f1bd 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1913,3 +1913,90 @@ def on_epoch_start(self, trainer, *_): gc.collect() memory_3 = torch.cuda.memory_allocated(0) assert memory_3 == initial + + +@pytest.mark.parametrize( + "trainer_kwargs,expected", + [ + ( + dict(training_type=None, gpus=None), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), + ), + ( + dict(training_type="dp", gpus=None), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), + ), + ( + dict(training_type="ddp", gpus=None), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), + ), + ( + dict(training_type="ddp", num_processes=2, gpus=None), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + ), + ( + dict(training_type="ddp", num_nodes=2, gpus=None), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), + ), + ( + dict(training_type="ddp_cpu", num_processes=2, gpus=None), + dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + ), + ( + dict(training_type="ddp2", gpus=None), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), + ), + ( + dict(training_type=None, gpus=1), + dict(_distrib_type=None, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + ), + ( + dict(training_type="dp", gpus=1), + dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + ), + ( + dict(training_type="ddp", gpus=1), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + ), + ( + dict(training_type="ddp_cpu", num_processes=2, gpus=1), + dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + ), + ( + dict(training_type="ddp2", gpus=1), + dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + ), + ( + dict(training_type=None, gpus=2), + dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), + ), + ( + dict(training_type="dp", gpus=2), + dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + ), + ( + dict(training_type="ddp", gpus=2), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), + ), + ( + dict(training_type="ddp2", gpus=2), + dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + ), + ( + dict(training_type="ddp2", num_processes=2, gpus=None), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + ), + ( + dict(training_type="dp", num_processes=2, gpus=None), + dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + ), + ], +) +def test_trainer_config_training_type(trainer_kwargs, expected, monkeypatch): + if trainer_kwargs["gpus"] is not None: + monkeypatch.setattr(torch.cuda, "is_available", lambda: True) + monkeypatch.setattr(torch.cuda, "device_count", lambda: trainer_kwargs["gpus"]) + trainer = Trainer(**trainer_kwargs) + assert len(expected) == 4 + for k, v in expected.items(): + assert getattr(trainer, k) == v, f"Failed {k}: {v}" From ddbeab40a3e3ebc7d664c7df33b61d057627127f Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Tue, 3 Aug 2021 14:27:17 +0530 Subject: [PATCH 18/29] Add update_device_type_if_training_type_plugin_passed --- .../connectors/accelerator_connector.py | 20 +++++++++++++------ .../test_accelerator_connector.py | 12 ++++++++++- tests/accelerators/test_ipu.py | 11 +++++++++- tests/models/test_tpu.py | 12 ++++++++++- 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 505de90cd9ddd..4afd0d01cacd3 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -935,12 +935,20 @@ def update_device_type_if_training_type_plugin_passed(self) -> None: if isinstance(self.training_type, TrainingTypePlugin) or any( isinstance(plug, TrainingTypePlugin) for plug in self.plugins ): - if self.use_ipu: - self._device_type = DeviceType.IPU - elif self.use_tpu: - self._device_type = DeviceType.TPU - elif self.use_gpu: - self._device_type = DeviceType.GPU + if self._accelerator_type is not None: + if self.use_ipu: + self._device_type = DeviceType.IPU + elif self.use_tpu: + self._device_type = DeviceType.TPU + elif self.use_gpu: + self._device_type = DeviceType.GPU + else: + if self.has_ipu: + self._device_type = DeviceType.IPU + elif self.has_tpu: + self._device_type = DeviceType.TPU + elif self.has_gpu: + self._device_type = DeviceType.GPU def configure_slurm_ddp(self): # extract SLURM flag vars diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index d3c0330d60f29..2a5337e9a8bef 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -43,7 +43,7 @@ SLURMEnvironment, TorchElasticEnvironment, ) -from pytorch_lightning.utilities import DistributedType +from pytorch_lightning.utilities import DeviceType, DistributedType from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel from tests.helpers.runif import RunIf @@ -692,3 +692,13 @@ def test_training_type_choice_gpu_str(tmpdir, training_type, plugin): def test_training_type_choice_gpu_plugin(tmpdir, plugin): trainer = Trainer(training_type=plugin(), accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) + + +@RunIf(min_gpus=2) +@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) +def test_device_type_when_training_plugin_gpu_passed(tmpdir, plugin): + + trainer = Trainer(training_type=plugin(), gpus=2) + assert isinstance(trainer.training_type_plugin, plugin) + assert trainer._device_type == DeviceType.GPU + assert isinstance(trainer.accelerator, GPUAccelerator) diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py index 8136344da53dc..0d8e4bb6e242b 100644 --- a/tests/accelerators/test_ipu.py +++ b/tests/accelerators/test_ipu.py @@ -24,7 +24,7 @@ from pytorch_lightning.plugins import IPUPlugin, IPUPrecisionPlugin from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.trainer.supporters import CombinedLoader -from pytorch_lightning.utilities import _IPU_AVAILABLE +from pytorch_lightning.utilities import _IPU_AVAILABLE, DeviceType from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel from tests.helpers.datamodules import ClassifDataModule @@ -544,3 +544,12 @@ def test_set_devices_if_none_ipu(): def test_training_type_choice_ipu_plugin(tmpdir): trainer = Trainer(training_type=IPUPlugin(), accelerator="ipu", devices=8) assert isinstance(trainer.training_type_plugin, IPUPlugin) + + +@RunIf(ipu=True) +def test_device_type_when_training_plugin_ipu_passed(tmpdir): + + trainer = Trainer(training_type=IPUPlugin(), ipus=8) + assert isinstance(trainer.training_type_plugin, IPUPlugin) + assert trainer._device_type == DeviceType.IPU + assert isinstance(trainer.accelerator, IPUAccelerator) diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py index 5aa605cdf38bb..8046672211e75 100644 --- a/tests/models/test_tpu.py +++ b/tests/models/test_tpu.py @@ -26,7 +26,7 @@ from pytorch_lightning.callbacks import EarlyStopping from pytorch_lightning.plugins import TPUSpawnPlugin from pytorch_lightning.trainer.connectors.logger_connector.result import _Sync -from pytorch_lightning.utilities import _TPU_AVAILABLE +from pytorch_lightning.utilities import _TPU_AVAILABLE, DeviceType from pytorch_lightning.utilities.distributed import ReduceOp from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers import BoringModel, RandomDataset @@ -473,3 +473,13 @@ def teardown(self, stage): model = DebugModel() tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False) + + +@RunIf(tpu=True) +@pl_multi_process_test +def test_device_type_when_training_plugin_tpu_passed(tmpdir): + + trainer = Trainer(training_type=TPUSpawnPlugin(), tpu_cores=8) + assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) + assert trainer._device_type == DeviceType.TPU + assert isinstance(trainer.accelerator, TPUAccelerator) From e6eb8a64416267fe41670e350240fb5df50af2ad Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Tue, 3 Aug 2021 19:30:04 +0530 Subject: [PATCH 19/29] Update to accelerator strategy --- CHANGELOG.md | 2 +- .../connectors/accelerator_connector.py | 42 +++++++++---------- pytorch_lightning/trainer/trainer.py | 9 ++-- .../test_accelerator_connector.py | 16 +++---- tests/accelerators/test_ipu.py | 6 +-- tests/accelerators/test_tpu_backend.py | 4 +- tests/models/test_tpu.py | 2 +- tests/trainer/test_trainer.py | 38 ++++++++--------- 8 files changed, 60 insertions(+), 59 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0d5ac7caf287..bce68d8c7f6a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `ResultCollection` state_dict to Loop `state_dict` and support for distributed reload. ([#8641](https://github.com/PyTorchLightning/pytorch-lightning/pull/8641)) -- +- Added `accelerator_strategy` argument to Trainer ([#8597](https://github.com/PyTorchLightning/pytorch-lightning/pull/8597)) - diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 4afd0d01cacd3..1831680aac2a1 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -89,7 +89,7 @@ def __init__( ipus, distributed_backend, accelerator, - training_type, + accelerator_strategy, gpus, gpu_ids, num_nodes, @@ -107,7 +107,7 @@ def __init__( self._distrib_type = None self._accelerator_type = None - self.training_type = training_type + self.accelerator_strategy = accelerator_strategy self.distributed_backend = distributed_backend or accelerator self.num_processes = num_processes @@ -148,7 +148,7 @@ def __init__( self.select_accelerator_type() - if self.training_type is not None: + if self.accelerator_strategy is not None: self._set_training_type_plugin() else: self.set_distributed_mode() @@ -280,47 +280,47 @@ def _handle_accelerator_and_distributed_backend( if distributed_backend is not None: rank_zero_deprecation( f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5." - f" Use `Trainer(training_type={distributed_backend})` instead." + f" Use `Trainer(accelerator_strategy={distributed_backend})` instead." ) - if self.training_type is not None: + if self.accelerator_strategy is not None: raise MisconfigurationException( - f"You have passed `Trainer(training_type={self.training_type})` but have" + f"You have passed `Trainer(accelerator_strategy={self.accelerator_strategy})` but have" f" also passed `Trainer(distributed_backend={distributed_backend})`." - f"HINT: Use just `Trainer(training_type={self.training_type})` instead." + f"HINT: Use just `Trainer(accelerator_strategy={self.accelerator_strategy})` instead." ) if accelerator is not None and accelerator in list(DistributedType): rank_zero_deprecation( f"Passing {accelerator} `training_type` to the `accelerator` flag in Trainer has been deprecated" - f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={accelerator})` instead." + f" in v1.5 and will be removed in v1.6. Use `Trainer(accelerator_strategy={accelerator})` instead." ) - if self.training_type is not None: + if self.accelerator_strategy is not None: raise MisconfigurationException( - f"You have passed `Trainer(training_type={self.training_type})` but have" + f"You have passed `Trainer(accelerator_strategy={self.accelerator_strategy})` but have" f" also passed `Trainer(accelerator={accelerator})`." - f"HINT: Use just `Trainer(training_type={self.training_type})` instead." + f"HINT: Use just `Trainer(accelerator_strategy={self.accelerator_strategy})` instead." ) def _set_training_type_plugin(self) -> None: - if isinstance(self.training_type, str) and self.training_type in TrainingTypePluginsRegistry: - self._training_type_plugin = TrainingTypePluginsRegistry.get(self.training_type) - if isinstance(self.training_type, str): - self.set_distributed_mode(self.training_type) - elif isinstance(self.training_type, TrainingTypePlugin): - self._training_type_plugin = self.training_type + if isinstance(self.accelerator_strategy, str) and self.accelerator_strategy in TrainingTypePluginsRegistry: + self._training_type_plugin = TrainingTypePluginsRegistry.get(self.accelerator_strategy) + if isinstance(self.accelerator_strategy, str): + self.set_distributed_mode(self.accelerator_strategy) + elif isinstance(self.accelerator_strategy, TrainingTypePlugin): + self._training_type_plugin = self.accelerator_strategy def handle_given_plugins(self) -> None: for plug in self.plugins: - if self.training_type is not None and self._is_plugin_training_type(plug): + if self.accelerator_strategy is not None and self._is_plugin_training_type(plug): raise MisconfigurationException( - f"You have passed `Trainer(training_type={self.training_type})`" + f"You have passed `Trainer(accelerator_strategy={self.accelerator_strategy})`" f" and you can only specify one training type plugin, but you have passed {plug} as a plugin." ) if self._is_plugin_training_type(plug): rank_zero_deprecation( f"Passing {plug} `training_type` to the `plugins` flag in Trainer has been deprecated" - f" in v1.5 and will be removed in v1.6. Use `Trainer(training_type={plug})` instead." + f" in v1.5 and will be removed in v1.6. Use `Trainer(accelerator_strategy={plug})` instead." ) training_type = self._training_type_plugin @@ -932,7 +932,7 @@ def update_device_type_if_ipu_plugin(self) -> None: self._device_type = DeviceType.IPU def update_device_type_if_training_type_plugin_passed(self) -> None: - if isinstance(self.training_type, TrainingTypePlugin) or any( + if isinstance(self.accelerator_strategy, TrainingTypePlugin) or any( isinstance(plug, TrainingTypePlugin) for plug in self.plugins ): if self._accelerator_type is not None: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index ef3d8ca2ccf43..3c786956acc23 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -135,7 +135,7 @@ def __init__( flush_logs_every_n_steps: int = 100, log_every_n_steps: int = 50, accelerator: Optional[Union[str, Accelerator]] = None, - training_type: Optional[Union[str, TrainingTypePlugin]] = None, + accelerator_strategy: Optional[Union[str, TrainingTypePlugin]] = None, sync_batchnorm: bool = False, precision: int = 32, weights_summary: Optional[str] = "top", @@ -169,6 +169,9 @@ def __init__( accelerator: Previously known as distributed_backend (dp, ddp, ddp2, etc...). Can also take in an accelerator object for custom hardware. + accelerator_strategy: Supports different accelerator strategies with aliases + as well custom training type plugins. + accumulate_grad_batches: Accumulates grads every k batches or as set up in the dict. amp_backend: The mixed precision backend to use ("native" or "apex") @@ -308,8 +311,6 @@ def __init__( ipus: How many IPUs to train on. - training_type: Supports different training strategies with aliases as well custom training type plugins. - track_grad_norm: -1 no tracking. Otherwise tracks that p-norm. May be set to 'inf' infinity-norm. truncated_bptt_steps: Deprecated in v1.3 to be removed in 1.5. @@ -357,7 +358,7 @@ def __init__( ipus, distributed_backend, accelerator, - training_type, + accelerator_strategy, gpus, gpu_ids, num_nodes, diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 2a5337e9a8bef..91fc12b5b1fe6 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -634,17 +634,17 @@ def test_accelerator_ddp_for_cpu(tmpdir): def test_exception_when_training_type_used_with_distributed_backend(): with pytest.raises(MisconfigurationException, match="but have also passed"): - Trainer(distributed_backend="ddp_cpu", training_type="ddp_spawn") + Trainer(distributed_backend="ddp_cpu", accelerator_strategy="ddp_spawn") def test_exception_when_training_type_used_with_accelerator(): with pytest.raises(MisconfigurationException, match="but have also passed"): - Trainer(accelerator="ddp", training_type="ddp_spawn") + Trainer(accelerator="ddp", accelerator_strategy="ddp_spawn") def test_exception_when_training_type_used_with_plugins(): with pytest.raises(MisconfigurationException, match="only specify one training type plugin, but you have passed"): - Trainer(plugins="ddp_find_unused_parameters_false", training_type="ddp_spawn") + Trainer(plugins="ddp_find_unused_parameters_false", accelerator_strategy="ddp_spawn") @pytest.mark.parametrize( @@ -657,13 +657,13 @@ def test_exception_when_training_type_used_with_plugins(): ], ) def test_training_type_choice_cpu_str(tmpdir, training_type, plugin): - trainer = Trainer(training_type=training_type, accelerator="cpu", devices=2) + trainer = Trainer(accelerator_strategy=training_type, accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) def test_training_type_choice_cpu_plugin(tmpdir, plugin): - trainer = Trainer(training_type=plugin(), accelerator="cpu", devices=2) + trainer = Trainer(accelerator_strategy=plugin(), accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @@ -683,14 +683,14 @@ def test_training_type_choice_cpu_plugin(tmpdir, plugin): ], ) def test_training_type_choice_gpu_str(tmpdir, training_type, plugin): - trainer = Trainer(training_type=training_type, accelerator="gpu", devices=2) + trainer = Trainer(accelerator_strategy=training_type, accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @RunIf(min_gpus=2) @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) def test_training_type_choice_gpu_plugin(tmpdir, plugin): - trainer = Trainer(training_type=plugin(), accelerator="gpu", devices=2) + trainer = Trainer(accelerator_strategy=plugin(), accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @@ -698,7 +698,7 @@ def test_training_type_choice_gpu_plugin(tmpdir, plugin): @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) def test_device_type_when_training_plugin_gpu_passed(tmpdir, plugin): - trainer = Trainer(training_type=plugin(), gpus=2) + trainer = Trainer(accelerator_strategy=plugin(), gpus=2) assert isinstance(trainer.training_type_plugin, plugin) assert trainer._device_type == DeviceType.GPU assert isinstance(trainer.accelerator, GPUAccelerator) diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py index 0d8e4bb6e242b..1354f03f04e7a 100644 --- a/tests/accelerators/test_ipu.py +++ b/tests/accelerators/test_ipu.py @@ -542,14 +542,14 @@ def test_set_devices_if_none_ipu(): @RunIf(ipu=True) def test_training_type_choice_ipu_plugin(tmpdir): - trainer = Trainer(training_type=IPUPlugin(), accelerator="ipu", devices=8) + trainer = Trainer(accelerator_strategy=IPUPlugin(), accelerator="ipu", devices=8) assert isinstance(trainer.training_type_plugin, IPUPlugin) @RunIf(ipu=True) def test_device_type_when_training_plugin_ipu_passed(tmpdir): - trainer = Trainer(training_type=IPUPlugin(), ipus=8) + trainer = Trainer(accelerator_strategy=IPUPlugin(), ipus=8) assert isinstance(trainer.training_type_plugin, IPUPlugin) - assert trainer._device_type == DeviceType.IPU + assert trainer._device_type == DeviceType.IPUgleDevice assert isinstance(trainer.accelerator, IPUAccelerator) diff --git a/tests/accelerators/test_tpu_backend.py b/tests/accelerators/test_tpu_backend.py index 28cde72f2eb4a..21888913a2570 100644 --- a/tests/accelerators/test_tpu_backend.py +++ b/tests/accelerators/test_tpu_backend.py @@ -265,11 +265,11 @@ def test_ddp_cpu_not_supported_on_tpus(): @RunIf(tpu=True) @pytest.mark.parametrize("training_type", ["tpu_spawn", "tpu_spawn_debug"]) def test_training_type_choice_tpu_str(tmpdir, training_type): - trainer = Trainer(training_type=training_type, accelerator="tpu", devices=8) + trainer = Trainer(accelerator_strategy=training_type, accelerator="tpu", devices=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) @RunIf(tpu=True) def test_training_type_choice_tpu_plugin(tmpdir): - trainer = Trainer(training_type=TPUSpawnPlugin(), accelerator="tpu", devices=8) + trainer = Trainer(accelerator_strategy=TPUSpawnPlugin(), accelerator="tpu", devices=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py index 8046672211e75..66c81c7dfe7eb 100644 --- a/tests/models/test_tpu.py +++ b/tests/models/test_tpu.py @@ -479,7 +479,7 @@ def teardown(self, stage): @pl_multi_process_test def test_device_type_when_training_plugin_tpu_passed(tmpdir): - trainer = Trainer(training_type=TPUSpawnPlugin(), tpu_cores=8) + trainer = Trainer(accelerator_strategy=TPUSpawnPlugin(), tpu_cores=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) assert trainer._device_type == DeviceType.TPU assert isinstance(trainer.accelerator, TPUAccelerator) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 27825fa53f1bd..d4e5716024c93 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1919,80 +1919,80 @@ def on_epoch_start(self, trainer, *_): "trainer_kwargs,expected", [ ( - dict(training_type=None, gpus=None), + dict(accelerator_strategy=None, gpus=None), dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(training_type="dp", gpus=None), + dict(accelerator_strategy="dp", gpus=None), dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(training_type="ddp", gpus=None), + dict(accelerator_strategy="ddp", gpus=None), dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(training_type="ddp", num_processes=2, gpus=None), + dict(accelerator_strategy="ddp", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( - dict(training_type="ddp", num_nodes=2, gpus=None), + dict(accelerator_strategy="ddp", num_nodes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(training_type="ddp_cpu", num_processes=2, gpus=None), + dict(accelerator_strategy="ddp_cpu", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( - dict(training_type="ddp2", gpus=None), + dict(accelerator_strategy="ddp2", gpus=None), dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(training_type=None, gpus=1), + dict(accelerator_strategy=None, gpus=1), dict(_distrib_type=None, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( - dict(training_type="dp", gpus=1), + dict(accelerator_strategy="dp", gpus=1), dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( - dict(training_type="ddp", gpus=1), + dict(accelerator_strategy="ddp", gpus=1), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( - dict(training_type="ddp_cpu", num_processes=2, gpus=1), + dict(accelerator_strategy="ddp_cpu", num_processes=2, gpus=1), dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( - dict(training_type="ddp2", gpus=1), + dict(accelerator_strategy="ddp2", gpus=1), dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( - dict(training_type=None, gpus=2), + dict(accelerator_strategy=None, gpus=2), dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( - dict(training_type="dp", gpus=2), + dict(accelerator_strategy="dp", gpus=2), dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( - dict(training_type="ddp", gpus=2), + dict(accelerator_strategy="ddp", gpus=2), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( - dict(training_type="ddp2", gpus=2), + dict(accelerator_strategy="ddp2", gpus=2), dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( - dict(training_type="ddp2", num_processes=2, gpus=None), + dict(accelerator_strategy="ddp2", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( - dict(training_type="dp", num_processes=2, gpus=None), + dict(accelerator_strategy="dp", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ], ) -def test_trainer_config_training_type(trainer_kwargs, expected, monkeypatch): +def test_trainer_config_accelerator_strategy(trainer_kwargs, expected, monkeypatch): if trainer_kwargs["gpus"] is not None: monkeypatch.setattr(torch.cuda, "is_available", lambda: True) monkeypatch.setattr(torch.cuda, "device_count", lambda: trainer_kwargs["gpus"]) From a5c19786f866c539353320f93660e3256ee813fb Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Tue, 3 Aug 2021 19:44:01 +0530 Subject: [PATCH 20/29] Update to accelerator strategy --- .../connectors/accelerator_connector.py | 4 ++-- .../test_accelerator_connector.py | 22 +++++++++---------- tests/accelerators/test_ipu.py | 2 +- tests/accelerators/test_tpu_backend.py | 8 +++---- tests/deprecated_api/test_remove_1-6.py | 4 ++-- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 1831680aac2a1..64eacfdd2b8be 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -291,7 +291,7 @@ def _handle_accelerator_and_distributed_backend( if accelerator is not None and accelerator in list(DistributedType): rank_zero_deprecation( - f"Passing {accelerator} `training_type` to the `accelerator` flag in Trainer has been deprecated" + f"Passing {accelerator} `accelerator_strategy` to the `accelerator` flag in Trainer has been deprecated" f" in v1.5 and will be removed in v1.6. Use `Trainer(accelerator_strategy={accelerator})` instead." ) if self.accelerator_strategy is not None: @@ -319,7 +319,7 @@ def handle_given_plugins(self) -> None: ) if self._is_plugin_training_type(plug): rank_zero_deprecation( - f"Passing {plug} `training_type` to the `plugins` flag in Trainer has been deprecated" + f"Passing {plug} `accelerator_strategy` to the `plugins` flag in Trainer has been deprecated" f" in v1.5 and will be removed in v1.6. Use `Trainer(accelerator_strategy={plug})` instead." ) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 91fc12b5b1fe6..2971fc49d3e4d 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -632,23 +632,23 @@ def test_accelerator_ddp_for_cpu(tmpdir): assert isinstance(trainer.training_type_plugin, DDPPlugin) -def test_exception_when_training_type_used_with_distributed_backend(): +def test_exception_when_accelerator_strategy_used_with_distributed_backend(): with pytest.raises(MisconfigurationException, match="but have also passed"): Trainer(distributed_backend="ddp_cpu", accelerator_strategy="ddp_spawn") -def test_exception_when_training_type_used_with_accelerator(): +def test_exception_when_accelerator_strategy_used_with_accelerator(): with pytest.raises(MisconfigurationException, match="but have also passed"): Trainer(accelerator="ddp", accelerator_strategy="ddp_spawn") -def test_exception_when_training_type_used_with_plugins(): +def test_exception_when_accelerator_strategy_used_with_plugins(): with pytest.raises(MisconfigurationException, match="only specify one training type plugin, but you have passed"): Trainer(plugins="ddp_find_unused_parameters_false", accelerator_strategy="ddp_spawn") @pytest.mark.parametrize( - ["training_type", "plugin"], + ["accelerator_strategy", "plugin"], [ ("ddp_spawn", DDPSpawnPlugin), ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), @@ -656,20 +656,20 @@ def test_exception_when_training_type_used_with_plugins(): ("ddp_find_unused_parameters_false", DDPPlugin), ], ) -def test_training_type_choice_cpu_str(tmpdir, training_type, plugin): - trainer = Trainer(accelerator_strategy=training_type, accelerator="cpu", devices=2) +def test_accelerator_strategy_choice_cpu_str(tmpdir, accelerator_strategy, plugin): + trainer = Trainer(accelerator_strategy=accelerator_strategy, accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) -def test_training_type_choice_cpu_plugin(tmpdir, plugin): +def test_accelerator_strategy_choice_cpu_plugin(tmpdir, plugin): trainer = Trainer(accelerator_strategy=plugin(), accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @RunIf(min_gpus=2) @pytest.mark.parametrize( - ["training_type", "plugin"], + ["accelerator_strategy", "plugin"], [ ("ddp_spawn", DDPSpawnPlugin), ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), @@ -682,14 +682,14 @@ def test_training_type_choice_cpu_plugin(tmpdir, plugin): pytest.param("deepspeed", DeepSpeedPlugin, marks=RunIf(deepspeed=True)), ], ) -def test_training_type_choice_gpu_str(tmpdir, training_type, plugin): - trainer = Trainer(accelerator_strategy=training_type, accelerator="gpu", devices=2) +def test_accelerator_strategy_choice_gpu_str(tmpdir, accelerator_strategy, plugin): + trainer = Trainer(accelerator_strategy=accelerator_strategy, accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @RunIf(min_gpus=2) @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) -def test_training_type_choice_gpu_plugin(tmpdir, plugin): +def test_accelerator_strategy_choice_gpu_plugin(tmpdir, plugin): trainer = Trainer(accelerator_strategy=plugin(), accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py index 1354f03f04e7a..3eaa104842360 100644 --- a/tests/accelerators/test_ipu.py +++ b/tests/accelerators/test_ipu.py @@ -541,7 +541,7 @@ def test_set_devices_if_none_ipu(): @RunIf(ipu=True) -def test_training_type_choice_ipu_plugin(tmpdir): +def test_accelerator_strategy_choice_ipu_plugin(tmpdir): trainer = Trainer(accelerator_strategy=IPUPlugin(), accelerator="ipu", devices=8) assert isinstance(trainer.training_type_plugin, IPUPlugin) diff --git a/tests/accelerators/test_tpu_backend.py b/tests/accelerators/test_tpu_backend.py index 21888913a2570..c26eac121ff6d 100644 --- a/tests/accelerators/test_tpu_backend.py +++ b/tests/accelerators/test_tpu_backend.py @@ -263,13 +263,13 @@ def test_ddp_cpu_not_supported_on_tpus(): @RunIf(tpu=True) -@pytest.mark.parametrize("training_type", ["tpu_spawn", "tpu_spawn_debug"]) -def test_training_type_choice_tpu_str(tmpdir, training_type): - trainer = Trainer(accelerator_strategy=training_type, accelerator="tpu", devices=8) +@pytest.mark.parametrize("accelerator_strategy", ["tpu_spawn", "tpu_spawn_debug"]) +def test_accelerator_strategy_choice_tpu_str(tmpdir, accelerator_strategy): + trainer = Trainer(accelerator_strategy=accelerator_strategy, accelerator="tpu", devices=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) @RunIf(tpu=True) -def test_training_type_choice_tpu_plugin(tmpdir): +def test_accelerator_strategy_choice_tpu_plugin(tmpdir): trainer = Trainer(accelerator_strategy=TPUSpawnPlugin(), accelerator="tpu", devices=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 5bbf185947053..c82f8d8519f55 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -320,11 +320,11 @@ def test_v1_6_0_deprecated_device_dtype_mixin_import(): from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin # noqa: F401 -def test_v1_6_0_passing_training_type_to_accelerator_trainer_flag(): +def test_v1_6_0_passing_accelerator_strategy_to_accelerator_trainer_flag(): with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): Trainer(accelerator="ddp_spawn") -def test_v1_6_0_passing_training_type_to_plugins_flag(): +def test_v1_6_0_passing_accelerator_strategy_to_plugins_flag(): with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): Trainer(plugins="ddp_spawn") From c9cd9f483fdf24862abebf8788a4de308ba67334 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 11 Oct 2021 17:07:53 +0000 Subject: [PATCH 21/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +- tests/accelerators/test_tpu_backend.py | 2 +- tests/trainer/test_trainer.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 53a85bd4e2d43..fb297bf690451 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -114,7 +114,7 @@ def __init__( self.accelerator_strategy = accelerator_strategy self.distributed_backend = distributed_backend or accelerator - + self._init_deterministic(deterministic) self.num_processes = num_processes diff --git a/tests/accelerators/test_tpu_backend.py b/tests/accelerators/test_tpu_backend.py index f67af8374cc8f..9d303b15ebc61 100644 --- a/tests/accelerators/test_tpu_backend.py +++ b/tests/accelerators/test_tpu_backend.py @@ -240,7 +240,7 @@ def test_accelerator_strategy_choice_tpu_str(tmpdir, accelerator_strategy): def test_accelerator_strategy_choice_tpu_plugin(tmpdir): trainer = Trainer(accelerator_strategy=TPUSpawnPlugin(), accelerator="tpu", devices=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) - + def test_auto_parameters_tying_tpus(tmpdir): diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 7dcaa46d83b6d..620ad36f90a80 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -2095,8 +2095,8 @@ def training_step(self, batch, batch_idx): UserWarning, match=r".*Error detected in.* Traceback of forward call that caused the error.*" ): trainer.fit(model) - - + + @pytest.mark.parametrize( "trainer_kwargs,expected", [ From 0f00172505236684ab2db0640e07c300f1b8c2b9 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Mon, 11 Oct 2021 22:58:02 +0530 Subject: [PATCH 22/29] Fix tests --- tests/accelerators/test_ipu.py | 2 +- tests/accelerators/test_tpu_backend.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py index f063912d64be6..7f0d447e253ac 100644 --- a/tests/accelerators/test_ipu.py +++ b/tests/accelerators/test_ipu.py @@ -541,5 +541,5 @@ def test_device_type_when_training_plugin_ipu_passed(tmpdir): trainer = Trainer(accelerator_strategy=IPUPlugin(), ipus=8) assert isinstance(trainer.training_type_plugin, IPUPlugin) - assert trainer._device_type == DeviceType.IPUgleDevice + assert trainer._device_type == DeviceType.IPU assert isinstance(trainer.accelerator, IPUAccelerator) diff --git a/tests/accelerators/test_tpu_backend.py b/tests/accelerators/test_tpu_backend.py index 9d303b15ebc61..dd8be51ce6fc5 100644 --- a/tests/accelerators/test_tpu_backend.py +++ b/tests/accelerators/test_tpu_backend.py @@ -242,6 +242,7 @@ def test_accelerator_strategy_choice_tpu_plugin(tmpdir): assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) +@RunIf(tpu=True) def test_auto_parameters_tying_tpus(tmpdir): model = WeightSharingModule() From 8b631578eff0692dd91fda3b253f68ae073017d0 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Tue, 12 Oct 2021 00:09:50 +0530 Subject: [PATCH 23/29] Use strategy instead --- CHANGELOG.md | 2 +- .../connectors/accelerator_connector.py | 46 +++++++++---------- pytorch_lightning/trainer/trainer.py | 10 ++-- .../test_accelerator_connector.py | 34 +++++++------- tests/accelerators/test_ipu.py | 6 +-- tests/accelerators/test_tpu_backend.py | 10 ++-- tests/deprecated_api/test_remove_1-6.py | 4 +- tests/models/test_tpu.py | 2 +- tests/trainer/test_trainer.py | 38 +++++++-------- 9 files changed, 76 insertions(+), 76 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f67b1b52fdae..0fa7197434687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -178,7 +178,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added support for `torch.autograd.set_detect_anomaly` through `Trainer` constructor argument `detect_anomaly` ([#9848](https://github.com/PyTorchLightning/pytorch-lightning/pull/9848)) -- Added `accelerator_strategy` argument to Trainer ([#8597](https://github.com/PyTorchLightning/pytorch-lightning/pull/8597)) +- Added `strategy` argument to Trainer ([#8597](https://github.com/PyTorchLightning/pytorch-lightning/pull/8597)) ### Changed diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index fb297bf690451..13323767b641d 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -94,7 +94,7 @@ def __init__( ipus, distributed_backend, accelerator, - accelerator_strategy, + strategy, gpus, gpu_ids, num_nodes, @@ -112,7 +112,7 @@ def __init__( self._distrib_type = None self._accelerator_type = None - self.accelerator_strategy = accelerator_strategy + self.strategy = strategy self.distributed_backend = distributed_backend or accelerator self._init_deterministic(deterministic) @@ -156,7 +156,7 @@ def __init__( self.select_accelerator_type() - if self.accelerator_strategy is not None: + if self.strategy is not None: self._set_training_type_plugin() else: self.set_distributed_mode() @@ -295,47 +295,47 @@ def _handle_accelerator_and_distributed_backend( if distributed_backend is not None: rank_zero_deprecation( f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5." - f" Use `Trainer(accelerator_strategy={distributed_backend})` instead." + f" Use `Trainer(strategy={distributed_backend})` instead." ) - if self.accelerator_strategy is not None: + if self.strategy is not None: raise MisconfigurationException( - f"You have passed `Trainer(accelerator_strategy={self.accelerator_strategy})` but have" + f"You have passed `Trainer(strategy={self.strategy})` but have" f" also passed `Trainer(distributed_backend={distributed_backend})`." - f"HINT: Use just `Trainer(accelerator_strategy={self.accelerator_strategy})` instead." + f"HINT: Use just `Trainer(strategy={self.strategy})` instead." ) if accelerator is not None and accelerator in list(DistributedType): rank_zero_deprecation( - f"Passing {accelerator} `accelerator_strategy` to the `accelerator` flag in Trainer has been deprecated" - f" in v1.5 and will be removed in v1.6. Use `Trainer(accelerator_strategy={accelerator})` instead." + f"Passing {accelerator} `strategy` to the `accelerator` flag in Trainer has been deprecated" + f" in v1.5 and will be removed in v1.6. Use `Trainer(strategy={accelerator})` instead." ) - if self.accelerator_strategy is not None: + if self.strategy is not None: raise MisconfigurationException( - f"You have passed `Trainer(accelerator_strategy={self.accelerator_strategy})` but have" + f"You have passed `Trainer(strategy={self.strategy})` but have" f" also passed `Trainer(accelerator={accelerator})`." - f"HINT: Use just `Trainer(accelerator_strategy={self.accelerator_strategy})` instead." + f"HINT: Use just `Trainer(strategy={self.strategy})` instead." ) def _set_training_type_plugin(self) -> None: - if isinstance(self.accelerator_strategy, str) and self.accelerator_strategy in TrainingTypePluginsRegistry: - self._training_type_plugin = TrainingTypePluginsRegistry.get(self.accelerator_strategy) - if isinstance(self.accelerator_strategy, str): - self.set_distributed_mode(self.accelerator_strategy) - elif isinstance(self.accelerator_strategy, TrainingTypePlugin): - self._training_type_plugin = self.accelerator_strategy + if isinstance(self.strategy, str) and self.strategy in TrainingTypePluginsRegistry: + self._training_type_plugin = TrainingTypePluginsRegistry.get(self.strategy) + if isinstance(self.strategy, str): + self.set_distributed_mode(self.strategy) + elif isinstance(self.strategy, TrainingTypePlugin): + self._training_type_plugin = self.strategy def handle_given_plugins(self) -> None: for plug in self.plugins: - if self.accelerator_strategy is not None and self._is_plugin_training_type(plug): + if self.strategy is not None and self._is_plugin_training_type(plug): raise MisconfigurationException( - f"You have passed `Trainer(accelerator_strategy={self.accelerator_strategy})`" + f"You have passed `Trainer(strategy={self.strategy})`" f" and you can only specify one training type plugin, but you have passed {plug} as a plugin." ) if self._is_plugin_training_type(plug): rank_zero_deprecation( - f"Passing {plug} `accelerator_strategy` to the `plugins` flag in Trainer has been deprecated" - f" in v1.5 and will be removed in v1.6. Use `Trainer(accelerator_strategy={plug})` instead." + f"Passing {plug} `strategy` to the `plugins` flag in Trainer has been deprecated" + f" in v1.5 and will be removed in v1.6. Use `Trainer(strategy={plug})` instead." ) training_type = self._training_type_plugin or None @@ -938,7 +938,7 @@ def update_device_type_if_ipu_plugin(self) -> None: self._device_type = DeviceType.IPU def update_device_type_if_training_type_plugin_passed(self) -> None: - if isinstance(self.accelerator_strategy, TrainingTypePlugin) or any( + if isinstance(self.strategy, TrainingTypePlugin) or any( isinstance(plug, TrainingTypePlugin) for plug in self.plugins ): if self._accelerator_type is not None: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a322e1400d7a7..b58dd2a7d8d26 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -154,7 +154,7 @@ def __init__( flush_logs_every_n_steps: Optional[int] = None, log_every_n_steps: int = 50, accelerator: Optional[Union[str, Accelerator]] = None, - accelerator_strategy: Optional[Union[str, TrainingTypePlugin]] = None, + strategy: Optional[Union[str, TrainingTypePlugin]] = None, sync_batchnorm: bool = False, precision: Union[int, str] = 32, weights_summary: Optional[str] = "top", @@ -188,9 +188,6 @@ def __init__( accelerator: Previously known as distributed_backend (dp, ddp, ddp2, etc...). Can also take in an accelerator object for custom hardware. - accelerator_strategy: Supports different accelerator strategies with aliases - as well custom training type plugins. - accumulate_grad_batches: Accumulates grads every k batches or as set up in the dict. amp_backend: The mixed precision backend to use ("native" or "apex"). @@ -350,6 +347,9 @@ def __init__( no checkpoint file at the path, start from scratch. If resuming from mid-epoch checkpoint, training will start from the beginning of the next epoch. + strategy: Supports different training strategies with aliases + as well custom training type plugins. + sync_batchnorm: Synchronize batch norm layers between process groups/whole world. terminate_on_nan: If set to True, will terminate training (by raising a `ValueError`) at the @@ -406,7 +406,7 @@ def __init__( ipus, distributed_backend, accelerator, - accelerator_strategy, + strategy, gpus, gpu_ids, num_nodes, diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 79ffbfab223a2..6a41e9032ea16 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -632,23 +632,23 @@ def test_accelerator_ddp_for_cpu(tmpdir): assert isinstance(trainer.training_type_plugin, DDPPlugin) -def test_exception_when_accelerator_strategy_used_with_distributed_backend(): +def test_exception_when_strategy_used_with_distributed_backend(): with pytest.raises(MisconfigurationException, match="but have also passed"): - Trainer(distributed_backend="ddp_cpu", accelerator_strategy="ddp_spawn") + Trainer(distributed_backend="ddp_cpu", strategy="ddp_spawn") -def test_exception_when_accelerator_strategy_used_with_accelerator(): +def test_exception_when_strategy_used_with_accelerator(): with pytest.raises(MisconfigurationException, match="but have also passed"): - Trainer(accelerator="ddp", accelerator_strategy="ddp_spawn") + Trainer(accelerator="ddp", strategy="ddp_spawn") -def test_exception_when_accelerator_strategy_used_with_plugins(): +def test_exception_when_strategy_used_with_plugins(): with pytest.raises(MisconfigurationException, match="only specify one training type plugin, but you have passed"): - Trainer(plugins="ddp_find_unused_parameters_false", accelerator_strategy="ddp_spawn") + Trainer(plugins="ddp_find_unused_parameters_false", strategy="ddp_spawn") @pytest.mark.parametrize( - ["accelerator_strategy", "plugin"], + ["strategy", "plugin"], [ ("ddp_spawn", DDPSpawnPlugin), ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), @@ -656,20 +656,20 @@ def test_exception_when_accelerator_strategy_used_with_plugins(): ("ddp_find_unused_parameters_false", DDPPlugin), ], ) -def test_accelerator_strategy_choice_cpu_str(tmpdir, accelerator_strategy, plugin): - trainer = Trainer(accelerator_strategy=accelerator_strategy, accelerator="cpu", devices=2) +def test_strategy_choice_cpu_str(tmpdir, strategy, plugin): + trainer = Trainer(strategy=strategy, accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) -def test_accelerator_strategy_choice_cpu_plugin(tmpdir, plugin): - trainer = Trainer(accelerator_strategy=plugin(), accelerator="cpu", devices=2) +def test_strategy_choice_cpu_plugin(tmpdir, plugin): + trainer = Trainer(strategy=plugin(), accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @RunIf(min_gpus=2) @pytest.mark.parametrize( - ["accelerator_strategy", "plugin"], + ["strategy", "plugin"], [ ("ddp_spawn", DDPSpawnPlugin), ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), @@ -682,15 +682,15 @@ def test_accelerator_strategy_choice_cpu_plugin(tmpdir, plugin): pytest.param("deepspeed", DeepSpeedPlugin, marks=RunIf(deepspeed=True)), ], ) -def test_accelerator_strategy_choice_gpu_str(tmpdir, accelerator_strategy, plugin): - trainer = Trainer(accelerator_strategy=accelerator_strategy, accelerator="gpu", devices=2) +def test_strategy_choice_gpu_str(tmpdir, strategy, plugin): + trainer = Trainer(strategy=strategy, accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @RunIf(min_gpus=2) @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) -def test_accelerator_strategy_choice_gpu_plugin(tmpdir, plugin): - trainer = Trainer(accelerator_strategy=plugin(), accelerator="gpu", devices=2) +def test_strategy_choice_gpu_plugin(tmpdir, plugin): + trainer = Trainer(strategy=plugin(), accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @@ -698,7 +698,7 @@ def test_accelerator_strategy_choice_gpu_plugin(tmpdir, plugin): @pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) def test_device_type_when_training_plugin_gpu_passed(tmpdir, plugin): - trainer = Trainer(accelerator_strategy=plugin(), gpus=2) + trainer = Trainer(strategy=plugin(), gpus=2) assert isinstance(trainer.training_type_plugin, plugin) assert trainer._device_type == DeviceType.GPU assert isinstance(trainer.accelerator, GPUAccelerator) diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py index 7f0d447e253ac..7e0eae5a13f3d 100644 --- a/tests/accelerators/test_ipu.py +++ b/tests/accelerators/test_ipu.py @@ -531,15 +531,15 @@ def test_set_devices_if_none_ipu(): @RunIf(ipu=True) -def test_accelerator_strategy_choice_ipu_plugin(tmpdir): - trainer = Trainer(accelerator_strategy=IPUPlugin(), accelerator="ipu", devices=8) +def test_strategy_choice_ipu_plugin(tmpdir): + trainer = Trainer(strategy=IPUPlugin(), accelerator="ipu", devices=8) assert isinstance(trainer.training_type_plugin, IPUPlugin) @RunIf(ipu=True) def test_device_type_when_training_plugin_ipu_passed(tmpdir): - trainer = Trainer(accelerator_strategy=IPUPlugin(), ipus=8) + trainer = Trainer(strategy=IPUPlugin(), ipus=8) assert isinstance(trainer.training_type_plugin, IPUPlugin) assert trainer._device_type == DeviceType.IPU assert isinstance(trainer.accelerator, IPUAccelerator) diff --git a/tests/accelerators/test_tpu_backend.py b/tests/accelerators/test_tpu_backend.py index dd8be51ce6fc5..97b50ee30899f 100644 --- a/tests/accelerators/test_tpu_backend.py +++ b/tests/accelerators/test_tpu_backend.py @@ -230,15 +230,15 @@ def test_ddp_cpu_not_supported_on_tpus(): @RunIf(tpu=True) -@pytest.mark.parametrize("accelerator_strategy", ["tpu_spawn", "tpu_spawn_debug"]) -def test_accelerator_strategy_choice_tpu_str(tmpdir, accelerator_strategy): - trainer = Trainer(accelerator_strategy=accelerator_strategy, accelerator="tpu", devices=8) +@pytest.mark.parametrize("strategy", ["tpu_spawn", "tpu_spawn_debug"]) +def test_strategy_choice_tpu_str(tmpdir, strategy): + trainer = Trainer(strategy=strategy, accelerator="tpu", devices=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) @RunIf(tpu=True) -def test_accelerator_strategy_choice_tpu_plugin(tmpdir): - trainer = Trainer(accelerator_strategy=TPUSpawnPlugin(), accelerator="tpu", devices=8) +def test_strategy_choice_tpu_plugin(tmpdir): + trainer = Trainer(strategy=TPUSpawnPlugin(), accelerator="tpu", devices=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index c7e8c65884cfd..4445c27c4aed4 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -330,12 +330,12 @@ def test_v1_6_0_deprecated_device_dtype_mixin_import(): from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin # noqa: F401 -def test_v1_6_0_passing_accelerator_strategy_to_accelerator_trainer_flag(): +def test_v1_6_0_passing_strategy_to_accelerator_trainer_flag(): with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): Trainer(accelerator="ddp_spawn") -def test_v1_6_0_passing_accelerator_strategy_to_plugins_flag(): +def test_v1_6_0_passing_strategy_to_plugins_flag(): with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): Trainer(plugins="ddp_spawn") diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py index fad6f62ef51a9..1538169f98d9e 100644 --- a/tests/models/test_tpu.py +++ b/tests/models/test_tpu.py @@ -479,7 +479,7 @@ def teardown(self, stage): @pl_multi_process_test def test_device_type_when_training_plugin_tpu_passed(tmpdir): - trainer = Trainer(accelerator_strategy=TPUSpawnPlugin(), tpu_cores=8) + trainer = Trainer(strategy=TPUSpawnPlugin(), tpu_cores=8) assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin) assert trainer._device_type == DeviceType.TPU assert isinstance(trainer.accelerator, TPUAccelerator) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 620ad36f90a80..feb37c59ebed6 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -2101,80 +2101,80 @@ def training_step(self, batch, batch_idx): "trainer_kwargs,expected", [ ( - dict(accelerator_strategy=None, gpus=None), + dict(strategy=None, gpus=None), dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(accelerator_strategy="dp", gpus=None), + dict(strategy="dp", gpus=None), dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(accelerator_strategy="ddp", gpus=None), + dict(strategy="ddp", gpus=None), dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(accelerator_strategy="ddp", num_processes=2, gpus=None), + dict(strategy="ddp", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( - dict(accelerator_strategy="ddp", num_nodes=2, gpus=None), + dict(strategy="ddp", num_nodes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(accelerator_strategy="ddp_cpu", num_processes=2, gpus=None), + dict(strategy="ddp_cpu", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( - dict(accelerator_strategy="ddp2", gpus=None), + dict(strategy="ddp2", gpus=None), dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( - dict(accelerator_strategy=None, gpus=1), + dict(strategy=None, gpus=1), dict(_distrib_type=None, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( - dict(accelerator_strategy="dp", gpus=1), + dict(strategy="dp", gpus=1), dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( - dict(accelerator_strategy="ddp", gpus=1), + dict(strategy="ddp", gpus=1), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( - dict(accelerator_strategy="ddp_cpu", num_processes=2, gpus=1), + dict(strategy="ddp_cpu", num_processes=2, gpus=1), dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( - dict(accelerator_strategy="ddp2", gpus=1), + dict(strategy="ddp2", gpus=1), dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( - dict(accelerator_strategy=None, gpus=2), + dict(strategy=None, gpus=2), dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( - dict(accelerator_strategy="dp", gpus=2), + dict(strategy="dp", gpus=2), dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( - dict(accelerator_strategy="ddp", gpus=2), + dict(strategy="ddp", gpus=2), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( - dict(accelerator_strategy="ddp2", gpus=2), + dict(strategy="ddp2", gpus=2), dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( - dict(accelerator_strategy="ddp2", num_processes=2, gpus=None), + dict(strategy="ddp2", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( - dict(accelerator_strategy="dp", num_processes=2, gpus=None), + dict(strategy="dp", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ], ) -def test_trainer_config_accelerator_strategy(trainer_kwargs, expected, monkeypatch): +def test_trainer_config_strategy(trainer_kwargs, expected, monkeypatch): if trainer_kwargs["gpus"] is not None: monkeypatch.setattr(torch.cuda, "is_available", lambda: True) monkeypatch.setattr(torch.cuda, "device_count", lambda: trainer_kwargs["gpus"]) From 353434d72396d03d725daf9d9505668c8ece1e33 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Tue, 12 Oct 2021 01:03:43 +0530 Subject: [PATCH 24/29] Update ipu test --- tests/accelerators/test_ipu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py index 7e0eae5a13f3d..c8c557eab4ebf 100644 --- a/tests/accelerators/test_ipu.py +++ b/tests/accelerators/test_ipu.py @@ -120,7 +120,7 @@ def test_warning_if_ipus_not_used(tmpdir): @RunIf(ipu=True) def test_no_warning_plugin(tmpdir): with pytest.warns(None) as record: - Trainer(default_root_dir=tmpdir, plugins=IPUPlugin(training_opts=poptorch.Options())) + Trainer(default_root_dir=tmpdir, strategy=IPUPlugin(training_opts=poptorch.Options())) assert len(record) == 0 From aee59fd4ed60c072842e86e04ef9775fcb7d5dbd Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Wed, 13 Oct 2021 14:31:51 +0530 Subject: [PATCH 25/29] Update typing --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 13323767b641d..c52a484d93129 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -94,7 +94,7 @@ def __init__( ipus, distributed_backend, accelerator, - strategy, + strategy: Optional[Union[str, TrainingTypePlugin]], gpus, gpu_ids, num_nodes, From 9ba9c13c505f29dba1a2fe44bb397ca77868c217 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Wed, 13 Oct 2021 16:15:51 +0530 Subject: [PATCH 26/29] Update tests --- .../trainer/connectors/accelerator_connector.py | 4 ++-- tests/deprecated_api/test_remove_1-6.py | 10 ---------- tests/deprecated_api/test_remove_1-7.py | 10 ++++++++++ tests/trainer/test_trainer.py | 4 ++++ 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index c52a484d93129..c10ace93e58b4 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -307,7 +307,7 @@ def _handle_accelerator_and_distributed_backend( if accelerator is not None and accelerator in list(DistributedType): rank_zero_deprecation( f"Passing {accelerator} `strategy` to the `accelerator` flag in Trainer has been deprecated" - f" in v1.5 and will be removed in v1.6. Use `Trainer(strategy={accelerator})` instead." + f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator})` instead." ) if self.strategy is not None: raise MisconfigurationException( @@ -335,7 +335,7 @@ def handle_given_plugins(self) -> None: if self._is_plugin_training_type(plug): rank_zero_deprecation( f"Passing {plug} `strategy` to the `plugins` flag in Trainer has been deprecated" - f" in v1.5 and will be removed in v1.6. Use `Trainer(strategy={plug})` instead." + f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={plug})` instead." ) training_type = self._training_type_plugin or None diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 383634c954a00..a05f5935a33f5 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -330,16 +330,6 @@ def test_v1_6_0_deprecated_device_dtype_mixin_import(): from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin # noqa: F401 -def test_v1_6_0_passing_strategy_to_accelerator_trainer_flag(): - with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): - Trainer(accelerator="ddp_spawn") - - -def test_v1_6_0_passing_strategy_to_plugins_flag(): - with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.6."): - Trainer(plugins="ddp_spawn") - - def test_v1_6_0_deprecated_accelerator_collective(): from pytorch_lightning.plugins.precision import PrecisionPlugin from pytorch_lightning.plugins.training_type import SingleDevicePlugin diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 995fa0f2c61b1..639ada6008abe 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -329,3 +329,13 @@ def test_v1_7_0_deprecate_parameter_validation(): match="Using `pytorch_lightning.core.decorators.parameter_validation` is deprecated in v1.5" ): from pytorch_lightning.core.decorators import parameter_validation # noqa: F401 + + +def test_v1_7_0_passing_strategy_to_accelerator_trainer_flag(): + with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.7."): + Trainer(accelerator="ddp_spawn") + + +def test_v1_7_0_passing_strategy_to_plugins_flag(): + with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.7."): + Trainer(plugins="ddp_spawn") diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index feb37c59ebed6..4ee7295f17d97 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -2172,6 +2172,10 @@ def training_step(self, batch, batch_idx): dict(strategy="dp", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), + ( + dict(strategy="ddp_spawn", num_processes=2, gpus=None), + dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + ), ], ) def test_trainer_config_strategy(trainer_kwargs, expected, monkeypatch): From 52a04ab3e822afca878af2760a9988f5712df836 Mon Sep 17 00:00:00 2001 From: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Date: Wed, 13 Oct 2021 16:34:43 +0530 Subject: [PATCH 27/29] Update pytorch_lightning/trainer/connectors/accelerator_connector.py Co-authored-by: Rohit Gupta --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index c10ace93e58b4..13c3d59279d18 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -112,7 +112,7 @@ def __init__( self._distrib_type = None self._accelerator_type = None - self.strategy = strategy + self.strategy = strategy.lower() if isinstance(strategy, str) else strategy self.distributed_backend = distributed_backend or accelerator self._init_deterministic(deterministic) From 8df86eca8e567a3ae54ed6ef881d862047ebfc35 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Wed, 13 Oct 2021 16:42:05 +0530 Subject: [PATCH 28/29] Update tests --- tests/trainer/test_trainer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 4ee7295f17d97..837ed1a24d25d 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -2176,6 +2176,10 @@ def training_step(self, batch, batch_idx): dict(strategy="ddp_spawn", num_processes=2, gpus=None), dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), + ( + dict(strategy="ddp_spawn", num_processes=1, gpus=None), + dict(_distrib_type=None, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), + ), ], ) def test_trainer_config_strategy(trainer_kwargs, expected, monkeypatch): From dfecb4fcb612511231f18befd93d0c23c856ef4c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Oct 2021 12:01:43 +0000 Subject: [PATCH 29/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/deprecated_api/test_remove_1-7.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 8393897a911bd..08449f6fbbcff 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -351,8 +351,8 @@ def test_v1_7_0_passing_strategy_to_accelerator_trainer_flag(): def test_v1_7_0_passing_strategy_to_plugins_flag(): with pytest.deprecated_call(match="has been deprecated in v1.5 and will be removed in v1.7."): Trainer(plugins="ddp_spawn") - - + + def test_v1_7_0_weights_summary_trainer(tmpdir): with pytest.deprecated_call(match=r"Setting `Trainer\(weights_summary=full\)` is deprecated in v1.5"): t = Trainer(weights_summary="full")