diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 3a94ef67589108..729d258cfcd63b 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,14 +1,14 @@
 ---
 name: Bug report
-about: Create a report to help us improve
+about: Create a bug report to help us improve
 title: ''
-labels: bug / fix, help wanted
+labels: bug
 assignees: ''
 ---
 
 ## 🐛 Bug
 
-<!-- A clear and concise description of what the bug is. -->
+<!-- A clear and concise description of the bug. -->
 
 ### To Reproduce
 
diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md
index 75919587387a9f..f5ff43d6f093a2 100644
--- a/.github/ISSUE_TEMPLATE/documentation.md
+++ b/.github/ISSUE_TEMPLATE/documentation.md
@@ -1,8 +1,8 @@
 ---
 name: Typos and doc fixes
-about: Typos and doc fixes
+about: Tell us about how we can improve our documentation
 title: ''
-labels: documentation
+labels: docs
 assignees: ''
 ---
 
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index ab95a714e6dd71..11da695decfe01 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,8 +1,8 @@
 ---
 name: Feature request
-about: Suggest an idea for this project
+about: Propose a feature for this project
 title: ''
-labels: enhancement
+labels: feature
 assignees: ''
 ---
 
@@ -12,7 +12,7 @@ assignees: ''
 
 ### Motivation
 
-<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link it here -->
 
 ### Pitch
 
diff --git a/.github/ISSUE_TEMPLATE/code_improvement.md b/.github/ISSUE_TEMPLATE/refactor.md
similarity index 83%
rename from .github/ISSUE_TEMPLATE/code_improvement.md
rename to .github/ISSUE_TEMPLATE/refactor.md
index 7608b604e611b7..5e07b0aae2df13 100644
--- a/.github/ISSUE_TEMPLATE/code_improvement.md
+++ b/.github/ISSUE_TEMPLATE/refactor.md
@@ -1,18 +1,18 @@
 ---
-name: Code improvement
-about: Suggest a code improvement, i.e. refactoring, deprecation, etc.
+name: Refactor
+about: Suggest a code refactor or deprecation
 title: ''
-labels: refactors / code health
+labels: refactor
 assignees: ''
 ---
 
-## Proposed refactoring or deprecation
+## Proposed refactor
 
-<!-- A clear and concise description of the code improvement -->
+<!-- A clear and concise description of the refactor -->
 
 ### Motivation
 
-<!-- Please outline the motivation for the proposal. If this is related to another GitHub issue, please link here too -->
+<!-- Please outline the motivation for the proposal. If this is related to another GitHub issue, please link it here -->
 
 ### Pitch
 
diff --git a/.github/lightning-probot.yml b/.github/lightning-probot.yml
new file mode 100644
index 00000000000000..bd6a330a448a4c
--- /dev/null
+++ b/.github/lightning-probot.yml
@@ -0,0 +1 @@
+tracking_issue: 10530
diff --git a/.github/mergify.yml b/.github/mergify.yml
index a2b1e8aede6de9..53ec106873dfe0 100644
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -45,7 +45,7 @@ pull_request_rules:
       - "#changes-requested-reviews-by=0" # no requested changes
     actions:
       label:
-        add: [ "0:] Ready-To-Go" ]
+        add: [ "ready" ]
 
   - name: Not ready yet
     conditions:
@@ -54,13 +54,13 @@ pull_request_rules:
         - "#changes-requested-reviews-by>=1" # no requested changes
     actions:
       label:
-        remove: [ "0:] Ready-To-Go" ]
+        remove: [ "ready" ]
 
   - name: add core reviewer
     conditions:
       - -conflict # skip if conflict
       - -draft # filter-out GH draft PRs
-      - label="0:] Ready-To-Go"
+      - label="ready"
       - "#approved-reviews-by<3" # number of review approvals
       - "#review-requested<3" # number of requested reviews
     actions:
diff --git a/.github/stale.yml b/.github/stale.yml
index 84049394d3aab5..1ac5e7448c9ff9 100644
--- a/.github/stale.yml
+++ b/.github/stale.yml
@@ -8,8 +8,8 @@ issues:
     daysUntilClose: 7
     # Issues with these labels will never be considered stale
     exemptLabels:
-      - Important
-      - Priority
+      - p0
+      - p1
     # Comment to post when marking an issue as stale. Set to `false` to disable
     markComment: >
       This issue has been automatically marked as stale because it hasn't had any recent activity.
diff --git a/.github/workflows/probot-auto-cc.yml b/.github/workflows/probot-auto-cc.yml
new file mode 100644
index 00000000000000..0595c4eee65f7f
--- /dev/null
+++ b/.github/workflows/probot-auto-cc.yml
@@ -0,0 +1,18 @@
+name: Probot
+
+on:
+  issues:
+    types:
+      - labeled
+  pull_request:
+    types:
+      - labeled
+
+jobs:
+  auto-cc:
+    if: ${{ github.repository_owner == 'PyTorchLightning' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: carmocca/probot@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24e68bb24f16e6..712141aaffa61e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -44,7 +44,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103))
 
 
--
+- Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/PyTorchLightning/pytorch-lightning/pull/10505))
 
 
 -
@@ -57,6 +57,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Removed
 
+- Removed deprecated parameter `method` in `pytorch_lightning.utilities.model_helpers.is_overridden` ([#10507](https://github.com/PyTorchLightning/pytorch-lightning/pull/10507))
+
+
 - Remove deprecated method `ClusterEnvironment.creates_children` ([#10339](https://github.com/PyTorchLightning/pytorch-lightning/issues/10339))
 
 
@@ -123,6 +126,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
 
+
+- Removed deprecated `disable_validation` property from Trainer ([#10450](https://github.com/PyTorchLightning/pytorch-lightning/pull/10450))
+
+
+- Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525))
+
+
+
 ### Fixed
 
 - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702))
@@ -134,6 +145,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed sampler replacement logic with `overfit_batches` ([#10486](https://github.com/PyTorchLightning/pytorch-lightning/issues/10486))
 
 
+- Fixed `isinstance` not working with `init_meta_context`, materialized model not being moved to the device ([#10493](https://github.com/PyTorchLightning/metrics/pull/10493))
+
+
 - Fixed an issue that prevented the Trainer to shutdown workers when execution is interrupted due to failure([#10463](https://github.com/PyTorchLightning/pytorch-lightning/issues/10463))
 
 
diff --git a/docs/source/advanced/training_tricks.rst b/docs/source/advanced/training_tricks.rst
index 28f81d98dcbd31..a389b0db69a2e1 100644
--- a/docs/source/advanced/training_tricks.rst
+++ b/docs/source/advanced/training_tricks.rst
@@ -64,10 +64,7 @@ read `this post <https://pytorch.org/blog/pytorch-1.6-now-includes-stochastic-we
 
 .. testcode::
 
-    # Enable Stochastic Weight Averaging - uses the class defaults
-    trainer = Trainer(stochastic_weight_avg=True)
-
-    # alternatively, if you need to pass custom arguments
+    # Enable Stochastic Weight Averaging using the callback
     trainer = Trainer(callbacks=[StochasticWeightAveraging(...)])
 
 ----------
diff --git a/pytorch_lightning/core/mixins/device_dtype_mixin.py b/pytorch_lightning/core/mixins/device_dtype_mixin.py
index e02790edddd1e5..e8b122989cd9c5 100644
--- a/pytorch_lightning/core/mixins/device_dtype_mixin.py
+++ b/pytorch_lightning/core/mixins/device_dtype_mixin.py
@@ -17,6 +17,8 @@
 import torch
 from torch.nn import Module
 
+import pytorch_lightning as pl
+
 
 class DeviceDtypeModuleMixin(Module):
     __jit_unused_properties__ = ["device", "dtype"]
@@ -177,7 +179,9 @@ def __update_properties(
         self, device: Optional[torch.device] = None, dtype: Optional[Union[str, torch.dtype]] = None
     ) -> None:
         def apply_fn(module: Union["DeviceDtypeModuleMixin", Module]) -> None:
-            if not isinstance(module, DeviceDtypeModuleMixin):
+            # TODO: Find why `isinstance(module, DeviceDtypeModuleMixin)` doesn't
+            # work when using `init_meta_context`.
+            if not isinstance(module, (DeviceDtypeModuleMixin, pl.LightningModule)):
                 return
             if device is not None:
                 module._device = device
diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py
index d36e874cbae7b9..2a2ed9586b420e 100644
--- a/pytorch_lightning/lite/lite.py
+++ b/pytorch_lightning/lite/lite.py
@@ -41,7 +41,7 @@
 )
 from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector
 from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
-from pytorch_lightning.utilities import DeviceType, DistributedType, move_data_to_device
+from pytorch_lightning.utilities import _StrategyType, DeviceType, move_data_to_device
 from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors
 from pytorch_lightning.utilities.data import has_iterable_dataset
 from pytorch_lightning.utilities.device_parser import _parse_devices
@@ -477,14 +477,14 @@ def _supported_device_types() -> Sequence[DeviceType]:
         )
 
     @staticmethod
-    def _supported_strategy_types() -> Sequence[DistributedType]:
+    def _supported_strategy_types() -> Sequence[_StrategyType]:
         return (
-            DistributedType.DP,
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
-            DistributedType.DEEPSPEED,
-            DistributedType.DDP_SHARDED,
-            DistributedType.DDP_SHARDED_SPAWN,
+            _StrategyType.DP,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DEEPSPEED,
+            _StrategyType.DDP_SHARDED,
+            _StrategyType.DDP_SHARDED_SPAWN,
         )
 
     @staticmethod
diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
index 84e9b55b9ee085..0285859a6714a4 100644
--- a/pytorch_lightning/plugins/training_type/ddp.py
+++ b/pytorch_lightning/plugins/training_type/ddp.py
@@ -55,7 +55,7 @@
     ReduceOp,
     sync_ddp_if_available,
 )
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException
 from pytorch_lightning.utilities.seed import reset_seed
 from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -79,7 +79,7 @@ class DDPPlugin(ParallelPlugin):
     devices (e.g. GPU) per node. It is very similar to how :mod:`torch.distributed.launch` launches processes.
     """
 
-    distributed_backend = DistributedType.DDP
+    distributed_backend = _StrategyType.DDP
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/ddp2.py b/pytorch_lightning/plugins/training_type/ddp2.py
index ef623a794da42b..a142d518a0f2f0 100644
--- a/pytorch_lightning/plugins/training_type/ddp2.py
+++ b/pytorch_lightning/plugins/training_type/ddp2.py
@@ -15,14 +15,14 @@
 
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
 
 class DDP2Plugin(DDPPlugin):
     """DDP2 behaves like DP in one node, but synchronization across nodes behaves like in DDP."""
 
-    distributed_backend = DistributedType.DDP2
+    distributed_backend = _StrategyType.DDP2
 
     @property
     def global_rank(self) -> int:
diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
index 677e031cd04af4..a77027adb6dcf6 100644
--- a/pytorch_lightning/plugins/training_type/ddp_spawn.py
+++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -43,7 +43,7 @@
     ReduceOp,
     sync_ddp_if_available,
 )
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.seed import reset_seed
 from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -58,7 +58,7 @@ class DDPSpawnPlugin(ParallelPlugin):
     """Spawns processes using the :func:`torch.multiprocessing.spawn` method and joins processes after training
     finishes."""
 
-    distributed_backend = DistributedType.DDP_SPAWN
+    distributed_backend = _StrategyType.DDP_SPAWN
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 2464a8ba4eecad..94235f361d9458 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -36,7 +36,7 @@
 from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.distributed import log, rank_zero_info, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.model_helpers import is_overridden
@@ -82,7 +82,7 @@ def _move_float_tensors_to_half(self, batch: Any):
 
 
 class DeepSpeedPlugin(DDPPlugin):
-    distributed_backend = DistributedType.DEEPSPEED
+    distributed_backend = _StrategyType.DEEPSPEED
     DEEPSPEED_ENV_VAR = "PL_DEEPSPEED_CONFIG_PATH"
 
     def __init__(
diff --git a/pytorch_lightning/plugins/training_type/dp.py b/pytorch_lightning/plugins/training_type/dp.py
index a0f53791bc373d..83328e8c472717 100644
--- a/pytorch_lightning/plugins/training_type/dp.py
+++ b/pytorch_lightning/plugins/training_type/dp.py
@@ -20,7 +20,7 @@
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
@@ -29,7 +29,7 @@ class DataParallelPlugin(ParallelPlugin):
     """Implements data-parallel training in a single process, i.e., the model gets replicated to each device and
     each gets a split of the data."""
 
-    distributed_backend = DistributedType.DP
+    distributed_backend = _StrategyType.DP
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/fully_sharded.py b/pytorch_lightning/plugins/training_type/fully_sharded.py
index 704afa1a91aaac..c9601a905df1c0 100644
--- a/pytorch_lightning/plugins/training_type/fully_sharded.py
+++ b/pytorch_lightning/plugins/training_type/fully_sharded.py
@@ -20,7 +20,7 @@
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.utilities import _FAIRSCALE_FULLY_SHARDED_AVAILABLE
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_FULLY_SHARDED_AVAILABLE:
@@ -30,7 +30,7 @@
 
 class DDPFullyShardedPlugin(DDPPlugin):
 
-    distributed_backend = DistributedType.DDP_FULLY_SHARDED
+    distributed_backend = _StrategyType.DDP_FULLY_SHARDED
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/horovod.py b/pytorch_lightning/plugins/training_type/horovod.py
index 30360e1ab458f3..51558189a3d35f 100644
--- a/pytorch_lightning/plugins/training_type/horovod.py
+++ b/pytorch_lightning/plugins/training_type/horovod.py
@@ -26,7 +26,7 @@
 from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.distributed import group as dist_group
 from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 
 if _HOROVOD_AVAILABLE:
     import horovod.torch as hvd
@@ -35,7 +35,7 @@
 class HorovodPlugin(ParallelPlugin):
     """Plugin for Horovod distributed training integration."""
 
-    distributed_backend = DistributedType.HOROVOD
+    distributed_backend = _StrategyType.HOROVOD
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/sharded.py b/pytorch_lightning/plugins/training_type/sharded.py
index 5955f3a46f38e6..d7563437bd16b1 100644
--- a/pytorch_lightning/plugins/training_type/sharded.py
+++ b/pytorch_lightning/plugins/training_type/sharded.py
@@ -23,7 +23,7 @@
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_AVAILABLE:
@@ -36,7 +36,7 @@
 class DDPShardedPlugin(DDPPlugin):
     """Optimizer and gradient sharded training provided by FairScale."""
 
-    distributed_backend = DistributedType.DDP_SHARDED
+    distributed_backend = _StrategyType.DDP_SHARDED
     _REDUCE_BUFFER_SIZE_DEFAULT: int = 2 ** 23  # 8M
 
     def __init__(self, *args, **kwargs):
diff --git a/pytorch_lightning/plugins/training_type/sharded_spawn.py b/pytorch_lightning/plugins/training_type/sharded_spawn.py
index e0ae5c7bba1879..12e627edbe5cbd 100644
--- a/pytorch_lightning/plugins/training_type/sharded_spawn.py
+++ b/pytorch_lightning/plugins/training_type/sharded_spawn.py
@@ -24,7 +24,7 @@
 from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_AVAILABLE:
@@ -38,7 +38,7 @@
 class DDPSpawnShardedPlugin(DDPSpawnPlugin):
     """Optimizer sharded training provided by FairScale."""
 
-    distributed_backend = DistributedType.DDP_SHARDED_SPAWN
+    distributed_backend = _StrategyType.DDP_SHARDED_SPAWN
 
     def configure_ddp(self) -> None:
         trainer = self.lightning_module.trainer
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 43eb65ce21a22b..47deeed2dca1d5 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -61,10 +61,10 @@
     TorchElasticEnvironment,
 )
 from pytorch_lightning.utilities import (
+    _StrategyType,
     AMPType,
     device_parser,
     DeviceType,
-    DistributedType,
     rank_zero_deprecation,
     rank_zero_info,
     rank_zero_warn,
@@ -278,7 +278,7 @@ def _set_devices_if_none(self) -> None:
             self.devices = self.num_processes
 
     def _handle_accelerator_and_strategy(self) -> None:
-        deprecated_types = [t for t in DistributedType if t not in (DistributedType.TPU_SPAWN, DistributedType.DDP_CPU)]
+        deprecated_types = [t for t in _StrategyType if t not in (_StrategyType.TPU_SPAWN, _StrategyType.DDP_CPU)]
         if self.distributed_backend is not None and self.distributed_backend in deprecated_types:
             rank_zero_deprecation(
                 f"Passing `Trainer(accelerator={self.distributed_backend!r})` has been deprecated"
@@ -290,12 +290,12 @@ def _handle_accelerator_and_strategy(self) -> None:
                     f" also passed `Trainer(accelerator={self.distributed_backend!r})`."
                     f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead."
                 )
-        if self.strategy == DistributedType.TPU_SPAWN:
+        if self.strategy == _StrategyType.TPU_SPAWN:
             raise MisconfigurationException(
                 "`Trainer(strategy='tpu_spawn')` is not a valid strategy,"
                 " you can use `Trainer(strategy='ddp_spawn', accelerator='tpu')` instead."
             )
-        if self.strategy == DistributedType.DDP_CPU:
+        if self.strategy == _StrategyType.DDP_CPU:
             raise MisconfigurationException(
                 "`Trainer(strategy='ddp_cpu')` is not a valid strategy,"
                 " you can use `Trainer(strategy='ddp'|'ddp_spawn', accelerator='cpu')` instead."
@@ -505,31 +505,31 @@ def _map_devices_to_accelerator(self, accelerator: str) -> bool:
 
     @property
     def use_dp(self) -> bool:
-        return self._distrib_type == DistributedType.DP
+        return self._distrib_type == _StrategyType.DP
 
     @property
     def use_ddp(self) -> bool:
         return self._distrib_type in (
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
-            DistributedType.DDP_SHARDED,
-            DistributedType.DDP_SHARDED_SPAWN,
-            DistributedType.DDP_FULLY_SHARDED,
-            DistributedType.DEEPSPEED,
-            DistributedType.TPU_SPAWN,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DDP_SHARDED,
+            _StrategyType.DDP_SHARDED_SPAWN,
+            _StrategyType.DDP_FULLY_SHARDED,
+            _StrategyType.DEEPSPEED,
+            _StrategyType.TPU_SPAWN,
         )
 
     @property
     def use_ddp2(self) -> bool:
-        return self._distrib_type == DistributedType.DDP2
+        return self._distrib_type == _StrategyType.DDP2
 
     @property
     def use_horovod(self) -> bool:
-        return self._distrib_type == DistributedType.HOROVOD
+        return self._distrib_type == _StrategyType.HOROVOD
 
     @property
     def use_deepspeed(self) -> bool:
-        return self._distrib_type == DistributedType.DEEPSPEED
+        return self._distrib_type == _StrategyType.DEEPSPEED
 
     @property
     def _is_sharded_training_type(self) -> bool:
@@ -590,7 +590,7 @@ def root_gpu(self) -> Optional[int]:
 
     @staticmethod
     def _is_plugin_training_type(plugin: Union[str, TrainingTypePlugin]) -> bool:
-        if isinstance(plugin, str) and (plugin in TrainingTypePluginsRegistry or plugin in list(DistributedType)):
+        if isinstance(plugin, str) and (plugin in TrainingTypePluginsRegistry or plugin in list(_StrategyType)):
             return True
         return isinstance(plugin, TrainingTypePlugin)
 
@@ -635,7 +635,7 @@ def select_precision_plugin(self) -> PrecisionPlugin:
                     )
                 return TPUBf16PrecisionPlugin()
 
-        if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
+        if self._distrib_type == _StrategyType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
             return DeepSpeedPrecisionPlugin(self.precision)
 
         if self.precision == 32:
@@ -706,15 +706,15 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
             use_slurm_ddp = self.use_ddp and self._is_slurm_managing_tasks
             use_torchelastic_ddp = self.use_ddp and TorchElasticEnvironment.is_using_torchelastic()
             use_kubeflow_ddp = self.use_ddp and KubeflowEnvironment.is_using_kubeflow()
-            use_ddp_spawn = self._distrib_type == DistributedType.DDP_SPAWN
+            use_ddp_spawn = self._distrib_type == _StrategyType.DDP_SPAWN
             use_ddp_cpu_spawn = use_ddp_spawn and self.use_cpu
-            use_tpu_spawn = self.use_tpu and self._distrib_type == DistributedType.TPU_SPAWN
+            use_tpu_spawn = self.use_tpu and self._distrib_type == _StrategyType.TPU_SPAWN
             use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and TorchElasticEnvironment.is_using_torchelastic()
             use_ddp_cpu_kubeflow = use_ddp_cpu_spawn and KubeflowEnvironment.is_using_kubeflow()
             use_ddp_cpu_slurm = use_ddp_cpu_spawn and self._is_slurm_managing_tasks
-            use_ddp_sharded = self._distrib_type == DistributedType.DDP_SHARDED
-            use_ddp_sharded_spawn = self._distrib_type == DistributedType.DDP_SHARDED_SPAWN
-            use_ddp_fully_sharded = self._distrib_type == DistributedType.DDP_FULLY_SHARDED
+            use_ddp_sharded = self._distrib_type == _StrategyType.DDP_SHARDED
+            use_ddp_sharded_spawn = self._distrib_type == _StrategyType.DDP_SHARDED_SPAWN
+            use_ddp_fully_sharded = self._distrib_type == _StrategyType.DDP_FULLY_SHARDED
 
             if use_tpu_spawn:
                 ddp_plugin_cls = TPUSpawnPlugin
@@ -839,27 +839,27 @@ def set_distributed_mode(self, strategy: Optional[str] = None):
             if self.has_horovodrun():
                 self._set_horovod_backend()
             elif self.num_gpus == 0 and self.num_nodes > 1:
-                self._distrib_type = DistributedType.DDP
+                self._distrib_type = _StrategyType.DDP
             elif self.num_gpus == 0 and self.num_processes > 1:
-                self.distributed_backend = DistributedType.DDP_SPAWN
+                self.distributed_backend = _StrategyType.DDP_SPAWN
             elif self.num_gpus > 1 and not _use_cpu:
                 rank_zero_warn(
                     "You requested multiple GPUs but did not specify a backend, e.g."
                     ' `Trainer(strategy="dp"|"ddp"|"ddp2")`. Setting `strategy="ddp_spawn"` for you.'
                 )
-                self.distributed_backend = DistributedType.DDP_SPAWN
+                self.distributed_backend = _StrategyType.DDP_SPAWN
 
         # special case with DDP on CPUs
-        if self.distributed_backend == DistributedType.DDP_CPU:
+        if self.distributed_backend == _StrategyType.DDP_CPU:
             if _TPU_AVAILABLE:
                 raise MisconfigurationException(
                     "`accelerator='ddp_cpu'` is not supported on TPU machines. "
                     "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
                 )
             if self.num_processes == 1 and self.num_nodes > 1:
-                self._distrib_type = DistributedType.DDP
+                self._distrib_type = _StrategyType.DDP
             else:
-                self._distrib_type = DistributedType.DDP_SPAWN
+                self._distrib_type = _StrategyType.DDP_SPAWN
             if self.num_gpus > 0:
                 rank_zero_warn(
                     "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
@@ -872,25 +872,25 @@ def set_distributed_mode(self, strategy: Optional[str] = None):
         elif self.has_tpu and not _use_cpu:
             self._device_type = DeviceType.TPU
             if isinstance(self.tpu_cores, int):
-                self._distrib_type = DistributedType.TPU_SPAWN
+                self._distrib_type = _StrategyType.TPU_SPAWN
         elif self.has_ipu and not _use_cpu:
             self._device_type = DeviceType.IPU
         elif self.distributed_backend and self._distrib_type is None:
-            self._distrib_type = DistributedType(self.distributed_backend)
+            self._distrib_type = _StrategyType(self.distributed_backend)
 
         if self.num_gpus > 0 and not _use_cpu:
             self._device_type = DeviceType.GPU
 
-        _gpu_distrib_types = (DistributedType.DP, DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2)
+        _gpu_distrib_types = (_StrategyType.DP, _StrategyType.DDP, _StrategyType.DDP_SPAWN, _StrategyType.DDP2)
         # DP and DDP2 cannot run without GPU
         if self.num_gpus == 0 and self._distrib_type in _gpu_distrib_types and not _use_cpu:
 
             if (self.num_nodes and self.num_nodes > 1) or (self.num_processes and self.num_processes > 1):
-                if self._distrib_type in (DistributedType.DP, DistributedType.DDP2):
+                if self._distrib_type in (_StrategyType.DP, _StrategyType.DDP2):
                     rank_zero_warn(
                         f"{self._distrib_type.value!r} is not supported on CPUs, hence setting `strategy='ddp'`."
                     )
-                    self._distrib_type = DistributedType.DDP
+                    self._distrib_type = _StrategyType.DDP
             else:
                 rank_zero_warn("You are running on single node with no parallelization, so distributed has no effect.")
                 self._distrib_type = None
@@ -900,28 +900,28 @@ def set_distributed_mode(self, strategy: Optional[str] = None):
 
         # for DDP overwrite nb processes by requested GPUs
         if self._device_type == DeviceType.GPU and self._distrib_type in (
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
         ):
             self.num_processes = self.num_gpus
 
-        if self._device_type == DeviceType.GPU and self._distrib_type == DistributedType.DDP2:
+        if self._device_type == DeviceType.GPU and self._distrib_type == _StrategyType.DDP2:
             self.num_processes = self.num_nodes
 
         # Horovod is an extra case...
-        if self.distributed_backend == DistributedType.HOROVOD:
+        if self.distributed_backend == _StrategyType.HOROVOD:
             self._set_horovod_backend()
 
         using_valid_distributed = self.use_ddp or self.use_ddp2
         if self.num_nodes > 1 and not using_valid_distributed:
-            # throw error to force user to choose a supported distributed type such as ddp or ddp2
+            # throw error to force user to choose a supported strategy type such as ddp or ddp2
             raise MisconfigurationException(
                 "Your chosen strategy does not support `num_nodes > 1`. Please set `strategy=('ddp'|'ddp2')`."
             )
 
     def _set_horovod_backend(self):
         self.check_horovod()
-        self._distrib_type = DistributedType.HOROVOD
+        self._distrib_type = _StrategyType.HOROVOD
 
         # Initialize Horovod to get rank / size info
         hvd.init()
@@ -941,7 +941,7 @@ def check_interactive_compatibility(self):
                 f"`Trainer(strategy={self._distrib_type.value!r})` or"
                 f" `Trainer(accelerator={self._distrib_type.value!r})` is not compatible with an interactive"
                 " environment. Run your code as a script, or choose one of the compatible backends:"
-                f" {', '.join(DistributedType.interactive_compatible_types())}."
+                f" {', '.join(_StrategyType.interactive_compatible_types())}."
                 " In case you are spawning processes yourself, make sure to include the Trainer"
                 " creation inside the worker function."
             )
diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
index 921c2e0a7e160b..ab0d3aa4288fa8 100644
--- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py
+++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
@@ -413,17 +413,6 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict:
 
         return checkpoint
 
-    def hpc_load(self, checkpoint_path: _PATH) -> None:
-        """Attempts to restore the full training and model state from a HPC checkpoint file.
-
-        .. deprecated:: v1.4     Will be removed in v1.6. Use :meth:`restore` instead.
-        """
-        rank_zero_deprecation(
-            "`CheckpointConnector.hpc_load()` was deprecated in v1.4 and will be removed in v1.6."
-            " Use `CheckpointConnector.restore()` instead."
-        )
-        self.restore(checkpoint_path)
-
     def max_ckpt_version_in_folder(self, dir_path: _PATH, name_key: str = "ckpt_") -> Optional[int]:
         """List up files in `dir_path` with `name_key`, then yield maximum suffix number.
 
diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
index 9c40e728391c1c..bdc051091b50c7 100644
--- a/pytorch_lightning/trainer/data_loading.py
+++ b/pytorch_lightning/trainer/data_loading.py
@@ -38,7 +38,7 @@
     FastForwardSampler,
 )
 from pytorch_lightning.utilities.data import get_len, has_iterable_dataset, has_len_all_ranks
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _fault_tolerant_training
 from pytorch_lightning.utilities.model_helpers import is_overridden
@@ -70,7 +70,7 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
         if not isinstance(dataloader, DataLoader):
             return
 
-        using_spawn = self._accelerator_connector._distrib_type == DistributedType.DDP_SPAWN
+        using_spawn = self._accelerator_connector._distrib_type == _StrategyType.DDP_SPAWN
         num_cpus = multiprocessing.cpu_count()
 
         # ddp_spawn + num_workers > 0 don't mix! tell the user
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b84f03393309b3..19efdce8e35492 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -64,10 +64,10 @@
 from pytorch_lightning.tuner.tuning import Tuner
 from pytorch_lightning.utilities import (
     _IPU_AVAILABLE,
+    _StrategyType,
     _TPU_AVAILABLE,
     device_parser,
     DeviceType,
-    DistributedType,
     GradClipAlgorithmType,
     parsing,
     rank_zero_deprecation,
@@ -84,7 +84,7 @@
 from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.exceptions import ExitGracefullyException, MisconfigurationException
 from pytorch_lightning.utilities.imports import _fault_tolerant_training
-from pytorch_lightning.utilities.meta import materialize_module
+from pytorch_lightning.utilities.meta import is_on_meta_device, materialize_module
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.seed import reset_seed
 from pytorch_lightning.utilities.types import (
@@ -1406,10 +1406,21 @@ def _call_setup_hook(self) -> None:
 
     def _call_configure_sharded_model(self) -> None:
         with self.accelerator.model_sharded_context():
-            materialize_module(self.lightning_module)
+            self._handle_meta_model()
             self.call_hook("configure_sharded_model")
             self.call_hook("on_configure_sharded_model")
 
+    def _handle_meta_model(self) -> None:
+        if not is_on_meta_device(self.lightning_module):
+            return
+
+        if isinstance(self.training_type_plugin, DDPSpawnPlugin):
+            raise MisconfigurationException("LightningModule on meta device isn't supported with spawn.")
+
+        materialize_module(self.lightning_module)
+        # the trainer reference is lost during materialization
+        self.lightning_module.trainer = proxy(self)
+
     def _call_teardown_hook(self) -> None:
         fn = self.state.fn._setup_fn
 
@@ -1591,7 +1602,7 @@ def should_rank_save_checkpoint(self) -> bool:
         return self.training_type_plugin.should_rank_save_checkpoint
 
     @property
-    def _distrib_type(self) -> DistributedType:
+    def _distrib_type(self) -> _StrategyType:
         return self._accelerator_connector._distrib_type
 
     @property
@@ -1754,10 +1765,10 @@ def distributed_sampler_kwargs(self) -> Optional[dict]:
     @property
     def data_parallel(self) -> bool:
         return self._distrib_type in (
-            DistributedType.DP,
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
-            DistributedType.DDP2,
+            _StrategyType.DP,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DDP2,
         )
 
     @property
@@ -1783,15 +1794,6 @@ def _should_reload_dl_epoch(self) -> bool:
         n_epochs = self.reload_dataloaders_every_n_epochs
         return n_epochs and (not self.current_epoch % n_epochs)
 
-    @property
-    def disable_validation(self) -> bool:
-        """Check if validation is disabled during training."""
-        rank_zero_deprecation(
-            "`trainer.disable_validation` is deprecated in v1.4 and will be removed in v1.6."
-            " Use `not trainer.enable_validation` instead."
-        )
-        return not self.enable_validation
-
     @property
     def enable_validation(self) -> bool:
         """Check if we should run validation during training."""
diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py
index 7343e28d6d8112..22164908a3e3f1 100644
--- a/pytorch_lightning/utilities/__init__.py
+++ b/pytorch_lightning/utilities/__init__.py
@@ -18,6 +18,7 @@
 from pytorch_lightning.utilities.apply_func import move_data_to_device  # noqa: F401
 from pytorch_lightning.utilities.distributed import AllGatherGrad, rank_zero_info, rank_zero_only  # noqa: F401
 from pytorch_lightning.utilities.enums import (  # noqa: F401
+    _StrategyType,
     AMPType,
     DeviceType,
     DistributedType,
diff --git a/pytorch_lightning/utilities/enums.py b/pytorch_lightning/utilities/enums.py
index 436c675c382c26..18b0336b82d5f6 100644
--- a/pytorch_lightning/utilities/enums.py
+++ b/pytorch_lightning/utilities/enums.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Enumerated utilities."""
-from enum import Enum
-from typing import List, Optional, Union
+from enum import Enum, EnumMeta
+from typing import Any, List, Optional, Union
+
+from pytorch_lightning.utilities.warnings import rank_zero_deprecation
 
 
 class LightningEnum(str, Enum):
@@ -37,6 +39,31 @@ def __hash__(self) -> int:
         return hash(self.value.lower())
 
 
+class _OnAccessEnumMeta(EnumMeta):
+    """Enum with a hook to run a function whenever a member is accessed.
+
+    Adapted from:
+    https://www.buzzphp.com/posts/how-do-i-detect-and-invoke-a-function-when-a-python-enum-member-is-accessed
+    """
+
+    def __getattribute__(cls, name: str) -> Any:
+        obj = super().__getattribute__(name)
+        if isinstance(obj, Enum):
+            obj.deprecate()
+        return obj
+
+    def __getitem__(cls, name: str) -> Any:
+        member = super().__getitem__(name)
+        member.deprecate()
+        return member
+
+    def __call__(cls, value: str, *args: Any, **kwargs: Any) -> Any:
+        obj = super().__call__(value, *args, **kwargs)
+        if isinstance(obj, Enum):
+            obj.deprecate()
+        return obj
+
+
 class AMPType(LightningEnum):
     """Type of Automatic Mixed Precission used for training.
 
@@ -73,8 +100,8 @@ def supported_types() -> List[str]:
         return [x.value for x in PrecisionType]
 
 
-class DistributedType(LightningEnum):
-    """Define type of distributed computing.
+class DistributedType(LightningEnum, metaclass=_OnAccessEnumMeta):
+    """Define type of training strategy.
 
     >>> # you can match the type with string
     >>> DistributedType.DDP == 'ddp'
@@ -82,8 +109,24 @@ class DistributedType(LightningEnum):
     >>> # which is case invariant
     >>> DistributedType.DDP2 in ('ddp2', )
     True
+
+    Deprecated since v1.6.0 and will be removed in v1.8.0.
+
+    Use `_StrategyType` instead.
     """
 
+    DP = "dp"
+    DDP = "ddp"
+    DDP2 = "ddp2"
+    DDP_CPU = "ddp_cpu"
+    DDP_SPAWN = "ddp_spawn"
+    TPU_SPAWN = "tpu_spawn"
+    DEEPSPEED = "deepspeed"
+    HOROVOD = "horovod"
+    DDP_SHARDED = "ddp_sharded"
+    DDP_SHARDED_SPAWN = "ddp_sharded_spawn"
+    DDP_FULLY_SHARDED = "ddp_fully_sharded"
+
     @staticmethod
     def interactive_compatible_types() -> List["DistributedType"]:
         """Returns a list containing interactive compatible DistributeTypes."""
@@ -98,17 +141,11 @@ def is_interactive_compatible(self) -> bool:
         """Returns whether self is interactive compatible."""
         return self in DistributedType.interactive_compatible_types()
 
-    DP = "dp"
-    DDP = "ddp"
-    DDP2 = "ddp2"
-    DDP_CPU = "ddp_cpu"
-    DDP_SPAWN = "ddp_spawn"
-    TPU_SPAWN = "tpu_spawn"
-    DEEPSPEED = "deepspeed"
-    HOROVOD = "horovod"
-    DDP_SHARDED = "ddp_sharded"
-    DDP_SHARDED_SPAWN = "ddp_sharded_spawn"
-    DDP_FULLY_SHARDED = "ddp_fully_sharded"
+    def deprecate(self) -> None:
+        rank_zero_deprecation(
+            "`DistributedType` Enum has been deprecated in v1.6 and will be removed in v1.8."
+            " Use the string value `{self.value!r}` instead."
+        )
 
 
 class DeviceType(LightningEnum):
@@ -188,3 +225,41 @@ def get_max_depth(mode: str) -> int:
     @staticmethod
     def supported_types() -> List[str]:
         return [x.value for x in ModelSummaryMode]
+
+
+class _StrategyType(LightningEnum):
+    """Define type of training strategy.
+
+    >>> # you can match the type with string
+    >>> _StrategyType.DDP == 'ddp'
+    True
+    >>> # which is case invariant
+    >>> _StrategyType.DDP2 in ('ddp2', )
+    True
+    """
+
+    DP = "dp"
+    DDP = "ddp"
+    DDP2 = "ddp2"
+    DDP_CPU = "ddp_cpu"
+    DDP_SPAWN = "ddp_spawn"
+    TPU_SPAWN = "tpu_spawn"
+    DEEPSPEED = "deepspeed"
+    HOROVOD = "horovod"
+    DDP_SHARDED = "ddp_sharded"
+    DDP_SHARDED_SPAWN = "ddp_sharded_spawn"
+    DDP_FULLY_SHARDED = "ddp_fully_sharded"
+
+    @staticmethod
+    def interactive_compatible_types() -> List["_StrategyType"]:
+        """Returns a list containing interactive compatible _StrategyTypes."""
+        return [
+            _StrategyType.DP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DDP_SHARDED_SPAWN,
+            _StrategyType.TPU_SPAWN,
+        ]
+
+    def is_interactive_compatible(self) -> bool:
+        """Returns whether self is interactive compatible."""
+        return self in _StrategyType.interactive_compatible_types()
diff --git a/pytorch_lightning/utilities/meta.py b/pytorch_lightning/utilities/meta.py
index 60e6cc791b7aee..6d3c1d6b5f11bf 100644
--- a/pytorch_lightning/utilities/meta.py
+++ b/pytorch_lightning/utilities/meta.py
@@ -18,13 +18,14 @@
 from functools import partial
 from itertools import chain
 from types import ModuleType
-from typing import Callable, Dict, Generator, Iterator, List, Optional, Set, Type
+from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Set, Type
 
 import torch
 from torch import nn, Tensor
 from torch.nn import Module
 from torch.nn.modules.container import ModuleDict, ModuleList, Sequential
 
+import pytorch_lightning as pl
 from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_10
@@ -191,7 +192,6 @@ def materialize_module(root_module: nn.Module) -> nn.Module:
 
 # cache subclasses to optimize the search when resetting the meta device later on.
 __STORAGE_META__ = {}
-
 __CREATED_MODULES__ = set()
 
 
@@ -237,45 +237,52 @@ def _set_meta_device() -> None:
 
     for subclass in get_all_subclasses(torch.nn.modules.module.Module):
 
-        if isinstance(subclass, (Sequential, ModuleList, ModuleDict)):
+        if subclass in (Sequential, ModuleList, ModuleDict, pl.LightningModule):
             continue
 
         # if a subclass has already been stored, we should use the cache
         if str(subclass) in __STORAGE_META__:
-            # reset the class import package to its rightfull state.
+            # reset the class import package to its rightful state.
             mods, subclass, meta_class = __STORAGE_META__[subclass]
             for mod in mods:
                 setattr(mod, subclass.__name__, meta_class)
             continue
 
+        class _IsinstanceMetaclass(type(subclass)):
+            def __instancecheck__(self, instance: Any) -> bool:
+                """Overrides the ``isinstance`` check on ``_MaterializerModule`` objects."""
+                return isinstance(instance, self.__bases__[0])
+
         # Create a class subclassing current `subclass` overriding its new method.
         # this will enable use to use `torch.distributed.nn.utils.init_meta` to create a `meta`
         # version of the current subclass module
-        class _MetaClass(subclass):
+        class _MaterializerModule(subclass, metaclass=_IsinstanceMetaclass):
             @classmethod
             @contextmanager
-            def instantiation_context(cls, materialize: bool):
+            def instantiation_context(cls):
                 _unset_meta_device(from_created=True)
                 yield
                 _set_meta_device_populated(from_created=True)
 
             @classmethod
             def materialize(cls, materialize_fn: Callable):
-                with cls.instantiation_context(materialize=True):
+                with cls.instantiation_context():
                     obj = materialize_fn()
                 return obj
 
             @staticmethod
             def add_subclasses(subclass):
-                """This is used to unrol the instantion tree while creating the modules."""
-                __CREATED_MODULES__.add(subclass)
+                """This is used to unroll the instantiation tree while creating the modules."""
+                # Don't store the LightningModule as skipped from the Meta process.
+                if subclass != pl.LightningModule:
+                    __CREATED_MODULES__.add(subclass)
                 if subclass.__bases__[0] != torch.nn.modules.module.Module:
-                    _MetaClass.add_subclasses(subclass.__bases__[0])
+                    _MaterializerModule.add_subclasses(subclass.__bases__[0])
 
             def __new__(cls, *args, **kwargs):
                 subclass = cls.__bases__[0]
                 cls.add_subclasses(subclass)
-                with cls.instantiation_context(materialize=False):
+                with cls.instantiation_context():
                     obj = init_meta(subclass, *args, **kwargs)
 
                 obj.materialize = partial(cls.materialize, materialize_fn=obj.materialize)
@@ -294,9 +301,8 @@ def search(mod: ModuleType) -> List[ModuleType]:
         # nn.Module class can be imported at different level and they all need to be mocked.
         # Example: torch.nn.Linear is actually torch.nn.modules.linear.Linear
         # Therefore, torch.nn.Linear, torch.nn.modules.Linear, torch.nn.modules.linear.Linear
-        # needs to be replaced by the torch.nn.linear.modules.Linear _MetaClass
-        out = []
-        out.append(search(mod))
+        # needs to be replaced by the torch.nn.linear.modules.Linear _MaterializerModule
+        out = [search(mod)]
         for name in submodules[1:]:
             mod = getattr(mod, name)
             out.append(search(mod))
@@ -305,11 +311,11 @@ def search(mod: ModuleType) -> List[ModuleType]:
         mods = [mod for mod in chain(*out) if mod]
 
         # store the modules search so it doesn't have to be performed again for this class
-        __STORAGE_META__[subclass] = (mods, subclass, _MetaClass)
+        __STORAGE_META__[subclass] = (mods, subclass, _MaterializerModule)
 
         # replace all subclass by its meta form
         for mod in mods:
-            setattr(mod, subclass.__name__, _MetaClass)
+            setattr(mod, subclass.__name__, _MaterializerModule)
 
 
 @contextmanager
@@ -321,3 +327,11 @@ def init_meta_context() -> Generator:
     _set_meta_device()
     yield
     _unset_meta_device()
+
+
+def is_on_meta_device(module: nn.Module) -> bool:
+    try:
+        param = next(module.parameters())
+        return param.device.type == "meta"
+    except StopIteration:
+        return False
diff --git a/pytorch_lightning/utilities/model_helpers.py b/pytorch_lightning/utilities/model_helpers.py
index 3146b33fe153dc..bb48b481e625f1 100644
--- a/pytorch_lightning/utilities/model_helpers.py
+++ b/pytorch_lightning/utilities/model_helpers.py
@@ -12,26 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from functools import partial
-from typing import Optional, Type, Union
+from typing import Optional, Type
 from unittest.mock import Mock
 
 import pytorch_lightning as pl
-from pytorch_lightning.utilities import rank_zero_deprecation
 
 
-def is_overridden(
-    method_name: str,
-    instance: Optional[object] = None,
-    parent: Optional[Type[object]] = None,
-    model: Optional[Union["pl.LightningModule", "pl.LightningDataModule"]] = None,
-) -> bool:
-    if model is not None and instance is None:
-        rank_zero_deprecation(
-            "`is_overriden(model=...)` has been deprecated and will be removed in v1.6."
-            "Please use `is_overriden(instance=...)`"
-        )
-        instance = model
-
+def is_overridden(method_name: str, instance: Optional[object] = None, parent: Optional[Type[object]] = None) -> bool:
     if instance is None:
         # if `self.lightning_module` was passed as instance, it can be `None`
         return False
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index d95f5c8e6f9ea6..e70d862b048e0a 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -43,7 +43,7 @@
     SLURMEnvironment,
     TorchElasticEnvironment,
 )
-from pytorch_lightning.utilities import DeviceType, DistributedType
+from pytorch_lightning.utilities import _StrategyType, DeviceType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.runif import RunIf
@@ -636,7 +636,7 @@ def test_unsupported_distrib_types_on_cpu(training_type):
     with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting `strategy='ddp"):
         trainer = Trainer(accelerator=training_type, num_processes=2)
 
-    assert trainer._distrib_type == DistributedType.DDP
+    assert trainer._distrib_type == _StrategyType.DDP
 
 
 def test_accelerator_ddp_for_cpu(tmpdir):
diff --git a/tests/base/model_test_epoch_ends.py b/tests/base/model_test_epoch_ends.py
index 746ceb94a5de07..b001298e93dd0c 100644
--- a/tests/base/model_test_epoch_ends.py
+++ b/tests/base/model_test_epoch_ends.py
@@ -15,7 +15,7 @@
 
 import torch
 
-from pytorch_lightning.utilities import DistributedType
+from pytorch_lightning.utilities import _StrategyType
 
 
 class TestEpochEndVariations(ABC):
@@ -34,13 +34,13 @@ def test_epoch_end(self, outputs):
             test_loss = self.get_output_metric(output, "test_loss")
 
             # reduce manually when using dp
-            if self.trainer._distrib_type == DistributedType.DP:
+            if self.trainer._distrib_type == _StrategyType.DP:
                 test_loss = torch.mean(test_loss)
             test_loss_mean += test_loss
 
             # reduce manually when using dp
             test_acc = self.get_output_metric(output, "test_acc")
-            if self.trainer._distrib_type == DistributedType.DP:
+            if self.trainer._distrib_type == _StrategyType.DP:
                 test_acc = torch.mean(test_acc)
 
             test_acc_mean += test_acc
@@ -69,13 +69,13 @@ def test_epoch_end__multiple_dataloaders(self, outputs):
                 test_loss = output["test_loss"]
 
                 # reduce manually when using dp
-                if self.trainer._distrib_type == DistributedType.DP:
+                if self.trainer._distrib_type == _StrategyType.DP:
                     test_loss = torch.mean(test_loss)
                 test_loss_mean += test_loss
 
                 # reduce manually when using dp
                 test_acc = output["test_acc"]
-                if self.trainer._distrib_type == DistributedType.DP:
+                if self.trainer._distrib_type == _StrategyType.DP:
                     test_acc = torch.mean(test_acc)
 
                 test_acc_mean += test_acc
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index d2f3cec5cba4f7..1ded07734a7de2 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -17,7 +17,6 @@
 import pytest
 
 from pytorch_lightning import Trainer
-from pytorch_lightning.utilities.model_helpers import is_overridden
 from tests.helpers import BoringModel
 
 
@@ -48,27 +47,3 @@ def test_v1_6_0_reload_dataloaders_every_epoch(tmpdir):
         [call.val_dataloader()] + [call.train_dataloader(), call.val_dataloader()] * 3 + [call.test_dataloader()]
     )
     assert tracker.mock_calls == expected_sequence
-
-
-def test_v1_6_0_is_overridden_model():
-    model = BoringModel()
-    with pytest.deprecated_call(match="and will be removed in v1.6"):
-        assert is_overridden("validation_step", model=model)
-    with pytest.deprecated_call(match="and will be removed in v1.6"):
-        assert not is_overridden("foo", model=model)
-
-
-def test_v1_6_0_deprecated_disable_validation():
-    trainer = Trainer()
-    with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"):
-        _ = trainer.disable_validation
-
-
-def test_v1_6_0_deprecated_hpc_load(tmpdir):
-    model = BoringModel()
-    trainer = Trainer(default_root_dir=tmpdir, max_steps=1)
-    trainer.fit(model)
-    trainer.checkpoint_connector.hpc_save(tmpdir, trainer.logger)
-    checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(str(tmpdir))
-    with pytest.deprecated_call(match=r"`CheckpointConnector.hpc_load\(\)` was deprecated in v1.4"):
-        trainer.checkpoint_connector.hpc_load(checkpoint_path)
diff --git a/tests/deprecated_api/test_remove_1-8.py b/tests/deprecated_api/test_remove_1-8.py
new file mode 100644
index 00000000000000..f668f63b9f4503
--- /dev/null
+++ b/tests/deprecated_api/test_remove_1-8.py
@@ -0,0 +1,23 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test deprecated functionality which will be removed in v1.8.0."""
+import pytest
+
+from pytorch_lightning.utilities.enums import DistributedType
+
+
+def test_v1_8_0_deprecated_distributed_type_enum():
+
+    with pytest.deprecated_call(match="has been deprecated in v1.6 and will be removed in v1.8."):
+        _ = DistributedType.DDP
diff --git a/tests/helpers/pipelines.py b/tests/helpers/pipelines.py
index 643d3e50cb8940..6fa3bbb5dc9433 100644
--- a/tests/helpers/pipelines.py
+++ b/tests/helpers/pipelines.py
@@ -15,7 +15,7 @@
 from torchmetrics.functional import accuracy
 
 from pytorch_lightning import LightningDataModule, LightningModule, Trainer
-from pytorch_lightning.utilities import DistributedType
+from pytorch_lightning.utilities import _StrategyType
 from tests.helpers import BoringModel
 from tests.helpers.utils import get_default_logger, load_model_from_checkpoint, reset_seed
 
@@ -82,7 +82,7 @@ def run_model_test(
             run_prediction_eval_model_template(model, dataloader, min_acc=min_acc)
 
     if with_hpc:
-        if trainer._distrib_type in (DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2):
+        if trainer._distrib_type in (_StrategyType.DDP, _StrategyType.DDP_SPAWN, _StrategyType.DDP2):
             # on hpc this would work fine... but need to hack it for the purpose of the test
             trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = trainer.init_optimizers(
                 pretrained_model
diff --git a/tests/lite/test_lite.py b/tests/lite/test_lite.py
index bd69cf359473e0..7c79cb7f2e709c 100644
--- a/tests/lite/test_lite.py
+++ b/tests/lite/test_lite.py
@@ -31,7 +31,7 @@
     _replace_dataloader_init_method,
 )
 from pytorch_lightning.plugins import DeepSpeedPlugin, PrecisionPlugin, TrainingTypePlugin
-from pytorch_lightning.utilities import DistributedType
+from pytorch_lightning.utilities import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.seed import pl_worker_init_function
 from tests.helpers.runif import RunIf
@@ -251,12 +251,12 @@ def test_seed_everything():
 @pytest.mark.parametrize(
     "strategy",
     [
-        DistributedType.DP,
-        DistributedType.DDP,
-        DistributedType.DDP_SPAWN,
-        pytest.param(DistributedType.DEEPSPEED, marks=RunIf(deepspeed=True)),
-        pytest.param(DistributedType.DDP_SHARDED, marks=RunIf(fairscale=True)),
-        pytest.param(DistributedType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
+        _StrategyType.DP,
+        _StrategyType.DDP,
+        _StrategyType.DDP_SPAWN,
+        pytest.param(_StrategyType.DEEPSPEED, marks=RunIf(deepspeed=True)),
+        pytest.param(_StrategyType.DDP_SHARDED, marks=RunIf(fairscale=True)),
+        pytest.param(_StrategyType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
     ],
 )
 def test_setup_dataloaders_replace_custom_sampler(strategy):
@@ -279,12 +279,12 @@ def test_setup_dataloaders_replace_custom_sampler(strategy):
 @pytest.mark.parametrize(
     "strategy",
     [
-        DistributedType.DP,
-        DistributedType.DDP,
-        DistributedType.DDP_SPAWN,
-        pytest.param(DistributedType.DEEPSPEED, marks=RunIf(deepspeed=True)),
-        pytest.param(DistributedType.DDP_SHARDED, marks=RunIf(fairscale=True)),
-        pytest.param(DistributedType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
+        _StrategyType.DP,
+        _StrategyType.DDP,
+        _StrategyType.DDP_SPAWN,
+        pytest.param(_StrategyType.DEEPSPEED, marks=RunIf(deepspeed=True)),
+        pytest.param(_StrategyType.DDP_SHARDED, marks=RunIf(fairscale=True)),
+        pytest.param(_StrategyType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
     ],
 )
 @pytest.mark.parametrize("shuffle", [True, False])
diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 97097b2074ca16..4f3a482e37ac47 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -20,7 +20,7 @@
 from torch.utils.data.sampler import BatchSampler, Sampler, SequentialSampler
 
 from pytorch_lightning import Trainer
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel, RandomDataset
 from tests.helpers.runif import RunIf
@@ -137,7 +137,7 @@ def _get_warning_msg():
 @pytest.mark.parametrize("num_workers", [0, 1])
 def test_dataloader_warnings(tmpdir, num_workers):
     trainer = Trainer(default_root_dir=tmpdir, strategy="ddp_spawn", num_processes=2, fast_dev_run=4)
-    assert trainer._accelerator_connector._distrib_type == DistributedType.DDP_SPAWN
+    assert trainer._accelerator_connector._distrib_type == _StrategyType.DDP_SPAWN
     trainer.fit(TestSpawnBoringModel(num_workers))
 
 
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index d2e5f771a9c408..dc0ce2b68452c8 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -48,7 +48,7 @@
     DDPSpawnShardedPlugin,
 )
 from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities import DeviceType, DistributedType
+from pytorch_lightning.utilities import _StrategyType, DeviceType
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException
 from pytorch_lightning.utilities.seed import seed_everything
@@ -1154,15 +1154,15 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
         ),
         (
             dict(accelerator="ddp", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(accelerator="ddp", num_nodes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1),
         ),
         (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(accelerator="ddp2", gpus=None),
@@ -1174,43 +1174,43 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
         ),
         (
             dict(accelerator="dp", gpus=1),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(accelerator="ddp", gpus=1),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=1),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(accelerator="ddp2", gpus=1),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(accelerator=None, gpus=2),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
         ),
         (
             dict(accelerator="dp", gpus=2),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(accelerator="ddp", gpus=2),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
         ),
         (
             dict(accelerator="ddp2", gpus=2),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(accelerator="ddp2", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(accelerator="dp", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
     ],
 )
@@ -2096,11 +2096,11 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy="ddp", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy="ddp", num_nodes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1),
         ),
         (
             dict(strategy="ddp2", gpus=None),
@@ -2112,47 +2112,47 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy="dp", gpus=1),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(strategy="ddp", gpus=1),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(strategy="ddp_spawn", gpus=1),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(strategy="ddp2", gpus=1),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(strategy=None, gpus=2),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
         ),
         (
             dict(strategy="dp", gpus=2),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy="ddp", gpus=2),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
         ),
         (
             dict(strategy="ddp2", gpus=2),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy="ddp2", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy="dp", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy="ddp_spawn", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy="ddp_spawn", num_processes=1, gpus=None),
@@ -2161,7 +2161,7 @@ def training_step(self, batch, batch_idx):
         (
             dict(strategy="ddp_fully_sharded", gpus=1),
             dict(
-                _distrib_type=DistributedType.DDP_FULLY_SHARDED,
+                _distrib_type=_StrategyType.DDP_FULLY_SHARDED,
                 _device_type=DeviceType.GPU,
                 num_gpus=1,
                 num_processes=1,
@@ -2169,32 +2169,32 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy=DDPSpawnPlugin(), num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy=DDPSpawnPlugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy=DDPPlugin(), num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy=DDPPlugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy=DDP2Plugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy=DataParallelPlugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy=DDPFullyShardedPlugin(), gpus=2),
             dict(
-                _distrib_type=DistributedType.DDP_FULLY_SHARDED,
+                _distrib_type=_StrategyType.DDP_FULLY_SHARDED,
                 _device_type=DeviceType.GPU,
                 num_gpus=2,
                 num_processes=1,
@@ -2203,7 +2203,7 @@ def training_step(self, batch, batch_idx):
         (
             dict(strategy=DDPSpawnShardedPlugin(), gpus=2),
             dict(
-                _distrib_type=DistributedType.DDP_SHARDED_SPAWN,
+                _distrib_type=_StrategyType.DDP_SHARDED_SPAWN,
                 _device_type=DeviceType.GPU,
                 num_gpus=2,
                 num_processes=1,
@@ -2211,7 +2211,7 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy=DDPShardedPlugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DDP_SHARDED, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP_SHARDED, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
     ],
 )
diff --git a/tests/utilities/test_meta.py b/tests/utilities/test_meta.py
index 8e36a86c3beef0..581b949d9167f5 100644
--- a/tests/utilities/test_meta.py
+++ b/tests/utilities/test_meta.py
@@ -14,7 +14,7 @@
 from torch import nn
 
 from pytorch_lightning.core.lightning import LightningModule
-from pytorch_lightning.utilities.meta import init_meta_context, materialize_module
+from pytorch_lightning.utilities.meta import init_meta_context, is_on_meta_device, materialize_module
 from tests.helpers.runif import RunIf
 
 
@@ -31,18 +31,23 @@ def __init__(self, num_layers: int):
         self.layer = nn.Sequential(*[nn.Linear(1, 1) for _ in range(self.hparams.num_layers)])
 
 
-@RunIf(min_torch="1.10.0")
+@RunIf(special=True, min_torch="1.10.0")
 def test_init_meta_context():
 
     with init_meta_context():
         m = nn.Linear(in_features=1, out_features=1)
+        assert isinstance(m, nn.Linear)
         assert m.weight.device.type == "meta"
+        assert is_on_meta_device(m)
         mlp = MLP(4)
         assert mlp.layer[0].weight.device.type == "meta"
 
         mlp = materialize_module(mlp)
         assert mlp.layer[0].weight.device.type == "cpu"
 
+        assert not is_on_meta_device(mlp)
+        assert not is_on_meta_device(nn.Module())
+
         model = BoringModel(4)
         assert model.layer[0].weight.device.type == "meta"
         materialize_module(model)