diff --git a/.azure/gpu-pipeline.yml b/.azure/gpu-pipeline.yml
index acb6022051f..cad42d8fe91 100644
--- a/.azure/gpu-pipeline.yml
+++ b/.azure/gpu-pipeline.yml
@@ -37,6 +37,7 @@ jobs:
 
     variables:
       DEVICES: $( python -c 'name = "$(Agent.Name)" ; gpus = name.split("_")[-1] if "_" in name else "0,1"; print(gpus)' )
+      HF_CACHE_DIR: "$(Pipeline.Workspace)/ci-cache_huggingface"
 
     container:
       image: "$(docker-image)"
@@ -95,6 +96,19 @@ jobs:
         python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
       displayName: 'Sanity check'
 
+    - task: Cache@2
+      inputs:
+        key: transformers | "$(Agent.OS)"
+        restoreKeys: transformers
+        path: $(HF_CACHE_DIR)
+        cacheHitVar: HF_CACHE_RESTORED
+    - bash: |
+        printf "cache location: $(HF_CACHE_DIR)\n"
+        printf "hit the HF cache: $(variables.HF_CACHE_RESTORED)\n"
+        mkdir -p $(HF_CACHE_DIR)  # in case cache was void
+        ls -lh $(HF_CACHE_DIR)  # show what was restored...
+      displayName: 'Show HF cache'
+
     - bash: python -m pytest torchmetrics --cov=torchmetrics --timeout=120 --durations=50
       workingDirectory: src
       displayName: 'DocTesting'
@@ -120,6 +134,8 @@ jobs:
         python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) --flags=gpu,unittest --name="GPU-coverage" --env=linux,azure
         ls -l
       workingDirectory: tests
+      env:
+        TRANSFORMERS_CACHE: $(HF_CACHE_DIR)
       displayName: 'Statistics'
 
     - task: PublishTestResults@2
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 0c72120e707..825a3836c72 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -73,7 +73,7 @@ jobs:
     # We do this, since failures on test.pypi aren't that bad
     - name: Publish to Test PyPI
       if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release'
-      uses: pypa/gh-action-pypi-publish@v1.5.2
+      uses: pypa/gh-action-pypi-publish@v1.6.4
       with:
         user: __token__
         password: ${{ secrets.test_pypi_password }}
@@ -82,7 +82,7 @@ jobs:
 
     - name: Publish distribution 📦 to PyPI
       if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release'
-      uses: pypa/gh-action-pypi-publish@v1.5.2
+      uses: pypa/gh-action-pypi-publish@v1.6.4
       with:
         user: __token__
         password: ${{ secrets.pypi_password }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fdd47732e1c..de5db98005a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -45,13 +45,13 @@ repos:
         name: Upgrade code
 
   - repo: https://github.com/PyCQA/docformatter
-    rev: v1.5.0
+    rev: v1.5.1
     hooks:
       - id: docformatter
         args: [--in-place, --wrap-summaries=115, --wrap-descriptions=120]
 
   - repo: https://github.com/PyCQA/isort
-    rev: 5.11.2
+    rev: 5.11.4
     hooks:
       - id: isort
         name: imports
diff --git a/docs/source/image/error_relative_global_dimensionless_synthesis.rst b/docs/source/image/error_relative_global_dimensionless_synthesis.rst
index 9b5818e44dc..3df0be3a833 100644
--- a/docs/source/image/error_relative_global_dimensionless_synthesis.rst
+++ b/docs/source/image/error_relative_global_dimensionless_synthesis.rst
@@ -3,6 +3,8 @@
    :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/image_classification.svg
    :tags: Image
 
+.. include:: ../links.rst
+
 ############################################
 Error Relative Global Dim. Synthesis (ERGAS)
 ############################################
@@ -12,6 +14,8 @@ ________________
 
 .. autoclass:: torchmetrics.image.ergas.ErrorRelativeGlobalDimensionlessSynthesis
     :noindex:
+    :exclude-members: update, compute
+
 
 Functional Interface
 ____________________
diff --git a/docs/source/image/frechet_inception_distance.rst b/docs/source/image/frechet_inception_distance.rst
index 2c817e2bd7e..88eae415427 100644
--- a/docs/source/image/frechet_inception_distance.rst
+++ b/docs/source/image/frechet_inception_distance.rst
@@ -14,3 +14,4 @@ ________________
 
 .. autoclass:: torchmetrics.image.fid.FrechetInceptionDistance
     :noindex:
+    :exclude-members: update, compute
diff --git a/docs/source/image/inception_score.rst b/docs/source/image/inception_score.rst
index ca193c3d53f..292639cadfb 100644
--- a/docs/source/image/inception_score.rst
+++ b/docs/source/image/inception_score.rst
@@ -3,6 +3,8 @@
    :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/image_classification.svg
    :tags: Image
 
+.. include:: ../links.rst
+
 ###############
 Inception Score
 ###############
@@ -12,3 +14,4 @@ ________________
 
 .. autoclass:: torchmetrics.image.inception.InceptionScore
     :noindex:
+    :exclude-members: update, compute
diff --git a/docs/source/image/kernel_inception_distance.rst b/docs/source/image/kernel_inception_distance.rst
index 29c3caedf6f..aa31b10afea 100644
--- a/docs/source/image/kernel_inception_distance.rst
+++ b/docs/source/image/kernel_inception_distance.rst
@@ -14,3 +14,4 @@ ________________
 
 .. autoclass:: torchmetrics.image.kid.KernelInceptionDistance
     :noindex:
+    :exclude-members: update, compute
diff --git a/docs/source/image/learned_perceptual_image_patch_similarity.rst b/docs/source/image/learned_perceptual_image_patch_similarity.rst
index dddb3bb066c..a7fd8e5dea2 100644
--- a/docs/source/image/learned_perceptual_image_patch_similarity.rst
+++ b/docs/source/image/learned_perceptual_image_patch_similarity.rst
@@ -12,3 +12,4 @@ ________________
 
 .. autoclass:: torchmetrics.image.lpip.LearnedPerceptualImagePatchSimilarity
     :noindex:
+    :exclude-members: update, compute
diff --git a/docs/source/image/multi_scale_structural_similarity.rst b/docs/source/image/multi_scale_structural_similarity.rst
index c96e3b2264f..69d3803c9c1 100644
--- a/docs/source/image/multi_scale_structural_similarity.rst
+++ b/docs/source/image/multi_scale_structural_similarity.rst
@@ -14,6 +14,7 @@ ________________
 
 .. autoclass:: torchmetrics.MultiScaleStructuralSimilarityIndexMeasure
     :noindex:
+    :exclude-members: update, compute
 
 Functional Interface
 ____________________
diff --git a/docs/source/image/peak_signal_noise_ratio.rst b/docs/source/image/peak_signal_noise_ratio.rst
index 2c08b3f87ff..29dbf348c7f 100644
--- a/docs/source/image/peak_signal_noise_ratio.rst
+++ b/docs/source/image/peak_signal_noise_ratio.rst
@@ -14,6 +14,7 @@ ________________
 
 .. autoclass:: torchmetrics.PeakSignalNoiseRatio
     :noindex:
+    :exclude-members: update, compute
 
 Functional Interface
 ____________________
diff --git a/docs/source/image/spectral_angle_mapper.rst b/docs/source/image/spectral_angle_mapper.rst
index 7eb75f0257e..cbbad6f5920 100644
--- a/docs/source/image/spectral_angle_mapper.rst
+++ b/docs/source/image/spectral_angle_mapper.rst
@@ -12,6 +12,7 @@ ________________
 
 .. autoclass:: torchmetrics.SpectralAngleMapper
     :noindex:
+    :exclude-members: update, compute
 
 Functional Interface
 ____________________
diff --git a/docs/source/image/spectral_distortion_index.rst b/docs/source/image/spectral_distortion_index.rst
index a75976984d4..46c303e31ae 100644
--- a/docs/source/image/spectral_distortion_index.rst
+++ b/docs/source/image/spectral_distortion_index.rst
@@ -14,6 +14,7 @@ ________________
 
 .. autoclass:: torchmetrics.SpectralDistortionIndex
     :noindex:
+    :exclude-members: update, compute
 
 Functional Interface
 ____________________
diff --git a/docs/source/image/structural_similarity.rst b/docs/source/image/structural_similarity.rst
index 6a6cd726d23..ea27befeee4 100644
--- a/docs/source/image/structural_similarity.rst
+++ b/docs/source/image/structural_similarity.rst
@@ -14,6 +14,7 @@ ________________
 
 .. autoclass:: torchmetrics.StructuralSimilarityIndexMeasure
     :noindex:
+    :exclude-members: update, compute
 
 Functional Interface
 ____________________
diff --git a/docs/source/image/total_variation.rst b/docs/source/image/total_variation.rst
index 0f0e7398d9b..70dd933052f 100644
--- a/docs/source/image/total_variation.rst
+++ b/docs/source/image/total_variation.rst
@@ -14,6 +14,7 @@ ________________
 
 .. autoclass:: torchmetrics.TotalVariation
     :noindex:
+    :exclude-members: update, compute
 
 Functional Interface
 ____________________
diff --git a/docs/source/image/universal_image_quality_index.rst b/docs/source/image/universal_image_quality_index.rst
index ba820e4d720..c5710fdf07b 100644
--- a/docs/source/image/universal_image_quality_index.rst
+++ b/docs/source/image/universal_image_quality_index.rst
@@ -14,6 +14,7 @@ ________________
 
 .. autoclass:: torchmetrics.UniversalImageQualityIndex
     :noindex:
+    :exclude-members: update, compute
 
 Functional Interface
 ____________________
diff --git a/docs/source/links.rst b/docs/source/links.rst
index ef610313980..aa3cb5e3d06 100644
--- a/docs/source/links.rst
+++ b/docs/source/links.rst
@@ -115,5 +115,15 @@
 .. _Scale-invariant signal-to-noise ratio: https://arxiv.org/abs/1711.00541
 .. _Signal-to-noise ratio: https://arxiv.org/abs/1811.02508
 .. _Permutation invariant training: https://arxiv.org/abs/1607.00325
+.. _ranking ref1: https://link.springer.com/chapter/10.1007/978-0-387-09823-4_34
+.. _Spectral Distortion Index: https://www.ingentaconnect.com/content/asprs/pers/2008/00000074/00000002/art00003;jsessionid=nzjnb3v9xxr1.x-ic-live-03
+.. _Relative dimensionless global error synthesis: https://ieeexplore.ieee.org/document/4317530
+.. _fid ref1: https://arxiv.org/abs/1512.00567
+.. _fid ref2: https://arxiv.org/abs/1706.08500
+.. _inception ref1: https://arxiv.org/abs/1606.03498
+.. _inception ref2: https://arxiv.org/abs/1706.08500
+.. _kid ref1: https://arxiv.org/abs/1801.01401
+.. _kid ref2: https://arxiv.org/abs/1706.08500
+.. _Spectral Angle Mapper: https://ntrs.nasa.gov/citations/19940012238
 .. _Multilabel coverage error: https://link.springer.com/chapter/10.1007/978-0-387-09823-4_34
 .. _Peak Signal to Noise Ratio with Blocked Effect:https://ieeexplore.ieee.org/abstract/document/5535179
\ No newline at end of file
diff --git a/docs/source/pages/lightning.rst b/docs/source/pages/lightning.rst
index 6f4b8a30ffa..f8065779c5c 100644
--- a/docs/source/pages/lightning.rst
+++ b/docs/source/pages/lightning.rst
@@ -32,9 +32,9 @@ The example below shows how to use a metric in your `LightningModule <https://py
 
     class MyModel(LightningModule):
 
-        def __init__(self):
+        def __init__(self, num_classes):
             ...
-            self.accuracy = torchmetrics.Accuracy(task='multiclass')
+            self.accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
 
         def training_step(self, batch, batch_idx):
             x, y = batch
@@ -78,10 +78,10 @@ value by calling ``.compute()``.
 
     class MyModule(LightningModule):
 
-        def __init__(self):
+        def __init__(self, num_classes):
             ...
-            self.train_acc = torchmetrics.Accuracy(task='multiclass')
-            self.valid_acc = torchmetrics.Accuracy(task='multiclass')
+            self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
+            self.valid_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
 
         def training_step(self, batch, batch_idx):
             x, y = batch
@@ -105,8 +105,8 @@ of the metrics.
 
         def __init__(self):
             ...
-            self.train_acc = torchmetrics.Accuracy(task='multiclass')
-            self.valid_acc = torchmetrics.Accuracy(task='multiclass')
+            self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
+            self.valid_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
 
         def training_step(self, batch, batch_idx):
             x, y = batch
@@ -141,9 +141,9 @@ mixed as it can lead to wrong results.
 
         class MyModule(LightningModule):
 
-            def __init__(self):
+            def __init__(self, num_classes):
                 ...
-                self.valid_acc = torchmetrics.Accuracy(task='multiclass')
+                self.valid_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
 
             def validation_step(self, batch, batch_idx):
                 logits = self(x)
@@ -185,9 +185,9 @@ The following contains a list of pitfalls to be aware of:
 
     class MyModule(LightningModule):
 
-        def __init__(self):
+        def __init__(self, num_classes):
             ...
-            self.val_acc = nn.ModuleList([torchmetrics.Accuracy(task='multiclass') for _ in range(2)])
+            self.val_acc = nn.ModuleList([torchmetrics.Accuracy(task="multiclass", num_classes=num_classes) for _ in range(2)])
 
         def val_dataloader(self):
             return [DataLoader(...), DataLoader(...)]
diff --git a/docs/source/pages/overview.rst b/docs/source/pages/overview.rst
index 9749dae28e7..8c523889b34 100644
--- a/docs/source/pages/overview.rst
+++ b/docs/source/pages/overview.rst
@@ -282,9 +282,9 @@ inside your LightningModule. In most cases we just have to replace ``self.log``
     from torchmetrics.classification import MulticlassAccuracy, MulticlassPrecision, MulticlassRecall
 
     class MyModule(LightningModule):
-        def __init__(self):
+        def __init__(self, num_classes):
             metrics = MetricCollection([
-                MulticlassAccuracy(), MulticlassPrecision(), MulticlassRecall()
+                MulticlassAccuracy(num_classes), MulticlassPrecision(num_classes), MulticlassRecall(num_classes)
             ])
             self.train_metrics = metrics.clone(prefix='train_')
             self.valid_metrics = metrics.clone(prefix='val_')
diff --git a/docs/source/pages/quickstart.rst b/docs/source/pages/quickstart.rst
index 4fc89a9cf41..7f28eec8593 100644
--- a/docs/source/pages/quickstart.rst
+++ b/docs/source/pages/quickstart.rst
@@ -64,7 +64,7 @@ The code-snippet below shows a simple example for calculating the accuracy using
     preds = torch.randn(10, 5).softmax(dim=-1)
     target = torch.randint(5, (10,))
 
-    acc = torchmetrics.functional.accuracy(preds, target, task='multiclass', num_classes=5)
+    acc = torchmetrics.functional.accuracy(preds, target, task="multiclass", num_classes=5)
 
 Module metrics
 ~~~~~~~~~~~~~~
@@ -86,7 +86,7 @@ The code below shows how to use the class-based interface:
     import torchmetrics
 
     # initialize metric
-    metric = torchmetrics.Accuracy(task='multiclass', num_classes=5)
+    metric = torchmetrics.Accuracy(task="multiclass", num_classes=5)
 
     n_batches = 10
     for i in range(n_batches):
diff --git a/src/torchmetrics/detection/mean_ap.py b/src/torchmetrics/detection/mean_ap.py
index 0a0daf4aa89..d6a12bf7898 100644
--- a/src/torchmetrics/detection/mean_ap.py
+++ b/src/torchmetrics/detection/mean_ap.py
@@ -438,7 +438,6 @@ def _move_list_states_to_cpu(self) -> None:
             setattr(self, key, current_to_cpu)
 
     def _get_safe_item_values(self, item: Dict[str, Any]) -> Union[Tensor, Tuple]:
-
         if self.iou_type == "bbox":
             boxes = _fix_empty_tensors(item["boxes"])
             if boxes.numel() > 0:
diff --git a/src/torchmetrics/functional/image/d_lambda.py b/src/torchmetrics/functional/image/d_lambda.py
index 350e03bfbe8..e21e19591ef 100644
--- a/src/torchmetrics/functional/image/d_lambda.py
+++ b/src/torchmetrics/functional/image/d_lambda.py
@@ -94,8 +94,8 @@ def spectral_distortion_index(
     p: int = 1,
     reduction: Literal["elementwise_mean", "sum", "none"] = "elementwise_mean",
 ) -> Tensor:
-    """Spectral Distortion Index (SpectralDistortionIndex_) also now as D_lambda is used to compare the spectral
-    distortion between two images.
+    """Calculates `Spectral Distortion Index`_ (SpectralDistortionIndex_) also known as D_lambda that is used to
+    compare the spectral distortion between two images.
 
     Args:
         preds: Low resolution multispectral image
diff --git a/src/torchmetrics/image/d_lambda.py b/src/torchmetrics/image/d_lambda.py
index 56a8f95dec1..089f7d5010a 100644
--- a/src/torchmetrics/image/d_lambda.py
+++ b/src/torchmetrics/image/d_lambda.py
@@ -26,6 +26,16 @@ class SpectralDistortionIndex(Metric):
     """Computes Spectral Distortion Index (SpectralDistortionIndex_) also now as D_lambda is used to compare the
     spectral distortion between two images.
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``preds`` (:class:`~torch.Tensor`): Low resolution multispectral image of shape ``(N,C,H,W)``
+    - ``target``(:class:`~torch.Tensor`): High resolution fused image of shape ``(N,C,H,W)``
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``sdi`` (:class:`~torch.Tensor`): if ``reduction!='none'`` returns float scalar tensor with average SDI value
+      over sample else returns tensor of shape ``(N,)`` with SDI values per sample
+
     Args:
         p: Large spectral differences
         reduction: a method to reduce metric score over labels.
@@ -36,7 +46,6 @@ class SpectralDistortionIndex(Metric):
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
-
     Example:
         >>> import torch
         >>> _ = torch.manual_seed(42)
@@ -46,12 +55,6 @@ class SpectralDistortionIndex(Metric):
         >>> sdi = SpectralDistortionIndex()
         >>> sdi(preds, target)
         tensor(0.0234)
-
-    References:
-        [1] Alparone, Luciano & Aiazzi, Bruno & Baronti, Stefano & Garzelli, Andrea & Nencini,
-        Filippo & Selva, Massimo. (2008). Multispectral and Panchromatic Data Fusion
-        Assessment Without Reference. ASPRS Journal of Photogrammetric Engineering
-        and Remote Sensing. 74. 193-200. 10.14358/PERS.74.2.193.
     """
 
     higher_is_better: bool = True
@@ -82,12 +85,7 @@ def __init__(
         self.add_state("target", default=[], dist_reduce_fx="cat")
 
     def update(self, preds: Tensor, target: Tensor) -> None:
-        """Update state with preds and target.
-
-        Args:
-            preds: Low resolution multispectral image
-            target: High resolution fused image
-        """
+        """Update state with preds and target."""
         preds, target = _spectral_distortion_index_update(preds, target)
         self.preds.append(preds)
         self.target.append(target)
diff --git a/src/torchmetrics/image/ergas.py b/src/torchmetrics/image/ergas.py
index 5200ba99203..c7423b866ee 100644
--- a/src/torchmetrics/image/ergas.py
+++ b/src/torchmetrics/image/ergas.py
@@ -24,10 +24,20 @@
 
 
 class ErrorRelativeGlobalDimensionlessSynthesis(Metric):
-    """Relative dimensionless global error synthesis (ERGAS) is used to calculate the accuracy of Pan sharpened
-    image considering normalized average error of each band of the result image
+    """Calculates `Relative dimensionless global error synthesis`_ (ERGAS) is used to calculate the accuracy of Pan
+    sharpened image considering normalized average error of each band of the result image
     (ErrorRelativeGlobalDimensionlessSynthesis).
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``preds`` (:class:`~torch.Tensor`): Predictions from model
+    - ``target`` (:class:`~torch.Tensor`): Ground truth values
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``ergas`` (:class:`~torch.Tensor`): if ``reduction!='none'`` returns float scalar tensor with average ERGAS
+      value over sample else returns tensor of shape ``(N,)`` with ERGAS values per sample
+
     Args:
         ratio: ratio of high resolution to low resolution
         reduction: a method to reduce metric score over labels.
@@ -38,9 +48,6 @@ class ErrorRelativeGlobalDimensionlessSynthesis(Metric):
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
-    Return:
-        Tensor with ErrorRelativeGlobalDimensionlessSynthesis score
-
     Example:
         >>> import torch
         >>> from torchmetrics import ErrorRelativeGlobalDimensionlessSynthesis
@@ -49,11 +56,6 @@ class ErrorRelativeGlobalDimensionlessSynthesis(Metric):
         >>> ergas = ErrorRelativeGlobalDimensionlessSynthesis()
         >>> torch.round(ergas(preds, target))
         tensor(154.)
-
-    References:
-        [1] Qian Du; Nicholas H. Younan; Roger King; Vijay P. Shah, "On the Performance Evaluation of
-        Pan-Sharpening Techniques" in IEEE Geoscience and Remote Sensing Letters, vol. 4, no. 4, pp. 518-522,
-        15 October 2007, doi: 10.1109/LGRS.2007.896328.
     """
 
     higher_is_better: bool = False
@@ -82,12 +84,7 @@ def __init__(
         self.reduction = reduction
 
     def update(self, preds: Tensor, target: Tensor) -> None:
-        """Update state with predictions and targets.
-
-        Args:
-            preds: Predictions from model
-            target: Ground truth values
-        """
+        """Update state with predictions and targets."""
         preds, target = _ergas_update(preds, target)
         self.preds.append(preds)
         self.target.append(target)
diff --git a/src/torchmetrics/image/fid.py b/src/torchmetrics/image/fid.py
index 397eabc4106..f59f4cd66be 100644
--- a/src/torchmetrics/image/fid.py
+++ b/src/torchmetrics/image/fid.py
@@ -131,13 +131,13 @@ class FrechetInceptionDistance(Metric):
     .. math::
         FID = |\mu - \mu_w| + tr(\Sigma + \Sigma_w - 2(\Sigma \Sigma_w)^{\frac{1}{2}})
 
-    where :math:`\mathcal{N}(\mu, \Sigma)` is the multivariate normal distribution estimated from Inception v3 [1]
-    features calculated on real life images and :math:`\mathcal{N}(\mu_w, \Sigma_w)` is the multivariate normal
-    distribution estimated from Inception v3 features calculated on generated (fake) images. The metric was
-    originally proposed in [1].
+    where :math:`\mathcal{N}(\mu, \Sigma)` is the multivariate normal distribution estimated from Inception v3
+    (`fid ref1`_) features calculated on real life images and :math:`\mathcal{N}(\mu_w, \Sigma_w)` is the
+    multivariate normal distribution estimated from Inception v3 features calculated on generated (fake) images.
+    The metric was originally proposed in `fid ref1`_.
 
-    Using the default feature extraction (Inception v3 using the original weights from [2]), the input is
-    expected to be mini-batches of 3-channel RGB images of shape (``3 x H x W``). If argument ``normalize``
+    Using the default feature extraction (Inception v3 using the original weights from `fid ref2`_), the input is
+    expected to be mini-batches of 3-channel RGB images of shape ``(3 x H x W)``. If argument ``normalize``
     is ``True`` images are expected to be dtype ``float`` and have values in the ``[0, 1]`` range, else if
     ``normalize`` is set to ``False`` images are expected to have dtype ``uint8`` and take values in the ``[0, 255]``
     range. All images will be resized to 299 x 299 which is the size of the original training data. The boolian
@@ -151,6 +151,15 @@ class FrechetInceptionDistance(Metric):
         is installed. Either install as ``pip install torchmetrics[image]`` or
         ``pip install torch-fidelity``
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``imgs`` (:class:`~torch.Tensor`): tensor with images feed to the feature extractor with
+    - ``real`` (:class:`~bool`): bool indicating if ``imgs`` belong to the real or the fake distribution
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``fid`` (:class:`~torch.Tensor`): float scalar tensor with mean FID value over samples
+
     Args:
         feature:
             Either an integer or ``nn.Module``:
@@ -158,22 +167,13 @@ class FrechetInceptionDistance(Metric):
             - an integer will indicate the inceptionv3 feature layer to choose. Can be one of the following:
               64, 192, 768, 2048
             - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns
-              an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size.
+              an ``(N,d)`` matrix where ``N`` is the batch size and ``d`` is the feature size.
 
         reset_real_features: Whether to also reset the real features. Since in many cases the real dataset does not
             change, the features can be cached them to avoid recomputing them which is costly. Set this to ``False`` if
             your dataset does not change.
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
-    References:
-        [1] Rethinking the Inception Architecture for Computer Vision
-        Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna
-        https://arxiv.org/abs/1512.00567
-
-        [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium,
-        Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter
-        https://arxiv.org/abs/1706.08500
-
     Raises:
         ValueError:
             If ``feature`` is set to an ``int`` (default settings) and ``torch-fidelity`` is not installed
@@ -259,12 +259,7 @@ def __init__(
         self.add_state("fake_features_num_samples", torch.tensor(0).long(), dist_reduce_fx="sum")
 
     def update(self, imgs: Tensor, real: bool) -> None:  # type: ignore
-        """Update the state with extracted features.
-
-        Args:
-            imgs: tensor with images feed to the feature extractor
-            real: bool indicating if ``imgs`` belong to the real or the fake distribution
-        """
+        """Update the state with extracted features."""
         imgs = (imgs * 255).byte() if self.normalize else imgs
         features = self.inception(imgs)
         self.orig_dtype = features.dtype
diff --git a/src/torchmetrics/image/inception.py b/src/torchmetrics/image/inception.py
index 5022508567a..b95f2ea048d 100644
--- a/src/torchmetrics/image/inception.py
+++ b/src/torchmetrics/image/inception.py
@@ -36,10 +36,11 @@ class InceptionScore(Metric):
     where :math:`KL(p(y | x) || p(y))` is the KL divergence between the conditional distribution :math:`p(y|x)`
     and the margianl distribution :math:`p(y)`. Both the conditional and marginal distribution is calculated
     from features extracted from the images. The score is calculated on random splits of the images such that
-    both a mean and standard deviation of the score are returned. The metric was originally proposed in [1].
+    both a mean and standard deviation of the score are returned. The metric was originally proposed in
+    `inception ref1`_.
 
-    Using the default feature extraction (Inception v3 using the original weights from [2]), the input is
-    expected to be mini-batches of 3-channel RGB images of shape (``3 x H x W``). If argument ``normalize``
+    Using the default feature extraction (Inception v3 using the original weights from `inception ref2`_), the input
+    is expected to be mini-batches of 3-channel RGB images of shape ``(3 x H x W)``. If argument ``normalize``
     is ``True`` images are expected to be dtype ``float`` and have values in the ``[0, 1]`` range, else if
     ``normalize`` is set to ``False`` images are expected to have dtype uint8 and take values in the ``[0, 255]``
     range. All images will be resized to 299 x 299 which is the size of the original training data.
@@ -48,6 +49,14 @@ class InceptionScore(Metric):
         is installed. Either install as ``pip install torchmetrics[image]`` or
         ``pip install torch-fidelity``
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``imgs`` (:class:`~torch.Tensor`): tensor with images feed to the feature extractor
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``fid`` (:class:`~torch.Tensor`): float scalar tensor with mean FID value over samples
+
     Args:
         feature:
             Either an str, integer or ``nn.Module``:
@@ -55,25 +64,16 @@ class InceptionScore(Metric):
             - an str or integer will indicate the inceptionv3 feature layer to choose. Can be one of the following:
               'logits_unbiased', 64, 192, 768, 2048
             - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns
-              an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size.
+              an ``(N,d)`` matrix where ``N`` is the batch size and ``d`` is the feature size.
 
         splits: integer determining how many splits the inception score calculation should be split among
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
-    References:
-        [1] Improved Techniques for Training GANs
-        Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, Xi Chen
-        https://arxiv.org/abs/1606.03498
-
-        [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium,
-        Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter
-        https://arxiv.org/abs/1706.08500
-
     Raises:
         ValueError:
             If ``feature`` is set to an ``str`` or ``int`` and ``torch-fidelity`` is not installed
         ValueError:
-            If ``feature`` is set to an ``str`` or ``int`` and not one of ``['logits_unbiased', 64, 192, 768, 2048]``
+            If ``feature`` is set to an ``str`` or ``int`` and not one of ``('logits_unbiased', 64, 192, 768, 2048)``
         TypeError:
             If ``feature`` is not an ``str``, ``int`` or ``torch.nn.Module``
 
@@ -135,11 +135,7 @@ def __init__(
         self.add_state("features", [], dist_reduce_fx=None)
 
     def update(self, imgs: Tensor) -> None:  # type: ignore
-        """Update the state with extracted features.
-
-        Args:
-            imgs: tensor with images feed to the feature extractor
-        """
+        """Update the state with extracted features."""
         imgs = (imgs * 255).byte() if self.normalize else imgs
         features = self.inception(imgs)
         self.features.append(features)
diff --git a/src/torchmetrics/image/kid.py b/src/torchmetrics/image/kid.py
index fb7be620c5e..162493f0382 100644
--- a/src/torchmetrics/image/kid.py
+++ b/src/torchmetrics/image/kid.py
@@ -72,7 +72,7 @@ class KernelInceptionDistance(Metric):
         KID = MMD(f_{real}, f_{fake})^2
 
     where :math:`MMD` is the maximum mean discrepancy and :math:`I_{real}, I_{fake}` are extracted features
-    from real and fake images, see [1] for more details. In particular, calculating the MMD requires the
+    from real and fake images, see `kid ref1`_ for more details. In particular, calculating the MMD requires the
     evaluation of a polynomial kernel function :math:`k`
 
     .. math::
@@ -81,8 +81,8 @@ class KernelInceptionDistance(Metric):
     which controls the distance between two features. In practise the MMD is calculated over a number of
     subsets to be able to both get the mean and standard deviation of KID.
 
-    Using the default feature extraction (Inception v3 using the original weights from [2]), the input is
-    expected to be mini-batches of 3-channel RGB images of shape (``3 x H x W``). If argument ``normalize``
+    Using the default feature extraction (Inception v3 using the original weights from `kid ref2`_), the input is
+    expected to be mini-batches of 3-channel RGB images of shape ``(3 x H x W)``. If argument ``normalize``
     is ``True`` images are expected to be dtype ``float`` and have values in the ``[0, 1]`` range, else if
     ``normalize`` is set to ``False`` images are expected to have dtype ``uint8`` and take values in the ``[0, 255]``
     range. All images will be resized to 299 x 299 which is the size of the original training data. The boolian
@@ -93,13 +93,23 @@ class KernelInceptionDistance(Metric):
         is installed. Either install as ``pip install torchmetrics[image]`` or
         ``pip install torch-fidelity``
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``imgs`` (:class:`~torch.Tensor`): tensor with images feed to the feature extractor of shape ``(N,C,H,W)``
+    - ``real`` (`bool`): bool indicating if ``imgs`` belong to the real or the fake distribution
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``kid_mean`` (:class:`~torch.Tensor`): float scalar tensor with mean value over subsets
+    - ``kid_std`` (:class:`~torch.Tensor`): float scalar tensor with mean value over subsets
+
     Args:
         feature: Either an str, integer or ``nn.Module``:
 
             - an str or integer will indicate the inceptionv3 feature layer to choose. Can be one of the following:
               'logits_unbiased', 64, 192, 768, 2048
             - an ``nn.Module`` for using a custom feature extractor. Expects that its forward method returns
-              an ``[N,d]`` matrix where ``N`` is the batch size and ``d`` is the feature size.
+              an ``(N,d)`` matrix where ``N`` is the batch size and ``d`` is the feature size.
 
         subsets: Number of subsets to calculate the mean and standard deviation scores over
         subset_size: Number of randomly picked samples in each subset
@@ -111,20 +121,11 @@ class KernelInceptionDistance(Metric):
             your dataset does not change.
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
-    References:
-        [1] Demystifying MMD GANs
-        Mikołaj Bińkowski, Danica J. Sutherland, Michael Arbel, Arthur Gretton
-        https://arxiv.org/abs/1801.01401
-
-        [2] GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium,
-        Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, Sepp Hochreiter
-        https://arxiv.org/abs/1706.08500
-
     Raises:
         ValueError:
             If ``feature`` is set to an ``int`` (default settings) and ``torch-fidelity`` is not installed
         ValueError:
-            If ``feature`` is set to an ``int`` not in ``[64, 192, 768, 2048]``
+            If ``feature`` is set to an ``int`` not in ``(64, 192, 768, 2048)``
         ValueError:
             If ``subsets`` is not an integer larger than 0
         ValueError:
@@ -230,12 +231,7 @@ def __init__(
         self.add_state("fake_features", [], dist_reduce_fx=None)
 
     def update(self, imgs: Tensor, real: bool) -> None:
-        """Update the state with extracted features.
-
-        Args:
-            imgs: tensor with images feed to the feature extractor
-            real: bool indicating if ``imgs`` belong to the real or the fake distribution
-        """
+        """Update the state with extracted features."""
         imgs = (imgs * 255).byte() if self.normalize else imgs
         features = self.inception(imgs)
 
@@ -245,8 +241,7 @@ def update(self, imgs: Tensor, real: bool) -> None:
             self.fake_features.append(features)
 
     def compute(self) -> Tuple[Tensor, Tensor]:
-        """Calculate KID score based on accumulated extracted features from the two distributions. Returns a tuple
-        of mean and standard deviation of KID scores calculated on subsets of extracted features.
+        """Calculate KID score based on accumulated extracted features from the two distributions.
 
         Implementation inspired by `Fid Score`_
         """
diff --git a/src/torchmetrics/image/lpip.py b/src/torchmetrics/image/lpip.py
index 84ff4868c5a..994e52d6371 100644
--- a/src/torchmetrics/image/lpip.py
+++ b/src/torchmetrics/image/lpip.py
@@ -49,7 +49,7 @@ class LearnedPerceptualImagePatchSimilarity(Metric):
     pre-defined network. This measure has been shown to match human perception well. A low LPIPS score means that
     image patches are perceptual similar.
 
-    Both input image patches are expected to have shape `[N, 3, H, W]`.
+    Both input image patches are expected to have shape ``(N, 3, H, W)``.
     The minimum size of `H, W` depends on the chosen backbone (see `net_type` arg).
 
     .. note:: using this metrics requires you to have ``lpips`` package installed. Either install
@@ -58,6 +58,15 @@ class LearnedPerceptualImagePatchSimilarity(Metric):
     .. note:: this metric is not scriptable when using ``torch<1.8``. Please update your pytorch installation
         if this is a issue.
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``img1`` (:class:`~torch.Tensor`): tensor with images of shape ``(N, 3, H, W)``
+    - ``img2`` (:class:`~torch.Tensor`): tensor with images of shape ``(N, 3, H, W)``
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``lpips`` (:class:`~torch.Tensor`): returns float scalar tensor with average LPIPS value over samples
+
     Args:
         net_type: str indicating backbone network type to use. Choose between `'alex'`, `'vgg'` or `'squeeze'`
         reduction: str indicating how to reduce over the batch dimension. Choose between `'sum'` or `'mean'`.
@@ -128,12 +137,7 @@ def __init__(
         self.add_state("total", torch.tensor(0.0), dist_reduce_fx="sum")
 
     def update(self, img1: Tensor, img2: Tensor) -> None:  # type: ignore
-        """Update internal states with lpips score.
-
-        Args:
-            img1: tensor with images of shape ``[N, 3, H, W]``
-            img2: tensor with images of shape ``[N, 3, H, W]``
-        """
+        """Update internal states with lpips score."""
         if not (_valid_img(img1, self.normalize) and _valid_img(img2, self.normalize)):
             raise ValueError(
                 "Expected both input arguments to be normalized tensors with shape [N, 3, H, W]."
diff --git a/src/torchmetrics/image/psnr.py b/src/torchmetrics/image/psnr.py
index 1359eed1933..3e4b41958c3 100644
--- a/src/torchmetrics/image/psnr.py
+++ b/src/torchmetrics/image/psnr.py
@@ -29,6 +29,16 @@ class PeakSignalNoiseRatio(Metric):
 
     Where :math:`\text{MSE}` denotes the `mean-squared-error`_ function.
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``preds`` (:class:`~torch.Tensor`): Predictions from model of shape ``(N,C,H,W)``
+    - ``target`` (:class:`~torch.Tensor`): Ground truth values of shape ``(N,C,H,W)``
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``psnr`` (:class:`~torch.Tensor`): if ``reduction!='none'`` returns float scalar tensor with average PSNR value
+      over sample else returns tensor of shape ``(N,)`` with PSNR values per sample
+
     Args:
         data_range:
             the range of the data. If None, it is determined from the data (max - min).
@@ -56,9 +66,6 @@ class PeakSignalNoiseRatio(Metric):
         >>> target = torch.tensor([[3.0, 2.0], [1.0, 0.0]])
         >>> psnr(preds, target)
         tensor(2.5527)
-
-    .. note::
-        Half precision is only support on GPU for this metric
     """
     is_differentiable: bool = True
     higher_is_better: bool = True
@@ -103,12 +110,7 @@ def __init__(
         self.dim = tuple(dim) if isinstance(dim, Sequence) else dim
 
     def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
-        """Update state with predictions and targets.
-
-        Args:
-            preds: Predictions from model
-            target: Ground truth values
-        """
+        """Update state with predictions and targets."""
         sum_squared_error, n_obs = _psnr_update(preds, target, dim=self.dim)
         if self.dim is None:
             if self.data_range is None:
diff --git a/src/torchmetrics/image/sam.py b/src/torchmetrics/image/sam.py
index 7c5ef44e0d4..be01b73c798 100644
--- a/src/torchmetrics/image/sam.py
+++ b/src/torchmetrics/image/sam.py
@@ -23,9 +23,19 @@
 
 
 class SpectralAngleMapper(Metric):
-    """The Spectral Angle Mapper determines the spectral similarity between image spectra and reference spectra by
-    calculating the angle between the spectra, where small angles between indicate high similarity and high angles
-    indicate low similarity.
+    """The metric `Spectral Angle Mapper`_ determines the spectral similarity between image spectra and reference
+    spectra by calculating the angle between the spectra, where small angles between indicate high similarity and
+    high angles indicate low similarity.
+
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``preds`` (:class:`~torch.Tensor`): Predictions from model of shape ``(N,C,H,W)``
+    - ``target`` (:class:`~torch.Tensor`): Ground truth values of shape ``(N,C,H,W)``
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``sam`` (:class:`~torch.Tensor`): if ``reduction!='none'`` returns float scalar tensor with average SAM value
+      over sample else returns tensor of shape ``(N,)`` with SAM values per sample
 
     Args:
         reduction: a method to reduce metric score over labels.
@@ -47,11 +57,6 @@ class SpectralAngleMapper(Metric):
         >>> sam = SpectralAngleMapper()
         >>> sam(preds, target)
         tensor(0.5943)
-
-    References:
-        [1] Roberta H. Yuhas, Alexander F. H. Goetz and Joe W. Boardman, "Discrimination among semi-arid
-        landscape endmembers using the Spectral Angle Mapper (SAM) algorithm" in PL, Summaries of the Third Annual JPL
-        Airborne Geoscience Workshop, vol. 1, June 1, 1992.
     """
 
     higher_is_better: bool = False
@@ -77,12 +82,7 @@ def __init__(
         self.reduction = reduction
 
     def update(self, preds: Tensor, target: Tensor) -> None:
-        """Update state with predictions and targets.
-
-        Args:
-            preds: Predictions from model
-            target: Ground truth values
-        """
+        """Update state with predictions and targets."""
         preds, target = _sam_update(preds, target)
         self.preds.append(preds)
         self.target.append(target)
diff --git a/src/torchmetrics/image/ssim.py b/src/torchmetrics/image/ssim.py
index fa320a818cf..ecd39f33cad 100644
--- a/src/torchmetrics/image/ssim.py
+++ b/src/torchmetrics/image/ssim.py
@@ -25,6 +25,16 @@
 class StructuralSimilarityIndexMeasure(Metric):
     """Computes Structual Similarity Index Measure (SSIM_).
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``preds`` (:class:`~torch.Tensor`): Predictions from model
+    - ``target`` (:class:`~torch.Tensor`): Ground truth values
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``ssim`` (:class:`~torch.Tensor`): if ``reduction!='none'`` returns float scalar tensor with average SSIM value
+      over sample else returns tensor of shape ``(N,)`` with SSIM values per sample
+
     Args:
         preds: estimated image
         target: ground truth image
@@ -49,9 +59,6 @@ class StructuralSimilarityIndexMeasure(Metric):
             Mutually exclusive with ``return_full_image``
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
-    Return:
-        Tensor with SSIM score
-
     Example:
         >>> from torchmetrics import StructuralSimilarityIndexMeasure
         >>> import torch
@@ -109,12 +116,7 @@ def __init__(
         self.return_contrast_sensitivity = return_contrast_sensitivity
 
     def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
-        """Update state with predictions and targets.
-
-        Args:
-            preds: Predictions from model
-            target: Ground truth values
-        """
+        """Update state with predictions and targets."""
         preds, target = _ssim_check_inputs(preds, target)
         similarity_pack = _ssim_update(
             preds,
@@ -163,6 +165,16 @@ class MultiScaleStructuralSimilarityIndexMeasure(Metric):
     """Computes `MultiScaleSSIM`_, Multi-scale Structural Similarity Index Measure, which is a generalization of
     Structural Similarity Index Measure by incorporating image details at different resolution scores.
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``preds`` (:class:`~torch.Tensor`): Predictions from model
+    - ``target`` (:class:`~torch.Tensor`): Ground truth values
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``msssim`` (: :class:`~torch.Tensor`): if ``reduction!='none'`` returns float scalar tensor with average MSSSIM
+      value over sample else returns tensor of shape ``(N,)`` with SSIM values per sample
+
     Args:
         gaussian_kernel: If ``True`` (default), a gaussian kernel is used, if false a uniform kernel is used
         kernel_size: size of the gaussian kernel
@@ -202,10 +214,6 @@ class MultiScaleStructuralSimilarityIndexMeasure(Metric):
         >>> ms_ssim = MultiScaleStructuralSimilarityIndexMeasure(data_range=1.0)
         >>> ms_ssim(preds, target)
         tensor(0.9627)
-
-    References:
-        [1] Multi-Scale Structural Similarity For Image Quality Assessment by Zhou Wang, Eero P. Simoncelli and Alan C.
-        Bovik `MultiScaleSSIM`_
     """
 
     higher_is_better: bool = True
@@ -270,12 +278,7 @@ def __init__(
         self.normalize = normalize
 
     def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
-        """Update state with predictions and targets.
-
-        Args:
-            preds: Predictions from model of shape ``[N, C, H, W]``
-            target: Ground truth values of shape ``[N, C, H, W]``
-        """
+        """Update state with predictions and targets."""
         preds, target = _ssim_check_inputs(preds, target)
         similarity = _multiscale_ssim_update(
             preds,
diff --git a/src/torchmetrics/image/tv.py b/src/torchmetrics/image/tv.py
index aef5e8b97df..63dc733186d 100644
--- a/src/torchmetrics/image/tv.py
+++ b/src/torchmetrics/image/tv.py
@@ -25,6 +25,15 @@
 class TotalVariation(Metric):
     """Computes Total Variation loss (`TV`_).
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``img`` (:class:`~torch.Tensor`): A tensor of shape ``(N, C, H, W)`` consisting of images
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``sdi`` (:class:`~torch.Tensor`): if ``reduction!='none'`` returns float scalar tensor with average TV value
+      over sample else returns tensor of shape ``(N,)`` with TV values per sample
+
     Args:
         reduction: a method to reduce metric score over samples
 
@@ -65,11 +74,7 @@ def __init__(self, reduction: Literal["mean", "sum", "none", None] = "sum", **kw
         self.add_state("num_elements", default=tensor(0, dtype=torch.int), dist_reduce_fx="sum")
 
     def update(self, img: Tensor) -> None:  # type: ignore
-        """Update current score with batch of input images.
-
-        Args:
-            img: A `Tensor` of shape `(N, C, H, W)` consisting of images
-        """
+        """Update current score with batch of input images."""
         score, num_elements = _total_variation_update(img)
         if self.reduction is None or self.reduction == "none":
             self.score.append(score)
diff --git a/src/torchmetrics/image/uqi.py b/src/torchmetrics/image/uqi.py
index 0832c045847..a6062777f5b 100644
--- a/src/torchmetrics/image/uqi.py
+++ b/src/torchmetrics/image/uqi.py
@@ -25,6 +25,16 @@
 class UniversalImageQualityIndex(Metric):
     """Computes Universal Image Quality Index (UniversalImageQualityIndex_).
 
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``preds`` (:class:`~torch.Tensor`): Predictions from model of shape ``(N,C,H,W)``
+    - ``target`` (:class:`~torch.Tensor`): Ground truth values of shape ``(N,C,H,W)``
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``uiqi`` (:class:`~torch.Tensor`): if ``reduction!='none'`` returns float scalar tensor with average UIQI value
+      over sample else returns tensor of shape ``(N,)`` with UIQI values per sample
+
     Args:
         kernel_size: size of the gaussian kernel
         sigma: Standard deviation of the gaussian kernel
@@ -81,12 +91,7 @@ def __init__(
         self.reduction = reduction
 
     def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
-        """Update state with predictions and targets.
-
-        Args:
-            preds: Predictions from model
-            target: Ground truth values
-        """
+        """Update state with predictions and targets."""
         preds, target = _uqi_update(preds, target)
         self.preds.append(preds)
         self.target.append(target)
diff --git a/src/torchmetrics/metric.py b/src/torchmetrics/metric.py
index 1d42046ec49..9381d1c4046 100644
--- a/src/torchmetrics/metric.py
+++ b/src/torchmetrics/metric.py
@@ -915,7 +915,6 @@ def update(self, *args: Any, **kwargs: Any) -> None:
             self.metric_b.update(*args, **self.metric_b._filter_kwargs(**kwargs))
 
     def compute(self) -> Any:
-
         # also some parsing for kwargs?
         if isinstance(self.metric_a, Metric):
             val_a = self.metric_a.compute()
@@ -934,7 +933,6 @@ def compute(self) -> Any:
 
     @torch.jit.unused
     def forward(self, *args: Any, **kwargs: Any) -> Any:
-
         val_a = (
             self.metric_a(*args, **self.metric_a._filter_kwargs(**kwargs))
             if isinstance(self.metric_a, Metric)
diff --git a/tests/unittests/detection/test_map.py b/tests/unittests/detection/test_map.py
index 267d5431873..014defb5da3 100644
--- a/tests/unittests/detection/test_map.py
+++ b/tests/unittests/detection/test_map.py
@@ -328,8 +328,8 @@ def _compare_fn_segm(preds, target) -> dict:
 class TestMAP(MetricTester):
     """Test the MAP metric for object detection predictions.
 
-    Results are compared to original values from the pycocotools implementation.
-    A subset of the first 10 fake predictions of the official repo is used:
+    Results are compared to original values from the pycocotools implementation. A subset of the first 10 fake
+    predictions of the official repo is used:
     https://github.com/cocodataset/cocoapi/blob/master/results/instances_val2014_fakebbox100_results.json
     """
 
diff --git a/tests/unittests/retrieval/test_precision.py b/tests/unittests/retrieval/test_precision.py
index 53d889bbab4..eb38c90de00 100644
--- a/tests/unittests/retrieval/test_precision.py
+++ b/tests/unittests/retrieval/test_precision.py
@@ -39,8 +39,8 @@
 def _precision_at_k(target: np.ndarray, preds: np.ndarray, k: int = None, adaptive_k: bool = False):
     """Didn't find a reliable implementation of Precision in Information Retrieval, so, reimplementing here.
 
-    A good explanation can be found
-    `here <https://web.stanford.edu/class/cs276/handouts/EvaluationNew-handout-1-per.pdf>_`.
+    A good explanation can be found `here
+    <https://web.stanford.edu/class/cs276/handouts/EvaluationNew-handout-1-per.pdf>_`.
     """
     assert target.shape == preds.shape
     assert len(target.shape) == 1  # works only with single dimension inputs
diff --git a/tests/unittests/retrieval/test_r_precision.py b/tests/unittests/retrieval/test_r_precision.py
index a2393b53d60..e238a1d10bd 100644
--- a/tests/unittests/retrieval/test_r_precision.py
+++ b/tests/unittests/retrieval/test_r_precision.py
@@ -35,8 +35,8 @@
 def _r_precision(target: np.ndarray, preds: np.ndarray):
     """Didn't find a reliable implementation of R-Precision in Information Retrieval, so, reimplementing here.
 
-    A good explanation can be found
-    `here <https://web.stanford.edu/class/cs276/handouts/EvaluationNew-handout-1-per.pdf>_`.
+    A good explanation can be found `here
+    <https://web.stanford.edu/class/cs276/handouts/EvaluationNew-handout-1-per.pdf>_`.
     """
     assert target.shape == preds.shape
     assert len(target.shape) == 1  # works only with single dimension inputs
diff --git a/tests/unittests/text/test_mer.py b/tests/unittests/text/test_mer.py
index ef1dc266cda..6c00ba0597f 100644
--- a/tests/unittests/text/test_mer.py
+++ b/tests/unittests/text/test_mer.py
@@ -30,7 +30,6 @@ class TestMatchErrorRate(TextTester):
     @pytest.mark.parametrize("ddp", [False, True])
     @pytest.mark.parametrize("dist_sync_on_step", [False, True])
     def test_mer_class(self, ddp, dist_sync_on_step, preds, targets):
-
         self.run_class_metric_test(
             ddp=ddp,
             preds=preds,
@@ -41,7 +40,6 @@ def test_mer_class(self, ddp, dist_sync_on_step, preds, targets):
         )
 
     def test_mer_functional(self, preds, targets):
-
         self.run_functional_metric_test(
             preds,
             targets,
@@ -50,7 +48,6 @@ def test_mer_functional(self, preds, targets):
         )
 
     def test_mer_differentiability(self, preds, targets):
-
         self.run_differentiability_test(
             preds=preds,
             targets=targets,
diff --git a/tests/unittests/text/test_wer.py b/tests/unittests/text/test_wer.py
index 7487c0d2c0d..f19c673df11 100644
--- a/tests/unittests/text/test_wer.py
+++ b/tests/unittests/text/test_wer.py
@@ -30,7 +30,6 @@ class TestWER(TextTester):
     @pytest.mark.parametrize("ddp", [False, True])
     @pytest.mark.parametrize("dist_sync_on_step", [False, True])
     def test_wer_class(self, ddp, dist_sync_on_step, preds, targets):
-
         self.run_class_metric_test(
             ddp=ddp,
             preds=preds,
@@ -41,7 +40,6 @@ def test_wer_class(self, ddp, dist_sync_on_step, preds, targets):
         )
 
     def test_wer_functional(self, preds, targets):
-
         self.run_functional_metric_test(
             preds,
             targets,
@@ -50,7 +48,6 @@ def test_wer_functional(self, preds, targets):
         )
 
     def test_wer_differentiability(self, preds, targets):
-
         self.run_differentiability_test(
             preds=preds,
             targets=targets,
diff --git a/tests/unittests/text/test_wil.py b/tests/unittests/text/test_wil.py
index 6f71705fb12..bf47d7c8256 100644
--- a/tests/unittests/text/test_wil.py
+++ b/tests/unittests/text/test_wil.py
@@ -26,7 +26,6 @@ class TestWordInfoLost(TextTester):
     @pytest.mark.parametrize("ddp", [False, True])
     @pytest.mark.parametrize("dist_sync_on_step", [False, True])
     def test_wil_class(self, ddp, dist_sync_on_step, preds, targets):
-
         self.run_class_metric_test(
             ddp=ddp,
             preds=preds,
@@ -37,7 +36,6 @@ def test_wil_class(self, ddp, dist_sync_on_step, preds, targets):
         )
 
     def test_wil_functional(self, preds, targets):
-
         self.run_functional_metric_test(
             preds,
             targets,
@@ -46,7 +44,6 @@ def test_wil_functional(self, preds, targets):
         )
 
     def test_wil_differentiability(self, preds, targets):
-
         self.run_differentiability_test(
             preds=preds,
             targets=targets,
diff --git a/tests/unittests/text/test_wip.py b/tests/unittests/text/test_wip.py
index 1320075b6b3..0c81d29274f 100644
--- a/tests/unittests/text/test_wip.py
+++ b/tests/unittests/text/test_wip.py
@@ -26,7 +26,6 @@ class TestWordInfoPreserved(TextTester):
     @pytest.mark.parametrize("ddp", [False, True])
     @pytest.mark.parametrize("dist_sync_on_step", [False, True])
     def test_wip_class(self, ddp, dist_sync_on_step, preds, targets):
-
         self.run_class_metric_test(
             ddp=ddp,
             preds=preds,
@@ -37,7 +36,6 @@ def test_wip_class(self, ddp, dist_sync_on_step, preds, targets):
         )
 
     def test_wip_functional(self, preds, targets):
-
         self.run_functional_metric_test(
             preds,
             targets,
@@ -46,7 +44,6 @@ def test_wip_functional(self, preds, targets):
         )
 
     def test_wip_differentiability(self, preds, targets):
-
         self.run_differentiability_test(
             preds=preds,
             targets=targets,