New metric: Calinski Harabasz Score (Lightning-AI#2036)

* docs * functional * module * tests * changelog * try another link * mypy * remove broken link * change image * use new inputs * fix * fix flaky tests --------- Co-authored-by: Daniel Stancl <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
matsumotosan · Sep 4, 2023 · b10cc2f · b10cc2f
1 parent c139a96
commit b10cc2f
Show file tree

Hide file tree

Showing 13 changed files with 295 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,13 +11,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added `MutualInformationScore` metric to cluster package ([#2008](https://github.com/Lightning-AI/torchmetrics/pull/2008)
+- Added `MutualInformationScore` metric to cluster package ([#2008](https://github.com/Lightning-AI/torchmetrics/pull/2008))
 
 
-- Added `RandScore` metric to cluster package ([#2025](https://github.com/Lightning-AI/torchmetrics/pull/2025)
+- Added `RandScore` metric to cluster package ([#2025](https://github.com/Lightning-AI/torchmetrics/pull/2025))
 
 
-- Added `NormalizedMutualInfoScore` metric to cluster package ([#2029](https://github.com/Lightning-AI/torchmetrics/pull/2029)
+- Added `CalinskiHarabaszScore` metric to cluster package ([#2036](https://github.com/Lightning-AI/torchmetrics/pull/2036))
+
+
+- Added `NormalizedMutualInfoScore` metric to cluster package ([#2029](https://github.com/Lightning-AI/torchmetrics/pull/2029))
+
 
 
 ### Changed

diff --git a/docs/source/clustering/calinski_harabasz_score.rst b/docs/source/clustering/calinski_harabasz_score.rst
@@ -0,0 +1,21 @@
+.. customcarditem::
+   :header: Calinski Harabasz Score
+   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg
+   :tags: Clustering
+
+.. include:: ../links.rst
+
+#######################
+Calinski Harabasz Score
+#######################
+
+Module Interface
+________________
+
+.. autoclass:: torchmetrics.clustering.CalinskiHarabaszScore
+    :exclude-members: update, compute
+
+Functional Interface
+____________________
+
+.. autofunction:: torchmetrics.functional.clustering.calinski_harabasz_score
diff --git a/docs/source/clustering/mutual_info_score.rst b/docs/source/clustering/mutual_info_score.rst
@@ -1,6 +1,6 @@
 .. customcarditem::
    :header: Mutual Information Score
-   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg
+   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg
    :tags: Clustering
 
 .. include:: ../links.rst

diff --git a/docs/source/clustering/normalized_mutual_info_score.rst b/docs/source/clustering/normalized_mutual_info_score.rst
@@ -1,6 +1,6 @@
 .. customcarditem::
    :header: Normalized Mutual Information Score
-   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg
+   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg
    :tags: Clustering
 
 .. include:: ../links.rst

diff --git a/docs/source/clustering/rand_score.rst b/docs/source/clustering/rand_score.rst
@@ -1,6 +1,6 @@
 .. customcarditem::
    :header: Rand Score
-   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg
+   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg
    :tags: Clustering
 
 .. include:: ../links.rst

diff --git a/src/torchmetrics/clustering/__init__.py b/src/torchmetrics/clustering/__init__.py
@@ -11,11 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from torchmetrics.clustering.calinski_harabasz_score import CalinskiHarabaszScore
 from torchmetrics.clustering.mutual_info_score import MutualInfoScore
 from torchmetrics.clustering.normalized_mutual_info_score import NormalizedMutualInfoScore
 from torchmetrics.clustering.rand_score import RandScore
 
 __all__ = [
+    "CalinskiHarabaszScore",
     "MutualInfoScore",
     "NormalizedMutualInfoScore",
     "RandScore",

diff --git a/src/torchmetrics/clustering/calinski_harabasz_score.py b/src/torchmetrics/clustering/calinski_harabasz_score.py
@@ -0,0 +1,126 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional, Sequence, Union
+
+from torch import Tensor
+
+from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score
+from torchmetrics.metric import Metric
+from torchmetrics.utilities.data import dim_zero_cat
+from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE
+from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
+
+if not _MATPLOTLIB_AVAILABLE:
+    __doctest_skip__ = ["CalinskiHarabaszScore.plot"]
+
+
+class CalinskiHarabaszScore(Metric):
+    r"""Compute Calinski Harabasz Score (also known as variance ratio criterion) for clustering algorithms.
+
+    .. math::
+        CHS(X, L) = \frac{B(X, L) \cdot (n_\text{samples} - n_\text{labels})}{W(X, L) \cdot (n_\text{labels} - 1)}
+
+    where :math:`B(X, L)` is the between-cluster dispersion, which is the squared distance between the cluster centers
+    and the dataset mean, weighted by the size of the clusters, :math:`n_\text{samples}` is the number of samples,
+    :math:`n_\text{labels}` is the number of labels, and :math:`W(X, L)` is the within-cluster dispersion e.g. the
+    sum of squared distances between each samples and its closest cluster center.
+
+    This clustering metric is an intrinsic measure, because it does not rely on ground truth labels for the evaluation.
+    Instead it examines how well the clusters are separated from each other. The score is higher when clusters are dense
+    and well separated, which relates to a standard concept of a cluster.
+
+    As input to ``forward`` and ``update`` the metric accepts the following input:
+
+    - ``data`` (:class:`~torch.Tensor`): float tensor with shape ``(N,d)`` with the embedded data. ``d`` is the
+      dimensionality of the embedding space.
+    - ``labels`` (:class:`~torch.Tensor`): single integer tensor with shape ``(N,)`` with cluster labels
+
+    As output of ``forward`` and ``compute`` the metric returns the following output:
+
+    - ``chs`` (:class:`~torch.Tensor`): A tensor with the Calinski Harabasz Score
+
+    Args:
+        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
+
+    Example:
+        >>> import torch
+        >>> from torchmetrics.clustering import CalinskiHarabaszScore
+        >>> _ = torch.manual_seed(42)
+        >>> data = torch.randn(10, 3)
+        >>> labels = torch.randint(3, (10,))
+        >>> metric = CalinskiHarabaszScore()
+        >>> metric(data, labels)
+        tensor(3.0053)
+
+    """
+    is_differentiable: bool = True
+    higher_is_better: bool = True
+    full_state_update: bool = False
+    plot_lower_bound: float = 0.0
+    data: List[Tensor]
+    labels: List[Tensor]
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+
+        self.add_state("data", default=[], dist_reduce_fx="cat")
+        self.add_state("labels", default=[], dist_reduce_fx="cat")
+
+    def update(self, data: Tensor, labels: Tensor) -> None:
+        """Update metric state with new data and labels."""
+        self.data.append(data)
+        self.labels.append(labels)
+
+    def compute(self) -> Tensor:
+        """Compute the Calinski Harabasz Score over all data and labels."""
+        return calinski_harabasz_score(dim_zero_cat(self.data), dim_zero_cat(self.labels))
+
+    def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE:
+        """Plot a single or multiple values from the metric.
+
+        Args:
+            val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
+                If no value is provided, will automatically call `metric.compute` and plot that result.
+            ax: An matplotlib axis object. If provided will add plot to that axis
+
+        Returns:
+            Figure and Axes object
+
+        Raises:
+            ModuleNotFoundError:
+                If `matplotlib` is not installed
+
+        .. plot::
+            :scale: 75
+
+            >>> # Example plotting a single value
+            >>> import torch
+            >>> from torchmetrics.clustering import RandScore
+            >>> metric = RandScore()
+            >>> metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,)))
+            >>> fig_, ax_ = metric.plot(metric.compute())
+
+        .. plot::
+            :scale: 75
+
+            >>> # Example plotting multiple values
+            >>> import torch
+            >>> from torchmetrics.clustering import RandScore
+            >>> metric = RandScore()
+            >>> for _ in range(10):
+            ...     metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,)))
+            >>> fig_, ax_ = metric.plot(metric.compute())
+
+        """
+        return self._plot(val, ax)
diff --git a/src/torchmetrics/detection/giou.py b/src/torchmetrics/detection/giou.py
@@ -174,7 +174,7 @@ def plot(
             ... ]
             >>> target = lambda : [
             ...    {
-            ...        "boxes": torch.tensor([[300.00, 100.00, 315.00, 150.00]]) + torch.randint(-10, 10, (1, 4)),
+            ...        "boxes": torch.tensor([[300.00, 100.00, 335.00, 150.00]]) + torch.randint(-10, 10, (1, 4)),
             ...        "labels": torch.tensor([5]),
             ...    }
             ... ]

diff --git a/src/torchmetrics/functional/clustering/__init__.py b/src/torchmetrics/functional/clustering/__init__.py
@@ -11,11 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score
 from torchmetrics.functional.clustering.mutual_info_score import mutual_info_score
 from torchmetrics.functional.clustering.normalized_mutual_info_score import normalized_mutual_info_score
 from torchmetrics.functional.clustering.rand_score import rand_score
 
 __all__ = [
+    "calinski_harabasz_score",
     "mutual_info_score",
     "normalized_mutual_info_score",
     "rand_score",

diff --git a/src/torchmetrics/functional/clustering/calinski_harabasz_score.py b/src/torchmetrics/functional/clustering/calinski_harabasz_score.py
@@ -0,0 +1,73 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch import Tensor
+
+
+def _calinski_harabasz_score_validate_input(data: Tensor, labels: Tensor) -> None:
+    """Validate that the input data and labels have correct shape and type."""
+    if data.ndim != 2:
+        raise ValueError(f"Expected 2D data, got {data.ndim}D data instead")
+    if not data.is_floating_point():
+        raise ValueError(f"Expected floating point data, got {data.dtype} data instead")
+    if labels.ndim != 1:
+        raise ValueError(f"Expected 1D labels, got {labels.ndim}D labels instead")
+
+
+def calinski_harabasz_score(data: Tensor, labels: Tensor) -> Tensor:
+    """Compute the Calinski Harabasz Score (also known as variance ratio criterion) for clustering algorithms.
+
+    Args:
+        data: float tensor with shape ``(N,d)`` with the embedded data.
+        labels: single integer tensor with shape ``(N,)`` with cluster labels
+
+    Returns:
+        Scalar tensor with the Calinski Harabasz Score
+
+    Example:
+        >>> import torch
+        >>> from torchmetrics.functional.clustering import calinski_harabasz_score
+        >>> _ = torch.manual_seed(42)
+        >>> data = torch.randn(10, 3)
+        >>> labels = torch.randint(0, 2, (10,))
+        >>> calinski_harabasz_score(data, labels)
+        tensor(3.4998)
+
+    """
+    _calinski_harabasz_score_validate_input(data, labels)
+
+    # convert to zero indexed labels
+    unique_labels, labels = torch.unique(labels, return_inverse=True)
+    n_labels = len(unique_labels)
+
+    n_samples = data.shape[0]
+
+    if not 1 < n_labels < n_samples:
+        raise ValueError(
+            "Number of detected clusters must be greater than one and less than the number of samples."
+            f"Got {n_labels} clusters and {n_samples} samples."
+        )
+
+    mean = data.mean(dim=0)
+    between_cluster_dispersion = torch.tensor(0.0, device=data.device)
+    within_cluster_dispersion = torch.tensor(0.0, device=data.device)
+    for k in range(n_labels):
+        cluster_k = data[labels == k, :]
+        mean_k = cluster_k.mean(dim=0)
+        between_cluster_dispersion += ((mean_k - mean) ** 2).sum() * cluster_k.shape[0]
+        within_cluster_dispersion += ((cluster_k - mean_k) ** 2).sum()
+
+    if within_cluster_dispersion == 0:
+        return torch.tensor(1.0, device=data.device, dtype=torch.float32)
+    return between_cluster_dispersion * (n_samples - n_labels) / (within_cluster_dispersion * (n_labels - 1.0))
diff --git a/tests/unittests/clustering/test_calinski_harabasz_score.py b/tests/unittests/clustering/test_calinski_harabasz_score.py
@@ -0,0 +1,56 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from sklearn.metrics import calinski_harabasz_score as sklearn_calinski_harabasz_score
+from torchmetrics.clustering.calinski_harabasz_score import CalinskiHarabaszScore
+from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score
+
+from unittests.clustering.inputs import _single_target_intrinsic1, _single_target_intrinsic2
+from unittests.helpers import seed_all
+from unittests.helpers.testers import MetricTester
+
+seed_all(42)
+
+
+@pytest.mark.parametrize(
+    "preds, target",
+    [
+        (_single_target_intrinsic1.preds, _single_target_intrinsic1.target),
+        (_single_target_intrinsic2.preds, _single_target_intrinsic2.target),
+    ],
+)
+class TestCalinskiHarabaszScore(MetricTester):
+    """Test class for `CalinskiHarabaszScore` metric."""
+
+    atol = 1e-5
+
+    @pytest.mark.parametrize("ddp", [True, False])
+    def test_calinski_harabasz_score(self, preds, target, ddp):
+        """Test class implementation of metric."""
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=CalinskiHarabaszScore,
+            reference_metric=sklearn_calinski_harabasz_score,
+        )
+
+    def test_calinski_harabasz_score_functional(self, preds, target):
+        """Test functional implementation of metric."""
+        self.run_functional_metric_test(
+            preds=preds,
+            target=target,
+            metric_functional=calinski_harabasz_score,
+            reference_metric=sklearn_calinski_harabasz_score,
+        )
diff --git a/tests/unittests/image/test_perceptual_path_length.py b/tests/unittests/image/test_perceptual_path_length.py
@@ -164,6 +164,7 @@ def num_classes(self):
         ),
     ],
 )
+@skip_on_running_out_of_memory()
 def test_raises_error_on_wrong_generator(generator, errortype, match):
     """Test that appropriate errors are raised on wrong generator."""
     with pytest.raises(errortype, match=match):
@@ -176,6 +177,7 @@ def test_raises_error_on_wrong_generator(generator, errortype, match):
 
 @pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch_fidelity")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
+@skip_on_running_out_of_memory()
 def test_compare():
     """Test against torch_fidelity.
 

diff --git a/tests/unittests/utilities/test_plot.py b/tests/unittests/utilities/test_plot.py
@@ -91,7 +91,7 @@
     MultilabelROC,
     MultilabelSpecificity,
 )
-from torchmetrics.clustering import MutualInfoScore, NormalizedMutualInfoScore, RandScore
+from torchmetrics.clustering import CalinskiHarabaszScore, MutualInfoScore, NormalizedMutualInfoScore, RandScore
 from torchmetrics.detection import PanopticQuality
 from torchmetrics.detection.mean_ap import MeanAveragePrecision
 from torchmetrics.functional.audio import scale_invariant_signal_noise_ratio
@@ -617,6 +617,7 @@
         pytest.param(TranslationEditRate, _text_input_3, _text_input_4, id="translation edit rate"),
         pytest.param(MutualInfoScore, _nominal_input, _nominal_input, id="mutual info score"),
         pytest.param(RandScore, _nominal_input, _nominal_input, id="rand score"),
+        pytest.param(CalinskiHarabaszScore, lambda: torch.randn(100, 3), _nominal_input, id="calinski harabasz score"),
         pytest.param(NormalizedMutualInfoScore, _nominal_input, _nominal_input, id="normalized mutual info score"),
     ],
 )