Lightning-AI · Borda · Dec 22, 2023 · Dec 1, 2023 · Dec 1, 2023 · Dec 1, 2023
@@ -37,6 +37,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added `Spatial Correlation Coefficient` to image subpackage ([#2248](https://github.com/Lightning-AI/torchmetrics/pull/2248))
 
 
+- Added `RetrievalAUROC` metric ([#2251](https://github.com/Lightning-AI/torchmetrics/pull/2251))
+
+
 ### Changed
 
 - Changed minimum supported Pytorch version from 1.8 to 1.10 ([#2145](https://github.com/Lightning-AI/torchmetrics/pull/2145))

@@ -0,0 +1,21 @@
+.. customcarditem::
+   :header: Retrieval AUROC
+   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/text_classification.svg
+   :tags: Retrieval
+
+.. include:: ../links.rst
+
+###############
+Retrieval AUROC
+###############
+
+Module Interface
+________________
+
+.. autoclass:: torchmetrics.retrieval.RetrievalAUROC
+    :exclude-members: update, compute
+
+Functional Interface
+____________________
+
+.. autofunction:: torchmetrics.functional.retrieval.retrieval_auroc
@@ -1,13 +1,13 @@
 .. customcarditem::
-   :header: Precision Recall Curve
+   :header: Retrieval Precision Recall Curve
    :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/text_classification.svg
    :tags: Retrieval
 
 .. include:: ../links.rst
 
-######################
-Precision Recall Curve
-######################
+################################
+Retrieval Precision Recall Curve
+################################
 
 Module Interface
 ________________

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+from torchmetrics.functional.retrieval.auroc import retrieval_auroc
 from torchmetrics.functional.retrieval.average_precision import retrieval_average_precision
 from torchmetrics.functional.retrieval.fall_out import retrieval_fall_out
 from torchmetrics.functional.retrieval.hit_rate import retrieval_hit_rate
@@ -23,6 +23,7 @@
 from torchmetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank
 
 __all__ = [
+    "retrieval_auroc",
     "retrieval_average_precision",
     "retrieval_fall_out",
     "retrieval_hit_rate",

@@ -0,0 +1,64 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+from torch import Tensor, tensor
+
+from torchmetrics.functional.classification.auroc import binary_auroc
+from torchmetrics.utilities.checks import _check_retrieval_functional_inputs
+
+
+def retrieval_auroc(
+    preds: Tensor, target: Tensor, top_k: Optional[int] = None, max_fpr: Optional[float] = None
+) -> Tensor:
+    """Compute area under the receiver operating characteristic curve (AUROC) for information retrieval.
+
+    ``preds`` and ``target`` should be of the same shape and live on the same device. If no ``target`` is ``True``,
+    ``0`` is returned. ``target`` must be either `bool` or `integers` and ``preds`` must be ``float``,
+    otherwise an error is raised.
+
+    Args:
+        preds: estimated probabilities of each document to be relevant.
+        target: ground truth about each document being relevant or not.
+        top_k: consider only the top k elements (default: ``None``, which considers them all)
+        max_fpr: If not ``None``, calculates standardized partial AUC over the range ``[0, max_fpr]``.
+
+    Return:
+        a single-value tensor with the auroc value of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    Raises:
+        ValueError:
+            If ``top_k`` is not ``None`` or an integer larger than 0.
+
+    Example:
+        >>> from torchmetrics.functional.retrieval import retrieval_auroc
+        >>> preds = tensor([0.2, 0.3, 0.5])
+        >>> target = tensor([True, False, True])
+        >>> retrieval_auroc(preds, target)
+        tensor(0.5000)
+
+    """
+    preds, target = _check_retrieval_functional_inputs(preds, target)
+
+    top_k = top_k or preds.shape[-1]
+    if not (isinstance(top_k, int) and top_k > 0):
+        raise ValueError("`top_k` has to be a positive integer or None")
+
+    top_k_idx = preds.topk(min(top_k, preds.shape[-1]), sorted=True, dim=-1)[1]
+    target = target[top_k_idx]
+    if (0 not in target) or (1 not in target):
+        return tensor(0.0, device=preds.device, dtype=preds.dtype)
+
+    preds = preds[top_k_idx]
+    return binary_auroc(preds, target.int(), max_fpr=max_fpr)
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from torchmetrics.retrieval.auroc import RetrievalAUROC
 from torchmetrics.retrieval.average_precision import RetrievalMAP
 from torchmetrics.retrieval.fall_out import RetrievalFallOut
 from torchmetrics.retrieval.hit_rate import RetrievalHitRate
@@ -22,6 +23,7 @@
 from torchmetrics.retrieval.reciprocal_rank import RetrievalMRR
 
 __all__ = [
+    "RetrievalAUROC",
     "RetrievalFallOut",
     "RetrievalHitRate",
     "RetrievalMAP",

@@ -0,0 +1,163 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional, Sequence, Union
+
+from torch import Tensor
+from typing_extensions import Literal
+
+from torchmetrics.functional.retrieval.auroc import retrieval_auroc
+from torchmetrics.retrieval.base import RetrievalMetric
+from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE
+from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
+
+if not _MATPLOTLIB_AVAILABLE:
+    __doctest_skip__ = ["RetrievalAUROC.plot"]
+
+
+class RetrievalAUROC(RetrievalMetric):
+    """Compute area under the receiver operating characteristic curve (AUROC) for information retrieval.
+
+    Works with binary target data. Accepts float predictions from a model output.
+
+    As input to ``forward`` and ``update`` the metric accepts the following input:
+
+    - ``preds`` (:class:`~torch.Tensor`): A float tensor of shape ``(N, ...)``
+    - ``target`` (:class:`~torch.Tensor`): A long or bool tensor of shape ``(N, ...)``
+    - ``indexes`` (:class:`~torch.Tensor`): A long tensor of shape ``(N, ...)`` which indicate to which query a
+      prediction belongs
+
+    As output to ``forward`` and ``compute`` the metric returns the following output:
+
+    - ``auroc@k`` (:class:`~torch.Tensor`): A single-value tensor with the auroc value
+      of the predictions ``preds`` w.r.t. the labels ``target``.
+
+    All ``indexes``, ``preds`` and ``target`` must have the same dimension and will be flatten at the beginning,
+    so that for example, a tensor of shape ``(N, M)`` is treated as ``(N * M, )``. Predictions will be first grouped by
+    ``indexes`` and then will be computed as the mean of the metric over each query.
+
+    Args:
+        empty_target_action:
+            Specify what to do with queries that do not have at least a positive ``target``. Choose from:
+
+            - ``'neg'``: those queries count as ``0.0`` (default)
+            - ``'pos'``: those queries count as ``1.0``
+            - ``'skip'``: skip those queries; if all queries are skipped, ``0.0`` is returned
+            - ``'error'``: raise a ``ValueError``
+
+        ignore_index: Ignore predictions where the target is equal to this number.
+        top_k: Consider only the top k elements for each query (default: ``None``, which considers them all)
+        max_fpr: If not ``None``, calculates standardized partial AUC over the range ``[0, max_fpr]``.
+        aggregation:
+            Specify how to aggregate over indexes. Can either a custom callable function that takes in a single tensor
+            and returns a scalar value or one of the following strings:
+
+            - ``'mean'``: average value is returned
+            - ``'median'``: median value is returned
+            - ``'max'``: max value is returned
+            - ``'min'``: min value is returned
+
+        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
+
+    Raises:
+        ValueError:
+            If ``empty_target_action`` is not one of ``error``, ``skip``, ``neg`` or ``pos``.
+        ValueError:
+            If ``ignore_index`` is not `None` or an integer.
+        ValueError:
+            If ``top_k`` is not ``None`` or not an integer greater than 0.
+
+    Example:
+        >>> from torch import tensor
+        >>> from torchmetrics.retrieval import RetrievalAUROC
+        >>> indexes = tensor([0, 0, 0, 1, 1, 1, 1])
+        >>> preds = tensor([0.2, 0.3, 0.5, 0.1, 0.3, 0.5, 0.2])
+        >>> target = tensor([False, False, True, False, True, False, True])
+        >>> rmap = RetrievalAUROC()
+        >>> rmap(preds, target, indexes=indexes)
+        tensor(0.7500)
+
+    """
+
+    is_differentiable: bool = False
+    higher_is_better: bool = True
+    full_state_update: bool = False
+    plot_lower_bound: float = 0.0
+    plot_upper_bound: float = 1.0
+
+    def __init__(
+        self,
+        empty_target_action: Literal["error", "skip", "neg", "pos"] = "neg",
+        ignore_index: Optional[int] = None,
+        top_k: Optional[int] = None,
+        max_fpr: Optional[float] = None,
+        aggregation: Union[Literal["mean", "median", "min", "max"], Callable] = "mean",
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            empty_target_action=empty_target_action,
+            ignore_index=ignore_index,
+            aggregation=aggregation,
+            **kwargs,
+        )
+        if top_k is not None and not (isinstance(top_k, int) and top_k > 0):
+            raise ValueError("`top_k` has to be a positive integer or None")
+        self.top_k = top_k
+        if max_fpr is not None and not isinstance(max_fpr, float) and 0 < max_fpr <= 1:
+            raise ValueError(f"Arguments `max_fpr` should be a float in range (0, 1], but got: {max_fpr}")
+        self.max_fpr = max_fpr
+
+    def _metric(self, preds: Tensor, target: Tensor) -> Tensor:
+        return retrieval_auroc(preds, target, top_k=self.top_k, max_fpr=self.max_fpr)
+
+    def plot(
+        self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None
+    ) -> _PLOT_OUT_TYPE:
+        """Plot a single or multiple values from the metric.
+
+        Args:
+            val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
+                If no value is provided, will automatically call `metric.compute` and plot that result.
+            ax: An matplotlib axis object. If provided will add plot to that axis
+
+        Returns:
+            Figure and Axes object
+
+        Raises:
+            ModuleNotFoundError:
+                If `matplotlib` is not installed
+
+        .. plot::
+            :scale: 75
+
+            >>> import torch
+            >>> from torchmetrics.retrieval import RetrievalAUROC
+            >>> # Example plotting a single value
+            >>> metric = RetrievalAUROC()
+            >>> metric.update(torch.rand(10,), torch.randint(2, (10,)), indexes=torch.randint(2,(10,)))
+            >>> fig_, ax_ = metric.plot()
+
+        .. plot::
+            :scale: 75
+
+            >>> import torch
+            >>> from torchmetrics.retrieval import RetrievalAUROC
+            >>> # Example plotting multiple values
+            >>> metric = RetrievalAUROC()
+            >>> values = []
+            >>> for _ in range(10):
+            ...     values.append(metric(torch.rand(10,), torch.randint(2, (10,)), indexes=torch.randint(2,(10,))))
+            >>> fig, ax = metric.plot(values)
+
+        """
+        return self._plot(val, ax)