openvinotoolkit · ashwinvaidya17 · Sep 20, 2024 · Dec 19, 2023 · Dec 20, 2023 · Dec 21, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -84,7 +84,8 @@ test = [
     "coverage[toml]",
     "tox",
 ]
-full = ["anomalib[core,openvino,loggers,notebooks]"]
+extra = ["numba>=0.58.1"]
+full = ["anomalib[core,openvino,loggers,notebooks,extra]"]
 dev = ["anomalib[full,docs,test]"]
 
 [project.scripts]

diff --git a/src/anomalib/data/utils/path.py b/src/anomalib/data/utils/path.py
@@ -142,13 +142,20 @@ def contains_non_printable_characters(path: str | Path) -> bool:
     return not printable_pattern.match(str(path))
 
 
-def validate_path(path: str | Path, base_dir: str | Path | None = None, should_exist: bool = True) -> Path:
+def validate_path(
+    path: str | Path,
+    base_dir: str | Path | None = None,
+    should_exist: bool = True,
+    accepted_extensions: tuple[str, ...] | None = None,
-    accepted_extensions: tuple[str, ...] | None = None,
+    extensions: tuple[str, ...] | None = None,
-    accepted_extensions: tuple[str, ...] | None = None,
+    extensions: tuple[str, ...] | None = None,
+) -> Path:
     """Validate the path.
 
     Args:
         path (str | Path): Path to validate.
         base_dir (str | Path): Base directory to restrict file access.
         should_exist (bool): If True, do not raise an exception if the path does not exist.
+        accepted_extensions (tuple[str, ...] | None): Accepted extensions for the path. An exception is raised if the
-        accepted_extensions (tuple[str, ...] | None): Accepted extensions for the path. An exception is raised if the
+        extensions (tuple[str, ...] | None): Accepted extensions for the path. An exception is raised if the
-        accepted_extensions (tuple[str, ...] | None): Accepted extensions for the path. An exception is raised if the
+        extensions (tuple[str, ...] | None): Accepted extensions for the path. An exception is raised if the
+            path does not have one of the accepted extensions. If None, no check is performed. Defaults to None.
 
     Returns:
         Path: Validated path.
@@ -213,6 +220,11 @@ def validate_path(path: str | Path, base_dir: str | Path | None = None, should_e
             msg = f"Read or execute permissions denied for the path: {path}"
             raise PermissionError(msg)
 
+    # Check if the path has one of the accepted extensions
+    if accepted_extensions is not None and path.suffix not in accepted_extensions:
-    if accepted_extensions is not None and path.suffix not in accepted_extensions:
+    if extensions is not None and path.suffix not in extensions:
-    if accepted_extensions is not None and path.suffix not in accepted_extensions:
+    if extensions is not None and path.suffix not in extensions:
+        msg = f"Path extension is not accepted. Accepted extensions: {accepted_extensions}. Path: {path}"
+        raise ValueError(msg)
+
     return path
 
 

diff --git a/src/anomalib/metrics/__init__.py b/src/anomalib/metrics/__init__.py
@@ -11,6 +11,7 @@
 import torchmetrics
 from omegaconf import DictConfig, ListConfig
 
+from . import per_image
 from .anomaly_score_distribution import AnomalyScoreDistribution
 from .aupr import AUPR
 from .aupro import AUPRO
@@ -19,6 +20,7 @@
 from .f1_max import F1Max
 from .f1_score import F1Score
 from .min_max import MinMax
+from .per_image import AUPIMO, PIMO, aupimo_scores, pimo_curves
 from .precision_recall_curve import BinaryPrecisionRecallCurve
 from .pro import PRO
 from .threshold import F1AdaptiveThreshold, ManualThreshold
@@ -35,6 +37,11 @@
     "ManualThreshold",
     "MinMax",
     "PRO",
+    "per_image",
+    "pimo_curves",
+    "aupimo_scores",
+    "PIMO",
+    "AUPIMO",
 ]
 
 logger = logging.getLogger(__name__)

diff --git a/src/anomalib/metrics/per_image/__init__.py b/src/anomalib/metrics/per_image/__init__.py
@@ -0,0 +1,44 @@
+"""Per-Image Metrics."""
+
+# Original Code
+# https://github.com/jpcbertoldo/aupimo
+#
+# Modified
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from .binclf_curve import per_image_binclf_curve, per_image_fpr, per_image_tpr
+from .binclf_curve_numpy import BinclfAlgorithm, BinclfThreshsChoice
+from .pimo import AUPIMO, PIMO, AUPIMOResult, PIMOResult, aupimo_scores, pimo_curves
+from .utils import (
+    compare_models_pairwise_ttest_rel,
+    compare_models_pairwise_wilcoxon,
+    format_pairwise_tests_results,
+    per_image_scores_stats,
+)
+from .utils_numpy import StatsOutliersPolicy, StatsRepeatedPolicy
+
+__all__ = [
+    # constants
+    "BinclfAlgorithm",
+    "BinclfThreshsChoice",
+    "StatsOutliersPolicy",
+    "StatsRepeatedPolicy",
+    # result classes
+    "PIMOResult",
+    "AUPIMOResult",
+    # functional interfaces
+    "per_image_binclf_curve",
+    "per_image_fpr",
+    "per_image_tpr",
+    "pimo_curves",
+    "aupimo_scores",
+    # torchmetrics interfaces
+    "PIMO",
+    "AUPIMO",
+    # utils
+    "compare_models_pairwise_ttest_rel",
+    "compare_models_pairwise_wilcoxon",
+    "format_pairwise_tests_results",
+    "per_image_scores_stats",
+]
diff --git a/src/anomalib/metrics/per_image/_binclf_curve_numba.py b/src/anomalib/metrics/per_image/_binclf_curve_numba.py
@@ -0,0 +1,115 @@
+"""Binary classification matrix curve (NUMBA implementation of low level functions).
+
+Details: `.binclf_curve`.
+"""
+
+# Original Code
+# https://github.com/jpcbertoldo/aupimo
+#
+# Modified
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numba
 try: 
     import numba  # noqa: F401 
 except ImportError: 
     HAS_NUMBA = False 
 else: 
     HAS_NUMBA = True 
 if HAS_NUMBA: 
     from . import _binclf_curve_numba 
 if algorithm == BinclfAlgorithm.NUMBA: 
     if HAS_NUMBA: 
         return _binclf_curve_numba.binclf_multiple_curves_numba(scores_batch, gts_batch, threshs) 
     logger.warning( 
         f"Algorithm '{BinclfAlgorithm.NUMBA.value}' was selected, but Numba is not installed. " 
         f"Falling back to '{BinclfAlgorithm.PYTHON.value}' implementation.", 
         "Notice that the performance will be slower. Consider installing Numba for faster computation.", 
     ) 
 return _binclf_multiple_curves_python(scores_batch, gts_batch, threshs) 
 extra = ["numba>=0.58.1"] 
 try: 
     import numba  # noqa: F401 
 except ImportError: 
     HAS_NUMBA = False 
 else: 
     HAS_NUMBA = True 
  
  
 if HAS_NUMBA: 
     from . import _binclf_curve_numba 
 if algorithm == BinclfAlgorithm.NUMBA: 
     if HAS_NUMBA: 
         return _binclf_curve_numba.binclf_multiple_curves_numba(scores_batch, gts_batch, threshs) 
  
     logger.warning( 
         f"Algorithm '{BinclfAlgorithm.NUMBA.value}' was selected, but Numba is not installed. " 
         f"Falling back to '{BinclfAlgorithm.PYTHON.value}' implementation.", 
         "Notice that the performance will be slower. Consider installing Numba for faster computation.", 
     ) 
  
 return _binclf_multiple_curves_python(scores_batch, gts_batch, threshs) 
 extra = ["numba>=0.58.1"] 
+import numpy as np
+from numpy import ndarray
+
+
+@numba.jit(nopython=True)
+def binclf_one_curve_numba(scores: ndarray, gts: ndarray, threshs: ndarray) -> ndarray:
+    """One binary classification matrix at each threshold (NUMBA implementation).
+
+    This does the same as `_binclf_one_curve_python` but with numba using just-in-time compilation.
+
+    Note: VALIDATION IS NOT DONE HERE! Make sure to validate the arguments before calling this function.
+
+    Args:
+        scores (ndarray): Anomaly scores (D,).
+        gts (ndarray): Binary (bool) ground truth of shape (D,).
+        threshs (ndarray): Sequence of thresholds in ascending order (K,).
+
+    Returns:
+        ndarray: Binary classification matrix curve (K, 2, 2)
+
+        Details: `anomalib.metrics.per_image.binclf_curve_numpy.binclf_multiple_curves`.
+    """
+    num_th = len(threshs)
+
+    # POSITIVES
+    scores_pos = scores[gts]
+    # the sorting is very important for the algorithm to work and the speedup
+    scores_pos = np.sort(scores_pos)
+    # start counting with lowest th, so everything is predicted as positive (this variable is updated in the loop)
+    num_pos = current_count_tp = len(scores_pos)
+
+    tps = np.empty((num_th,), dtype=np.int64)
+
+    # NEGATIVES
+    # same thing but for the negative samples
+    scores_neg = scores[~gts]
+    scores_neg = np.sort(scores_neg)
+    num_neg = current_count_fp = len(scores_neg)
+
+    fps = np.empty((num_th,), dtype=np.int64)
+
+    # it will progressively drop the scores that are below the current th
+    for thidx, th in enumerate(threshs):
+        num_drop = 0
+        num_scores = len(scores_pos)
+        while num_drop < num_scores and scores_pos[num_drop] < th:  # ! scores_pos !
+            num_drop += 1
+        # ---
+        scores_pos = scores_pos[num_drop:]
+        current_count_tp -= num_drop
+        tps[thidx] = current_count_tp
+
+        # same with the negatives
+        num_drop = 0
+        num_scores = len(scores_neg)
+        while num_drop < num_scores and scores_neg[num_drop] < th:  # ! scores_neg !
+            num_drop += 1
+        # ---
+        scores_neg = scores_neg[num_drop:]
+        current_count_fp -= num_drop
+        fps[thidx] = current_count_fp
+
+    fns = num_pos * np.ones((num_th,), dtype=np.int64) - tps
+    tns = num_neg * np.ones((num_th,), dtype=np.int64) - fps
+
+    # sequence of dimensions is (threshs, true class, predicted class) (see docstring)
+    return np.stack(
+        (
+            np.stack((tns, fps), axis=-1),
+            np.stack((fns, tps), axis=-1),
+        ),
+        axis=-1,
+    ).transpose(0, 2, 1)
+
+
+@numba.jit(nopython=True, parallel=True)
+def binclf_multiple_curves_numba(scores_batch: ndarray, gts_batch: ndarray, threshs: ndarray) -> ndarray:
+    """Multiple binary classification matrix at each threshold (NUMBA implementation).
+
+    This does the same as `_binclf_multiple_curves_python` but with numba,
+    using parallelization and just-in-time compilation.
+
+    Note: VALIDATION IS NOT DONE HERE. Make sure to validate the arguments before calling this function.
+
+    Args:
+        scores_batch (ndarray): Anomaly scores (N, D,).
+        gts_batch (ndarray): Binary (bool) ground truth of shape (N, D,).
+        threshs (ndarray): Sequence of thresholds in ascending order (K,).
+
+    Returns:
+        ndarray: Binary classification matrix curves (N, K, 2, 2)
+
+        Details: `anomalib.metrics.per_image.binclf_curve_numpy.binclf_multiple_curves`.
+    """
+    num_imgs = scores_batch.shape[0]
+    num_th = len(threshs)
+    ret = np.empty((num_imgs, num_th, 2, 2), dtype=np.int64)
+    for imgidx in numba.prange(num_imgs):
+        scoremap = scores_batch[imgidx]
+        mask = gts_batch[imgidx]
+        ret[imgidx] = binclf_one_curve_numba(scoremap, mask, threshs)
+    return ret