From 981c42df845e46492e960a0a5b9ab91d196f5688 Mon Sep 17 00:00:00 2001
From: brimoor <brimoor@umich.edu>
Date: Fri, 13 Dec 2024 01:17:03 -0500
Subject: [PATCH 1/3] include all labels in views

---
 .../panels/model_evaluation/__init__.py       | 97 +++++++++++++++----
 1 file changed, 76 insertions(+), 21 deletions(-)

diff --git a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
index e8a1aff301..807c446cd5 100644
--- a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
+++ b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
@@ -97,12 +97,6 @@ def on_load(self, ctx):
         ctx.panel.set_data("permissions", permissions)
         self.load_pending_evaluations(ctx)
 
-    def is_binary_classification(self, info):
-        return (
-            info.config.type == "classification"
-            and info.config.method == "binary"
-        )
-
     def get_avg_confidence(self, per_class_metrics):
         count = 0
         total = 0
@@ -114,7 +108,10 @@ def get_avg_confidence(self, per_class_metrics):
 
     def get_tp_fp_fn(self, info, results):
         # Binary classification
-        if self.is_binary_classification(info):
+        if (
+            info.config.type == "classification"
+            and info.config.method == "binary"
+        ):
             neg_label, pos_label = results.classes
             tp_count = np.count_nonzero(
                 (results.ytrue == pos_label) & (results.ypred == pos_label)
@@ -418,23 +415,81 @@ def load_view(self, ctx):
         y = view_options.get("y", None)
         field = view_options.get("field", None)
         computed_eval_key = view_options.get("key", eval_key)
+        eval_view = ctx.dataset.load_evaluation_view(eval_key)
+
         view = None
-        if view_type == "class":
-            view = ctx.dataset.filter_labels(pred_field, F("label") == x)
-        elif view_type == "matrix":
-            view = ctx.dataset.filter_labels(
-                gt_field, F("label") == y
-            ).filter_labels(pred_field, F("label") == x)
-        elif view_type == "field":
-            if self.is_binary_classification(info):
-                uppercase_field = field.upper()
-                view = ctx.dataset.match(
-                    {computed_eval_key: {"$eq": uppercase_field}}
+        if info.config.type == "classification":
+            if view_type == "class":
+                view = eval_view.match(
+                    (F(f"{gt_field}.label") == x)
+                    | (F(f"{pred_field}.label") == x)
                 )
-            else:
-                view = ctx.dataset.filter_labels(
-                    pred_field, F(computed_eval_key) == field
+            elif view_type == "matrix":
+                view = eval_view.match(
+                    (F(f"{gt_field}.label") == y)
+                    & (F(f"{pred_field}.label") == x)
+                )
+            elif view_type == "field":
+                if field == "fn":
+                    view = eval_view.match(
+                        F(f"{gt_field}.{computed_eval_key}") == field
+                    )
+                else:
+                    view = eval_view.match(
+                        F(f"{pred_field}.{computed_eval_key}") == field
+                    )
+        elif info.config.type == "detection":
+            _, pred_root = ctx.dataset._get_label_field_path(pred_field)
+            _, gt_root = ctx.dataset._get_label_field_path(gt_field)
+
+            if view_type == "class":
+                view = (
+                    eval_view.filter_labels(
+                        pred_field, F("label") == x, only_matches=False
+                    )
+                    .filter_labels(
+                        gt_field, F("label") == x, only_matches=False
+                    )
+                    .match(
+                        (F(pred_root).length() > 0) | (F(gt_root).length() > 0)
+                    )
                 )
+            elif view_type == "matrix":
+                view = (
+                    eval_view.filter_labels(
+                        gt_field, F("label") == y, only_matches=False
+                    )
+                    .filter_labels(
+                        pred_field, F("label") == x, only_matches=False
+                    )
+                    .match(
+                        (F(pred_root).length() > 0) & (F(gt_root).length() > 0)
+                    )
+                )
+            elif view_type == "field":
+                if field == "tp":
+                    view = eval_view.filter_labels(
+                        gt_field,
+                        F(computed_eval_key) == field,
+                        only_matches=False,
+                    ).filter_labels(
+                        pred_field,
+                        F(computed_eval_key) == field,
+                        only_matches=True,
+                    )
+                elif field == "fn":
+                    view = eval_view.filter_labels(
+                        gt_field,
+                        F(computed_eval_key) == field,
+                        only_matches=True,
+                    )
+                else:
+                    view = eval_view.filter_labels(
+                        pred_field,
+                        F(computed_eval_key) == field,
+                        only_matches=True,
+                    )
+
         if view is not None:
             ctx.ops.set_view(view)
 

From a9ea1c3f1a31031d567cd4520e8ae27e60285bec Mon Sep 17 00:00:00 2001
From: brimoor <brimoor@umich.edu>
Date: Sat, 14 Dec 2024 22:45:14 -0500
Subject: [PATCH 2/3] filtering comparison field as well

---
 .../panels/model_evaluation/__init__.py       | 135 +++++++++++-------
 1 file changed, 87 insertions(+), 48 deletions(-)

diff --git a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
index 807c446cd5..96684ce080 100644
--- a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
+++ b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
@@ -312,6 +312,8 @@ def load_evaluation(self, ctx):
                 "confusion_matrices": self.get_confusion_matrices(results),
                 "per_class_metrics": per_class_metrics,
             }
+            ctx.panel.set_state("missing", results.missing)
+
             if ENABLE_CACHING:
                 # Cache the evaluation data
                 try:
@@ -406,88 +408,125 @@ def load_view(self, ctx):
             return
 
         view_state = ctx.panel.get_state("view") or {}
+        view_options = ctx.params.get("options", {})
+
         eval_key = view_state.get("key")
+        eval_key = view_options.get("key", eval_key)
+        eval_view = ctx.dataset.load_evaluation_view(eval_key)
         info = ctx.dataset.get_evaluation_info(eval_key)
         pred_field = info.config.pred_field
         gt_field = info.config.gt_field
-        view_options = ctx.params.get("options", {})
+
+        eval_key2 = view_state.get("compareKey", None)
+        pred_field2 = None
+        gt_field2 = None
+        if eval_key2 is not None:
+            info2 = ctx.dataset.get_evaluation_info(eval_key2)
+            pred_field2 = info2.config.pred_field
+            if info2.config.gt_field != gt_field:
+                gt_field2 = info2.config.gt_field
+
         x = view_options.get("x", None)
         y = view_options.get("y", None)
         field = view_options.get("field", None)
-        computed_eval_key = view_options.get("key", eval_key)
-        eval_view = ctx.dataset.load_evaluation_view(eval_key)
+        missing = ctx.panel.get_state("missing", "(none)")
 
         view = None
         if info.config.type == "classification":
             if view_type == "class":
-                view = eval_view.match(
-                    (F(f"{gt_field}.label") == x)
-                    | (F(f"{pred_field}.label") == x)
-                )
+                # All GT/predictions of class `x`
+                expr = F(f"{gt_field}.label") == x
+                expr |= F(f"{pred_field}.label") == x
+                if gt_field2 is not None:
+                    expr |= F(f"{gt_field2}.label") == x
+                if pred_field2 is not None:
+                    expr |= F(f"{pred_field2}.label") == x
+                view = eval_view.match(expr)
             elif view_type == "matrix":
-                view = eval_view.match(
-                    (F(f"{gt_field}.label") == y)
-                    & (F(f"{pred_field}.label") == x)
-                )
+                # Specific confusion matrix cell (including FP/FN)
+                expr = F(f"{gt_field}.label") == y
+                expr &= F(f"{pred_field}.label") == x
+                view = eval_view.match(expr)
             elif view_type == "field":
-                if field == "fn":
-                    view = eval_view.match(
-                        F(f"{gt_field}.{computed_eval_key}") == field
-                    )
+                if info.config.method == "binary":
+                    # All TP/FP/FN
+                    expr = F(f"{eval_key}") == field.upper()
+                    view = eval_view.match(expr)
                 else:
-                    view = eval_view.match(
-                        F(f"{pred_field}.{computed_eval_key}") == field
-                    )
+                    # Correct/incorrect
+                    expr = F(f"{eval_key}") == field
+                    view = eval_view.match(expr)
         elif info.config.type == "detection":
-            _, pred_root = ctx.dataset._get_label_field_path(pred_field)
             _, gt_root = ctx.dataset._get_label_field_path(gt_field)
+            _, pred_root = ctx.dataset._get_label_field_path(pred_field)
+            if gt_field2 is not None:
+                _, gt_root2 = ctx.dataset._get_label_field_path(gt_field2)
+            if pred_field2 is not None:
+                _, pred_root2 = ctx.dataset._get_label_field_path(pred_field2)
 
             if view_type == "class":
-                view = (
-                    eval_view.filter_labels(
-                        pred_field, F("label") == x, only_matches=False
-                    )
-                    .filter_labels(
-                        gt_field, F("label") == x, only_matches=False
+                # All GT/predictions of class `x`
+                view = eval_view.filter_labels(
+                    gt_field, F("label") == x, only_matches=False
+                )
+                expr = F(gt_root).length() > 0
+                view = view.filter_labels(
+                    pred_field, F("label") == x, only_matches=False
+                )
+                expr |= F(pred_root).length() > 0
+                if gt_field2 is not None:
+                    view = view.filter_labels(
+                        gt_field2, F("label") == x, only_matches=False
                     )
-                    .match(
-                        (F(pred_root).length() > 0) | (F(gt_root).length() > 0)
+                    expr |= F(gt_root2).length() > 0
+                if pred_field2 is not None:
+                    view = view.filter_labels(
+                        pred_field2, F("label") == x, only_matches=False
                     )
-                )
+                    expr |= F(pred_root2).length() > 0
+                view = view.match(expr)
             elif view_type == "matrix":
-                view = (
-                    eval_view.filter_labels(
+                if y == missing:
+                    # False positives of class `x`
+                    expr = (F("label") == x) & (F(eval_key) == "fp")
+                    view = eval_view.filter_labels(
+                        pred_field, expr, only_matches=True
+                    )
+                elif x == missing:
+                    # False negatives of class `y`
+                    expr = (F("label") == y) & (F(eval_key) == "fn")
+                    view = eval_view.filter_labels(
+                        gt_field, expr, only_matches=True
+                    )
+                else:
+                    # All class `y` GT and class `x` predictions in same sample
+                    view = eval_view.filter_labels(
                         gt_field, F("label") == y, only_matches=False
                     )
-                    .filter_labels(
+                    expr = F(gt_root).length() > 0
+                    view = view.filter_labels(
                         pred_field, F("label") == x, only_matches=False
                     )
-                    .match(
-                        (F(pred_root).length() > 0) & (F(gt_root).length() > 0)
-                    )
-                )
+                    expr &= F(pred_root).length() > 0
+                    view = view.match(expr)
             elif view_type == "field":
                 if field == "tp":
+                    # All true positives
                     view = eval_view.filter_labels(
-                        gt_field,
-                        F(computed_eval_key) == field,
-                        only_matches=False,
-                    ).filter_labels(
-                        pred_field,
-                        F(computed_eval_key) == field,
-                        only_matches=True,
+                        gt_field, F(eval_key) == field, only_matches=False
+                    )
+                    view = view.filter_labels(
+                        pred_field, F(eval_key) == field, only_matches=True
                     )
                 elif field == "fn":
+                    # All false negatives
                     view = eval_view.filter_labels(
-                        gt_field,
-                        F(computed_eval_key) == field,
-                        only_matches=True,
+                        gt_field, F(eval_key) == field, only_matches=True
                     )
                 else:
+                    # All false positives
                     view = eval_view.filter_labels(
-                        pred_field,
-                        F(computed_eval_key) == field,
-                        only_matches=True,
+                        pred_field, F(eval_key) == field, only_matches=True
                     )
 
         if view is not None:

From 5603258575ef6374beb6199c22d30c7474c896d0 Mon Sep 17 00:00:00 2001
From: imanjra <ibrahim@voxel51.com>
Date: Tue, 17 Dec 2024 22:42:19 -0500
Subject: [PATCH 3/3] model evaluation load_view bug fixes

---
 .../NativeModelEvaluationView/Evaluation.tsx           | 10 +++++++++-
 .../builtins/panels/model_evaluation/__init__.py       |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx b/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx
index c3ee377dab..cefbc3f6c6 100644
--- a/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx
+++ b/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx
@@ -1217,6 +1217,14 @@ export default function Evaluation(props: EvaluationProps) {
                             ].join(" <br>") + "<extra></extra>",
                         },
                       ]}
+                      onClick={({ points }) => {
+                        const firstPoint = points[0];
+                        loadView("matrix", {
+                          x: firstPoint.x,
+                          y: firstPoint.y,
+                          key: compareKey,
+                        });
+                      }}
                       layout={{
                         yaxis: {
                           autorange: "reversed",
@@ -1598,7 +1606,7 @@ function useActiveFilter(evaluation, compareEvaluation) {
   const evalKey = evaluation?.info?.key;
   const compareKey = compareEvaluation?.info?.key;
   const [stages] = useRecoilState(view);
-  if (stages?.length === 1) {
+  if (stages?.length >= 1) {
     const stage = stages[0];
     const { _cls, kwargs } = stage;
     if (_cls.endsWith("FilterLabels")) {
diff --git a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
index 96684ce080..f9b1b147e9 100644
--- a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
+++ b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
@@ -420,7 +420,7 @@ def load_view(self, ctx):
         eval_key2 = view_state.get("compareKey", None)
         pred_field2 = None
         gt_field2 = None
-        if eval_key2 is not None:
+        if eval_key2:
             info2 = ctx.dataset.get_evaluation_info(eval_key2)
             pred_field2 = info2.config.pred_field
             if info2.config.gt_field != gt_field: