fix: don't look at len when calculating prevalence

janosh · Feb 9, 2024 · c2ebe94 · c2ebe94
1 parent 591ab2a
commit c2ebe94
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 10 deletions.
diff --git a/matbench_discovery/metrics.py b/matbench_discovery/metrics.py
@@ -105,7 +105,7 @@ def stable_metrics(
     n_total_neg = n_true_neg + n_false_pos
     # prevalence: dummy discovery rate of stable crystals by selecting randomly from
     # all materials
-    prevalence = n_total_pos / len(each_true)
+    prevalence = n_total_pos / (n_total_pos + n_total_neg)
     precision = n_true_pos / (n_true_pos + n_false_pos)  # model's discovery rate
     recall = n_true_pos / n_total_pos
 

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -39,7 +39,7 @@ def test_classify_stable(
 
 
 def test_stable_metrics() -> None:
-    metrics = stable_metrics(np.arange(-1, 1, 0.1), np.arange(1, -1, -0.1))
+    metrics = stable_metrics(np.arange(-1, 1, 0.1), np.arange(1, -1, -0.1), fillna=True)
     for key, val in dict(
         DAF=0,
         Precision=0,
@@ -57,20 +57,22 @@ def test_stable_metrics() -> None:
 
     assert math.isnan(metrics["F1"])
 
-    metrics = stable_metrics(np.array((-1, 1, 0.1, -0.5)), np.array((-1, -1, -0.1, np.nan)), fillna=False)
-    fillna_metrics = stable_metrics(np.array((-1, 1, 0.1, -0.5)), np.array((-11, -1, -0.1, np.nan)), fillna=True)
+    metrics = stable_metrics(np.array((-1, 1, 0.1, -0.5, 0.5)), np.array((-1, 1, -0.1, np.nan, np.nan)), fillna=False)
+    fillna_metrics = stable_metrics(np.array((-1, 1, 0.1, -0.5, 0.5)), np.array((-1, 1, -0.1, np.nan, np.nan)), fillna=True)
 
-    # When we fill NaNs, the DAF should decrease as there are more False Negatives created
-    # The precision remains unchanged as it only depends on the numbers of
-    # True Positives and False Positives
     assert metrics["Precision"] == fillna_metrics["Precision"]
-    assert metrics["DAF"] > fillna_metrics["DAF"]
+    assert metrics["DAF"] > fillna_metrics["DAF"]  # nan's dropped in prevalence
+    assert metrics["TNR"] == 0.5
+    assert metrics["FNR"] == 0
+    assert fillna_metrics["TNR"] == 2/3
+    assert fillna_metrics["FNR"] == 1/2
+
 
     # test stable_metrics gives the same result as sklearn.metrics.classification_report
     # for random numpy data
     rng = np.random.default_rng(0)
     y_true, y_pred = rng.normal(size=(2, 100))
-    metrics = stable_metrics(y_true, y_pred)
+    metrics = stable_metrics(y_true, y_pred, fillna=True)
 
     from sklearn.metrics import classification_report
 
@@ -94,7 +96,7 @@ def test_stable_metrics() -> None:
 
     # test discovery acceleration factor (DAF)
     n_true_pos, n_false_neg, n_false_pos, n_true_neg = map(
-        sum, classify_stable(y_true, y_pred)
+        sum, classify_stable(y_true, y_pred, fillna=True)
     )
 
     dummy_hit_rate = (n_true_pos + n_false_neg) / (