collect_predictions() for survival models (#158)

* initial tests * updates for tidymodels/tune#798 * SA test updates * update tune version * update bayesian opt tests * update racing unit tests * update tune version * temporarily point to PR * improve readability of reference object * racing updates; also covers changes in tidymodels/finetune#90 * updated for changes in #804 * test for #138 and tidymodels/finetune#81 * PR tune#798 has been merged, new one to check with is tune#804 * updated for new tune version * version bumps * update with new tune warning wording * changes for tidymodels/tune#806 * more updated snapshots * mroe snapshot updates * update censored bagging snapshot * irreproducible results; see #160 --------- Co-authored-by: ‘topepo’ <‘[email protected]’> Co-authored-by: Hannah Frick <[email protected]>
tidymodels · Jan 12, 2024 · e5ad42b · e5ad42b
1 parent 683ec31
commit e5ad42b
Show file tree

Hide file tree

Showing 36 changed files with 1,470 additions and 359 deletions.
diff --git a/.github/workflows/GH-R-CMD-check.yaml b/.github/workflows/GH-R-CMD-check.yaml
@@ -76,7 +76,7 @@ jobs:
           try(pak::pkg_install("tidymodels/stacks"))
           try(pak::pkg_install("tidymodels/themis"))
           try(pak::pkg_install("tidymodels/tidymodels"))
-          try(pak::pkg_install("tidymodels/tune"))
+          try(pak::pkg_install("tidymodels/tune#804"))
           try(pak::pkg_install("tidymodels/workflows"))
           try(pak::pkg_install("tidymodels/yardstick"))
           try(pak::pkg_install("tidymodels/finetune"))

diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ tests/testthat/derby.log
 tests/testthat/logs/log4j.spark.log
 derby.log
 logs/log4j.spark.log
+tests/testthat/_snaps/*.new.md
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,6 +18,7 @@ Suggests:
     butcher,
     C50,
     censored,
+    conflicted,
     coin,
     dials,
     dimRed,

diff --git a/tests/testthat.R b/tests/testthat.R
@@ -1,4 +1,7 @@
 library(testthat)
 library(extratests)
 
+# triage with fewer files:
+# devtools::test(filter = "(survival)|(censor)")
+
 test_check("extratests", reporter = "summary")
diff --git a/tests/testthat/_snaps/censored-case-weights.md b/tests/testthat/_snaps/censored-case-weights.md
@@ -1,11 +1,3 @@
-# bag_tree - rpart censored case weights
-
-    Code
-      wt_fit$fit$call
-    Output
-      bagging.data.frame(formula = Surv(time, event) ~ ., data = data, 
-          weights = weights, cp = ~0, minsplit = ~2)
-
 # proportional_hazards - glmnet censored case weights
 
     Code

diff --git a/tests/testthat/_snaps/parsnip-case-weights.md b/tests/testthat/_snaps/parsnip-case-weights.md
diff --git a/tests/testthat/_snaps/survival-fit-resamples.md b/tests/testthat/_snaps/survival-fit-resamples.md
@@ -1,6 +1,15 @@
 # resampling survival models mixture of metric types
 
-    4 evaluation times were specified during tuning; the first (10) will be used.
+    Code
+      show_best(rs_mixed_res, metric = "brier_survival")
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
+    Output
+      # A tibble: 1 x 7
+        .metric        .estimator .eval_time  mean     n std_err .config             
+        <chr>          <chr>           <dbl> <dbl> <int>   <dbl> <chr>               
+      1 brier_survival standard           10 0.166    10  0.0204 Preprocessor1_Model1
 
 ---
 
@@ -26,7 +35,7 @@
       show_best(rs_mixed_res, metric = "brier_survival", eval_time = c(1, 3))
     Condition
       Warning:
-      2 evaluation times were specified during tuning; the first (1) will be used.
+      2 evaluation times are available; the first (1) will be used.
     Output
       # A tibble: 1 x 7
         .metric        .estimator .eval_time   mean     n std_err .config             

diff --git a/tests/testthat/_snaps/survival-tune-bayes.md b/tests/testthat/_snaps/survival-tune-bayes.md
@@ -1,14 +1,57 @@
 # Bayesian tuning survival models with dynamic metric
 
-    No evaluation time was set; a value of 5 was used.
+    Code
+      set.seed(2193)
+      bayes_dynamic_res <- mod_spec %>% tune_bayes(event_time ~ X1 + X2, resamples = sim_rs,
+      iter = 2, metrics = dyn_mtrc, eval_time = time_points, control = bctrl,
+      initial = init_grid_dynamic_res)
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
+
+---
+
+    Code
+      expect_snapshot_plot(print(autoplot(bayes_dynamic_res)), "dyn-bayes")
+    Condition
+      Warning in `filter_plot_eval_time()`:
+      No evaluation time was set; a value of 5 was used.
 
 # Bayesian tuning survival models with mixture of metric types
 
-    No evaluation time was set; a value of 5 was used.
+    Code
+      set.seed(2193)
+      bayes_mixed_res <- mod_spec %>% tune_bayes(event_time ~ X1 + X2, resamples = sim_rs,
+      iter = 2, metrics = mix_mtrc, eval_time = time_points, initial = init_grid_mixed_res,
+      control = bctrl)
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
 
 ---
 
-    No evaluation time was set; a value of 5 was used.
+    Code
+      expect_snapshot_plot(print(autoplot(bayes_mixed_res)), "mix-bayes-0-times")
+    Condition
+      Warning in `filter_plot_eval_time()`:
+      No evaluation time was set; a value of 5 was used.
+
+---
+
+    Code
+      show_best(bayes_mixed_res, metric = "brier_survival")
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
+    Output
+      # A tibble: 5 x 9
+        tree_depth .metric     .estimator .eval_time  mean     n std_err .config .iter
+             <dbl> <chr>       <chr>           <dbl> <dbl> <int>   <dbl> <chr>   <int>
+      1         10 brier_surv~ standard           10 0.164    10  0.0198 Prepro~     0
+      2         15 brier_surv~ standard           10 0.164    10  0.0198 Iter1       1
+      3          6 brier_surv~ standard           10 0.164    10  0.0198 Iter2       2
+      4          2 brier_surv~ standard           10 0.179    10  0.0209 Prepro~     0
+      5          1 brier_surv~ standard           10 0.193    10  0.0201 Prepro~     0
 
 ---
 
@@ -27,18 +70,27 @@
 ---
 
     Code
-      show_best(bayes_mixed_res, metric = "brier_survival", eval_time = c(1.001))
+      show_best(bayes_mixed_res, metric = "brier_survival", eval_time = c(1.1))
     Condition
-      Error in `choose_eval_time()`:
-      ! No evaluation times matched a value of 1.001.
+      Error in `show_best()`:
+      ! Evaluation time 1.1 is not in the results.
 
 ---
 
     Code
       show_best(bayes_mixed_res, metric = "brier_survival", eval_time = c(1, 3))
     Condition
-      Error in `choose_eval_time()`:
-      ! Please pick a single evaluation time point.
+      Warning:
+      2 evaluation times are available; the first (1) will be used.
+    Output
+      # A tibble: 5 x 9
+        tree_depth .metric    .estimator .eval_time   mean     n std_err .config .iter
+             <dbl> <chr>      <chr>           <dbl>  <dbl> <int>   <dbl> <chr>   <int>
+      1          1 brier_sur~ standard            1 0.0209    10 0.00501 Prepro~     0
+      2         10 brier_sur~ standard            1 0.0210    10 0.00496 Prepro~     0
+      3         15 brier_sur~ standard            1 0.0210    10 0.00496 Iter1       1
+      4          6 brier_sur~ standard            1 0.0210    10 0.00496 Iter2       2
+      5          2 brier_sur~ standard            1 0.0210    10 0.00499 Prepro~     0
 
 ---
 

diff --git a/tests/testthat/_snaps/survival-tune-eval-time-attribute.md b/tests/testthat/_snaps/survival-tune-eval-time-attribute.md
@@ -0,0 +1,41 @@
+# tune*_() saves eval_time
+
+    Code
+      set.seed(2193)
+      bayes_res <- mod_spec %>% tune_bayes(event_time ~ X1 + X2, sim_rs, initial = grid_res,
+      iter = 2, metrics = srv_mtrc, eval_time = time_points)
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
+
+---
+
+    Code
+      set.seed(2193)
+      sa_res <- mod_spec %>% tune_sim_anneal(event_time ~ X1 + X2, sim_rs, initial = grid_res,
+      iter = 2, metrics = srv_mtrc, eval_time = time_points, control = control_sim_anneal(
+        verbose_iter = FALSE))
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
+
+---
+
+    Code
+      set.seed(2193)
+      anova_res <- mod_spec %>% tune_race_anova(event_time ~ X1 + X2, sim_rs, grid = grid,
+      metrics = srv_mtrc, eval_time = time_points)
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
+
+---
+
+    Code
+      set.seed(2193)
+      wl_res <- mod_spec %>% tune_race_win_loss(event_time ~ X1 + X2, sim_rs, grid = grid,
+      metrics = srv_mtrc, eval_time = time_points)
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
+
diff --git a/tests/testthat/_snaps/survival-tune-grid.md b/tests/testthat/_snaps/survival-tune-grid.md
@@ -1,14 +1,33 @@
 # grid tuning survival models with dynamic metric
 
-    No evaluation time was set; a value of 5 was used.
+    Code
+      expect_snapshot_plot(print(autoplot(grid_dynamic_res)), "dyn-grid")
+    Condition
+      Warning in `filter_plot_eval_time()`:
+      No evaluation time was set; a value of 5 was used.
 
 # grid tuning survival models mixture of metric types
 
-    No evaluation time was set; a value of 5 was used.
+    Code
+      expect_snapshot_plot(print(autoplot(grid_mixed_res)), "mix-grid-0-times")
+    Condition
+      Warning in `filter_plot_eval_time()`:
+      No evaluation time was set; a value of 5 was used.
 
 ---
 
-    No evaluation time was set; a value of 5 was used.
+    Code
+      show_best(grid_mixed_res, metric = "brier_survival")
+    Condition
+      Warning:
+      4 evaluation times are available; the first (10) will be used.
+    Output
+      # A tibble: 3 x 8
+        penalty .metric        .estimator .eval_time  mean     n std_err .config      
+          <dbl> <chr>          <chr>           <dbl> <dbl> <int>   <dbl> <chr>        
+      1  0.0001 brier_survival standard           10 0.154    10  0.0210 Preprocessor~
+      2  0.01   brier_survival standard           10 0.154    10  0.0210 Preprocessor~
+      3  0.1    brier_survival standard           10 0.159    10  0.0209 Preprocessor~
 
 ---
 
@@ -27,16 +46,23 @@
     Code
       show_best(grid_mixed_res, metric = "brier_survival", eval_time = c(1.001))
     Condition
-      Error in `choose_eval_time()`:
-      ! No evaluation times matched a value of 1.001.
+      Error in `show_best()`:
+      ! Evaluation time 1 is not in the results.
 
 ---
 
     Code
       show_best(grid_mixed_res, metric = "brier_survival", eval_time = c(1, 3))
     Condition
-      Error in `choose_eval_time()`:
-      ! Please pick a single evaluation time point.
+      Warning:
+      2 evaluation times are available; the first (1) will be used.
+    Output
+      # A tibble: 3 x 8
+        penalty .metric        .estimator .eval_time   mean     n std_err .config     
+          <dbl> <chr>          <chr>           <dbl>  <dbl> <int>   <dbl> <chr>       
+      1  0.1    brier_survival standard            1 0.0208    10 0.00503 Preprocesso~
+      2  0.01   brier_survival standard            1 0.0208    10 0.00498 Preprocesso~
+      3  0.0001 brier_survival standard            1 0.0208    10 0.00498 Preprocesso~
 
 ---
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,6 +18,7 @@ Suggests: @@
         butcher,
         C50,
         censored,
+        conflicted,
         coin,
         dials,
         dimRed,
@@ Expand Down @@