diff --git a/2_scripts/2_ensemble_and_model_selection.R b/2_scripts/2_ensemble_and_model_selection.R
index 7a3d185..fecb4b1 100644
--- a/2_scripts/2_ensemble_and_model_selection.R
+++ b/2_scripts/2_ensemble_and_model_selection.R
@@ -356,7 +356,8 @@ g_legend<-function(a.gplot){
   }
 
 mylegend<-g_legend(
-  ggplot(data = data.table(Cat=c("None", "HGB deferral", "Low iron donation", "Absent iron donation")))+
+  ggplot(data = data.table(Cat=factor(c("No adverse outcome", "HGB deferral", "Low iron donation", "Absent iron donation"),
+                                      levels = c("No adverse outcome", "HGB deferral", "Low iron donation", "Absent iron donation"))))+
     geom_bar(aes(x=Cat, fill=Cat))+
     scale_fill_manual(values=c("turquoise2","yellow2","darkorange1","red1"),
                                name="")+
@@ -369,7 +370,7 @@ ggsave("./4_output/figs/ROC_compare.png",
        plot=grid.arrange(arrangeGrob(ROC_1vall_noXB,ROC_1vall_withXB,nrow=1),
                          mylegend,nrow=2, heights=c(7,1)),
        width = 6.5,
-       height = 3.5,
+       height = 4,
        unit = "in")
 
 
@@ -383,23 +384,15 @@ AUCs_by_fold <- dt_outer_preds_all_repeats[ , list(Overall = multiclass.roc(fu_o
                                     by= c("version", "rpt", "fold")]
 
 
-AUCs_by_fold_mean_CI <- AUCs_by_fold[ , list(
-  overall_mean = mean(Overall),
-  overall_lb = mean(Overall) - sd(Overall)/sqrt(.N),
-  overall_ub = mean(Overall) + sd(Overall)/sqrt(.N),
-  Z0_mean = mean(Z0),
-  Z0_lb = mean(Z0) - sd(Z0)/sqrt(.N),
-  Z0_ub = mean(Z0) + sd(Z0)/sqrt(.N),
-  Z1_mean = mean(Z1),
-  Z1_lb = mean(Z1) - sd(Z1)/sqrt(.N),
-  Z1_ub = mean(Z1) + sd(Z1)/sqrt(.N),
-  Z2_mean = mean(Z2),
-  Z2_lb = mean(Z2) - sd(Z2)/sqrt(.N),
-  Z2_ub = mean(Z2) + sd(Z2)/sqrt(.N),
-  Z3_mean = mean(Z3),
-  Z3_lb = mean(Z3) - sd(Z3)/sqrt(.N),
-  Z3_ub = mean(Z3) + sd(Z3)/sqrt(.N)),
-  by = "version"]
+AUCs_by_fold_long <- melt(AUCs_by_fold, id.vars = 1:3, 
+                          variable.name = "outcome", value.name = "AUC")
+
+
+AUCs_by_fold_mean_CI <- AUCs_by_fold_long[ , list(
+  mean = mean(AUC),
+  lb = mean(AUC) - sd(AUC)/sqrt(.N),
+  ub = mean(AUC) + sd(AUC)/sqrt(.N)),
+  by = c("version","outcome")]
 
 fwrite(AUCs_by_fold_mean_CI, "./4_output/AUC_results_meanCI.csv")
 
diff --git a/2_scripts/3_feature_importance.R b/2_scripts/3_feature_importance.R
index a6bd864..52b1c37 100644
--- a/2_scripts/3_feature_importance.R
+++ b/2_scripts/3_feature_importance.R
@@ -107,11 +107,7 @@ ggsave("./4_output/figs/feat_imp_XB_top15.png",
 #Combined fig
 featimp_both <- rbind(
   cbind(mod = "Extra biomarkers", featimp_XB),
-<<<<<<< HEAD
-  cbind(mod = "Sandard biomarkers", featimp_noXB)
-=======
   cbind(mod = "Standard biomarkers", featimp_noXB)
->>>>>>> 13d6b501ac8ccabab8e6904f651037f9baf12be0
 )
 
 
@@ -124,8 +120,6 @@ ggplot(featimp_both[display_name %in% c(top15_noXB, top15_XB)])+
 
 ggsave("./4_output/figs/feat_imp_both_top15.png",
        width = 5, height = 4, units = "in")
-<<<<<<< HEAD
-=======
 
 
 #combined median table
@@ -137,4 +131,4 @@ featimp_median <- rbind(
   cbind(featimp_noXB_median, model = "noXB")
 )
 fwrite(featimp_median, "./4_output/feature_importance_medians.csv")
->>>>>>> 13d6b501ac8ccabab8e6904f651037f9baf12be0
+
diff --git a/2_scripts/5_individual_trajectories.R b/2_scripts/5_individual_trajectories.R
index 2fbb07d..20c01a1 100644
--- a/2_scripts/5_individual_trajectories.R
+++ b/2_scripts/5_individual_trajectories.R
@@ -10,7 +10,7 @@ source("2_scripts/utility_functions.R")
 
 # #Read in data
 # weights <- readRDS("./4_output/calib_weights.RDS")
-# features_list_frXB<-readRDS("./1_data/features_scores_frXB.RDS")
+features_list_frXB<-readRDS("./1_data/features_scores_frXB.RDS")
 # features_list_frexcludeXB<-readRDS("./1_data/features_scores_frexcludeXB.RDS")
 # 
 # #Read in models
@@ -35,7 +35,7 @@ dt.frXB <- dt.fr[!is.na(ARUP_Ferritin)]
 
 
 #Risk trajectory matrix
-n_donations = nrow(features_list_frXB$factor)
+n_donations = nrow(dt.frXB)
 max_IDI = 250; min_IDI = 56
 # 
 # #CREATE RISK MATRIX WITH EXTRA BIOMARKERS
@@ -61,19 +61,19 @@ max_IDI = 250; min_IDI = 56
 risk_matrix_XB<- readRDS("./4_output/3d_risk_matrix_XB.RDS")
 
 # #COMPARE DAY 56 and 250 BASE MODELS
-t_return=56
-feature_list_temp <- gen_features_list(dt.temp[ , time_to_fu := t_return],withXB=TRUE)
-preds_56 <- risk_scores_ensemble(features_list=feature_list_temp,
-                              base_mods = base_mods_withXB,
-                              weights = weights$XB,
-                              incl_base_preds = TRUE)
-
-t_return=250
-feature_list_temp <- gen_features_list(dt.temp[ , time_to_fu := t_return],withXB=TRUE)
-preds_250 <- risk_scores_ensemble(features_list=feature_list_temp,
-                                 base_mods = base_mods_withXB,
-                                 weights = weights$XB,
-                                 incl_base_preds = TRUE)
+# t_return=56
+# feature_list_temp <- gen_features_list(dt.temp[ , time_to_fu := t_return],withXB=TRUE)
+# preds_56 <- risk_scores_ensemble(features_list=feature_list_temp,
+#                               base_mods = base_mods_withXB,
+#                               weights = weights$XB,
+#                               incl_base_preds = TRUE)
+# 
+# t_return=250
+# feature_list_temp <- gen_features_list(dt.temp[ , time_to_fu := t_return],withXB=TRUE)
+# preds_250 <- risk_scores_ensemble(features_list=feature_list_temp,
+#                                  base_mods = base_mods_withXB,
+#                                  weights = weights$XB,
+#                                  incl_base_preds = TRUE)
 
 
 # # #CREATE RISK MATRIX WITHOUT EXTRA BIOMARKERS
@@ -101,71 +101,8 @@ preds_250 <- risk_scores_ensemble(features_list=feature_list_temp,
 # summary(risk_matrix_excludeXB[ , "250", "None"] - risk_matrix_excludeXB[ , "56", "None"])
 # 
 # 
-# 1-summary(risk_matrix_XB[ , "56", "None"])
-# 1-summary(risk_matrix_XB[ , "250", "None"])
-# summary(risk_matrix_XB[ , "250", "None"] - risk_matrix_XB[ , "56", "None"])
-
-
-
 
 
-##
-# PLOT INDIVIDUAL TRAJECTORIES -----------
-##
-
-set.seed(10)
-random_donor_nums <- ceiling(runif(60, min=0, max = nrow(dt.frXB)))
-
-for(plot_num in 1:60){
-  donor_num <- random_donor_nums[plot_num]
-  assign(paste0("plot_traj_", plot_num), 
-         ggplot(melt(cbind(data.table(risk_matrix_XB[donor_num, , ]), t = min_IDI:max_IDI), id.vars = "t", measure.vars = c("None", "HGB_defer", "Low", "Absent"))
-                , aes(x=t, y=value, fill=variable)) + 
-           geom_area()+ 
-           scale_y_continuous(expand=c(0,0))+
-           scale_x_continuous(breaks=c(56, 150, 244), expand=c(0,0))+
-           theme(legend.position="none",
-                 axis.text.y = element_blank(),
-                 axis.ticks.y = element_blank(),
-                 axis.line.y = element_blank(),
-                 axis.title = element_blank(),
-                 axis.text.x = element_text(size=10))+
-           scale_fill_manual(values = c("#00FFFF", "#FBD808", "#FF9005", "#FF0000"))
-         #xlab("Days until donation attempt")+
-         #ylab("Probability of outcome")
-         
-  )
-}
-
-
-
-ggsave("./4_output/figs/trajectories_60_random.png",
-       plot = plot_grid(plot_traj_1, plot_traj_2, plot_traj_3, plot_traj_4, plot_traj_5,
-                        plot_traj_6, plot_traj_7, plot_traj_8, plot_traj_9, plot_traj_10, 
-                        plot_traj_11, plot_traj_12, plot_traj_13, plot_traj_14, plot_traj_15,
-                        plot_traj_16, plot_traj_17, plot_traj_18, plot_traj_19, plot_traj_20,
-                        plot_traj_21, plot_traj_22, plot_traj_23, plot_traj_24, plot_traj_25,
-                        plot_traj_26, plot_traj_27, plot_traj_28, plot_traj_29, plot_traj_30, 
-                        plot_traj_31, plot_traj_32, plot_traj_33, plot_traj_34, plot_traj_35,
-                        plot_traj_36, plot_traj_37, plot_traj_38, plot_traj_39, plot_traj_40,
-                        plot_traj_41, plot_traj_42, plot_traj_43, plot_traj_44, plot_traj_45,
-                        plot_traj_46, plot_traj_47, plot_traj_48, plot_traj_49, plot_traj_40, 
-                        plot_traj_51, plot_traj_52, plot_traj_53, plot_traj_54, plot_traj_55,
-                        plot_traj_56, plot_traj_57, plot_traj_58, plot_traj_59, plot_traj_60,
-                        ncol = 5),
-       width = 6.5,
-       height = 8.9,
-       units = "in"
-)
-
-ggsave("./4_output/figs/trajectories_3.png",
-       plot = plot_grid(plot_traj_4, plot_traj_29, plot_traj_6,
-                        ncol = 3),
-       width = 5,
-       height = 1,
-       units = "in"
-)
-
 ##
 # ANALYZE TRAJECTORIES -----------
 ##
@@ -181,6 +118,7 @@ dt.firstreturn <- cbind(features_list_frXB$factor,
 summary(dt.firstreturn$Any_AE_day_56)
 summary(dt.firstreturn$Any_AE_day_250)
 
+fwrite(dt.firstreturn, "./1_data/dt_fr_withrisk")
 
 ##SCATTER OF day 56 vs. day 250
 plot <- ggplot(data=dt.firstreturn, 
@@ -219,28 +157,31 @@ am_adt <- function(inarray) { #turns 3d matrix into 2d
 
 dt_long_risk_matrix <- am_adt(risk_matrix_XB)
 setnames(dt_long_risk_matrix, "N", "Risk")
+set.seed(998)
 dt_long_risk_matrix_sm <- dt_long_risk_matrix[Idx_donation %in% sample.int(3685, 300)]
-dt_long_risk_matrix_sm[time_to_fu==56 & outcome=="None"&Risk>.9]
-dt_long_risk_matrix_sm[time_to_fu==250 & outcome=="None" & Risk<.15]
+dt_long_risk_matrix_sm[, ex_group := ""]
+
+#Quick recoverer
+Idx_quick_recoverers <- dt_long_risk_matrix_sm[time_to_fu==56 & outcome=="None"&Risk>.9, unique(Idx_donation)] 
+Idx_chronic <- dt_long_risk_matrix_sm[time_to_fu==250 & outcome=="None" & Risk<.15, unique(Idx_donation)] #Chronic high-risk
 idx_low_start <- dt_long_risk_matrix_sm[time_to_fu==56 & outcome=="None"&Risk<.35]$Idx_donation
-dt_long_risk_matrix_sm[Idx_donation %in% idx_low_start & 
+Idx_slow_recoverers <- dt_long_risk_matrix_sm[Idx_donation %in% idx_low_start & 
                          outcome=="None" &
                          time_to_fu==250 &
-                         Risk>.65, ]
+                         Risk>.60, unique(Idx_donation)] #slow recoverer
 
 
 
-Idx_chronic <- c(8, 304, 712, 1763, 3267)
-Idx_quick_recoverers <- c(1372, 1750, 3036, 3684, 1374)
-Idx_slow_recoverers <- c(129, 394, 2278, 2978, 3596)
-dt_long_risk_matrix_sm[, ex_group := ""]
-dt_long_risk_matrix_sm[,  ex_group := ifelse(Idx_donation %in% Idx_chronic, 
+# Idx_chronic <- c(8, 304, 712, 1763, 3267)
+# Idx_quick_recoverers <- c(1372, 1750, 3036, 3684, 1374)
+# Idx_slow_recoverers <- c(129, 394, 2278, 2978, 3596)
+dt_long_risk_matrix_sm[,  ex_group := ifelse(Idx_donation %in% tail(Idx_chronic,5), 
                                              "Chronic high risk",
                                              ex_group)]
-dt_long_risk_matrix_sm[,  ex_group := ifelse(Idx_donation %in% Idx_quick_recoverers, 
+dt_long_risk_matrix_sm[,  ex_group := ifelse(Idx_donation %in% tail(Idx_quick_recoverers,5), 
                                              "Quick recoverer",
                                              ex_group)]
-dt_long_risk_matrix_sm[,  ex_group := ifelse(Idx_donation %in% Idx_slow_recoverers, 
+dt_long_risk_matrix_sm[,  ex_group := ifelse(Idx_donation %in% tail(Idx_slow_recoverers,5), 
                                              "Slow recoverer",
                                              ex_group)]
 
@@ -307,6 +248,150 @@ ggsave("4_output/figs/any_ae_traject.png",
 
 
 
+##
+# PLOT INDIVIDUAL TRAJECTORIES -----------
+##
+
+set.seed(10)
+random_donor_nums <- ceiling(runif(60, min=0, max = nrow(dt.frXB)))
+
+for(plot_num in 1:60){
+  donor_num <- random_donor_nums[plot_num]
+  assign(paste0("plot_traj_", plot_num), 
+         ggplot(melt(cbind(data.table(risk_matrix_XB[donor_num, , ]), t = min_IDI:max_IDI), id.vars = "t", measure.vars = c("None", "HGB_defer", "Low", "Absent"))
+                , aes(x=t, y=value, fill=variable)) + 
+           geom_area()+ 
+           scale_y_continuous(expand=c(0,0))+
+           scale_x_continuous(breaks=c(56, 150, 244), expand=c(0,0))+
+           theme(legend.position="none",
+                 axis.text.y = element_blank(),
+                 axis.ticks.y = element_blank(),
+                 axis.line.y = element_blank(),
+                 axis.title = element_blank(),
+                 axis.text.x = element_text(size=10))+
+           scale_fill_manual(values = c("#00FFFF", "#FBD808", "#FF9005", "#FF0000"))
+         #xlab("Days until donation attempt")+
+         #ylab("Probability of outcome")
+         
+  )
+}
+
+
+
+ggsave("./4_output/figs/trajectories_60_random.png",
+       plot = plot_grid(plot_traj_1, plot_traj_2, plot_traj_3, plot_traj_4, plot_traj_5,
+                        plot_traj_6, plot_traj_7, plot_traj_8, plot_traj_9, plot_traj_10, 
+                        plot_traj_11, plot_traj_12, plot_traj_13, plot_traj_14, plot_traj_15,
+                        plot_traj_16, plot_traj_17, plot_traj_18, plot_traj_19, plot_traj_20,
+                        plot_traj_21, plot_traj_22, plot_traj_23, plot_traj_24, plot_traj_25,
+                        plot_traj_26, plot_traj_27, plot_traj_28, plot_traj_29, plot_traj_30, 
+                        plot_traj_31, plot_traj_32, plot_traj_33, plot_traj_34, plot_traj_35,
+                        plot_traj_36, plot_traj_37, plot_traj_38, plot_traj_39, plot_traj_40,
+                        plot_traj_41, plot_traj_42, plot_traj_43, plot_traj_44, plot_traj_45,
+                        plot_traj_46, plot_traj_47, plot_traj_48, plot_traj_49, plot_traj_40, 
+                        plot_traj_51, plot_traj_52, plot_traj_53, plot_traj_54, plot_traj_55,
+                        plot_traj_56, plot_traj_57, plot_traj_58, plot_traj_59, plot_traj_60,
+                        ncol = 5),
+       width = 6.5,
+       height = 8.9,
+       units = "in"
+)
+
+ggsave("./4_output/figs/trajectories_3.png",
+       plot = plot_grid(plot_traj_4, plot_traj_29, plot_traj_6,
+                        ncol = 3),
+       width = 5,
+       height = 1,
+       units = "in"
+)
+
+
+#Trajectories for 3 archetypes
+archetype_donor_nums <- c(Idx_chronic[1:10],
+                          Idx_quick_recoverers[1:10],
+                          Idx_slow_recoverers[1:10])
+
+
+for(donor_num in archetype_donor_nums){
+  assign(paste0("plot_archetype_", donor_num), 
+         ggplot(melt(cbind(data.table(risk_matrix_XB[donor_num, , ]), t = min_IDI:max_IDI), id.vars = "t", measure.vars = c("None", "HGB_defer", "Low", "Absent"))
+                , aes(x=t, y=value, fill=variable)) + 
+           geom_area()+ 
+           scale_y_continuous(expand=c(0,0))+
+           scale_x_continuous(breaks=c(56, 150, 244), expand=c(0,0))+
+           theme(legend.position="none",
+                 axis.text.y = element_blank(),
+                 axis.ticks.y = element_blank(),
+                 axis.line.y = element_blank(),
+                 axis.title = element_blank(),
+                 axis.text.x = element_text(size=10))+
+           scale_fill_manual(values = c("#00FFFF", "#FBD808", "#FF9005", "#FF0000"),
+                             labels = c("No adverse outcome",
+                                        "Hemoglobin deferral",
+                                        "Low iron donation",
+                                        "Absent iron donation"
+                             ))
+         #xlab("Days until donation attempt")+
+         #ylab("Probability of outcome")
+         
+  )
+}
+
+title1 <- ggdraw() + 
+  draw_label(
+    "  Fast recoverers",
+    fontface = 'bold',
+    x = 0,
+    hjust = 0
+  ) 
+plot_row_fr <- plot_grid(plot_archetype_247, plot_archetype_849,plot_archetype_1234,plot_archetype_1654,
+                         ncol = 4)
+title2 <- ggdraw() + 
+  draw_label(
+    "  Slow recoverers",
+    fontface = 'bold',
+    x = 0,
+    hjust = 0
+  )
+plot_row_sr <- plot_grid(plot_archetype_121, plot_archetype_394,plot_archetype_787,plot_archetype_1781,
+                         ncol = 4)
+title3 <- ggdraw() + 
+  draw_label(
+    "  Chronic high risk",
+    fontface = 'bold',
+    x = 0,
+    hjust = 0
+  )
+plot_row_chr <- plot_grid(plot_archetype_63, plot_archetype_989,plot_archetype_1431,plot_archetype_679,
+                          ncol = 4)
+
+
+#Get legend
+legend <- get_legend(
+  plot_archetype_121+
+  theme(legend.position = "bottom")+
+    guides(fill=guide_legend(title="Donation outcome",
+                             nrow =2))
+)
+  
+ggsave(
+  "./4_output/figs/indiv_plots_archetypes.png",
+  plot_grid(title1,
+            plot_row_fr,
+            title2,
+            plot_row_sr,
+            title3,
+            plot_row_chr,
+            legend,
+            ncol =1,
+            rel_heights = c(rep(c(0.22, 1),3),.5)),
+  width = 5, height = 4.5, units="in"
+)
+
+
+
+
+
 ### AVERAGE TRAJECTORY BY SUBGROUPS  -----
 stratified_outcomes_plt <- function(group_vec, #vector of groups
                                 dt_long_risk_matrix,
diff --git a/4_output/AUC_results_meanCI.csv b/4_output/AUC_results_meanCI.csv
index 3a5be02..4abe2fd 100644
--- a/4_output/AUC_results_meanCI.csv
+++ b/4_output/AUC_results_meanCI.csv
@@ -1,3 +1,11 @@
-version,overall_mean,overall_lb,overall_ub,Z0_mean,Z0_lb,Z0_ub,Z1_mean,Z1_lb,Z1_ub,Z2_mean,Z2_lb,Z2_ub,Z3_mean,Z3_lb,Z3_ub
-withXB,0.827978034682583,0.825417464383301,0.830538604981865,0.911129063218194,0.908605221090865,0.913652905345523,0.81715255904535,0.81287999661037,0.821425121480329,0.794839701886792,0.789976542617216,0.799702861156368,0.858133953520499,0.854270357081833,0.861997549959165
-noXB,0.775607559186025,0.773265539305788,0.777949579066263,0.865623909196267,0.86152970640417,0.869718111988364,0.810832134715594,0.806441672008184,0.815222597423004,0.725511919126659,0.720033751211842,0.730990087041477,0.768934420449892,0.76492324118383,0.772945599715954
+version,outcome,mean,lb,ub
+withXB,Overall,0.827978034682583,0.825417464383301,0.830538604981865
+noXB,Overall,0.775607559186025,0.773265539305788,0.777949579066263
+withXB,Z0,0.911129063218194,0.908605221090865,0.913652905345523
+noXB,Z0,0.865623909196267,0.86152970640417,0.869718111988364
+withXB,Z1,0.81715255904535,0.81287999661037,0.821425121480329
+noXB,Z1,0.810832134715594,0.806441672008184,0.815222597423004
+withXB,Z2,0.794839701886792,0.789976542617216,0.799702861156368
+noXB,Z2,0.725511919126659,0.720033751211842,0.730990087041477
+withXB,Z3,0.858133953520499,0.854270357081833,0.861997549959165
+noXB,Z3,0.768934420449892,0.76492324118383,0.772945599715954
diff --git a/4_output/figs/ROC_compare.png b/4_output/figs/ROC_compare.png
index 916098b..ece5a4d 100644
Binary files a/4_output/figs/ROC_compare.png and b/4_output/figs/ROC_compare.png differ
diff --git a/4_output/figs/any_ae_traject.png b/4_output/figs/any_ae_traject.png
index 748e4c3..d2a55eb 100644
Binary files a/4_output/figs/any_ae_traject.png and b/4_output/figs/any_ae_traject.png differ
diff --git a/4_output/figs/each_ae_traject.png b/4_output/figs/each_ae_traject.png
index 8f7cf4e..ed7d634 100644
Binary files a/4_output/figs/each_ae_traject.png and b/4_output/figs/each_ae_traject.png differ
diff --git a/4_output/figs/indiv_plots_archetypes.png b/4_output/figs/indiv_plots_archetypes.png
new file mode 100644
index 0000000..1f72bc2
Binary files /dev/null and b/4_output/figs/indiv_plots_archetypes.png differ
diff --git a/5_manuscript/iron_trajectories.Rmd b/5_manuscript/iron_trajectories.Rmd
index a0e66c6..625d912 100644
--- a/5_manuscript/iron_trajectories.Rmd
+++ b/5_manuscript/iron_trajectories.Rmd
@@ -39,11 +39,17 @@ W. Alton Russell^1,2^, David Schienker^1,3,4,5^, Brian Custer^2,6^
 
 <br>
 
-**Corresponding author:** W. Alton Russell, Management Science and Engineering, Stanford University, Stanford CA 94305. email: [altonr\@stanford.edu](mailto:altonr@stanford.edu){.email}.
+**Corresponding author:** W. Alton Russell, Harvard Medical School, 101 Merrimac St, Room 1032, Boston, MA, 02114, United States. email: [warussell\@mgh.harvard.edu](mailto:warussell@mgh.harvard.edu){.email}.
 
-**Key words:**
+**Funding:** WAR was funded by a Stanford Interdisciplinary Graduate Fellowship.
 
-**Running title:**
+**Conflicts:** The authors have no conflicts of interest to declare.
+
+**Running title:** Blood donor iron risk trajectories
+
+<br>
+
+Main text: 3266 of 3500 words \| Abstract:, 247 of 250 words \| Tables: 1 \| Figures 6
 
 ##### 
 
@@ -62,58 +68,63 @@ knitr::opts_chunk$set(echo = FALSE)
 
 # Abstract
 
-**Background:**
+**Background:** Despite a fingerstick hemoglobin requirement and 56-day minimum donation interval, repeat blood donation can cause or exacerbate iron deficiency.
 
-**Methods:**
+**Study design and methods:** Using data from the REDS-II Donor Iron Status Evaluation study, we developed multiclass prediction models to estimate the competing risk of hemoglobin deferral and collecting blood from a donor with sufficient hemoglobin but low or absent underlying iron stores. We compared models developed with and without two biomarkers not routinely measured in most blood centers (ferritin and soluble transferrin receptor). We generated and analyzed 'individual risk trajectories': estimates of how each donors' risk developed as a function of the time interval until their next donation attempt.
 
-**Results:**
+**Results:** With standard biomarkers, the top model had a multiclass area under the receiver operator characteristic curve (AUC) of 77.6% (95% CI 77.3% - 77.8%). With extra biomarkers, multiclass AUC increased to 82.8% (95% CI 82.5% - 83.1%). In the extra biomarkers model, ferritin was the single most important variable, followed by the donation interval. We identified three risk archetypes: 'fast recoverers' (\<10% risk of any adverse outcome on post-donation day 56), 'slow recoverers' (\>60% adverse outcome risk on day 56 that declines to \<35% by day 250), and 'chronic high-risk' (\>85% risk of adverse outcome on day 250).
 
-**Conclusions:**
+**Discussion:** A longer donation interval reduced risk of iron-related adverse events for most donors, but risk remained high for some. Tailoring safeguards to individual risk may prevent collecting blood from donors with low or absent iron stores.
+
+<br>
+
+**Key words:** blood donation, iron deficiency, ferritin, hemoglobin
 
 ##### 
 
 # Introduction
 
+> Repeat blood donation can cause or exacerbate iron deficiency, with higher incidence among teen donors and premenopausal women [@Cable2012; @Salvin2014; @Spencer2019; @Baart2013; @Rigas2014; @Patel2019]. In the United States, potential donors are screened using fingerstick hemoglobin or hematocrit tests and deferred if levels are below a minimum cutoff. Currently, minimum hemoglobin levels are 12.5 g/dL for women and 13.0 g/dL for men. Because fingerstick hemoglobin is an unreliable indicator of true iron stores, some donors with low or absent iron stores qualify to donate and are thereby subjected to further iron loss [@Baart2013]. Deferral for low hemoglobin prevents some collections from iron deficient donors but consume time and resources from both donor and blood center, decreasing donor satisfaction and the likelihood of future donations [@Custer2007]. More reliable measures of iron status include ferritin, zinc protoporphyrin, soluble transferrin receptor, and hepcidin, but these are more costly to measure, and most are not yet available as point of care tests [@Kiss2018]. Past studies have identified several factors that increase risk of iron deficiency among blood donors. The Danish Blood Donor Study found that sex, menopause status, and donation history were the strongest predictors of iron deficiency among donors, and weight, age, vitamin use, and diet were also significant [@Rigas2014]. Similar results have been found for donors in the United States, Australia, and the Netherlands [@Cable2012; @Salvin2014; @Spencer2019; @Baart2013; @Patel2019]. Other studies have analyzed predictors for a low hemoglobin deferral for repeat blood donors, identifying age, time since last donation, and donation history as strong predictors [@Baart2011; @Baart2012]. To our knowledge, no prediction model has been developed that considers the competing risks of hemoglobin deferral and of collecting blood from a donor with sufficient hemoglobin but low or absent underlying iron stores.
 
-> Repeat blood donation can cause or exacerbate iron deficiency, with higher incidence among teen donors and premenopausal women [@Cable2012; @Salvin2014; @Spencer2019; @Baart2013; @Rigas2014; @Patel2019]. In the United States, potential donors are screened using fingerstick hemoglobin or hematocrit tests and deferred if levels are below a minimum cutoff. Such low hemoglobin deferrals prevent some collections from iron deficient donors but consume time and resources from both donor and blood center, decreasing donor satisfaction and the likelihood of returning for future donations [@Custer2007]. Because fingerstick hemoglobin is an unreliable indicator of true iron stores, many donors qualify to donate despite having low or absent underlying iron stores [@Baart2013]. More reliable measures of iron status include ferritin, zinc protoporphyrin, soluble transferrin receptor, and hepcidin, but these are more costly to measure and not available as point of care tests [@Kiss2018]. Past studies have identified several factors that increase risk of iron deficiency among blood donors. The Danish Blood Donor Study found that sex, menopause status, and donation history were the strongest predictors of iron deficiency among donors, and weight, age, vitamin use, and diet were also significant [@Rigas2014]. Similar results have been found for donors in the United States, Australia, and the Netherlands [@Cable2012; @Salvin2014; @Spencer2019; @Baart2013; @Patel2019]. Other studies have analyzed predictors for a low hemoglobin deferral for repeat blood donors, identifying age, time since last donation, and donation history as strong predictors [@Baart2011; @Baart2012]. To our knowledge, no prediction model has been developed that considers the competing risks of hemoglobin deferral and of collecting blood from a donor with sufficient hemoglobin but low or absent underlying iron stores.
-
-> In this study, we developed machine learning models to estimate how risk of hemoglobin deferral and completed donations from donors with low or absent iron stores develop as a function of the donation interval -- the length of time from an index donation until the donor returns for a subsequent donation attempt -- in a cohort of donors from the REDS-II Iron Status Evaluation (RISE) study [@Cable2016]. We also compared predictive performance with and without ferritin and soluble transferritin receptor (STfR), two biomarkers that were available for many donations in the RISE study but are not routinely collected by most US blood centers.
+> In this study, we developed machine learning models to estimate how risk of hemoglobin deferral and completed donations from donors with low or absent iron stores develop as a function of the donation interval -- the length of time from an index donation until the donor returns for a subsequent donation attempt -- in a cohort of donors from the REDS-II Iron Status Evaluation (RISE) study [@Cable2016]. We also compared predictive performance with and without ferritin and soluble transferrin receptor (STfR), two biomarkers that were available for many donations in the RISE study but are not routinely collected by most US blood centers.
 
 <br>
 
 # Methods
 
-> Using data from the RISE study, we trained multiclass prediction models to predict the risk of three iron-related adverse outcomes at a subsequent donation attempt: hemoglobin deferral and donating with low or absent iron stores as defined using ferritin. We assessed the models' predictive performance, compared performance with and without the inclusion of two non-routine biomarkers (ferritin and STfR) as features for prediction, and generated and analyzed individual risk profiles for each donor's likelihood of iron-related adverse donation outcomes at their next visit as a function of their donation interval (how long until the donor returns).
+> Using data from the RISE study, we trained multiclass prediction models to predict the risk of three iron-related adverse outcomes at a subsequent donation attempt: hemoglobin deferral and donating with iron stores that are low or absent at the time of collection. We assessed the models' predictive performance, compared performance with and without the inclusion of two non-routine biomarkers (ferritin and STfR) as features for prediction, and generated and analyzed individual risk profiles for each donor's likelihood of iron-related adverse donation outcomes at their next visit as a function of their donation interval (how long until the donor returns). All code is uploaded to a public repository [INSERT DOI LINK]
 
 ## Data preprocessing and formatting
 
-> The RISE dataset contains data from several U.S. blood centers on donation attempts for 2,425 donors over a 2-year period [@Cable2016]. Data elements include past donation history, biometrics for each visit, and questionnaire responses regarding demographics, diet, supplemental iron consumption, female reproductive health, and demographics from a baseline and final visit. We used 46 variables available for donations in the RISE dataset together with the time until the donor returns to predict the outcome of a follow-up donation attempt. We assumed that donor characteristics measured at the baseline visit such as diet, vitamin use, smoking, and female reproductive health would not change significantly over the study period, and we used them to predict outcomes following subsequent donations by the same donor. We re-coded or imputed missing values for some fields; `r run_reference("t-feature-engineering")` contains these details for all features used for prediction. We also included a composite dietary iron consumption score that was generated for each donor in the RISE dataset as part of previous secondary analysis of this dataset [@Spencer2019a].
+> The RISE dataset contains data from several U.S. blood centers on donation attempts for 2,425 donors over a 2-year period [@Cable2016]. Data elements include past donation history, biometrics for each visit, and questionnaire responses regarding demographics, diet, supplemental iron consumption, female reproductive health, and demographics. For the 'standard biomarkers' model, we used 46 variables available for donations in the RISE dataset together with the time interval until the donor returns to predict the outcome of a follow-up donation attempt. We assumed that donor characteristics measured at the baseline visit such as diet, vitamin use, smoking, and female reproductive health would not change significantly over the study period, and we used them to predict outcomes following subsequent donations by the same donor. We also developed an 'extra biomarkers' model, for which we included ferritin, STfRr, and derived measures (log ferritin, ratio of STfR to log ferritin, and calculated body iron) as features for prediction. We re-coded or imputed missing values for some fields; `r run_reference("t-feature-engineering")` contains these details for all features used for prediction. We also included a composite dietary iron consumption score that was generated for each donor in the RISE dataset as part of a previous secondary analysis of this dataset [@Spencer2019a].
 
-> To generate the model development dataset, we considered donations with at least 150 mL of red blood cell loss as potential index donations, which included whole blood donations, apheresis red blood cell donations, and some donations that were classified as 'quantity not sufficient'. We excluded potential index donations that were double red cell donations, that were missing a measurement of ferritin, and donations for which neither fingerstick hemoglobin nor hematocrit was recorded. If follow-up visits were recorded after potential index donations, we generated labels with the time until the follow-up visit (in days) and its outcome. For all index donations followed by a visit with significant iron loss, defined as a loss of at least 55 mL of red blood cells, we generated a label for the index donation based on the first such follow-up visit. We also generated labels for each index donation based on any follow-up visits that did not result in significant iron loss (i.e., visits resulting in a deferral or apheresis donations of platelets or plasma with \<55 mL of red blood cell loss) if such visits occurred before any follow-up visits with significant iron loss. For each index donation $i$, the outcome of its follow-up visits ($z_i$) was classified as hemoglobin deferral ($z_i=1$) if one were recorded; as a low iron donation ($z_i=2$) if pre-donation ferritin was $\geq12$ mg/dl and $<20$ mg/dl for women or $\geq12$ mg/dl and $<30$ mg/dl for men; as an absent iron donation ($z_i=3$) if pre-donation ferritin was \<12 mg/dl; and as a 'no adverse outcome' donation otherwise ($z_i=0$). Follow-up donations without ferritin measurements ($z_i=-1$) were not included in the model development dataset but were included in the 'first return' dataset. We used this dataset for calibrating the model and generating risk trajectories as described below.
+> To generate the model development dataset, we considered donations with at least 150 mL of red blood cell loss as potential index donations, which included whole blood donations, mixed apheresis donations that included a single red cell unit, and some donations that were classified as 'quantity not sufficient'. We excluded potential index donations that were double red cell donations due to limited data, the altered iron recovery profiles that follows the large iron loss from double red collection, and the 112-day mandatory deferral period after such donations. We also excluded donations that were missing a measurement of ferritin and donations for which neither fingerstick hemoglobin nor hematocrit was recorded. If follow-up visits were recorded after potential index donations, we generated labels with the time until the follow-up visit (in days) and its outcome. For all index donations followed by a visit with significant iron loss, defined as a loss of at least 55 mL of red blood cells, we generated a label for the index donation based on the first such follow-up visit. Additionally, we generated labels for any follow-up visits that did not result in significant iron loss (i.e., visits resulting in a deferral or apheresis donations of platelets or plasma with \<55 mL of red blood cell loss) if such visits occurred between the index donation and the first follow-up visit with significant iron loss. For each index donation $i$, the outcome of its follow-up visits ($z_i$) was classified as hemoglobin deferral (labeled as $z_i=1$) if one were recorded; as a low iron donation ($z_i=2$) if pre-donation ferritin was $\geq12$ mg/dl and $<20$ mg/dl for women or $\geq12$ mg/dl and $<30$ mg/dl for men; as an absent iron donation ($z_i=3$) if pre-donation ferritin was \<12 mg/dl; and as a 'no adverse outcome' donation otherwise ($z_i=0$). Follow-up donations without ferritin measurements ($z_i=-1$) were not included in the model development dataset but were included in the 'first return' dataset. We used the first return dataset to calibrate the model and generate risk trajectories as described below.
 
 ## Prediction model development
 
 ### Model selection
 
-> We evaluated several candidate model types: gradient boosted machines, random forest, regression trees, and generalized linear models with elastic net regularization with and without second order interaction terms. For each model type we evaluated multiple hyperparameter settings via grid search; the specific hyperparameter combinations assessed are described in `r run_reference("t-mod-tuning")`. We implemented a nested cross validation procedure with resampling to minimize bias in model selection and assessment [@Varma2006]. For this procedure, we generated 15 *model assessment partitions* which consisted of 3 resamples of 5 equal-sized partitions of the entire dataset that were generated with stratified sampling to ensure the distribution of outcomes was balanced across partitions. For each model assessment partition, we defined all data not included in the partition as the corresponding *model tuning set*. Within the 15 tuning sets, we assessed all candidate model configurations (model type and hyperparameter setting) using 5-fold validation, assessing the multiclass area under the reliever operator characteristic curve (multiclass AUC) using the Hand and Till method [@Hand2001]. We compared model configurations based on the average multiclass AUC across 5 cross validation folds averaged over all 15 tuning sets (assessing a total of 75 realizations of each candidate model configuration).
+> We evaluated several candidate model types: gradient boosted machines, random forest, regression trees, and generalized linear models with elastic net regularization with and without second order interaction terms. For each model type, we evaluated multiple hyperparameter settings via grid search; the specific hyperparameter combinations assessed are described in `r run_reference("t-mod-tuning")`. We implemented a nested cross validation procedure with resampling to minimize bias in model selection and assessment [@Varma2006]. For this procedure, we generated 15 *model assessment partitions* which consisted of 3 resamples of 5 equal-sized partitions of the entire dataset that were generated with stratified sampling to ensure the distribution of outcomes was balanced across partitions. For each model assessment partition, we defined all data not included in the partition as the corresponding *model tuning set*. Within the 15 tuning sets, we assessed all candidate model configurations (model type and hyperparameter setting) using 5-fold validation, assessing the multiclass area under the reliever operator characteristic curve (multiclass AUC) using the Hand and Till method [@Hand2001]. We compared model configurations based on the average multiclass AUC across 5 cross validation folds averaged over all 15 tuning sets (assessing a total of 75 realizations of each candidate model configuration).
 
-> We also considered ensemble models, which combine the risk scores from multiple base models. We assessed two methods of combining risk scores from base models: a simple average and a weighted average, for which we weighted each model's score proportionally to its accuracy raised to a power of four as suggested by Large et. al. [@Large2019]. We assessed AUC for each candidate ensemble configuration across the same 5 cross validation folds within each of the 15 tuning sets.
+> We also evaluated ensemble models, which combine the risk scores from multiple base models. We assessed two methods of combining risk scores from base models: a simple average and a weighted average, for which we weighted each model's score proportionally to its accuracy raised to a power of four as suggested by Large et. al. [@Large2019]. We assessed AUC for each candidate ensemble configuration across the same 5 cross validation folds within each of the 15 tuning sets.
 
-> We selected the top model configuration based on multiclass AUC. To produce an unbiased assessment of the selected model configuration, we then assessed multiclass AUC on each of the 15 model assessment partitions. For each assessment partition, we trained the model configuration on all data not in the partition and used this model to generate risk scores on the assessment partition. We then calculated multiclass AUC for the partition.
-
-> To assess impact of measuring ferritin and STfR on ability to predict iron-related adverse outcomes at follow-up donation attempts, we repeated this model development process twice. In the "extra biomarkers" model, we used ferritin and soluble transferrin receptor and derived measures (log ferritin, ratio of STfR to log ferritin, and calculated body iron) as features for prediciton. Because these biomarkers are not routinely measured for most blood donations in the United States, we also developed a "standard biomarkers" version that excluded these features.
+> We selected the top model configuration based on multiclass AUC. To produce an unbiased assessment of the selected model configuration, we then assessed multiclass AUC on each of the 15 model assessment partitions. For each assessment partition, we trained the model configuration on all data not in the partition and used this model to generate risk scores on the assessment partition which were used for calculation AUC. We repeated this model development process twice: once with ferritin, STfR, and derived measures as features (extra biomarkers model) and once without (standard biomarkers model). We also assessed one-vs-rest AUC for each feature, which measures how well the model discriminates each of the four possible outcomes from the other three.
 
 ### Feature importance
 
-> For the selected "standard" and "extra biomarkers" model configurations, we assessed the importance of features for prediction using a random permutation method [@Breiman2001]. In this procedure we randomly shuffled one feature collumn in each model assessment partition and generating risk scores using the model trained on all data not in that partition. We calculated the percent decrease in multiclass AUC when a feature's column was shuffled as compared to the unaltered dataset as a measure of the feature's importance to the model.
+> For the selected "standard" and "extra biomarkers" model configurations, we assessed the importance of features for prediction using a random permutation method [@Breiman2001]. In this procedure we randomly shuffled one feature column in each model assessment partition and generated risk scores using the model trained on all data not in that partition. We calculated the percent decrease in multiclass AUC when a feature's column was shuffled as compared to the unaltered dataset as a measure of the feature's importance to the model.
 
 ### Calibration
 
-> To generate the final model, we retrained the selected model configurations on the entire model development dataset and then calibrated the predicted probabilities to a 'first return' dataset, wherein index donations were labled only once with the outcome of the first subsequent donation attempt. The first return dataset included completed donations with no ferritin measurement; we estimated the distribution of outcomes for follow-up visits from the first return dataset by assuming that the distribution of absent, low, and 'no-adverse outcome' donations in follow-up donations at which ferritin was not measured would be the same as for those with ferritin measured. Mathematically, we totaled each follow-up outcome as $n^{(k)}$, where $k=-1, 0, 1, 2, 3$ correspond to the outcomes described above. We then calculated $\tilde{n}^{(k)}$, an estimation of what the totals would have been if ferritin were measured for all follow-up donations, as $\tilde{n}^{(1)} = n^{(1)}$ (hemoglobin deferral) and $\tilde{n}^{(l)} = n^{(l)}+n^{(-1)}\frac{n^{(l)}}{n^{(0)}+n^{(2)}+n^{(3)}}$ for $l=0,2,3$ (completed donations). We then used our top model to generate the unnormalized probability vector $[\hat{q}_i^{(0)}, \hat{q}_i^{(1)}, \hat{q}_i^{(2)}, \hat{q}_i^{(3)}]$ for each index donation $i$. We computed weights $w^{(k)}$ for the unnormalized probability of each outcome $\hat{q}_i^{(k)}$ by solving the system of equations $\sum_{i=1}^I w^{(k)}\hat{q}_i^{(k)}/\sum_{\tilde{k}=0}^4 w^{(\tilde{k})}\hat{q}_i^{(\tilde{k})} = \tilde{n}^{(k)}$ for $k=0,1,2,3$. The final calibrated model used parameters $a^{(k)}$, $b^{(k)}$, and $w^{(k)}$ together with the uncalibrated scores from the model $z_i^{(k)}$ to produce the estimated likelihood of each outcome at a follow-up donation as $\tilde{q}^{(k)}=w^{(k)}\hat{q}^{(k)}/\sum_{\tilde{k}=1}^4 w^{(\tilde{k})}\hat{q}_j^{(\tilde{k})}$ where $\hat{q}_i^{(k)} = \sigma (a^{(k)} z_i^{(k)} + b^{(k)})$. This ensured that the expectation of the distribution of the predicted outcome for the first return dataset would correspond to our estimated totals $\tilde{n}^{(k)}$.
+> To generate the final model, we retrained the selected model configurations on the entire model development dataset and then calibrated the predicted probabilities to a 'first return' dataset, wherein index donations were labeled only once with the outcome of the first subsequent donation attempt, including follow-up donations with no ferritin measurement. We estimated the distribution of outcomes for follow-up visits from the first return dataset by assuming that the distribution of absent, low, and 'no-adverse outcome' donations in follow-up donations for which ferritin was not measured would be the same as for those with ferritin measurements; mathematical details are provided in the supplemental methods.
+
+## Risk trajectory analysis
 
-# Risk trajectory analysis
+> For each index donation, we generated a risk trajectory by predicting the likelihood of each outcome at the donor's next donation attempt for each possible follow-up donation interval between 56 and 250 days using the calibrated 'extra biomarkers' model. We generated graphical representations of individual donors' risk trajectories showing how the estimated of each adverse outcome evolves depending on the number of days until the donor returns. To illustrate differences in risk trajectories, we created three recovery archetypes: 'fast recoverers' (\<10% risk of any adverse outcome on post-donation day 56), slow recoverers (\>60% adverse outcome risk on day 56 that declines to \<35% by day 250), and 'chronic high risk' (\>85% risk of adverse outcome on day 250). In a separate subgroup analysis, we compared the mean and 95% confidence interval for the estimated risk of each adverse outcome as a function of the donation interval for groups of donors stratified by selected parameters.
 
-> For each index donation, we generated a risk trajectory by predicting the likelihood of each outcome at the donor's next donation attempt for each possible followup donation interval between 56 and 256 days using the calibrated 'extra biomarkers' model. We generated graphical representations of individual donors' risk trajectories showing how the estimated of each adverse outcome evolve if donors wait longer to return. In subgroup analysis, we compared the mean and 95% confidence interval for the estimated risk of each adverse event for each donation interval from 56 to 250 days for groups of donors stratified by key parameters.
+## Data sharing statement
+
+The RISE dataset was provided by the National Heart, Lung, and Blood Institute (NHLBI) Biolincc repository ([\<https://biolincc.nhlbi.nih.gov\>](https://biolincc.nhlbi.nih.gov){.uri}). Our Research Materials Distribution Agreement prohibits publication of the raw data, but other researchers can submit a data request to NHLBI at no charge. All analytic code for this analysis has been published at XXXX.
 
 <br>
 
@@ -215,60 +226,112 @@ tbl_labels_per_idx <- table(dt.md[ , .N, by = donation_id]$N)
 
 ```
 
-
-> In the RISE dataset, a total of `r nrow(dt.firstreturn)` donations from `r dt.firstreturn[, uniqueN(RandID)]` donors were followed by at least one follow-up visit. We removed `r dt.firstreturn[is.na(FingerstickHGB_equiv), .N]` index donations because hemoglobin was not recorded, and we removed a further `r dt.firstreturn[time_to_fu < 56, .N]` index donations from the first return dataset because the first follow-up visit with significant iron loss was less than 56 days later. The first return dataset contained `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56, .N]` index donations labeled with the outcome of the first follow-up donation. That outcome was a hemoglobin deferral for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 1, .N]` index donations; a low-iron donation for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 2, .N]`; an absent iron donation for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 3, .N]`; no adverse outcome for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 0, .N]`; and a completed donation with unknown iron status for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == -1, .N]`. The model development dataset included `r dt.md[ , uniqueN(donation_id)]` unique index donations from `r dt.md[ , uniqueN(RandID)]` donors. `r tbl_labels_per_idx[[1]]` index donations were labeled with one follow-up donation, `r tbl_labels_per_idx[[2]]` were labeled twice, and `r dt.md[ , uniqueN(donation_id)] - tbl_labels_per_idx[[1]] - tbl_labels_per_idx[[2]]` were labeled with 3 or more follow-up visit outcomes (maximum of 8).
+> In the RISE dataset, a total of `r nrow(dt.firstreturn)` donations from `r dt.firstreturn[, uniqueN(RandID)]` donors were followed by at least one follow-up visit. We excluded `r dt.firstreturn[is.na(FingerstickHGB_equiv), .N]` index donations because hemoglobin was not recorded, and we excluded a further `r dt.firstreturn[time_to_fu < 56, .N]` index donations from the first return dataset because the first follow-up visit with significant iron loss was less than 56 days later. The first return dataset contained `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56, .N]` index donations labeled with the outcome of the first follow-up donation. That outcome was a hemoglobin deferral for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 1, .N]` index donations; a low-iron donation for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 2, .N]`; an absent iron donation for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 3, .N]`; no adverse outcome for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 0, .N]`; and a completed donation with unknown iron status for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == -1, .N]`. The model development dataset included `r dt.md[ , uniqueN(donation_id)]` unique index donations from `r dt.md[ , uniqueN(RandID)]` donors. `r tbl_labels_per_idx[[1]]` index donations were labeled with one follow-up donation, `r tbl_labels_per_idx[[2]]` were labeled twice, and `r dt.md[ , uniqueN(donation_id)] - tbl_labels_per_idx[[1]] - tbl_labels_per_idx[[2]]` were labeled with 3 or more follow-up visit outcomes (maximum of 8).
 
 ## Prediction model
 
-> We evaluated over 2,000 model configurations (model type and hyperparameter settings) across the five candidate model types. `r run_reference("f-tuning-auc")` shows the average overall AUC within the 15 tuning datasets for each model configuration, and `run_reference("t-mod-tuning")` shows the top hyperparameter setting for each model type. The top "standard biomarkers" model configuration was an ensemble model that averaged risk scores for three gradient boosted machine and three random forest models, with an AUC of XXX. The top "extra biomarkers" model was an ensemble model that averaged risk scores for two gradient boosted machines, a random forest model, and two penalized regression models, one with second order interaction terms (multiclass AUC ). `r run_reference("f-ensemble-auc")` shows the multiclass AUC for the top ensemble configurations and base models. For both models, discriminative performance was highest for predicting no adverse outcome donations and lowest for predicting low iron donations. In a secondary analysis, we found that use of ferritin, soluble transferrin receptor, and body iron increased the overall AUC from 77% to 82% among the subset of donations for which those values were recorded. Inclusion of these biomarkers increased discriminative performance most substantially for identifying absent iron donations and had little effect on ability to discriminate hemoglobin deferrals.
+```{r echo=FALSE, include=FALSE}
+dt.aucs <- fread("../4_output/AUC_results_meanCI.csv")
+
+dt.aucs[, disp_val := paste0(
+  percent(mean, accuracy=0.1), " (95% CI ",
+  percent(lb, accuracy=0.1)," - ",
+  percent(ub, accuracy=0.1),")")]
+
+dt.aucs[, tab_val := paste0(
+  percent(mean, accuracy=0.1), " (",
+  percent(lb, accuracy=0.1)," - ",
+  percent(ub, accuracy=0.1),")")]
+
+#calculate difference in AUC standard vs extra biomarekrs
+dt.aucs_noXB <- dt.aucs[version=="noXB"]
+dt.aucs_XB <- dt.aucs[version=="withXB"]
+dt.aucs_diff <- cbind(dt.aucs_XB[,"outcome"],
+                   "Difference" = percent(dt.aucs_XB$mean - dt.aucs_noXB$mean,
+                           accuracy=0.1)  
+)
+
+#format for the table
+dt.aucs_tab <- dcast(dt.aucs, 
+                    formula = outcome~version,
+                    value.var = "tab_val")
+
+dt.aucs_tab <-dt.aucs_tab[dt.aucs_diff,on="outcome"]
+
+outcome_disp_list <- list(
+  Overall = "Multiclass AUC",
+  Z0 = "No adverse outcome",
+  Z1 = "Hemoglobin deferral",
+  Z2 = "Low iron donation",
+  Z3 = "Absent iron donation"
+)
+
+dt.aucs_tab[ , outcome_disp := outcome_disp_list]
+dt.aucs_tab[ , category := c("Multiclass AUC", rep("One-vs-rest AUC",4))]
+setcolorder(dt.aucs_tab, c("category","outcome_disp","noXB","withXB","Difference"))
+
+#FEATRE IMPORTANCE
+dt.feat_imp_medians <- fread("../4_output/feature_importance_medians.csv")
 
-> Variable importance for the top model in the primary analysis is shown in `r run_reference("f-var-imp-XB")` and for the model using extra biomarkers as predicters in `r run_reference("f-var-imp-noXB")`. In the primary analysis, hemoglobin and return time were most important for predicting the outcome of a follow-up donation. When additional biomarkers were used as predictors, ferritin became the most important.
+```
 
-> We calculated normalization weights to calibrate the model scores to the expected distribution of outcomes in the first return dataset. They were 1.4 for the probability of no adverse outcome; 0.47 for the probability of a hemoglobin deferral; 1.1 for the probability of a low iron donation; and 1.2 for the probability of an absent iron donation.
+> We evaluated over 2,000 model configurations (model type and hyperparameter settings) across the five candidate model types. The top "standard biomarkers" model configuration was an ensemble model that averaged risk scores for three gradient boosted machine and three random forest models; the top "extra biomarkers" model was an ensemble model that averaged risk scores for two gradient boosted machines, a random forest model, and two penalized regression models, one with second order interaction terms. For both models, discriminative performance was highest for predicting no adverse outcome donations and lowest for predicting low iron donations (`r run_reference("f-roc-compare")`).
 
-## Individual risk profiles
+> `r run_reference("f-tuning-auc")` shows the average overall AUC within the 15 tuning datasets for each model configuration, and `r run_reference("t-mod-tuning")` shows the top hyperparameter setting for each model type. Inclusion of the extra biomarkers had the greatest improvement in distinguishing low and absent iron donations from the other outcomes (one-vs-rest AUC increased `r dt.aucs_diff[outcome=="Z2",Difference]` for low iron donations and `r dt.aucs_diff[outcome=="Z3",Difference]` for absent iron donations; `r run_reference("t-auc-compare")`). For both the standard and extra biomarkers model, the top ensemble model had a higher mean AUC with lower standard error than each of the base models that comprised it across the model tuning sets `r run_reference("f-ensemble-auc")`. Multiclass AUC for the top ensemble models assessed on the model assessment partitions was `r dt.aucs[version=="noXB"&outcome=="Overall",disp_val]` for the standard biomarkers model and `r dt.aucs[version=="withXB"&outcome=="Overall",disp_val]` for the extra biomarkers model.
 
-> Figure shows the individual risk trajectories from two donations: one for a donor whose risk of an adverse outcome was high at day 56 but declined over time and another for a donor who had a low risk of adverse outcomes even at day 56. `r run_reference("f-sixty-trajectories")` shows the same plots for 60 randomly selected index donations from the first return dataset. Notably, estimated risk did not monotonically decrease for all adverse events for all donors. For example, the risk of a low iron donation increased as risk of hemoglobin deferral or an absent iron donation fell for some donors.
+> `r run_reference("f-var-imp-both")` shows the most important features for both models, while `r run_reference("f-var-imp-XB")` and `r run_reference("f-var-imp-noXB")` show the full feature importance plots for each model. For the standard biomarkers model, the donation interval (time to return) was the most important feature for prediction (the median decrease in multiclass AUC when shuffling this feature was `r dt.feat_imp_medians[feature=="time_to_fu" & model=="noXB", percent(median_AUC_pctchg, accuracy = 0.1)]`), followed by venous hemoglobin and the number of red blood cell units donated in the last 24 months (median decreases in multiclass AUC of `r dt.feat_imp_medians[feature=="DER_AdjVenousHgb" & model=="noXB", percent(median_AUC_pctchg, accuracy = 0.1)]`, and `r dt.feat_imp_medians[feature=="DER_RBC_Last24months" & model=="noXB", percent(median_AUC_pctchg, accuracy = 0.1)]`, respectively). For the extra biomarkers model, ferritin was by far the most important feature, followed by donation interval and fingerstick hemoglobin/hematocrit (median decreases in multiclass AUC of `r dt.feat_imp_medians[feature=="ARUP_Ferritin" & model=="XB", percent(median_AUC_pctchg, accuracy = 0.1)]`, `r dt.feat_imp_medians[feature=="time_to_fu" & model=="XB", percent(median_AUC_pctchg, accuracy = 0.1)]`, and `r dt.feat_imp_medians[feature=="FingerstickHGB_equiv" & model=="XB", percent(median_AUC_pctchg, accuracy = 0.1)]`, respectively). The normalization weights we calculated for calibrating the model scores to the expected distribution of outcomes in the first return dataset are shown in `r run_reference("t-calib-weights")`.
 
-> Figure shows the probability of any adverse outcome at post-donation day 56 and post-donation day 250. The median risk of any adverse outcome at day 56 was 71% (IQR 43% -- 86%), but this dropped to 23% (IQR 12% -- 41%) at post-donation day 250. While risk of an adverse outcome fell for most donors, some continued to have a high risk even at post-donation day 250. For 787 donors (11%), estimated risk of any adverse outcome was above 60% at post-donation day 250, which may indicate an underlying iron-related condition unrelated to repeat blood donation.
+## Individual risk profiles
+
+```{r echo=FALSE, include=FALSE}
+risk_matrix_XB<- readRDS("../4_output/3d_risk_matrix_XB.RDS")
+risk_56 <- (1-risk_matrix_XB[ , "56", "None"])
+risk_250 <- (1-risk_matrix_XB[ , "250", "None"])
+summary(risk_matrix_XB[ , "250", "None"] - risk_matrix_XB[ , "56", "None"])
 
-> `r run_reference("f-ae-trajectory-examples")` shows different trajectory types.
 
-> `r run_reference("f-traject-by-iron-status")` is by iron status.
+dt.fr_risk <- fread("../1_data/dt_fr_withrisk")
+dt.fr_risk[,archetype:=fifelse(Any_AE_day_56 < .1, "Fast recoverer",
+                                   fifelse(Any_AE_day_250 > .85, "Chronic high risk",
+                                           fifelse(Any_AE_day_56 > .60 & Any_AE_day_250 <.35, "Slow recoverer",
+                                                   "Other")))]
 
-> `r run_reference("f-traject-by-venous-hgb")` is by tertile of venous hemoglobin.
 
-> `r run_reference("f-traject-by-gender")` shows different gender.
+```
 
-> `r run_reference("f-traject-by-RBC-loss")` shows tertiles of red blood cells lost.
+> Using the calibrated "extra biomarkers" model on the first return dataset, the median risk of any adverse outcome with a 56-day donation interval was `r percent(median(risk_56))` (Interquartile range [IQR] `r percent(quantile(risk_56, 0.25))` -- `r percent(quantile(risk_56,0.75))`). For a 250-day interval, the median risk of any adverse outcome fell to `r percent(median(risk_250))` (IQR `r percent(quantile(risk_250, 0.25))` -- `r percent(quantile(risk_250,0.75))`). The median decrease in absolute risk from an interval of 56 to 250 days was `r percent(median(risk_56 - risk_250))` (IQR `r percent(quantile(risk_56 - risk_250, 0.25))` -- `r percent(quantile(risk_56 - risk_250,0.75))`).
 
-> `r run_reference("f-traject-iron_supplementation")` shows whether people take daily iron supplementation.
+> Across the first return dataset, `r dt.fr_risk[archetype=="Fast recoverer",.N]` (`r percent(dt.fr_risk[archetype=="Fast recoverer",.N]/nrow(dt.fr_risk))`) donations were by a fast recoverer (\<10% risk of any adverse outcome on post-donation day 56), `r dt.fr_risk[archetype=="Slow recoverer",.N]` (`r percent(dt.fr_risk[archetype=="Slow recoverer",.N]/nrow(dt.fr_risk))`) donations were by a slow recoverer (\>60% adverse outcome risk on day 56 and \<35% on day 250), and `r dt.fr_risk[archetype=="Chronic high risk",.N]` (`r percent(dt.fr_risk[archetype=="Chronic high risk",.N]/nrow(dt.fr_risk))`) donations were by a chronic high risk donor (\>85% risk of adverse outcome on day 250). `r run_reference("f-indiv_trajects_archetypes")` shows the individual risk trajectory for four donors in each of these archetypes. `r run_reference("f-ae-trajectory-examples")` plots the adverse outcome risk trajectory for 300 randomly selected donors with the trajectories of donors fitting each of the archetypes highlighted. For chronic high risk donors, while risk of hemoglobin deferral and absent iron donation declined for longer donation intervals, risk of a low iron donation increased for longer donation intervals on average. `r run_reference("f-sixty-trajectories")` shows the individual risk trajectories for 60 additional randomly selected donors. Note that while risk of any adverse event consistently declines for longer donation intervals, risk of low iron donation increases for some donors.
 
-> `r run_reference("f-traject-composite_iron")` shows tertiles of the composite iron scores.
+> `r run_reference("f-traject-by-iron-status")` shows how the average risk of adverse outcomes develops for donors based on iron status at the index donation. For any donation interval, average risk of any adverse outcomes was lowest for iron replete donors, and highest for donors with absent iron at the index donation. While adverse outcome risk declined overall for all three cohorts, risk of a low iron donation increased with longer intervals for donors with absent iron stores at the index donation. `r run_reference("f-traject-by-venous-hgb")` shows the development of average risk of adverse outcomes based on cohorts of donors defined by the tertile of their venous hemoglobin; donors with venous hemoglobin in the lowest tertile (9.8-13 g/dL) showed similar trends to the absent iron cohort in `r run_reference("f-traject-by-iron-status")`. However, whereas the risk trajectory for absent iron donation was most different across cohorts when stratifying by baseline iron status, hemoglobin deferral risk was the outcome with adverse outcome for which the trajectory varied the most when stratifying based on venous hemoglobin. `r run_reference("f-traject-by-gender")`, `r run_reference("f-traject-by-RBC-loss")`, `r run_reference("f-traject-iron_supplementation")`, and `r run_reference("f-traject-composite_iron")` show the average risk trajectories for donor cohorts stratified by gender, red blood cell units donated over the prior 2 years, self-reported iron supplementation, and composite dietary heme iron intake.
 
 # Discussion
 
-> Risk of iron-related adverse outcomes at follow-up donations can be estimated as a function of the return time using data available at an index donation. Estimated risk decreased precipitously for most donors if they waited longer to return, suggesting that tailoring donors' IDIs to individual donors' risk profiles may be an effective strategy for managing risk of iron-related adverse donation outcomes without unduly restricting return donations from low-risk donors. Risk for some donors remained high even 250 days later, suggesting that these methods could also be used to identify individuals who may be poor candidates for repeat blood donation due to underlying iron deficiency. Including ferritin as a predictor improved risk estimation, particularly with respect to estimating risk of absent iron donations.
+> We found that risk of iron-related adverse outcomes at follow-up donations can be estimated as a function of the interval before a follow-up donation attempt. Estimated risk decreased precipitously for most donors if they waited longer to return, suggesting that minimum donation intervals can prevent donor-associated iron deficiency and hemoglobin deferrals. However, heterogeneity in estimated risk trajectories suggests that uniform or sex-based intervals may be insufficient. Using ferritin and soluble transferrin receptor improved risk estimation, particularly with respect to estimating risk of absent iron donations. For some donors, estimated risk of an adverse outcome remained over 90% even for a 250-day donation interval. These donors may have underlying (and potentially undiagnosed) iron deficiency or a related condition, which may make them poor candidates for repeat blood donation.
+
+> Our analysis has several limitations. The RISE study asked participants to commit to frequent blood donation and targeted recruitment to achieve proportional representation based on sex and donation history [@Cable2011]. Furthermore, we restricted our analysis to donations in the RISE study for which ferritin was measured, which may further bias our findings. Further study is needed to assess the generalizability of our prediction model's performance to a more representative blood donor population. Many of the features we used for prediction are highly correlated (e.g., venous and fingerstick hemoglobin; 12- and 24-month donation history), which can cause feature importance to be 'spread' over correlated features [@Tolosi2011]. Due to this, our feature importance method should only be interpreted as which features the model relied on most (or was most sensitive to) rather than which features are most correlated with adverse outcome risk. To calibrate our model, we assumed the distribution of absent, low, and replete iron status for follow-up donations without a ferritin measurement mirrored the distribution across follow-up donations at which ferritin was measured, but this may not be the case.
 
-> Our analysis has several limitations. The RISE study asked participants to commit to frequent blood donation and targeted recruitment to achieve proportional representation based on gender and donation history [@Cable2011]. Further study is needed to assess the generalizability of our prediction model's performance to a general blood donor population. Additionally, the outcomes we estimated at the population level are specific to the RISE cohort. In particular, the baseline rate of adverse outcomes and the reduction in supply introduced by tailored IDIs may be lower in populations with lower return rates. Another limitation of the RISE data is that ferritin and other biomarkers were not measured for all follow-up visits. We factored this into our analysis by assuming these biomarkers were missing at random, but this may not be the case.
+> We see several ways the approaches reported here can be used to gain further insights into tailored donation intervals for blood donors. Extension of this work to larger blood center operational datasets outside of specific clinical studies will provide information on the effectiveness of machine learning models when the quality and completeness of information may be more limited. Key features identified in this analysis are readily available such as donor hemoglobin/hematocrit, donation interval, and increasingly ferritin measurement. Other features such as venous hemoglobin and survey assessments of donor dietary habits and supplementation are not likely to be implemented as standard donor assessments.
 
-> Two other limitations must be overcome before tailored IDIs can be implemented in practice. First, a decision-maker must identify risk thresholds for each adverse outcome in our method. Experts may not agree on the level of risk that is acceptable and how the sufficiency of the blood supply should be weighed against risks to donors. Further work is needed to understand these trade-offs and identify reasonable risk thresholds. Second, this method may face significant barriers to implementation. Sophisticated machine learning techniques for decision-making require technical expertise to develop and maintain and are opaque in the sense that humans cannot readily understand how the system arrived at a decision. In a growing literature on interpretable machine learning, methods have been developed for constructing simpler decision rules that sometimes perform on par with advanced machine learning techniques [@Jung2017; @Ustun2016; @Letham2015; @Lakkaraju2016; @Ustun2019]. Further work is needed to assess barriers to the adoption of the tailored IDI method developed here and to determine whether simpler decision rules might be easier to implement and perform similarly. Despite these limitations, our analysis suggests that individual risk prediction could be a useful tool for ensuring a sufficient blood supply while managing iron-related risks to repeat blood donors.
+> Despite the limitations, our analysis demonstrates that repeat donors have heterogeneous risk of iron-related adverse outcomes as a function of their donation interval, and machine learning models can estimate individual donors' risk trajectories. Such predictive models could be a valuable tool for managing risks to donors while ensuring a sufficient blood supply.
 
 ##### 
 
 # Declarations
 
-**Funding:** A
+**Acknowledgments:** The authors thank the NHLBI Biolincc repository for making the RISE dataset available at no charge, and we thank Dr. Bryan Spencer for providing the dietary heme iron intake scores he generated for a separate analysis.
 
-**Conflicts:** A
+**Funding:** WAR was funded by a Stanford Interdisciplinary Graduate Fellowship.
 
-**Ethics/Consent:** A
+**Conflicts:** The authors have no conflicts of interest to declare.
 
-**Data and materials:** A
+**Ethics/Consent:** Because our analysis used fully de-identified human subjects data for a secondary analysis, this study was exempted from full IRB review by the Stanford University IRB.
 
-**Code availability:** A
+**Data and materials:** The RISE dataset was accessed through the National Heart, Lung, and Blood Institute (NHLBI) Biolincc repository (<https://biolincc.nhlbi.nih.gov>). Our Research Materials Distribution Agreement prohibits publication of the raw data, but other researchers can submit a data request to NHLBI at no charge.
 
-**Authors' contributions:**
+**Code availability:** All code is uploaded to a public repository [INSERT DOI LINK]
+
+**Authors' contributions:** All authors contributed to study design. WAR conducted the analysis and composed the manuscript; BC and DS edited the manuscript.
 
 ##### 
 
@@ -290,24 +353,39 @@ fig_num <- run_autonum(seq_id = "fig",
 
 knitr::include_graphics("../4_output/figs/ROC_compare.png")
 
-block_caption("One vs. all ROC curves with and without ferritin, soluble transferrin receptor, and derived measures. Black dot at 75% sensitivity and 75% specificity for visual reference.", 
+block_caption("One-vs-rest ROC curves for the standard and extra biomarker models as assessed on the model assessment partitions. For each outcome, one ROC curve is plotted for each of the three resamples of the data, combining data from the corresponding 5 model assessment partitions. Black dot at 75% sensitivity and 75% specificity for visual reference.", 
               style = "Image Caption", 
               autonum = fig_num)
 ```
 
-#####
-
+##### 
 
-```{r fig.width=5, fig.height=6}
-fig_num <- run_autonum(seq_id = "sfig",
-                       pre_label = "Figure S",
-                       bkm="f-var-imp-XB",
+```{r fig.width=5, fig.height=4}
+fig_num <- run_autonum(seq_id = "fig",
+                       pre_label = "Figure ",
+                       bkm="f-var-imp-both",
                        bkm_all = TRUE,
                        prop = fp_text(bold=TRUE, underlined = TRUE))
 
 knitr::include_graphics("../4_output/figs/feat_imp_both_top15.png")
 
-block_caption("Relative variable imprtance for the top \"standard\" and \"extra\" biomarker models. Variables were included in this figure if among the top 15 most important variables for at least one of the models. Full variable importance plots shown in the supplement.", 
+block_caption("Relative variable importance for the top \"standard\" and \"extra\" biomarker models. Variables were included in this figure if among the top 15 most important variables for at least one of the models. Full variable importance plots shown in the supplement.", 
+              style = "Image Caption", 
+              autonum = fig_num)
+```
+
+##### 
+
+```{r fig.width=5, fig.height=4.5}
+fig_num <- run_autonum(seq_id = "fig",
+                       pre_label = "Figure ",
+                       bkm="f-indiv_trajects_archetypes",
+                       bkm_all = TRUE,
+                       prop = fp_text(bold=TRUE, underlined = TRUE))
+
+knitr::include_graphics("../4_output/figs/indiv_plots_archetypes.png")
+
+block_caption("Individual risk profiles for four selected donors that represent each of the three donation archetypes. The donation interval (time to return donation attempt) is varied on the x axis from 56 to 250 days. Height of colored area indicates the risk of each adverse outcome and likelihood of a 'no adverse outcome' donation.", 
               style = "Image Caption", 
               autonum = fig_num)
 ```
@@ -323,7 +401,7 @@ fig_num <- run_autonum(seq_id = "fig",
 
 knitr::include_graphics("../4_output/figs/each_ae_traject.png")
 
-block_caption("Risk trajectory for any adverse event (top) or a specific adverse event (bottom) for 100 randomly-selected donors. Five 'chronic high risk' donors with more than 80% risk of an adverse event if they return on post-donation day 250 are shown in red; five 'quick recoverer' donors with less than 5% risk of an adverse event if they return on post-donation day 56 shown in green; and 5 'slow recoverer' donors whose risk of an adverse event was above 60% on post-donation day 56 but fell below 35% by postdonation day 250.", 
+block_caption("Risk trajectory for any adverse outcome (top plot) or a specific adverse outcome (bottom three plots) for 300 randomly selected donors. Five randomly selected donors fitting each of the three archetypes are highlighted in red, orange, and green. Other donors' trajectories are shown in grey.", 
               style = "Image Caption", 
               autonum = fig_num)
 ```
@@ -339,7 +417,7 @@ fig_num <- run_autonum(seq_id = "fig",
 
 knitr::include_graphics("../4_output/figs/ae_traject_by_Index_donation_iron_status.png")
 
-block_caption(".", 
+block_caption("Average risk trajectory with 95% confidence intervals for donors in the first return dataset stratified by iron status at index donation, defined by the donor's ferritin level.", 
               style = "Image Caption", 
               autonum = fig_num)
 ```
@@ -355,7 +433,7 @@ fig_num <- run_autonum(seq_id = "fig",
 
 knitr::include_graphics("../4_output/figs/ae_traject_by_Venous_HGB_tertile.png")
 
-block_caption(".",
+block_caption("Average risk trajectory with 95% confidence intervals for donors in the first return dataset stratified by venous hemoglobin (HGB) measured at the index donation in g/dL.",
               style = "Image Caption", 
               autonum = fig_num)
 ```
@@ -364,29 +442,33 @@ block_caption(".",
 
 # Tables
 
-```{r, echo=FALSE}
+```{r}
 tab_num <- run_autonum(seq_id = "tab", 
                        pre_label = "Table ", 
-                       bkm="t-model-aucs",
+                       bkm="t-auc-compare",
                        bkm_all = TRUE,
                        prop = fp_text(bold=TRUE, underlined = TRUE))
 
-t_model_aucs <- as_flextable(as_grouped_data(read_excel("../1_data/tables.xlsx", sheet = "features"),
-                                              groups = "Category"))
-t_model_aucs <- compose(t_model_aucs, i = ~ !is.na(Category), j = "Variable name",
-              value = as_paragraph(as_chunk(Category)))
+t_model_aucs <- flextable(dt.aucs_tab[,1:5])
+t_model_aucs <- set_header_labels(t_model_aucs,
+                  values = list(category = "AUC metric",
+                                outcome_disp = "AUC metric",
+                                noXB = "Standard biomarkers model",
+                                withXB = "Extra biomarkers model"))
+t_model_aucs <- merge_h(t_model_aucs, i=1:2, part="body")
+t_model_aucs <- merge_h(t_model_aucs, part="header")
+t_model_aucs <- merge_v(t_model_aucs, j=1, part="body")
 t_model_aucs <- align(t_model_aucs, align = "left", part = "all")
 t_model_aucs <- fontsize(t_model_aucs, size = 10, part = "all")
 t_model_aucs <- font(t_model_aucs, fontname = "Times", part = "all")
 t_model_aucs <- theme_box(t_model_aucs)
 t_model_aucs <- bg(t_model_aucs, bg = "#EAEAEA", part = "header")
-t_model_aucs <- width(t_model_aucs, 1, 1.6)
-t_model_aucs <- width(t_model_aucs, 2, 2.1)
-t_model_aucs <- width(t_model_aucs, 3, 2.8)
-t_model_aucs <- bg(t_model_aucs, i = c(1, 10, 21, 45), bg = "#DDDDDD", part = "body")
-t_model_aucs <- bold(t_model_aucs, i = c(1, 10, 21, 45), part = "body")
+t_model_aucs <- width(t_model_aucs, 1, 0.6)
+t_model_aucs <- width(t_model_aucs, 2, 1.5)
+t_model_aucs <- width(t_model_aucs, 3:4, 1.8)
+t_model_aucs <- width(t_model_aucs, 5, 0.8)
 t_model_aucs <- set_caption(t_model_aucs, 
-                                     "Multiclass and one verses rest AUC by outcome for the top ``standard`` and ``extra biomarker`` model configuration as assessed in the outer cross validation loop.",
+                                     "Multiclass and one verses rest AUC by outcome for the top \"standard\" and \"extra biomarker\" model configuration as assessed on the model assessment partitions.",
                                      autonum = tab_num)
 
 t_model_aucs
@@ -400,15 +482,16 @@ t_model_aucs
 
 <br>
 
-# A. Calculations for estimating the outcomes of pathogen inactivation
+# Supplemental methods
+
+## Calibration
 
-Blank.
+Our calibration procedure was as follows: we totaled each follow-up outcome in the first return dataset as $n^{(k)}$, where $k=-1, 0, 1, 2, 3$ correspond to a donation with unknown iron status (no ferritin measurement); a no adverse outcome donation, a hemoglobin deferral, a low iron donation, and an absent iron donation, respectively. We then calculated $\tilde{n}^{(k)}$, an estimation of what the totals would have been if ferritin were measured for all follow-up donations assuming the distribution of outcomes was the same as for completed donations with ferritin measures. These were calculated as $\tilde{n}^{(1)} = n^{(1)}$ (hemoglobin deferral) and $\tilde{n}^{(k)} = n^{(k)}+n^{(-1)}\frac{n^{(k)}}{n^{(0)}+n^{(2)}+n^{(3)}}$ for $k=0,2,3$ (completed donations). We then used our top model configuration to generate the unnormalized probability vector $[\hat{q}_i^{(0)}, \hat{q}_i^{(1)}, \hat{q}_i^{(2)}, \hat{q}_i^{(3)}]$ for each index donation $i$ in the first return dataset. We computed weights $w^{(k)}$ for the unnormalized probability of each outcome $\hat{q}_i^{(k)}$ by solving the system of equations $\sum_{i=1}^I w^{(k)}\hat{q}_i^{(k)}/\sum_{\tilde{k}=0}^4 w^{(\tilde{k})}\hat{q}_i^{(\tilde{k})} = \tilde{n}^{(k)}$ for each index donation $i \in 1,2,...,I$ and $k=0,1,2,3$. The final calibrated model used parameters $w^{(k)}$ together with the uncalibrated scores from the model $z_i^{(k)}$ to produce the estimated likelihood of each outcome at a follow-up donation as $\tilde{q}^{(k)}=w^{(k)} z_i^{(k)} / \sum_{\tilde{k}=1}^4 w^{(\tilde{k})}z_i^{(\tilde{k})}$. This ensured that the expectation of the distribution of the predicted outcome for the first return dataset would correspond to our estimated totals $\tilde{n}^{(k)}$.
 
 ##### 
 
 # Supplemental tables
 
-
 ```{r, echo=FALSE}
 stab_num <- run_autonum(seq_id = "stab", 
                        pre_label = "Table S", 
@@ -464,14 +547,44 @@ t_mod_tuning <- width(t_mod_tuning, 4, 1.3)
 t_mod_tuning <- bg(t_mod_tuning, i = c(1, 7, 11, 14, 17), bg = "#DDDDDD", part = "body")
 t_mod_tuning <- bold(t_mod_tuning, i = c(1, 7, 11, 14, 17), part = "body")
 t_mod_tuning <- merge_v(t_mod_tuning, j=5, part="body")
-t_mod_tuning <- set_caption(t_mod_tuning, "Model types and hyperparameters assessed as candidates. All hyperparameter combinations were assessed in 5-fold cross validation on each of 15 model validation sets defined by the nested cross validation scheme.")
+t_mod_tuning <- set_caption(t_mod_tuning, "Model types and hyperparameters assessed as candidates. All hyperparameter combinations were assessed in 5-fold cross validation on each of 15 model validation sets defined by the nested cross validation scheme.",
+                            autonum=stab_num)
 t_mod_tuning <- theme_box(t_mod_tuning)
-t_mod_tuning <- set_caption(t_mod_tuning, 
-                                     "List of features for prediction model with description and notes from feature engineering.",
-                                     autonum = stab_num)
 t_mod_tuning
 ```
 
+##### 
+
+```{r echo=FALSE}
+
+weights <- readRDS("../4_output/calib_weights.RDS")
+
+dt.weights <- data.table(
+  `Model version` = c("Standard biomarkers", "Extra biomarkers"),
+  `No adverse outcome` = c(weights$excludeXB["Q0"],weights$XB["Q0"]),
+  `Hemoglobin deferral` = c(weights$excludeXB["Q1"],weights$XB["Q1"]),
+  `Low iron donation` = c(weights$excludeXB["Q2"],weights$XB["Q2"]),
+  `Absent iron donation` = c(weights$excludeXB["Q3"],weights$XB["Q3"])
+)
+
+stab_num <- run_autonum(seq_id = "stab", 
+                       pre_label = "Table S", 
+                       bkm="t-calib-weights",
+                       bkm_all = TRUE,
+                       prop = fp_text(bold=TRUE, underlined = TRUE))
+
+t_weights <- flextable(dt.weights)
+t_weights <- align(t_weights, align = "left", part = "all")
+t_weights <- fontsize(t_weights, size = 8, part = "all")
+t_weights <- bg(t_weights, bg = "#EAEAEA", part = "header")
+t_weights <- width(t_weights, 1, 1.4)
+t_weights <- width(t_weights, 2:5, 1.2)
+t_weights <- set_caption(t_weights, "Calibration weights calculated for matching the expected distribution of the risk scores to the estimated distribution in the first return dataset. Compared to the raw risk predictions generated by the model trained in the model development dataset, calibration down-weighted risk of hemoglobin deferral (evidenced by a calibration weight less than 1) and up-weighted likelihood of the other three outcomes for both models.",
+                         autonum=stab_num)
+t_weights <- theme_box(t_weights)
+
+t_weights
+```
 
 ##### 
 
@@ -493,7 +606,6 @@ block_caption("Average multiclass AUC for each evaluated model configuration as
 
 ##### 
 
-
 ```{r fig.width=6, fig.height=5.5}
 sfig_num <- run_autonum(seq_id = "sfig",
                        pre_label = "Figure S",
@@ -511,7 +623,6 @@ block_caption("Distribution of multiclass AUC for across the 15 tuning sets for
 
 ##### 
 
-
 ```{r fig.width=5, fig.height=6}
 fig_num <- run_autonum(seq_id = "sfig",
                        pre_label = "Figure S",
@@ -544,7 +655,7 @@ block_caption("Relative variable importance for the top \"extra biomarkers\" mod
 
 ##### 
 
-```{r fig.width=6.5, fig.height = 8}
+```{r fig.width=6.4, fig.height = 7.3}
 sfig_num <- run_autonum(seq_id = "sfig",
                        pre_label = "Figure S",
                        bkm="f-sixty-trajectories",
@@ -553,7 +664,7 @@ sfig_num <- run_autonum(seq_id = "sfig",
 
 knitr::include_graphics("../4_output/figs/trajectories_60_random.png")
 
-block_caption("Individual risk trajectory for sixty randomly-selected index donations.", 
+block_caption("Individual risk trajectory for sixty randomly selected index donations. X-axis indicates the donation interval (days until a return donation attempt) and the height of the colored areas indicate the risk of each possible outcome: no adverse outcome (cyan), hemoglobin deferral (yellow), low iron donation (orange), and absent iron donation (red).", 
               style = "Image Caption", 
               autonum = sfig_num)
 ```
@@ -569,7 +680,7 @@ sfig_num <- run_autonum(seq_id = "sfig",
 
 knitr::include_graphics("../4_output/figs/ae_traject_by_Gender.png")
 
-block_caption(".", 
+block_caption("Average risk trajectory with 95% confidence intervals for donors in the first return dataset stratified by gender. Compared to men, women had higher estimated risk for absent iron donations and hemoglobin deferral but a similar average risk trajectory for low iron donations", 
               style = "Image Caption", 
               autonum = sfig_num)
 ```
@@ -585,7 +696,7 @@ sfig_num <- run_autonum(seq_id = "sfig",
 
 knitr::include_graphics("../4_output/figs/ae_traject_by_RBC_units_lost_prior_24_months_tertile.png")
 
-block_caption(".", 
+block_caption("Average risk trajectory with 95% confidence intervals for donors in the first return dataset stratified by the number of red blook cell (RBC) units lost through donation in the prior 24 months. Those who donated 2 or fewer units in the prior two years had lower risk of adverse outcomes, particularly absent iron donations.", 
               style = "Image Caption", 
               autonum = sfig_num)
 ```
@@ -601,7 +712,7 @@ sfig_num <- run_autonum(seq_id = "sfig",
 
 knitr::include_graphics("../4_output/figs/ae_traject_by_Iron_supplementation.png")
 
-block_caption(".", 
+block_caption("Average risk trajectory with 95% confidence intervals for donors in the first return dataset stratified by iron supplementation. Donors with 'less than daily' iron supplementation had lower risk of adverse outcomes, particularly hemoglobin deferral, whereas donors taking either no iron supplmeentation or daily iron supplementation had more similar risk trajectories. These results are not intuitive, but may be due to confounding variables, for which this analysis does not account. For example, donors with diagnosed anemia or a related condition may be more likely to take daily iron supplementation.", 
               style = "Image Caption", 
               autonum = sfig_num)
 ```
@@ -617,7 +728,7 @@ sfig_num <- run_autonum(seq_id = "sfig",
 
 knitr::include_graphics("../4_output/figs/ae_traject_by_Composite_iron_tertile.png")
 
-block_caption(".", 
+block_caption("Average risk trajectory with 95% confidence intervals for donors in the first return dataset stratified by heme dietary iron intake score, which is calculated from self-reported dietary data, at index donation. On average, donors in the lowest tertile of heme iron intake had a higher estimated risk of an absent iron donation but similar risk trajectory for hemoglobin deferral or a low iron donation.", 
               style = "Image Caption", 
               autonum = sfig_num)
 ```
diff --git a/5_manuscript/iron_trajectories.docx b/5_manuscript/iron_trajectories.docx
index c818874..004a4ea 100644
Binary files a/5_manuscript/iron_trajectories.docx and b/5_manuscript/iron_trajectories.docx differ
diff --git a/5_manuscript/tailored_idi.bib b/5_manuscript/tailored_idi.bib
index 5513548..e9ce346 100644
--- a/5_manuscript/tailored_idi.bib
+++ b/5_manuscript/tailored_idi.bib
@@ -3,84 +3,199 @@
 
 BibTeX export options can be customized via Options -> BibTeX in Mendeley Desktop
 
-@article{Baart2012,
-abstract = {BACKGROUND: Each year, approximately 5{\%} of the invited blood donors is eventually deferred from donation because of low hemoglobin (Hb) levels. Estimating the risk of Hb deferral in blood donors can be helpful in the management of the donation program. We developed and validated a prediction model for Hb deferral in whole blood donors, separately for men and women. STUDY DESIGN AND METHODS: Data from a Dutch prospective cohort of 220,946 whole blood donors were used to identify predictors for Hb deferral using multivariable logistic regression analyses. Validity of the prediction models was assessed with a cross-validation. RESULTS: A total of 12,865 donors (5.8{\%}) were deferred because of a low Hb level. The strongest predictors of Hb deferral were Hb level measured at the previous visit, age, seasonality, difference in Hb levels between the previous two visits, time since the previous visit, deferral at the previous visit, and the total number of whole blood donations in the past 2 years for both men and women. The prediction models had an area under the receiver operating characteristic curve of 0.89 for men and 0.84 for women. Cross-validation showed similar results and good calibration. CONCLUSION: Using a limited number of easy-to-measure characteristics enables a good prediction of Hb deferral risk in whole blood donors. The prediction models may guide the decision which donors to invite for a next donation and for which donors the invitation should be postponed. Potentially, this could decrease the number of Hb deferrals in blood donors. {\textcopyright} 2012 American Association of Blood Banks.},
-author = {Baart, A. Mireille and {De Kort}, Wim L.A.M. and Atsma, Femke and Moons, Karel G.M. and Vergouwe, Yvonne},
-doi = {10.1111/j.1537-2995.2012.03655.x},
+@article{Tolosi2011,
+abstract = {Motivation: Classification and feature selection of genomics or transcriptomics data is often hampered by the large number of features as compared with the small number of samples available. Moreover, features represented by probes that either have similar molecular functions (gene expression analysis) or genomic locations (DNA copy number analysis) are highly correlated. Classical model selection methods such as penalized logistic regression or random forest become unstable in the presence of high feature correlations. Sophisticated penalties such as group Lasso or fused Lasso can force the models to assign similar weights to correlated features and thus improve model stability and interpretability. In this article, we show that the measures of feature relevance corresponding to the above-mentioned methods are biased such that the weights of the features belonging to groups of correlated features decrease as the sizes of the groups increase, which leads to incorrect model interpretation and misleading feature ranking. Results: With simulation experiments, we demonstrate that Lasso logistic regression, fused support vector machine, group Lasso and random forest models suffer from correlation bias. Using simulations, we show that two related methods for group selection based on feature clustering can be used for correcting the correlation bias. These techniques also improve the stability and the accuracy of the baseline models. We apply all methods investigated to a breast cancer and a bladder cancer arrayCGH dataset and in order to identify copy number aberrations predictive of tumor phenotype. {\textcopyright} The Author 2011. Published by Oxford University Press. All rights reserved.},
+author = {Toloşi, Laura and Lengauer, Thomas},
+doi = {10.1093/bioinformatics/btr300},
+file = {::},
+issn = {13674803},
+journal = {Bioinformatics},
+month = {jul},
+number = {14},
+pages = {1986--1994},
+pmid = {21576180},
+publisher = {Oxford Academic},
+title = {{Classification with correlated features: Unreliability of feature ranking and solutions}},
+url = {https://academic-oup-com.ezp-prod1.hul.harvard.edu/bioinformatics/article/27/14/1986/194387},
+volume = {27},
+year = {2011}
+}
+@article{Bialkowski2015,
+abstract = {Background and Objectives—Repeated blood donation produces iron deficiency. Changes in dietary iron intake do not prevent donation-induced iron deficiency. Prolonging the interdonation interval or using oral iron supplements can mitigate donation-induced iron deficiency. The most effective operational methods for reducing iron deficiency in donors are unknown.},
+author = {Bialkowski, W and Bryant, B J and Schlumpf, K S and Wright, D J and Birch, R and Kiss, J E and {D 'andrea}, P and Cable, R G and Spencer, B R and Vij, V and Mast, A E},
+doi = {10.1111/vox.12210},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Bialkowski et al. - Unknown - The strategies to reduce iron deficiency in blood donors randomized trial design, enrolment and early rete.pdf:pdf},
+journal = {Vox sanguinis},
+number = {2},
+pages = {178--185},
+title = {{The strategies to reduce iron deficiency in blood donors randomized trial: design, enrolment and early retention}},
+url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4300282/pdf/nihms646989.pdf},
+volume = {108},
+year = {2015}
+}
+@misc{Kiss2018,
+abstract = {Summary: Blood donors and the RBCs and other components they willingly provide are essential in the delivery of healthcare in all parts of the world. Nearly 70{\%} of donated blood comes from repeat or committed donors. The amount of iron removed in the 10 min or so it takes to withdraw a unit of blood (500 ml, plus 25 ml for testing) requires over 24 weeks to replace on a “standard” diet, i.e., without added iron in the form of supplements The cumulative effect of repeat blood donations without adequate iron replacement or a longer wait between donations results in iron deficiency (ID) in many donors, low haemoglobin deferral ({\~{}}8{\%} of donation attempts), and frank anaemia in some. Moreover, ID can be associated with side effects that can impact a blood donor's health, such as fatigue, cognitive changes and other neuromuscular symptoms. In an effort to better identify and prevent ID, blood collection agencies are recommending various strategies, including changes in the donation interval, donation frequency, testing of iron status and iron supplementation. In this review, we present the evidence basis for these strategies and suggest our own approaches to improving iron balance in blood donors.},
+author = {Kiss, Joseph E. and Vassallo, Ralph R.},
+booktitle = {British Journal of Haematology},
+doi = {10.1111/bjh.15136},
+file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Kiss 2018 Blood donor iron what to do.pdf:pdf},
+issn = {13652141},
+keywords = {blood donor,ferritin,interdonation interval,iron deficiency,iron supplement},
+month = {jun},
+number = {5},
+pages = {590--603},
+pmid = {29767836},
+publisher = {Blackwell Publishing Ltd},
+title = {{How do we manage iron deficiency after blood donation?}},
+url = {http://doi.wiley.com/10.1111/bjh.15136},
+volume = {181},
+year = {2018}
+}
+@misc{Schotten2016,
+author = {Schotten, Nienke and Jong, Pieternel C.M.Pasker De and Moretti, Diego and Zimmermann, Michael B. and Geurts-Moespot, Anneke J. and Swinkels, Dorine W. and {Van Kraaij}, Marian G.J.},
+booktitle = {Blood},
+doi = {10.1182/blood-2016-04-709451},
+file = {::},
+issn = {15280020},
+keywords = {extension,iron},
+month = {oct},
+number = {17},
+pages = {2185--2188},
+pmid = {27587880},
+publisher = {American Society of Hematology},
+title = {{The donation interval of 56 days requires extension to 180 days for whole blood donors to recover from changes in iron metabolism}},
+url = {http://ashpublications.org/blood/article-pdf/128/17/2185/1396505/2185.pdf},
+volume = {128},
+year = {2016}
+}
+@article{Cable2011,
+abstract = {BACKGROUND Regular blood donors are at risk of iron deficiency, but characteristics that predispose to this condition are poorly defined. STUDY DESIGN AND METHODS A total of 2425 red blood cell donors, either first-time (FT) or reactivated donors (no donations for 2 years) or frequent donors, were recruited for follow-up. At enrollment, ferritin, soluble transferrin receptor (sTfR), and hemoglobin were determined. Donor variables included demographics, smoking, dietary intake, use of iron supplements, and menstrual and/or pregnancy history. Models to predict two measures of iron deficiency were developed: Absent iron stores (AIS) were indicated by a ferritin level of less than 12 ng/mL and iron-deficient erythropoiesis (IDE) by a log(sTfR/ferritin) value of 2.07 or greater. RESULTS A total of 15.0{\%} of donors had AIS and 41.7{\%} IDE. In frequent donors, 16.4 and 48.7{\%} of males had AIS and IDE, respectively, with corresponding proportions of 27.1 and 66.1{\%} for females. Donation intensity was most closely associated with AIS and/or IDE (odds ratios from 5.3 to 52.2 for different donation intensity compared to FT donors). Being female, younger, and/or menstruating also increased the likelihood of having AIS and/or IDE, as did having a lower weight. Marginally significant variables for AIS and/or IDE were being a nonsmoker, previous pregnancy, and not taking iron supplements. Dietary variables were in general unrelated to AIS and/or IDE, as was race and/or ethnicity. CONCLUSION A large proportion of both female and male frequent blood donors have iron depletion. Donation intensity, sex and/or menstrual status, weight, and age are important independent predictors of AIS and/or IDE. Reducing the frequency of blood donation is likely to reduce the prevalence of iron deficiency among blood donors, as might implementing routine iron supplementation.},
+author = {Cable, Ritchard G. and Glynn, Simone A. and Kiss, Joseph E. and Mast, Alan E. and Steele, Whitney R. and Murphy, Edward L. and Wright, David J. and Sacher, Ronald A. and Gottschall, Jerry L. and Vij, Vibha and Simon, Toby L.},
+doi = {10.1111/j.1537-2995.2010.02865.x},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Cable et al. - 2011 - Iron deficiency in blood donors Analysis of enrollment data from the REDS-II Donor Iron Status Evaluation (RISE) s.pdf:pdf},
+isbn = {1537-2995 (Electronic) 0041-1132 (Linking)},
 issn = {00411132},
 journal = {Transfusion},
-month = {dec},
-number = {12},
-pages = {2559--2569},
-pmid = {22519683},
-publisher = {John Wiley {\&} Sons, Ltd},
-title = {{Development and validation of a prediction model for low hemoglobin deferral in a large cohort of whole blood donors}},
-url = {https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.edu/doi/full/10.1111/j.1537-2995.2012.03655.x https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.edu/doi/abs/10.1111/j.1537-2995.2012.03655.x https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.ed},
-volume = {52},
-year = {2012}
+number = {3},
+pages = {511--522},
+pmid = {20804527},
+title = {{Iron deficiency in blood donors: Analysis of enrollment data from the REDS-II Donor Iron Status Evaluation (RISE) study}},
+volume = {51},
+year = {2011}
 }
-@article{Baart2011,
-abstract = {Background and Objectives Each year, a relevant proportion of whole blood donors is deferred from donation because of low haemoglobin (Hb) levels. Such temporary deferrals are demoralizing, and donors may never return for a donation. Reliable predictions of Hb levels may guide the decision whether donors can be invited for the next donation. In this study, a prediction model was developed for the risk of low Hb levels.Materials and Methods Individual data from 5191 whole blood donors were analysed; 143 donors had a low Hb level. Eleven candidate predictors were considered in logistic regression models to predict low Hb levels. The performance of the prediction model was studied with the receiver operating characteristic (ROC) curve. Internal validity was assessed with a bootstrap procedure.Results Strong predictors were sex, seasonality, Hb level measured at the previous visit, difference in Hb levels between the previous two visits, time since the previous visit, deferral at the previous visit, and the total number of whole blood donations in the past 2 years. Internal validation showed an area under the ROC curve of 0.87.Conclusion The developed prediction model provides accurate discrimination between donors with low and appropriate Hb levels. The model predictions may be valuable to determine whether donors can be invited for a next donation, or whether some interventions such as postponement of the invitation are warranted. Potentially, this could decrease the number of donor deferrals for low Hb levels. {\textcopyright} 2010 The Author(s). Vox Sanguinis {\textcopyright} 2010 International Society of Blood Transfusion.},
-author = {Baart, A. M. and {De Kort}, W. L.A.M. and Moons, K. G.M. and Vergouwe, Y.},
-doi = {10.1111/j.1423-0410.2010.01382.x},
-issn = {00429007},
-journal = {Vox Sanguinis},
-keywords = {Blood donors,Development,Donor deferral,Haemoglobin,Internal validation,Prediction model},
+@article{Rigas2014,
+abstract = {Dietary studies show a relationship between the intake of iron enhancers and inhibitors and iron stores in the general population. However, the impact of dietary factors on the iron stores of blood donors, whose iron status is affected by blood donations, is incompletely understood. In the Danish Blood Donor Study, we assessed the effect of blood donation frequency, physiologic factors, lifestyle and supplemental factors, and dietary factors on ferritin levels. We used multiple linear and logistic regression analyses stratified by sex and menopausal status. Among high-frequency donors (more than nine donations in the past 3 years), we found iron deficiency (ferritin below 15 ng/mL) in 9, 39, and 22{\%} of men, premenopausal women, and postmenopausal women, respectively. The strongest predictors of iron deficiency were sex, menopausal status, the number of blood donations in a 3-year period, and the time since last donation. Other significant factors included weight, age, intensity of menstruation, iron tablets, vitamin pills, and consumption of meat and wine. The study confirms iron deficiency as an important problem, especially among menstruating women donating frequently. The risk of iron depletion was largely explained by sex, menopausal status, and donation frequency. Other factors, including dietary and supplemental iron intake, had a much weaker effect on the risk of iron depletion. {\textcopyright} 2013 The Authors. Transfusion published by Wiley Periodicals, Inc. on behalf of AABB.},
+author = {Rigas, Andreas Stribolt and S{\o}rensen, Cecilie Juul and Pedersen, Ole Birger and Petersen, Mikkel Steen and Th{\o}rner, Lise Wegner and Kotz{\'{e}}, Sebastian and S{\o}rensen, Erik and Magnussen, Karin and Rostgaard, Klaus and Erikstrup, Christian and Ullum, Henrik},
+doi = {10.1111/trf.12518},
+file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Rigas2014 Danish Blood Donors.pdf:pdf},
+issn = {15372995},
+journal = {Transfusion},
+number = {3 Pt 2},
+pages = {789--796},
+pmid = {24372094},
+publisher = {Wiley-Blackwell},
+title = {{Predictors of iron levels in 14,737 Danish blood donors: results from the Danish Blood Donor Study.}},
+url = {/pmc/articles/PMC4209803/?report=abstract https://www-ncbi-nlm-nih-gov.stanford.idm.oclc.org/pmc/articles/PMC4209803/},
+volume = {54},
+year = {2014}
+}
+@article{Varma2006,
+abstract = {Background: Cross-validation (CV) is an effective method for estimating the prediction error of a classifier. Some recent articles have proposed methods for optimizing classifiers by choosing classifier parameter values that minimize the CV error estimate. We have evaluated the validity of using the CV error estimate of the optimized classifier as an estimate of the true error expected on independent data. Results: We used CV to optimize the classification parameters for two kinds of classifiers; Shrunken Centroids and Support Vector Machines (SVM). Random training datasets were created, with no difference in the distribution of the features between the two classes. Using these "null" datasets, we selected classifier parameter values that minimized the CV error estimate. 10-fold CV was used for Shrunken Centroids while Leave-One-Out-CV (LOOCV) was used for the SVM. Independent test data was created to estimate the true error. With "null" anb "non null" (with differential expression between the classes) data, we also tested a nested CV procedure, where an inner CV loop is used to perform the tuning of the parameters while an outer CV is used to compute an estimate of the error. The CV error estimate for the classifier with the optimal parameters was found to be a substantially biased estimate of the true error that the classifier would incur on independent data. Even though there is no real difference between the two classes for the "null" datasets, the CV error estimate for the Shrunken Centroid with the optimal parameters was less than 30{\%} on 18.5{\%} of simulated training data-sets. For SVM with optimal parameters the estimated error rate was less than 30{\%} on 38{\%} of "null" data-sets. Performance of the optimized classifiers on the independent test set was no better than chance. The nested CV procedure reduces the bias considerably and gives an estimate of the error that is very close to that obtained on the independent testing set for both Shrunken Centroids and SVM classifiers for "null" and "non-null" data distributions. Conclusion: We show that using CV to compute an error estimate for a classifier that has itself been tuned using CV gives a significantly biased estimate of the true error. Proper use of CV for estimating true error of a classifier developed using a well defined algorithm requires that all steps of the algorithm, including classifier parameter tuning, be repeated in each CV loop. A nested CV procedure provides an almost unbiased estimate of the true error. {\textcopyright} 2006 Varma and Simon; licensee BioMed Central Ltd.},
+author = {Varma, Sudhir and Simon, Richard},
+doi = {10.1186/1471-2105-7-91},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Varma, Simon - 2006 - Bias in error estimation when using cross-validation for model selection.pdf:pdf},
+issn = {14712105},
+journal = {BMC Bioinformatics},
+keywords = {Algorithms,Bioinformatics,Computational Biology/Bioinformatics,Computer Appl. in Life Sciences,Microarrays},
 month = {feb},
-number = {2},
-pages = {204--211},
-pmid = {20726956},
-publisher = {John Wiley {\&} Sons, Ltd},
-title = {{Prediction of low haemoglobin levels in whole blood donors}},
-url = {https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.edu/doi/full/10.1111/j.1423-0410.2010.01382.x https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.edu/doi/abs/10.1111/j.1423-0410.2010.01382.x https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.ed},
-volume = {100},
-year = {2011}
+number = {1},
+pages = {91},
+pmid = {16504092},
+publisher = {BioMed Central},
+title = {{Bias in error estimation when using cross-validation for model selection}},
+url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-7-91},
+volume = {7},
+year = {2006}
 }
-@article{Patel2019,
-abstract = {BACKGROUND: Blood donation results in a loss of iron stores, which is particularly concerning for young female blood donors. This study examines the association of blood donation and iron deficiency among adolescent and adult females in the United States. STUDY DESIGN AND METHODS: A cross-sectional analysis was performed using data from the 1999–2010 National Health and Nutrition Examination Survey (NHANES). Females who reported their blood donation history in the preceding year and had serum ferritin (SF) measurements were included. Analyses were weighted and stratified by adolescents (16–19 years; n = 2419) and adults (20–49 years; n = 7228). Adjusted prevalence ratios (aPRs) were estimated by multivariable Poisson regression. Standard errors were estimated by Taylor series linearization. RESULTS: Geometric mean SF levels (ng/mL) were lower in blood donors compared to nondonors among adolescents (21.2 vs. 31.4; p {\textless} 0.001) and among adults (26.2 vs. 43.7; p {\textless} 0.001). The prevalence of absent iron stores (SF {\textless} 12 ng/mL) was higher in blood donors compared to nondonors among adolescents (22.6{\%} vs. 12.2{\%}; aPR = 2.03 [95{\%} confidence interval (CI) = 1.45–2.85]) and among adults (18.3{\%} vs. 9.8{\%}; aPR = 2.06 [95{\%} CI = 1.48–2.88]). Additionally, the prevalence of iron deficiency anemia (SF {\textless} 26 ng/mL and hemoglobin {\textless} 12.0 g/dL) was also higher in blood donors compared to nondonors among adolescents (9.5{\%} vs. 6.1{\%}; aPR = 2.10 [95{\%} CI = 1.13–3.90]) and among adults (7.9{\%} vs. 6.1{\%}; aPR = 1.74 [95{\%} CI = 1.06–2.85]). Similar results were observed in a sensitivity analysis restricted to adolescents aged 16 to 18 years. CONCLUSIONS: Blood donation is associated with iron deficiency among adolescent and adult females in the United States. These national data call for further development and implementation of blood donation practices aimed toward mitigating iron deficiency.},
-author = {Patel, Eshan U. and White, Jodie L. and Bloch, Evan M. and Grabowski, Mary K. and Gehrie, Eric A. and Lokhandwala, Parvez M. and Brunker, Patricia A.R. and Goel, Ruchika and Shaz, Beth H. and Ness, Paul M. and Tobian, Aaron A.R.},
-doi = {10.1111/trf.15179},
-file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Patel 2019 Iron women NHANES.pdf:pdf},
+@article{Custer2007,
+abstract = {BACKGROUND: The consequences of temporary deferral are not well understood. This study sought to investigate blood donor return after deferral expiration. STUDY DESIGN AND METHODS: A retrospective cohort analysis of allogeneic whole-blood donation was conducted. All deferred donors and a random sample of eligible donors were identified from the year 2000, with subsequent blood center visits through December 2005 captured. Stratified results are reported as the percentage returning, rates of return, and time to return. Measures of statistical association and Cox regression modeling are reported. RESULTS: For first-time (FT) donors, 25 percent of temporarily deferred donors returned during the 5-year follow-up period compared to 47 percent of eligible donors (p {\textless} 0.0001); for repeat donors, 81 and 86 percent of deferred and eligible donors returned, respectively (p {\textless} 0.0001). Depending on the deferral category, 14 to 31 percent of FT and 58 to 90 percent of repeat donors returned. Rates (per year) of successful donation during the follow-up period were 0.09 for index-deferred FT donors, 0.28 for eligible FT donors, 1.0 for deferred repeat donors, and 1.45 for eligible repeat donors. Multivariate modeling indicated that in addition to deferral, age, sex, race, and education were associated with return in both FT and repeat donors. CONCLUSION: The effects of deferral were more pronounced than expected, affecting both FT and repeat donors. For FT donors, the type and duration of deferral, while important, were not as relevant as hypothesized because so few returned, suggesting the need to develop appropriate interventions to recapture those donors likely to be eligible. {\textcopyright} 2007 American Association of Blood Banks.},
+author = {Custer, Brian and Chinn, Artina and Hirschler, Nora V. and Busch, Michael P. and Murphy, Edward L.},
+doi = {10.1111/j.1537-2995.2007.01292.x},
+file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Custer 2007 Deferral return behaviour.pdf:pdf},
+issn = {00411132},
+journal = {Transfusion},
+keywords = {Adult,Aged,Artina Chinn,Blood Donors*,Brian Custer,Cohort Studies,Edward L Murphy,Female,Humans,MEDLINE,Male,Middle Aged,Multivariate Analysis,NCBI,NIH,NLM,National Center for Biotechnology Information,National Institutes of Health,National Library of Medicine,Non-U.S. Gov't,PubMed Abstract,Research Support,Retrospective Studies,Time Factors,doi:10.1111/j.1537-2995.2007.01292.x,pmid:17655597},
+month = {aug},
+number = {8},
+pages = {1514--1523},
+pmid = {17655597},
+publisher = {Transfusion},
+title = {{The consequences of temporary deferral on future whole blood donation}},
+url = {https://pubmed.ncbi.nlm.nih.gov/17655597/},
+volume = {47},
+year = {2007}
+}
+@article{Vassallo2018,
+abstract = {BACKGROUND: Iron deficiency is observed in blood donors who meet hemoglobin requirements for donation. Frequent donation results in negative iron balance, and teenage donors may thus be at risk for adverse health consequences. STUDY DESIGN AND METHODS: Blood Systems implemented ferritin testing on all successful 16- to 18-year-old (teen) donations. Low ferritin (LF) was defined as less than 20 ng/mL in females and less than 30 ng/mL in males. Donors with LF were deferred from red blood cell (RBC) donations (12 months for females, and 6 for males) and counseled to take low-dose iron for 60 days. A ferritin value less than 26 ng/mL indicated iron-deficient erythropoiesis and less than 12 ng/mL absent iron stores. RESULTS: Over 16 months, 110,417 teen donations were tested and represented 10.5{\%} of all successful donations. The rate of absent iron stores was 9.0{\%} (1.9{\%} male; 15.9{\%} female) and of iron-deficient erythropoiesis, 31.9{\%} (12.4{\%} male; 50.6{\%} female). The rate of LF deferrals was 26.9{\%} (16.7{\%} male; 36.6{\%} female). The proportion of LF donors decreased with increasing predonation hemoglobin and rose with increasing RBC donations in the prior 24 months. Seasonality in LF deferrals and the RBC contribution from teen donors was observed. CONCLUSIONS: Ferritin testing of teen donors identified individuals with LF who might benefit from risk mitigation. LF is more common in teenage female than male donors and those with RBC donations in the prior 24 months. An appreciable number of new/lapsed donors presented with LF, however. These data may be useful in guiding future risk mitigation efforts.},
+author = {Vassallo, Ralph R. and Bravo, Marjorie D. and Kamel, Hany},
+doi = {10.1111/trf.14921},
+file = {:G$\backslash$:/My Drive/Blood Transfusion/Iron model/Vassallo{\_}et{\_}al-2018-Transfusion.pdf:pdf},
 issn = {15372995},
 journal = {Transfusion},
-month = {may},
-number = {5},
-pages = {1723--1733},
-pmid = {30779173},
-publisher = {Blackwell Publishing Inc.},
-title = {{Association of blood donation with iron deficiency among adolescent and adult females in the United States: a nationally representative study}},
-url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/trf.15179},
-volume = {59},
-year = {2019}
+number = {12},
+pages = {2861--2867},
+title = {{Ferritin testing to characterize and address iron deficiency in young donors}},
+volume = {58},
+year = {2018}
 }
-@misc{Rahmati2020,
-abstract = {Objective: Iron deficiency anemia is the most common cause of anemia during pregnancy. Other causes of anemia include parasitic diseases, micronutrient deficiencies, and genetic hemoglobin apathies. Maternal anemia during pregnancy is the most important public health problem. Since the relationship between maternal anemia by the months of pregnancy and premature birth has been reported differently in various studies; thus, this study aims to determine the relationship between maternal anemia during pregnancy and premature birth. Methods: This systematic review and meta-analysis article was designed based on the recommendations of PRISMA. This study was performed from 1990 to 2018. Articles extracted using related keywords such as maternal, anemia, premature birth, and pregnancy in databases, including Cochrane, Medline, Medlib, Web of Science, PubMed, Scopus, Springer, Science Direct, Embase, Google Scholar, Sid, Irandoc, Iranmedex, and Magiran. Relative risk and its confidence interval were extracted from each of the studies. The random effects model was used to combine study results and heterogeneity among the studies measured using I2 index and the data were analyzed based by using STATA software version 3.2. Results: Overall 18 studies with sample sizes of 932 090 were entered into the meta-analysis. The overall relationship between maternal anemia during pregnancy and premature birth was significant (1.56 [95{\%} CI: 1.25–1.95]). Maternal anemia in the first trimester increases the risk of premature birth (relative risk, 1.65 [95{\%} CI: 1.31–2.08]). But, this relationship was not significant in the second (relative risk, 1.45 [95{\%} CI: 0.79–2.65]) and third trimester (relative risk, 1.43 [95{\%} CI: 0.82–2.51]). Conclusion: Maternal anemia during pregnancy can be considered as a risk factor for premature birth.},
-author = {Rahmati, Shoboo and Azami, Milad and Badfar, Gholamreza and Parizad, Naser and Sayehmiri, Kourosh},
-booktitle = {Journal of Maternal-Fetal and Neonatal Medicine},
-doi = {10.1080/14767058.2018.1555811},
-issn = {14764954},
-keywords = {Anemia,maternal,meta-analysis,pregnancy,premature birth},
-month = {apr},
-number = {15},
-pages = {2679--2689},
-pmid = {30522368},
-title = {{The relationship between maternal anemia during pregnancy with preterm birth: a systematic review and meta-analysis}},
-url = {http://www.ncbi.nlm.nih.gov/pubmed/30522368 https://www.tandfonline.com/doi/full/10.1080/14767058.2018.1555811},
-volume = {33},
-year = {2020}
+@article{Kiss2015,
+abstract = {IMPORTANCE Although blood donation is allowed every 8 weeks in the United States, recovery of hemoglobin to the currently accepted standard (12.5 g/dL) is frequently delayed, and some donors become anemic. OBJECTIVE To determine the effect of oral iron supplementation on hemoglobin recovery time (days to recovery of 80{\%} of hemoglobin removed) and recovery of iron stores in iron-depleted (" low ferritin, " Յ26 ng/mL) and iron-replete (" higher ferritin, " {\textgreater}26 ng/mL) blood donors. DESIGN, SETTING, AND PARTICIPANTS Randomized, nonblinded clinical trial of blood donors stratified by ferritin level, sex, and age conducted in 4 regional blood centers in the United States in 2012. Included were 215 eligible participants aged 18 to 79 years who had not donated whole blood or red blood cells within 4 months.},
+author = {Kiss, Joseph E. and Brambilla, Donald and Glynn, Simone A. and Mast, Alan E. and Spencer, Bryan R. and Stone, Mars and Kleinman, Steven H. and Cable, Ritchard G.},
+doi = {10.1001/jama.2015.119},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Kiss et al. - 2015 - Oral Iron Supplementation After Blood Donation.pdf:pdf},
+issn = {0098-7484},
+journal = {Jama},
+number = {6},
+pages = {575},
+pmid = {25668261},
+title = {{Oral iron supplementation after blood donation}},
+url = {http://jama.jamanetwork.com/article.aspx?doi=10.1001/jama.2015.119},
+volume = {313},
+year = {2015}
 }
-@inproceedings{Zadrozny2002,
-abstract = {Class membership probability estimates are important for many applications of data mining in which classification outputs are combined with other sources of information for decision-making, such as example-dependent misclassification costs, the outputs of other classifiers, or domain knowledge. Previous calibration methods apply only to two-class problems. Here, we show how to obtain accurate probability estimates for multiclass problems by combining calibrated binary probability estimates. We also propose a new method for obtaining calibrated two-class probability estimates that can be applied to any classifier that produces a ranking of examples. Using naive Bayes and support vector machine classifiers, we give experimental results from a variety of two-class and multiclass domains, including direct marketing, text categorization and digit recognition.},
-address = {New York, New York, USA},
-author = {Zadrozny, Bianca and Elkan, Charles},
-booktitle = {Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
-doi = {10.1145/775047.775151},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Zadrozny, Elkan - 2002 - Transforming classifier scores into accurate multiclass probability estimates.pdf:pdf},
-pages = {694--699},
-publisher = {Association for Computing Machinery (ACM)},
-title = {{Transforming classifier scores into accurate multiclass probability estimates}},
-url = {http://portal.acm.org/citation.cfm?doid=775047.775151},
-year = {2002}
+@article{Cable2017,
+abstract = {BACKGROUND: Donor behaviors in STRIDE (Strategies to Reduce Iron Deficiency), a trial to reduce iron deficiency, were examined. STUDY DESIGN AND METHODS: Six hundred ninety-two frequent donors were randomized to receive either 19 or 38 mg iron for 60 days or an educational letter based on their predonation ferritin. Compliance with assigned pills, response to written recommendations, change in donation frequency, and future willingness to take iron supplements were examined. RESULTS: Donors who were randomized to receive iron pills had increased red blood cell donations and decreased hemoglobin deferrals compared with controls or with pre-STRIDE donations. Donors who were randomized to receive educational letters had fewer hemoglobin deferrals compared with controls. Of those who received a letter advising of low ferritin levels with recommendations to take iron supplements or delay future donations, 57{\%} reported that they initiated iron supplementation, which was five times as many as those who received letters lacking a specific recommendation. The proportion reporting delayed donation was not statistically different (32{\%} vs. 20{\%}). Of donors who were assigned pills, 58{\%} reported taking them “frequently,” and forgetting was the primary reason for non-compliance. Approximately 80{\%} of participants indicated that they would take iron supplements if provided by the center. CONCLUSIONS: Donors who were assigned iron pills had acceptable compliance, producing increased red blood cell donations and decreased low hemoglobin deferrals compared with controls or with pre-STRIDE rates. The majority of donors assigned to an educational letter took action after receiving a low ferritin result, with more donors choosing to take iron than delay donation. Providing donors with information on iron status with personalized recommendations was an effective alternative to directly providing iron supplements.},
+author = {Cable, Ritchard G. and Birch, Rebecca J. and Spencer, Bryan R. and Wright, David J. and Bialkowski, Walter and Kiss, Joseph E. and Rios, Jorge and Bryant, Barbara J. and Mast, Alan E.},
+doi = {10.1111/trf.14226},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Cable et al. - 2017 - The operational implications of donor behaviors following enrollment in STRIDE (Strategies to Reduce Iron Deficien.pdf:pdf},
+issn = {15372995},
+journal = {Transfusion},
+month = {oct},
+number = {10},
+pages = {2440--2448},
+pmid = {28703859},
+publisher = {Blackwell Publishing Inc.},
+title = {{The operational implications of donor behaviors following enrollment in STRIDE (Strategies to Reduce Iron Deficiency in blood donors)}},
+url = {/pmc/articles/PMC5612857/?report=abstract https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5612857/},
+volume = {57},
+year = {2017}
+}
+@article{Breiman2001,
+abstract = {Random forests are a combination of tree predictors such that each tree depends on the values of a random vector sampled independently and with the same distribution for all trees in the forest. The generalization error for forests converges a.s. to a limit as the number of trees in the forest becomes large. The generalization error of a forest of tree classifiers depends on the strength of the individual trees in the forest and the correlation between them. Using a random selection of features to split each node yields error rates that compare favorably to Adaboost (Y. Freund {\&} R. Schapire, Machine Learning: Proceedings of the Thirteenth International conference, ***, 148–156), but are more robust with respect to noise. Internal estimates monitor error, strength, and correlation and these are used to show the response to increasing the number of features used in the splitting. Internal estimates are also used to measure variable importance. These ideas are also applicable to regression.},
+author = {Breiman, Leo},
+doi = {10.1023/A:1010933404324},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Breiman - 2001 - Random Forests.pdf:pdf},
+issn = {1573-0565},
+journal = {Machine Learning 2001 45:1},
+keywords = {Artificial Intelligence,Control,Mechatronics,Natural Language Processing (NLP),Robotics,Simulation and Modeling},
+month = {oct},
+number = {1},
+pages = {5--32},
+publisher = {Springer},
+title = {{Random forests}},
+url = {https://link-springer-com.ezp-prod1.hul.harvard.edu/article/10.1023/A:1010933404324},
+volume = {45},
+year = {2001}
 }
 @article{Ustun2016,
 abstract = {Scoring systems are linear classification models that only require users to add, subtract and multiply a few small numbers in order to make a prediction. These models are in widespread use by the medical community, but are difficult to learn from data because they need to be accurate and sparse, have coprime integer coefficients, and satisfy multiple operational constraints. We present a new method for creating data-driven scoring systems called a Supersparse Linear Integer Model (SLIM). SLIM scoring systems are built by using an integer programming problem that directly encodes measures of accuracy (the 0–1 loss) and sparsity (the (Formula presented.) -seminorm) while restricting coefficients to coprime integers. SLIM can seamlessly incorporate a wide range of operational constraints related to accuracy and sparsity, and can produce acceptable models without parameter tuning because of the direct control provided over these quantities. We provide bounds on the testing and training accuracy of SLIM scoring systems, and present a new data reduction technique that can improve scalability by eliminating a portion of the training data beforehand. Our paper includes results from a collaboration with the Massachusetts General Hospital Sleep Laboratory, where SLIM is being used to create a highly tailored scoring system for sleep apnea screening.},
@@ -100,70 +215,159 @@ @article{Ustun2016
 volume = {102},
 year = {2016}
 }
-@article{Cable2017,
-abstract = {BACKGROUND: Donor behaviors in STRIDE (Strategies to Reduce Iron Deficiency), a trial to reduce iron deficiency, were examined. STUDY DESIGN AND METHODS: Six hundred ninety-two frequent donors were randomized to receive either 19 or 38 mg iron for 60 days or an educational letter based on their predonation ferritin. Compliance with assigned pills, response to written recommendations, change in donation frequency, and future willingness to take iron supplements were examined. RESULTS: Donors who were randomized to receive iron pills had increased red blood cell donations and decreased hemoglobin deferrals compared with controls or with pre-STRIDE donations. Donors who were randomized to receive educational letters had fewer hemoglobin deferrals compared with controls. Of those who received a letter advising of low ferritin levels with recommendations to take iron supplements or delay future donations, 57{\%} reported that they initiated iron supplementation, which was five times as many as those who received letters lacking a specific recommendation. The proportion reporting delayed donation was not statistically different (32{\%} vs. 20{\%}). Of donors who were assigned pills, 58{\%} reported taking them “frequently,” and forgetting was the primary reason for non-compliance. Approximately 80{\%} of participants indicated that they would take iron supplements if provided by the center. CONCLUSIONS: Donors who were assigned iron pills had acceptable compliance, producing increased red blood cell donations and decreased low hemoglobin deferrals compared with controls or with pre-STRIDE rates. The majority of donors assigned to an educational letter took action after receiving a low ferritin result, with more donors choosing to take iron than delay donation. Providing donors with information on iron status with personalized recommendations was an effective alternative to directly providing iron supplements.},
-author = {Cable, Ritchard G. and Birch, Rebecca J. and Spencer, Bryan R. and Wright, David J. and Bialkowski, Walter and Kiss, Joseph E. and Rios, Jorge and Bryant, Barbara J. and Mast, Alan E.},
-doi = {10.1111/trf.14226},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Cable et al. - 2017 - The operational implications of donor behaviors following enrollment in STRIDE (Strategies to Reduce Iron Deficien.pdf:pdf},
+@article{Cable2012,
+abstract = {Background-Blood donors are at risk of iron deficiency. We evaluated the effects of blood donation intensity on iron and hemoglobin in a prospective study.},
+author = {Cable, Ritchard G and Glynn, Simone A and Kiss, Joseph E and Mast, Alan E and Steele, Whitney R and Murphy, Edward L and Wright, David J and Sacher, Ronald A and Gottschall, Jerry L and Tobler, Leslie H},
+doi = {10.1111/j.1537-2995.2011.03401.x},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Cable et al. - 2012 - Iron Deficiency in Blood Donors The REDS-II Donor Iron Status Evaluation (RISE) Study.pdf:pdf},
+journal = {Transfusion},
+number = {4},
+pages = {702--711},
+title = {{Iron deficiency in blood donors: the REDS-II donor iron status evaluation (RISE) study}},
+url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3618489/pdf/nihms330328.pdf},
+volume = {52},
+year = {2012}
+}
+@misc{Rahmati2020,
+abstract = {Objective: Iron deficiency anemia is the most common cause of anemia during pregnancy. Other causes of anemia include parasitic diseases, micronutrient deficiencies, and genetic hemoglobin apathies. Maternal anemia during pregnancy is the most important public health problem. Since the relationship between maternal anemia by the months of pregnancy and premature birth has been reported differently in various studies; thus, this study aims to determine the relationship between maternal anemia during pregnancy and premature birth. Methods: This systematic review and meta-analysis article was designed based on the recommendations of PRISMA. This study was performed from 1990 to 2018. Articles extracted using related keywords such as maternal, anemia, premature birth, and pregnancy in databases, including Cochrane, Medline, Medlib, Web of Science, PubMed, Scopus, Springer, Science Direct, Embase, Google Scholar, Sid, Irandoc, Iranmedex, and Magiran. Relative risk and its confidence interval were extracted from each of the studies. The random effects model was used to combine study results and heterogeneity among the studies measured using I2 index and the data were analyzed based by using STATA software version 3.2. Results: Overall 18 studies with sample sizes of 932 090 were entered into the meta-analysis. The overall relationship between maternal anemia during pregnancy and premature birth was significant (1.56 [95{\%} CI: 1.25–1.95]). Maternal anemia in the first trimester increases the risk of premature birth (relative risk, 1.65 [95{\%} CI: 1.31–2.08]). But, this relationship was not significant in the second (relative risk, 1.45 [95{\%} CI: 0.79–2.65]) and third trimester (relative risk, 1.43 [95{\%} CI: 0.82–2.51]). Conclusion: Maternal anemia during pregnancy can be considered as a risk factor for premature birth.},
+author = {Rahmati, Shoboo and Azami, Milad and Badfar, Gholamreza and Parizad, Naser and Sayehmiri, Kourosh},
+booktitle = {Journal of Maternal-Fetal and Neonatal Medicine},
+doi = {10.1080/14767058.2018.1555811},
+issn = {14764954},
+keywords = {Anemia,maternal,meta-analysis,pregnancy,premature birth},
+month = {apr},
+number = {15},
+pages = {2679--2689},
+pmid = {30522368},
+title = {{The relationship between maternal anemia during pregnancy with preterm birth: a systematic review and meta-analysis}},
+url = {http://www.ncbi.nlm.nih.gov/pubmed/30522368 https://www.tandfonline.com/doi/full/10.1080/14767058.2018.1555811},
+volume = {33},
+year = {2020}
+}
+@article{Baart2012,
+abstract = {BACKGROUND: Each year, approximately 5{\%} of the invited blood donors is eventually deferred from donation because of low hemoglobin (Hb) levels. Estimating the risk of Hb deferral in blood donors can be helpful in the management of the donation program. We developed and validated a prediction model for Hb deferral in whole blood donors, separately for men and women. STUDY DESIGN AND METHODS: Data from a Dutch prospective cohort of 220,946 whole blood donors were used to identify predictors for Hb deferral using multivariable logistic regression analyses. Validity of the prediction models was assessed with a cross-validation. RESULTS: A total of 12,865 donors (5.8{\%}) were deferred because of a low Hb level. The strongest predictors of Hb deferral were Hb level measured at the previous visit, age, seasonality, difference in Hb levels between the previous two visits, time since the previous visit, deferral at the previous visit, and the total number of whole blood donations in the past 2 years for both men and women. The prediction models had an area under the receiver operating characteristic curve of 0.89 for men and 0.84 for women. Cross-validation showed similar results and good calibration. CONCLUSION: Using a limited number of easy-to-measure characteristics enables a good prediction of Hb deferral risk in whole blood donors. The prediction models may guide the decision which donors to invite for a next donation and for which donors the invitation should be postponed. Potentially, this could decrease the number of Hb deferrals in blood donors. {\textcopyright} 2012 American Association of Blood Banks.},
+author = {Baart, A. Mireille and {De Kort}, Wim L.A.M. and Atsma, Femke and Moons, Karel G.M. and Vergouwe, Yvonne},
+doi = {10.1111/j.1537-2995.2012.03655.x},
+issn = {00411132},
+journal = {Transfusion},
+month = {dec},
+number = {12},
+pages = {2559--2569},
+pmid = {22519683},
+publisher = {John Wiley {\&} Sons, Ltd},
+title = {{Development and validation of a prediction model for low hemoglobin deferral in a large cohort of whole blood donors}},
+url = {https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.edu/doi/full/10.1111/j.1537-2995.2012.03655.x https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.edu/doi/abs/10.1111/j.1537-2995.2012.03655.x https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.ed},
+volume = {52},
+year = {2012}
+}
+@article{Simon1981,
+author = {Simon, Toby L and Garry, Philip J and Hooper, Elizabeth M},
+file = {:C$\backslash$:/Users/alton/Downloads/jama{\_}245{\_}20{\_}018.pdf:pdf},
+journal = {JAMA},
+number = {20},
+pages = {2038--2043},
+title = {{Iron stores in blood donors}},
+volume = {245},
+year = {1981}
+}
+@article{Salvin2014,
+abstract = {BACKGROUND: Iron deficiency (ID) is an important consequence of blood donation. The epidemiology of this problem in the blood donor population was therefore studied to enable appropriate targeting of potential solutions to donor ID.$\backslash$n$\backslash$nSTUDY DESIGN AND METHODS: A nationally representative, cluster-based cross-sectional study of Australian blood donors was performed. Donors were eligible for inclusion if they fulfilled criteria for blood donation or were deferred due to low or falling hemoglobin. Ferritin was measured and demographic and donation data were collected.$\backslash$n$\backslash$nRESULTS: A total of 3094 blood donors were recruited, of which samples were collected from 3049 donors; 1873 had exclusively donated whole blood (WB only), 242 had exclusively made apheresis donations, and 530 had not donated ("new" donors) in the previous 24 months. The prevalence of ID in new female donors was 12.0{\%} compared with 1.3{\%} in males. The prevalence of ID in female WB-only donors was 26.4{\%}; it increased with donation frequency and decreased with age. The prevalence in male WB-only donors was 6.3{\%} with no evident change with age or donation frequency. The prevalence of ID in apheresis-only donors (females 6.3{\%}; males 2.2{\%}) did not significantly exceed that of new donors nor did it change with donation frequency. Importantly, the risk of ID could not be satisfactorily predicted in an individual donor by his or her anemia status or with predictive modeling incorporating demographic and donation data.$\backslash$n$\backslash$nCONCLUSION: ID is especially prevalent in female, premenopausal, frequent WB donors. Strategies to combat ID should be implemented, specifically targeting this group.},
+author = {Salvin, Hannah E. and Pasricha, Sant Rayn and Marks, Denese C. and Speedy, Joanna},
+doi = {10.1111/trf.12647},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Salvin et al. - 2014 - Iron deficiency in blood donors A national cross-sectional study.pdf:pdf},
+isbn = {1537-2995 (Electronic)0041-1132 (Linking)},
+issn = {15372995},
+journal = {Transfusion},
+number = {10},
+pages = {2434--2444},
+pmid = {24738792},
+title = {{Iron deficiency in blood donors: A national cross-sectional study}},
+volume = {54},
+year = {2014}
+}
+@article{Spencer2016,
+author = {Spencer, Bryan R. and Johnson, Bryce and Wright, David J. and Kleinman, Steven and Glynn, Simone A. and Cable, Ritchard G.},
+doi = {10.1111/trf.13663},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Spencer et al. - 2016 - Potential impact on blood availability and donor iron status of changes to donor hemoglobin cutoff and interdona.pdf:pdf;:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Unknown - Unknown - trf13663-sup-0001-suppappendix.docx:docx},
+issn = {15372995},
+journal = {Transfusion},
+number = {8},
+pages = {1994--2004},
+title = {{Potential impact on blood availability and donor iron status of changes to donor hemoglobin cutoff and interdonation intervals}},
+volume = {56},
+year = {2016}
+}
+@inproceedings{Spencer2019a,
+address = {Basel},
+author = {Spencer, Bryan R and Fox, M and Wise, L and Cable, Richard},
+booktitle = {Abstract of 29th Regional Congress of the ISBT},
+doi = {10.1111/vox.12792},
+issn = {0042-9007},
+month = {jun},
+number = {S1},
+pages = {5--240},
+publisher = {Vox Sanguinis},
+title = {{A composite measure of heme iron consumption predicts incident iron depletion in repeat blood donors}},
+url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/vox.12792},
+volume = {114},
+year = {2019}
+}
+@article{Goldman2017,
+abstract = {BACKGROUND: We assessed risk groups for iron deficiency and the feasibility and efficacy of ferritin testing in a large blood center. STUDY DESIGN AND METHODS: Donors were informed of possible testing in the predonation pamphlet. Plasma ferritin was measured on retention samples (n = 12,595) from representative clinics. Low-ferritin donors ({\textless}25 µg/L) were sent a letter and information sheet and not called for 6 months. Ferritin testing was repeated on 25{\%} of donors; donor return rate and frequency were monitored. RESULTS: Low-ferritin donors represented 2.9{\%} of first-time and reactivated (no donation in past 12 months) male donors, 32.2{\%} of first-time and reactivated female donors, 41.6{\%} of repeat male donors, and 65.1{\%} of repeat female donors. A mean of 11.7 months after index donation, the return rate was 76{\%} for normal and 58{\%} for low-ferritin donors; returning low-ferritin donors had made approximately one fewer donation. Ferritin increased by 16.3 and 12.1 µg/L in male and female low-ferritin donors and decreased by 17 µg/L in male and female normal-ferritin donors. CONCLUSION: The minimum hemoglobin level will be increased to 130 g/L for male donors and the minimum interdonation interval changed to 84 days (four donations yearly) for female donors based on iron deficiency risk groups. Large-scale ferritin testing was feasible. Donors informed of low-ferritin results had a lower return rate, donated less often, and had an increase in ferritin on return, approximately 1 year after initial testing, compared to donors with normal ferritin results.},
+author = {Goldman, Mindy and Uzicanin, Samra and Osmond, Lori and Scalia, Vito and O'Brien, Sheila F.},
+doi = {10.1111/trf.13956},
+file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Goldman 2017 Canada ferritin testing study.pdf:pdf},
+issn = {15372995},
+journal = {Transfusion},
+month = {mar},
+number = {3},
+pages = {564--570},
+pmid = {27943371},
+title = {{A large national study of ferritin testing in Canadian blood donors}},
+url = {http://doi.wiley.com/10.1111/trf.13956},
+volume = {57},
+year = {2017}
+}
+@article{Baart2013,
+abstract = {Background Blood donors that meet the hemoglobin (Hb) criteria for donation may have undetected subclinical iron deficiency. The aim of this study was to assess the prevalence of subclinical iron deficiency in whole blood donors with Hb levels above cutoff levels for donation by measuring zinc protoporphyrin (ZPP) levels. In addition, prevalence rates based on other iron variables were assessed for comparison. Study Design and Methods The study population comprised 5280 Dutch whole blood donors, who passed the Hb criteria for donation. During donor screening, Hb levels were measured in capillary samples (finger prick), and venous blood samples were taken for measurements of ZPP and other iron variables. These variables included ferritin, transferrin saturation, soluble transferrin receptor (sTfR), hepcidin, red blood cell mean corpuscular volume (MCV), and mean cell Hb (MCH). Results With a ZPP cutoff level of at least 100 $\mu$mol/mol heme, subclinical iron deficiency was present in 6.9{\%} of male donors and in 9.8{\%} of female donors. Based on other iron variables, iron deficiency was also observed. Prevalence rates ranged from 4.8{\%} (based on transferrin saturation) to 27.4{\%} (based on hepcidin concentration) in men and from 5.6{\%} (based on sTfR concentration) to 24.7{\%} (based on hepcidin concentration) in women. Conclusion Results from this study showed that subclinical iron deficiency is prevalent among blood donors that meet the Hb criteria for blood donation, based on ZPP levels and on other iron variables. This finding needs attention because these donors are at increased risk of developing iron deficiency affecting Hb formation and other cellular processes. {\textcopyright} 2012 American Association of Blood Banks.},
+author = {Baart, A. Mireille and {Van Noord}, Paulus A.H. and Vergouwe, Yvonne and Moons, Karel G.M. and Swinkels, Dorine W. and Wiegerinck, Erwin T. and {De Kort}, Wim L.A.M. and Atsma, Femke},
+doi = {10.1111/j.1537-2995.2012.03956.x},
+file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Baart 2013 Dutch iron blood donors.pdf:pdf},
+issn = {00411132},
+journal = {Transfusion},
+month = {aug},
+number = {8},
+pages = {1670--1677},
+pmid = {23176175},
+publisher = {John Wiley {\&} Sons, Ltd},
+title = {{High prevalence of subclinical iron deficiency in whole blood donors not deferred for low hemoglobin}},
+url = {http://doi.wiley.com/10.1111/j.1537-2995.2012.03956.x},
+volume = {53},
+year = {2013}
+}
+@article{Magnussen2015,
+abstract = {Background: Blood donors with low haemoglobin-concentration (Hb) and iron deficiency are well known challenges in any blood bank setting. The handling is complex, and even though iron deficiency is a frequent cause of anaemia, there are differential diagnoses. In healthy blood donors ferritin is helpful in discerning between anaemia caused by iron-deficiency and other causes, and while low Hb and low levels of ferritin, are the primary concern, some donors have too high levels, which must also be dealt with. Aims: The primary aims were to standardize and optimize the handling of blood donors with Hb below the limit for donation (7.8 and 8.4 mmol/l for female and male donors respectively). Secondary aims were to deal with all issues related to high or low either Hb or ferritin in blood donors. Methods: The problem was approached, by centralizing measurement of Hb, initiating ferritin measurements and establishing Centre for Donor Haemoglobin and Iron. An algorithm was created, taking mainly Hb and ferritin into account. The possible outcomes were: to send iron-tablets and iron-folder by mail, to give iron tablets with future donations, to refer the donor to general practitioner (GP) or, in most cases to do nothing. Pre-existing staff was trained in donor-communication and to handle the Hb and ferritin results. The donors where mainly contacted by mail, but when Hb was low or when there was insufficient effect of previously sent iron-supplementation the donor was contacted by phone to improve compliance. Hb was measured on Sysmex-XE2100D as part of a Complete Blood Count at every donation. Ferritin was measured one time in all donors, at every 10th donation and repeated when outside 60-300 mug/l at the previous donation. Ferritin measurement was also repeated if the previously measured Hb was low. The reason for not measuring ferritin at every donation was economy. Results: From February 1st 2012 to February 1st 2015, 71,450 donors (53.5{\%} women/46.5{\%} men) donated 281,814 units of whole blood (48{\%} women/52{\%} men). The mean Hb increased from 8.59 to 8.64 mmol/l in the female donors (P {\textgreater} 0.001) while the increase in the male donors was smaller 9.55-9.57 mmol/l (P = 0.017). The Red-Blood-Cell Count increased from 4.57 to 4.66 and 5.02-5.12 x 1012/l in female and male donors respectively (P {\textgreater} 0.001). The decrease in {\%} of female donors with low Hb from 4.0 to 3.5 was not significant. The {\%} of male donors with low Hb decreased from 0.92 to 0.55 (P = 0.03). Of the donors that were referred to GP on suspicion or to rule out disease, not all called back to inform about the result. Of those that did inform 12 were diagnosed with leukaemia or cancer and 2 with Vitamin B12-deficiency. Other finds were heterozygous thalassemia, hemochromatosis and polycythaemia vera. Summary/Conclusions: While the aim was to keep the donors within our frame for Hb and ferritin, the main benefit of the program was to have a well-functioning program for when the donors did fall outside anyway. The program with goal directed iron supplementation only to those that would benefit, has led to an increase in Hb and a reduction the number of donors with low Hb.},
+author = {Magnussen, Karin and Ladelund, Steen},
+doi = {10.1111/trf.13152},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Magnussen, Ladelund - 2015 - Handling low hemoglobin and iron deficiency in a blood donor population 2 years' experience.pdf:pdf},
+isbn = {0042-9007},
 issn = {15372995},
 journal = {Transfusion},
-month = {oct},
 number = {10},
-pages = {2440--2448},
-pmid = {28703859},
-publisher = {Blackwell Publishing Inc.},
-title = {{The operational implications of donor behaviors following enrollment in STRIDE (Strategies to Reduce Iron Deficiency in blood donors)}},
-url = {/pmc/articles/PMC5612857/?report=abstract https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5612857/},
-volume = {57},
-year = {2017}
-}
-@article{Jung2017,
-abstract = {From doctors diagnosing patients to judges seeing bail, experts often base their decisions on experience and intuition rather than on statistical models. While understandable, relying on intuition over models has ooen been found to result in inferior outcomes. Here we present a new method-select-regress-and-round-for constructing simple rules that perform well for complex decisions. .ese rules take the form of a weighted checklist, can be applied mentally , and nonetheless rival the performance of modern machine learning algorithms. Our method for creating these rules is itself simple, and can be carried out by practitioners with basic statistics knowledge. We demonstrate this technique with a detailed case study of judicial decisions to release or detain defendants while they await trial. In this application, as in many policy seeings, the eeects of proposed decision rules cannot be directly observed from historical data: if a rule recommends releasing a defendant that the judge in reality detained, we do not observe what would have happened under the proposed action. We address this key counterfactual estimation problem by drawing on tools from causal inference. We end that simple rules signiicantly outperform judges and are on par with decisions derived from random forests trained on all available features. Generalizing to 22 varied decision-making domains, we end this basic result replicates. We conclude with an analytical framework that helps explain why these simple decision rules perform as well as they do.},
-archivePrefix = {arXiv},
-arxivId = {1702.04690v3},
-author = {Jung, Jongbin and Concannon, Connor and Shroff, Ravi and Goel, Sharad and Goldstein, Daniel G},
-doi = {10.1145/nnnnnnn.nnnnnnn},
-eprint = {1702.04690v3},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Jung et al. - 2017 - Simple rules for complex decisions.pdf:pdf},
-journal = {arXiv},
-title = {{Simple rules for complex decisions}},
-year = {2017}
+pages = {2473--2478},
+title = {{Handling low hemoglobin and iron deficiency in a blood donor population: 2 years' experience}},
+volume = {55},
+year = {2015}
 }
-@article{Ustun2019,
-abstract = {Risk scores are simple classification models that let users make quick risk predictions by adding and subtracting a few small numbers. These models are widely used in medicine and criminal justice, but are difficult to learn from data because they need to be calibrated, sparse, use small integer coefficients, and obey application-specific operational constraints. In this paper, we present a new machine learning approach to learn risk scores. We formulate the risk score problem as a mixed integer nonlinear program, and present a cutting plane algorithm for non-convex settings to efficiently recover its optimal solution. We improve our algorithm with specialized techniques to generate feasible solutions, narrow the optimality gap, and reduce data-related computation. Our approach can fit risk scores in a way that scales linearly in the number of samples, provides a certificate of optimality, and obeys real-world constraints without parameter tuning or post-processing. We benchmark the performance benefits of this approach through an extensive set of numerical experiments, comparing to risk scores built using heuristic approaches. We also discuss its practical benefits through a real-world application where we build a customized risk score for ICU seizure prediction in collaboration with the Massachusetts General Hospital.},
-archivePrefix = {arXiv},
-arxivId = {1610.00168},
-author = {Ustun, Berk and Rudin, Cynthia},
-eprint = {1610.00168},
+@techreport{Rajbhandary2018,
+address = {Bethesday},
+author = {Rajbhandary, Srijana and Whitaker, Barbee I and Perez, Gabriela E},
 file = {::},
-journal = {Journal of Machine Learning Research},
-keywords = {calibration,classification,constraints,cut-ting plane methods,discrete optimization,interpretability,mixed integer nonlinear programming,scoring systems},
-month = {sep},
-number = {150},
-pages = {75},
-publisher = {Microtome Publishing},
-title = {{Learning optimized risk scores}},
-url = {http://arxiv.org/abs/1610.00168},
-volume = {20},
-year = {2019}
-}
-@article{Scheinker2019,
-abstract = {Importance: Obesity is a leading cause of high health care expenditures, disability, and premature mortality. Previous studies have documented geographic disparities in obesity prevalence. Objective: To identify county-level factors associated with obesity using traditional epidemiologic and machine learning methods. Design, Setting, and Participants: Cross-sectional study using linear regression models and machine learning models to evaluate the associations between county-level obesity and county-level demographic, socioeconomic, health care, and environmental factors from summarized statistical data extracted from the 2018 Robert Wood Johnson Foundation County Health Rankings and merged with US Census data from each of 3138 US counties. The explanatory power of the linear multivariate regression and the top performing machine learning model were compared using mean R2 measured in 30-fold cross validation. Exposures: County-level demographic factors (population; rural status; census region; and race/ethnicity, sex, and age composition), socioeconomic factors (median income, unemployment rate, and percentage of population with some college education), health care factors (rate of uninsured adults and primary care physicians), and environmental factors (access to healthy foods and access to exercise opportunities). Main Outcomes and Measures: County-level obesity prevalence in 2018, its association with each county-level factor, and the percentage of variation in county-level obesity prevalence explained by linear multivariate and gradient boosting machine regression measured with R2. Results: Among the 3138 counties studied, the mean (range) obesity prevalence was 31.5{\%} (12.8{\%}-47.8{\%}). In multivariate regressions, demographic factors explained 44.9{\%} of variation in obesity prevalence; socioeconomic factors, 33.0{\%}; environmental factors, 15.5{\%}; and health care factors, 9.1{\%}. The county-level factors with the strongest association with obesity were census region, median household income, and percentage of population with some college education. R2 values of univariate regressions of obesity prevalence were 0.238 for census region, 0.218 for median household income, and 0.160 for percentage of population with some college education. Multivariate linear regression and gradient boosting machine regression (the best-performing machine learning model) of obesity prevalence using all county-level demographic, socioeconomic, health care, and environmental factors had R2 values of 0.58 and 0.66, respectively (P {\textless} .001). Conclusions and Relevance: Obesity prevalence varies significantly between counties. County-level demographic, socioeconomic, health care, and environmental factors explain the majority of variation in county-level obesity prevalence. Using machine learning models may explain significantly more of the variation in obesity prevalence..},
-author = {Scheinker, David and Valencia, Areli and Rodriguez, Fatima},
-doi = {10.1001/jamanetworkopen.2019.2884},
-file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Scheinker 2019 US obesity supplement.pdf:pdf;:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Scheinker, Valencia, Rodriguez - 2019 - Identification of factors associated with variation in US county-level obesity prevalence rates.pdf:pdf},
-issn = {25743805},
-journal = {JAMA network open},
-keywords = {The JAMA Network},
-month = {apr},
-number = {4},
-pages = {e192884},
-pmid = {31026030},
-publisher = {NLM (Medline)},
-title = {{Identification of factors associated with variation in US county-level obesity prevalence rates using epidemiologic vs machine learning models}},
-url = {https://jamanetwork.com/},
-volume = {2},
-year = {2019}
+institution = {AABB},
+pages = {1--91},
+title = {{The 2014-2015 AABB blood collection and utilization survey report}},
+url = {http://www.aabb.org/research/hemovigilance/bloodsurvey/Docs/2014-2015-AABB-Blood-Survey-Report.pdf?ct=483178b5c665113a4a67486385907873f0a434f88cddea5b6fece817de48206c5db7f5b0d5fe99d98ac08ff2cbe330ae8ed7602ce9b2b0b3ea6861794458c137},
+year = {2018}
 }
 @article{Cable2016,
 abstract = {BACKGROUND Understanding the effect of blood donation and iron supplementation on iron balance will inform strategies to manage donor iron status. STUDY DESIGN AND METHODS A total of 215 donors were randomized to receive ferrous gluconate daily (37.5 mg iron) or no iron for 24 weeks after blood donation. Iron stores were assessed using ferritin and soluble transferrin receptor. Hemoglobin (Hb) iron was calculated from total body Hb. Total body iron (TBI) was estimated by summing iron stores and Hb iron. RESULTS At 24 weeks, TBI in donors taking iron increased by 281.0 mg (95{\%} confidence interval [CI], 223.4-338.6 mg) compared to before donation, while TBI in donors not on iron decreased by 74.1 mg (95{\%} CI, -112.3 to -35.9; p {\textless} 0.0001, iron vs. no iron). TBI increased rapidly after blood donation with iron supplementation, especially in iron-depleted donors. Supplementation increased TBI compared to controls during the first 8 weeks after donation: 367.8 mg (95{\%} CI, 293.5-442.1) versus -24.1 mg (95{\%} CI, -82.5 to 34.3) for donors with a baseline ferritin level of not more than 26 ng/mL and 167.8 mg (95{\%} CI, 116.5-219.2) versus -68.1 mg (95{\%} CI, -136.7 to 0.5) for donors with a baseline ferritin level of more than 26 ng/mL. A total of 88{\%} of the benefit of iron supplementation occurred during the first 8 weeks after blood donation. CONCLUSION Donors on iron supplementation replaced donated iron while donors not on iron did not. Eight weeks of iron supplementation provided nearly all of the measured improvement in TBI. Daily iron supplementation after blood donation allows blood donors to recover the iron loss from blood donation and prevents sustained iron deficiency.},
@@ -179,6 +383,18 @@ @article{Cable2016
 volume = {56},
 year = {2016}
 }
+@article{Jung2017,
+abstract = {From doctors diagnosing patients to judges seeing bail, experts often base their decisions on experience and intuition rather than on statistical models. While understandable, relying on intuition over models has ooen been found to result in inferior outcomes. Here we present a new method-select-regress-and-round-for constructing simple rules that perform well for complex decisions. .ese rules take the form of a weighted checklist, can be applied mentally , and nonetheless rival the performance of modern machine learning algorithms. Our method for creating these rules is itself simple, and can be carried out by practitioners with basic statistics knowledge. We demonstrate this technique with a detailed case study of judicial decisions to release or detain defendants while they await trial. In this application, as in many policy seeings, the eeects of proposed decision rules cannot be directly observed from historical data: if a rule recommends releasing a defendant that the judge in reality detained, we do not observe what would have happened under the proposed action. We address this key counterfactual estimation problem by drawing on tools from causal inference. We end that simple rules signiicantly outperform judges and are on par with decisions derived from random forests trained on all available features. Generalizing to 22 varied decision-making domains, we end this basic result replicates. We conclude with an analytical framework that helps explain why these simple decision rules perform as well as they do.},
+archivePrefix = {arXiv},
+arxivId = {1702.04690v3},
+author = {Jung, Jongbin and Concannon, Connor and Shroff, Ravi and Goel, Sharad and Goldstein, Daniel G},
+doi = {10.1145/nnnnnnn.nnnnnnn},
+eprint = {1702.04690v3},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Jung et al. - 2017 - Simple rules for complex decisions.pdf:pdf},
+journal = {arXiv},
+title = {{Simple rules for complex decisions}},
+year = {2017}
+}
 @article{Hand2001,
 abstract = {The area under the ROC curve, or the equivalent Gini index, is a widely used measure of performance of supervised classification rules. It has the attractive property that it side-steps the need to specify the costs of the different kinds of misclassification. However, the simple form is only applicable to the case of two classes. We extend the definition to the case of more than two classes by averaging pairwise comparisons. This measure reduces to the standard form in the two class case. We compare its properties with the standard measure of proportion correct and an alternative definition of proportion correct based on pairwise comparison of classes for a simple artificial case and illustrate its application on eight data sets. On the data sets we examined, the measures produced similar, but not identical results, reflecting the different aspects of performance that they were measuring. Like the area under the ROC curve, the measure we propose is useful in those many situations where it is impossible to give costs for the different kinds of misclassification.},
 author = {Hand, David J. and Till, Robert J.},
@@ -195,33 +411,18 @@ @article{Hand2001
 volume = {45},
 year = {2001}
 }
-@article{Simon1981,
-author = {Simon, Toby L and Garry, Philip J and Hooper, Elizabeth M},
-file = {:C$\backslash$:/Users/alton/Downloads/jama{\_}245{\_}20{\_}018.pdf:pdf},
-journal = {JAMA},
-number = {20},
-pages = {2038--2043},
-title = {{Iron stores in blood donors}},
-volume = {245},
-year = {1981}
-}
-@article{Custer2007,
-abstract = {BACKGROUND: The consequences of temporary deferral are not well understood. This study sought to investigate blood donor return after deferral expiration. STUDY DESIGN AND METHODS: A retrospective cohort analysis of allogeneic whole-blood donation was conducted. All deferred donors and a random sample of eligible donors were identified from the year 2000, with subsequent blood center visits through December 2005 captured. Stratified results are reported as the percentage returning, rates of return, and time to return. Measures of statistical association and Cox regression modeling are reported. RESULTS: For first-time (FT) donors, 25 percent of temporarily deferred donors returned during the 5-year follow-up period compared to 47 percent of eligible donors (p {\textless} 0.0001); for repeat donors, 81 and 86 percent of deferred and eligible donors returned, respectively (p {\textless} 0.0001). Depending on the deferral category, 14 to 31 percent of FT and 58 to 90 percent of repeat donors returned. Rates (per year) of successful donation during the follow-up period were 0.09 for index-deferred FT donors, 0.28 for eligible FT donors, 1.0 for deferred repeat donors, and 1.45 for eligible repeat donors. Multivariate modeling indicated that in addition to deferral, age, sex, race, and education were associated with return in both FT and repeat donors. CONCLUSION: The effects of deferral were more pronounced than expected, affecting both FT and repeat donors. For FT donors, the type and duration of deferral, while important, were not as relevant as hypothesized because so few returned, suggesting the need to develop appropriate interventions to recapture those donors likely to be eligible. {\textcopyright} 2007 American Association of Blood Banks.},
-author = {Custer, Brian and Chinn, Artina and Hirschler, Nora V. and Busch, Michael P. and Murphy, Edward L.},
-doi = {10.1111/j.1537-2995.2007.01292.x},
-file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Custer 2007 Deferral return behaviour.pdf:pdf},
-issn = {00411132},
-journal = {Transfusion},
-keywords = {Adult,Aged,Artina Chinn,Blood Donors*,Brian Custer,Cohort Studies,Edward L Murphy,Female,Humans,MEDLINE,Male,Middle Aged,Multivariate Analysis,NCBI,NIH,NLM,National Center for Biotechnology Information,National Institutes of Health,National Library of Medicine,Non-U.S. Gov't,PubMed Abstract,Research Support,Retrospective Studies,Time Factors,doi:10.1111/j.1537-2995.2007.01292.x,pmid:17655597},
-month = {aug},
-number = {8},
-pages = {1514--1523},
-pmid = {17655597},
-publisher = {Transfusion},
-title = {{The consequences of temporary deferral on future whole blood donation}},
-url = {https://pubmed.ncbi.nlm.nih.gov/17655597/},
-volume = {47},
-year = {2007}
+@inproceedings{Zadrozny2002,
+abstract = {Class membership probability estimates are important for many applications of data mining in which classification outputs are combined with other sources of information for decision-making, such as example-dependent misclassification costs, the outputs of other classifiers, or domain knowledge. Previous calibration methods apply only to two-class problems. Here, we show how to obtain accurate probability estimates for multiclass problems by combining calibrated binary probability estimates. We also propose a new method for obtaining calibrated two-class probability estimates that can be applied to any classifier that produces a ranking of examples. Using naive Bayes and support vector machine classifiers, we give experimental results from a variety of two-class and multiclass domains, including direct marketing, text categorization and digit recognition.},
+address = {New York, New York, USA},
+author = {Zadrozny, Bianca and Elkan, Charles},
+booktitle = {Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
+doi = {10.1145/775047.775151},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Zadrozny, Elkan - 2002 - Transforming classifier scores into accurate multiclass probability estimates.pdf:pdf},
+pages = {694--699},
+publisher = {Association for Computing Machinery (ACM)},
+title = {{Transforming classifier scores into accurate multiclass probability estimates}},
+url = {http://portal.acm.org/citation.cfm?doid=775047.775151},
+year = {2002}
 }
 @article{Mast2016,
 abstract = {BACKGROUND The historical approach of offering dietary advice to donors with low hemoglobin (Hb) is ineffective for preventing iron deficiency in frequent donors. Alternative approaches to maintaining donor iron status were explored. STUDY DESIGN AND METHODS Frequent blood donors were randomly assigned into five arms for 2 years of follow-up. Three double-blinded arms provided 60 once-daily pills after each donation (38, 19, or 0 mg of iron). Two single-blinded arms provided iron status (ferritin) or no information letters after each donation. Ferritin, soluble transferrin receptor, and complete blood count were measured at each donation. RESULTS There were 692 subjects enrolled and 393 completed the study. Subjects in pill groups deenrolled more than those in letter groups (39{\%} vs. 7{\%}). Adverse events occurred equally in subjects receiving iron or placebo pills. Of those completing the study, the prevalence of ferritin of less than 12 or less than 26 ng/mL declined by more than 50{\%} and was statistically indistinguishable in the three intervention groups (19 or 38 mg of iron; iron status letter). Longitudinal analyses of all subjects showed improved iron status in iron pill groups and worsening iron status in control groups (placebo; no information letter). The iron pill groups experienced a net increase of approximately 0.6 g/dL Hb compared to control groups. The iron status letter group had little change in Hb. CONCLUSION Providing 19 or 38 mg of daily iron or iron status information were effective and mostly equivalent interventions for mitigating iron deficiency in regular donors when compared at the end of the 2-year longitudinal phase of the study. Donors without intervention had worsened iron deficiency with continued donation.},
@@ -256,184 +457,50 @@ @inproceedings{Lakkaraju2016
 volume = {13-17-Augu},
 year = {2016}
 }
-@article{DiAngelantonio2017,
-abstract = {Background Limits on the frequency of whole blood donation exist primarily to safeguard donor health. However, there is substantial variation across blood services in the maximum frequency of donations allowed. We compared standard practice in the UK with shorter inter-donation intervals used in other countries.},
-author = {{Di Angelantonio}, Emanuele and Thompson, Simon G and Kaptoge, Stephen and Moore, Carmel and Walker, Matthew and Armitage, Jane and Ouwehand, Willem H and Roberts, David J and Danesh, John},
-doi = {10.1016/S0140-6736(17)31928-1},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Di Angelantonio et al. - 2017 - Efficiency and safety of varying the frequency of whole blood donation (INTERVAL) a randomised trial of.pdf:pdf},
-journal = {The Lancet},
-title = {{Efficiency and safety of varying the frequency of whole blood donation (INTERVAL): a randomised trial of 45{\^{a}}€ˆ000 donors}},
-url = {https://ac.els-cdn.com/S0140673617319281/1-s2.0-S0140673617319281-main.pdf?{\_}tid=89ee0f5c-baf1-4e9d-8f9e-e6cefaedcd8d{\&}acdnat=1523834863{\_}d52c9259a58ce1d9290c329e920785e5},
-volume = {390},
-year = {2017}
-}
-@article{Large2019,
-abstract = {Our hypothesis is that building ensembles of small sets of strong classifiers constructed with different learning algorithms is, on average, the best approach to classification for real-world problems. We propose a simple mechanism for building small heterogeneous ensembles based on exponentially weighting the probability estimates of the base classifiers with an estimate of the accuracy formed through cross-validation on the train data. We demonstrate through extensive experimentation that, given the same small set of base classifiers, this method has measurable benefits over commonly used alternative weighting, selection or meta-classifier approaches to heterogeneous ensembles. We also show how an ensemble of five well-known, fast classifiers can produce an ensemble that is not significantly worse than large homogeneous ensembles and tuned individual classifiers on datasets from the UCI archive. We provide evidence that the performance of the cross-validation accuracy weighted probabilistic ensemble (CAWPE) generalises to a completely separate set of datasets, the UCR time series classification archive, and we also demonstrate that our ensemble technique can significantly improve the state-of-the-art classifier for this problem domain. We investigate the performance in more detail, and find that the improvement is most marked in problems with smaller train sets. We perform a sensitivity analysis and an ablation study to demonstrate the robustness of the ensemble and the significant contribution of each design element of the classifier. We conclude that it is, on average, better to ensemble strong classifiers with a weighting scheme rather than perform extensive tuning and that CAWPE is a sensible starting point for combining classifiers.},
-author = {Large, James and Lines, Jason and Bagnall, Anthony},
-doi = {10.1007/s10618-019-00638-y},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Large, Lines, Bagnall - 2019 - A probabilistic classifier ensemble weighting scheme based on cross-validated accuracy estimates.pdf:pdf},
-issn = {1573756X},
-journal = {Data Mining and Knowledge Discovery},
-keywords = {Classification,Ensemble,Heterogeneous,Weighted},
-month = {nov},
-number = {6},
-pages = {1674--1709},
-publisher = {Springer New York LLC},
-title = {{A probabilistic classifier ensemble weighting scheme based on cross-validated accuracy estimates}},
-url = {https://doi.org/10.1007/s10618-019-00638-y},
-volume = {33},
-year = {2019}
-}
-@article{Salvin2014,
-abstract = {BACKGROUND: Iron deficiency (ID) is an important consequence of blood donation. The epidemiology of this problem in the blood donor population was therefore studied to enable appropriate targeting of potential solutions to donor ID.$\backslash$n$\backslash$nSTUDY DESIGN AND METHODS: A nationally representative, cluster-based cross-sectional study of Australian blood donors was performed. Donors were eligible for inclusion if they fulfilled criteria for blood donation or were deferred due to low or falling hemoglobin. Ferritin was measured and demographic and donation data were collected.$\backslash$n$\backslash$nRESULTS: A total of 3094 blood donors were recruited, of which samples were collected from 3049 donors; 1873 had exclusively donated whole blood (WB only), 242 had exclusively made apheresis donations, and 530 had not donated ("new" donors) in the previous 24 months. The prevalence of ID in new female donors was 12.0{\%} compared with 1.3{\%} in males. The prevalence of ID in female WB-only donors was 26.4{\%}; it increased with donation frequency and decreased with age. The prevalence in male WB-only donors was 6.3{\%} with no evident change with age or donation frequency. The prevalence of ID in apheresis-only donors (females 6.3{\%}; males 2.2{\%}) did not significantly exceed that of new donors nor did it change with donation frequency. Importantly, the risk of ID could not be satisfactorily predicted in an individual donor by his or her anemia status or with predictive modeling incorporating demographic and donation data.$\backslash$n$\backslash$nCONCLUSION: ID is especially prevalent in female, premenopausal, frequent WB donors. Strategies to combat ID should be implemented, specifically targeting this group.},
-author = {Salvin, Hannah E. and Pasricha, Sant Rayn and Marks, Denese C. and Speedy, Joanna},
-doi = {10.1111/trf.12647},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Salvin et al. - 2014 - Iron deficiency in blood donors A national cross-sectional study.pdf:pdf},
-isbn = {1537-2995 (Electronic)0041-1132 (Linking)},
-issn = {15372995},
-journal = {Transfusion},
-number = {10},
-pages = {2434--2444},
-pmid = {24738792},
-title = {{Iron deficiency in blood donors: A national cross-sectional study}},
-volume = {54},
-year = {2014}
-}
-@article{Rigas2014,
-abstract = {Dietary studies show a relationship between the intake of iron enhancers and inhibitors and iron stores in the general population. However, the impact of dietary factors on the iron stores of blood donors, whose iron status is affected by blood donations, is incompletely understood. In the Danish Blood Donor Study, we assessed the effect of blood donation frequency, physiologic factors, lifestyle and supplemental factors, and dietary factors on ferritin levels. We used multiple linear and logistic regression analyses stratified by sex and menopausal status. Among high-frequency donors (more than nine donations in the past 3 years), we found iron deficiency (ferritin below 15 ng/mL) in 9, 39, and 22{\%} of men, premenopausal women, and postmenopausal women, respectively. The strongest predictors of iron deficiency were sex, menopausal status, the number of blood donations in a 3-year period, and the time since last donation. Other significant factors included weight, age, intensity of menstruation, iron tablets, vitamin pills, and consumption of meat and wine. The study confirms iron deficiency as an important problem, especially among menstruating women donating frequently. The risk of iron depletion was largely explained by sex, menopausal status, and donation frequency. Other factors, including dietary and supplemental iron intake, had a much weaker effect on the risk of iron depletion. {\textcopyright} 2013 The Authors. Transfusion published by Wiley Periodicals, Inc. on behalf of AABB.},
-author = {Rigas, Andreas Stribolt and S{\o}rensen, Cecilie Juul and Pedersen, Ole Birger and Petersen, Mikkel Steen and Th{\o}rner, Lise Wegner and Kotz{\'{e}}, Sebastian and S{\o}rensen, Erik and Magnussen, Karin and Rostgaard, Klaus and Erikstrup, Christian and Ullum, Henrik},
-doi = {10.1111/trf.12518},
-file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Rigas2014 Danish Blood Donors.pdf:pdf},
-issn = {15372995},
-journal = {Transfusion},
-number = {3 Pt 2},
-pages = {789--796},
-pmid = {24372094},
-publisher = {Wiley-Blackwell},
-title = {{Predictors of iron levels in 14,737 Danish blood donors: results from the Danish Blood Donor Study.}},
-url = {/pmc/articles/PMC4209803/?report=abstract https://www-ncbi-nlm-nih-gov.stanford.idm.oclc.org/pmc/articles/PMC4209803/},
-volume = {54},
-year = {2014}
-}
-@article{Goldman2017,
-abstract = {BACKGROUND: We assessed risk groups for iron deficiency and the feasibility and efficacy of ferritin testing in a large blood center. STUDY DESIGN AND METHODS: Donors were informed of possible testing in the predonation pamphlet. Plasma ferritin was measured on retention samples (n = 12,595) from representative clinics. Low-ferritin donors ({\textless}25 µg/L) were sent a letter and information sheet and not called for 6 months. Ferritin testing was repeated on 25{\%} of donors; donor return rate and frequency were monitored. RESULTS: Low-ferritin donors represented 2.9{\%} of first-time and reactivated (no donation in past 12 months) male donors, 32.2{\%} of first-time and reactivated female donors, 41.6{\%} of repeat male donors, and 65.1{\%} of repeat female donors. A mean of 11.7 months after index donation, the return rate was 76{\%} for normal and 58{\%} for low-ferritin donors; returning low-ferritin donors had made approximately one fewer donation. Ferritin increased by 16.3 and 12.1 µg/L in male and female low-ferritin donors and decreased by 17 µg/L in male and female normal-ferritin donors. CONCLUSION: The minimum hemoglobin level will be increased to 130 g/L for male donors and the minimum interdonation interval changed to 84 days (four donations yearly) for female donors based on iron deficiency risk groups. Large-scale ferritin testing was feasible. Donors informed of low-ferritin results had a lower return rate, donated less often, and had an increase in ferritin on return, approximately 1 year after initial testing, compared to donors with normal ferritin results.},
-author = {Goldman, Mindy and Uzicanin, Samra and Osmond, Lori and Scalia, Vito and O'Brien, Sheila F.},
-doi = {10.1111/trf.13956},
-file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Goldman 2017 Canada ferritin testing study.pdf:pdf},
-issn = {15372995},
-journal = {Transfusion},
-month = {mar},
-number = {3},
-pages = {564--570},
-pmid = {27943371},
-title = {{A large national study of ferritin testing in Canadian blood donors}},
-url = {http://doi.wiley.com/10.1111/trf.13956},
-volume = {57},
-year = {2017}
-}
-@article{Magnussen2015,
-abstract = {Background: Blood donors with low haemoglobin-concentration (Hb) and iron deficiency are well known challenges in any blood bank setting. The handling is complex, and even though iron deficiency is a frequent cause of anaemia, there are differential diagnoses. In healthy blood donors ferritin is helpful in discerning between anaemia caused by iron-deficiency and other causes, and while low Hb and low levels of ferritin, are the primary concern, some donors have too high levels, which must also be dealt with. Aims: The primary aims were to standardize and optimize the handling of blood donors with Hb below the limit for donation (7.8 and 8.4 mmol/l for female and male donors respectively). Secondary aims were to deal with all issues related to high or low either Hb or ferritin in blood donors. Methods: The problem was approached, by centralizing measurement of Hb, initiating ferritin measurements and establishing Centre for Donor Haemoglobin and Iron. An algorithm was created, taking mainly Hb and ferritin into account. The possible outcomes were: to send iron-tablets and iron-folder by mail, to give iron tablets with future donations, to refer the donor to general practitioner (GP) or, in most cases to do nothing. Pre-existing staff was trained in donor-communication and to handle the Hb and ferritin results. The donors where mainly contacted by mail, but when Hb was low or when there was insufficient effect of previously sent iron-supplementation the donor was contacted by phone to improve compliance. Hb was measured on Sysmex-XE2100D as part of a Complete Blood Count at every donation. Ferritin was measured one time in all donors, at every 10th donation and repeated when outside 60-300 mug/l at the previous donation. Ferritin measurement was also repeated if the previously measured Hb was low. The reason for not measuring ferritin at every donation was economy. Results: From February 1st 2012 to February 1st 2015, 71,450 donors (53.5{\%} women/46.5{\%} men) donated 281,814 units of whole blood (48{\%} women/52{\%} men). The mean Hb increased from 8.59 to 8.64 mmol/l in the female donors (P {\textgreater} 0.001) while the increase in the male donors was smaller 9.55-9.57 mmol/l (P = 0.017). The Red-Blood-Cell Count increased from 4.57 to 4.66 and 5.02-5.12 x 1012/l in female and male donors respectively (P {\textgreater} 0.001). The decrease in {\%} of female donors with low Hb from 4.0 to 3.5 was not significant. The {\%} of male donors with low Hb decreased from 0.92 to 0.55 (P = 0.03). Of the donors that were referred to GP on suspicion or to rule out disease, not all called back to inform about the result. Of those that did inform 12 were diagnosed with leukaemia or cancer and 2 with Vitamin B12-deficiency. Other finds were heterozygous thalassemia, hemochromatosis and polycythaemia vera. Summary/Conclusions: While the aim was to keep the donors within our frame for Hb and ferritin, the main benefit of the program was to have a well-functioning program for when the donors did fall outside anyway. The program with goal directed iron supplementation only to those that would benefit, has led to an increase in Hb and a reduction the number of donors with low Hb.},
-author = {Magnussen, Karin and Ladelund, Steen},
-doi = {10.1111/trf.13152},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Magnussen, Ladelund - 2015 - Handling low hemoglobin and iron deficiency in a blood donor population 2 years' experience.pdf:pdf},
-isbn = {0042-9007},
-issn = {15372995},
-journal = {Transfusion},
-number = {10},
-pages = {2473--2478},
-title = {{Handling low hemoglobin and iron deficiency in a blood donor population: 2 years' experience}},
-volume = {55},
-year = {2015}
-}
-@misc{Schotten2016,
-author = {Schotten, Nienke and Jong, Pieternel C.M.Pasker De and Moretti, Diego and Zimmermann, Michael B. and Geurts-Moespot, Anneke J. and Swinkels, Dorine W. and {Van Kraaij}, Marian G.J.},
-booktitle = {Blood},
-doi = {10.1182/blood-2016-04-709451},
-file = {::},
-issn = {15280020},
-keywords = {extension,iron},
-month = {oct},
-number = {17},
-pages = {2185--2188},
-pmid = {27587880},
-publisher = {American Society of Hematology},
-title = {{The donation interval of 56 days requires extension to 180 days for whole blood donors to recover from changes in iron metabolism}},
-url = {http://ashpublications.org/blood/article-pdf/128/17/2185/1396505/2185.pdf},
-volume = {128},
-year = {2016}
-}
-@inproceedings{Spencer2019a,
-address = {Basel},
-author = {Spencer, Bryan R and Fox, M and Wise, L and Cable, Richard},
-booktitle = {Abstract of 29th Regional Congress of the ISBT},
-doi = {10.1111/vox.12792},
-issn = {0042-9007},
-month = {jun},
-number = {S1},
-pages = {5--240},
-publisher = {Vox Sanguinis},
-title = {{A composite measure of heme iron consumption predicts incident iron depletion in repeat blood donors}},
-url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/vox.12792},
-volume = {114},
-year = {2019}
-}
-@article{Spencer2016,
-author = {Spencer, Bryan R. and Johnson, Bryce and Wright, David J. and Kleinman, Steven and Glynn, Simone A. and Cable, Ritchard G.},
-doi = {10.1111/trf.13663},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Spencer et al. - 2016 - Potential impact on blood availability and donor iron status of changes to donor hemoglobin cutoff and interdona.pdf:pdf;:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Unknown - Unknown - trf13663-sup-0001-suppappendix.docx:docx},
-issn = {15372995},
-journal = {Transfusion},
-number = {8},
-pages = {1994--2004},
-title = {{Potential impact on blood availability and donor iron status of changes to donor hemoglobin cutoff and interdonation intervals}},
-volume = {56},
-year = {2016}
+@article{Baart2011,
+abstract = {Background and Objectives Each year, a relevant proportion of whole blood donors is deferred from donation because of low haemoglobin (Hb) levels. Such temporary deferrals are demoralizing, and donors may never return for a donation. Reliable predictions of Hb levels may guide the decision whether donors can be invited for the next donation. In this study, a prediction model was developed for the risk of low Hb levels.Materials and Methods Individual data from 5191 whole blood donors were analysed; 143 donors had a low Hb level. Eleven candidate predictors were considered in logistic regression models to predict low Hb levels. The performance of the prediction model was studied with the receiver operating characteristic (ROC) curve. Internal validity was assessed with a bootstrap procedure.Results Strong predictors were sex, seasonality, Hb level measured at the previous visit, difference in Hb levels between the previous two visits, time since the previous visit, deferral at the previous visit, and the total number of whole blood donations in the past 2 years. Internal validation showed an area under the ROC curve of 0.87.Conclusion The developed prediction model provides accurate discrimination between donors with low and appropriate Hb levels. The model predictions may be valuable to determine whether donors can be invited for a next donation, or whether some interventions such as postponement of the invitation are warranted. Potentially, this could decrease the number of donor deferrals for low Hb levels. {\textcopyright} 2010 The Author(s). Vox Sanguinis {\textcopyright} 2010 International Society of Blood Transfusion.},
+author = {Baart, A. M. and {De Kort}, W. L.A.M. and Moons, K. G.M. and Vergouwe, Y.},
+doi = {10.1111/j.1423-0410.2010.01382.x},
+issn = {00429007},
+journal = {Vox Sanguinis},
+keywords = {Blood donors,Development,Donor deferral,Haemoglobin,Internal validation,Prediction model},
+month = {feb},
+number = {2},
+pages = {204--211},
+pmid = {20726956},
+publisher = {John Wiley {\&} Sons, Ltd},
+title = {{Prediction of low haemoglobin levels in whole blood donors}},
+url = {https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.edu/doi/full/10.1111/j.1423-0410.2010.01382.x https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.edu/doi/abs/10.1111/j.1423-0410.2010.01382.x https://onlinelibrary-wiley-com.ezp-prod1.hul.harvard.ed},
+volume = {100},
+year = {2011}
 }
-@article{Vassallo2018,
-abstract = {BACKGROUND: Iron deficiency is observed in blood donors who meet hemoglobin requirements for donation. Frequent donation results in negative iron balance, and teenage donors may thus be at risk for adverse health consequences. STUDY DESIGN AND METHODS: Blood Systems implemented ferritin testing on all successful 16- to 18-year-old (teen) donations. Low ferritin (LF) was defined as less than 20 ng/mL in females and less than 30 ng/mL in males. Donors with LF were deferred from red blood cell (RBC) donations (12 months for females, and 6 for males) and counseled to take low-dose iron for 60 days. A ferritin value less than 26 ng/mL indicated iron-deficient erythropoiesis and less than 12 ng/mL absent iron stores. RESULTS: Over 16 months, 110,417 teen donations were tested and represented 10.5{\%} of all successful donations. The rate of absent iron stores was 9.0{\%} (1.9{\%} male; 15.9{\%} female) and of iron-deficient erythropoiesis, 31.9{\%} (12.4{\%} male; 50.6{\%} female). The rate of LF deferrals was 26.9{\%} (16.7{\%} male; 36.6{\%} female). The proportion of LF donors decreased with increasing predonation hemoglobin and rose with increasing RBC donations in the prior 24 months. Seasonality in LF deferrals and the RBC contribution from teen donors was observed. CONCLUSIONS: Ferritin testing of teen donors identified individuals with LF who might benefit from risk mitigation. LF is more common in teenage female than male donors and those with RBC donations in the prior 24 months. An appreciable number of new/lapsed donors presented with LF, however. These data may be useful in guiding future risk mitigation efforts.},
-author = {Vassallo, Ralph R. and Bravo, Marjorie D. and Kamel, Hany},
-doi = {10.1111/trf.14921},
-file = {:G$\backslash$:/My Drive/Blood Transfusion/Iron model/Vassallo{\_}et{\_}al-2018-Transfusion.pdf:pdf},
+@article{Spencer2019,
+abstract = {BACKGROUND: High school students 16 to 18 years-old contribute 10{\%} of the US blood supply. Mitigating iron depletion in these donors is important because they continue to undergo physical and neurocognitive development. STUDY DESIGN AND METHODS: Study objectives were to determine the prevalence of iron depletion in 16- to 18-year-old donors and whether their risk for iron depletion was greater than adult donors. Successful, age-eligible donors were enrolled from high school blood drives at two large US blood centers. Plasma ferritin testing was performed with ferritin less than 12 ng/mL as our primary measure of iron depletion and ferritin less than 26 ng/mL a secondary measure. Multivariable repeated-measures logistic regression models evaluated the role of age and other demographic/donation factors. RESULTS: Ferritin was measured from 4265 enrollment donations September to November 2015 and 1954 follow-up donations through May 2016. At enrollment, prevalence of ferritin less than 12 ng/mL in teenagers was 1{\%} in males and 18{\%} in females making their first blood donation, and 8{\%} in males and 33{\%} in females with prior donations. Adjusted odds for ferritin less than 12 ng/mL were 2.1 to 2.8 times greater in 16- to 18-year-olds than in 19- to 49-year-olds, and for ferritin less than 26 ng/mL were 3.3- to 4.7-fold higher in 16- to 18-year-olds. Progression to hemoglobin deferral was twice as likely in 16- to 18-year-old versus 19- to 49-year-old females. CONCLUSION: Age 16 to 18 years-old is an independent risk factor for iron deficiency in blood donors at any donation frequency. Blood centers should implement alternate eligibility criteria or additional safety measures to protect teenage donors from iron depletion.},
+author = {Spencer, Bryan R. and Bialkowski, Walter and Creel, Darryl V. and Cable, Ritchard G. and Kiss, Joseph E. and Stone, Mars and McClure, Christopher and Kleinman, Steven and Glynn, Simone A. and Mast, Alan E.},
+doi = {10.1111/trf.15133},
+file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Spencer 2019 High school aged blood donors.pdf:pdf},
 issn = {15372995},
 journal = {Transfusion},
-number = {12},
-pages = {2861--2867},
-title = {{Ferritin testing to characterize and address iron deficiency in young donors}},
-volume = {58},
-year = {2018}
-}
-@article{Kiss2015,
-abstract = {IMPORTANCE Although blood donation is allowed every 8 weeks in the United States, recovery of hemoglobin to the currently accepted standard (12.5 g/dL) is frequently delayed, and some donors become anemic. OBJECTIVE To determine the effect of oral iron supplementation on hemoglobin recovery time (days to recovery of 80{\%} of hemoglobin removed) and recovery of iron stores in iron-depleted (" low ferritin, " Յ26 ng/mL) and iron-replete (" higher ferritin, " {\textgreater}26 ng/mL) blood donors. DESIGN, SETTING, AND PARTICIPANTS Randomized, nonblinded clinical trial of blood donors stratified by ferritin level, sex, and age conducted in 4 regional blood centers in the United States in 2012. Included were 215 eligible participants aged 18 to 79 years who had not donated whole blood or red blood cells within 4 months.},
-author = {Kiss, Joseph E. and Brambilla, Donald and Glynn, Simone A. and Mast, Alan E. and Spencer, Bryan R. and Stone, Mars and Kleinman, Steven H. and Cable, Ritchard G.},
-doi = {10.1001/jama.2015.119},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Kiss et al. - 2015 - Oral Iron Supplementation After Blood Donation.pdf:pdf},
-issn = {0098-7484},
-journal = {Jama},
-number = {6},
-pages = {575},
-pmid = {25668261},
-title = {{Oral iron supplementation after blood donation}},
-url = {http://jama.jamanetwork.com/article.aspx?doi=10.1001/jama.2015.119},
-volume = {313},
-year = {2015}
+month = {jan},
+number = {5},
+pages = {1706--1716},
+pmid = {30633813},
+publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
+title = {{Elevated risk for iron depletion in high-school age blood donors}},
+url = {http://doi.wiley.com/10.1111/trf.15133},
+volume = {59},
+year = {2019}
 }
-@article{Varma2006,
-abstract = {Background: Cross-validation (CV) is an effective method for estimating the prediction error of a classifier. Some recent articles have proposed methods for optimizing classifiers by choosing classifier parameter values that minimize the CV error estimate. We have evaluated the validity of using the CV error estimate of the optimized classifier as an estimate of the true error expected on independent data. Results: We used CV to optimize the classification parameters for two kinds of classifiers; Shrunken Centroids and Support Vector Machines (SVM). Random training datasets were created, with no difference in the distribution of the features between the two classes. Using these "null" datasets, we selected classifier parameter values that minimized the CV error estimate. 10-fold CV was used for Shrunken Centroids while Leave-One-Out-CV (LOOCV) was used for the SVM. Independent test data was created to estimate the true error. With "null" anb "non null" (with differential expression between the classes) data, we also tested a nested CV procedure, where an inner CV loop is used to perform the tuning of the parameters while an outer CV is used to compute an estimate of the error. The CV error estimate for the classifier with the optimal parameters was found to be a substantially biased estimate of the true error that the classifier would incur on independent data. Even though there is no real difference between the two classes for the "null" datasets, the CV error estimate for the Shrunken Centroid with the optimal parameters was less than 30{\%} on 18.5{\%} of simulated training data-sets. For SVM with optimal parameters the estimated error rate was less than 30{\%} on 38{\%} of "null" data-sets. Performance of the optimized classifiers on the independent test set was no better than chance. The nested CV procedure reduces the bias considerably and gives an estimate of the error that is very close to that obtained on the independent testing set for both Shrunken Centroids and SVM classifiers for "null" and "non-null" data distributions. Conclusion: We show that using CV to compute an error estimate for a classifier that has itself been tuned using CV gives a significantly biased estimate of the true error. Proper use of CV for estimating true error of a classifier developed using a well defined algorithm requires that all steps of the algorithm, including classifier parameter tuning, be repeated in each CV loop. A nested CV procedure provides an almost unbiased estimate of the true error. {\textcopyright} 2006 Varma and Simon; licensee BioMed Central Ltd.},
-author = {Varma, Sudhir and Simon, Richard},
-doi = {10.1186/1471-2105-7-91},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Varma, Simon - 2006 - Bias in error estimation when using cross-validation for model selection.pdf:pdf},
-issn = {14712105},
-journal = {BMC Bioinformatics},
-keywords = {Algorithms,Bioinformatics,Computational Biology/Bioinformatics,Computer Appl. in Life Sciences,Microarrays},
-month = {feb},
-number = {1},
-pages = {91},
-pmid = {16504092},
-publisher = {BioMed Central},
-title = {{Bias in error estimation when using cross-validation for model selection}},
-url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-7-91},
-volume = {7},
-year = {2006}
+@article{DiAngelantonio2017,
+abstract = {Background Limits on the frequency of whole blood donation exist primarily to safeguard donor health. However, there is substantial variation across blood services in the maximum frequency of donations allowed. We compared standard practice in the UK with shorter inter-donation intervals used in other countries.},
+author = {{Di Angelantonio}, Emanuele and Thompson, Simon G and Kaptoge, Stephen and Moore, Carmel and Walker, Matthew and Armitage, Jane and Ouwehand, Willem H and Roberts, David J and Danesh, John},
+doi = {10.1016/S0140-6736(17)31928-1},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Di Angelantonio et al. - 2017 - Efficiency and safety of varying the frequency of whole blood donation (INTERVAL) a randomised trial of.pdf:pdf},
+journal = {The Lancet},
+title = {{Efficiency and safety of varying the frequency of whole blood donation (INTERVAL): a randomised trial of 45{\^{a}}€ˆ000 donors}},
+url = {https://ac.els-cdn.com/S0140673617319281/1-s2.0-S0140673617319281-main.pdf?{\_}tid=89ee0f5c-baf1-4e9d-8f9e-e6cefaedcd8d{\&}acdnat=1523834863{\_}d52c9259a58ce1d9290c329e920785e5},
+volume = {390},
+year = {2017}
 }
 @inproceedings{Guo2017,
 abstract = {Confidence calibration-the problem of predicting probability estimates representative of the true correctness likelihood-is important for classification models in many applications. We discover that modern neural networks, unlike those from a decade ago, are poorly calibrated. Through extensive experiments, we observe that depth, width, weight decay, and Batch Normalization are important factors influencing calibration. We evaluate the performance of various post-processing calibration methods on state-of-the-art architectures with image and document classification datascts. Our analysis and experiments not only offer insights into neural net-work learning, but also provide a simple and straightforward recipe for practical settings: on most datasets, temperature scaling-a single-parameter variant of Piatt Scaling-is surprisingly effective at calibrating predictions.},
@@ -452,95 +519,40 @@ @inproceedings{Guo2017
 volume = {3},
 year = {2017}
 }
-@article{Cable2011,
-abstract = {BACKGROUND Regular blood donors are at risk of iron deficiency, but characteristics that predispose to this condition are poorly defined. STUDY DESIGN AND METHODS A total of 2425 red blood cell donors, either first-time (FT) or reactivated donors (no donations for 2 years) or frequent donors, were recruited for follow-up. At enrollment, ferritin, soluble transferrin receptor (sTfR), and hemoglobin were determined. Donor variables included demographics, smoking, dietary intake, use of iron supplements, and menstrual and/or pregnancy history. Models to predict two measures of iron deficiency were developed: Absent iron stores (AIS) were indicated by a ferritin level of less than 12 ng/mL and iron-deficient erythropoiesis (IDE) by a log(sTfR/ferritin) value of 2.07 or greater. RESULTS A total of 15.0{\%} of donors had AIS and 41.7{\%} IDE. In frequent donors, 16.4 and 48.7{\%} of males had AIS and IDE, respectively, with corresponding proportions of 27.1 and 66.1{\%} for females. Donation intensity was most closely associated with AIS and/or IDE (odds ratios from 5.3 to 52.2 for different donation intensity compared to FT donors). Being female, younger, and/or menstruating also increased the likelihood of having AIS and/or IDE, as did having a lower weight. Marginally significant variables for AIS and/or IDE were being a nonsmoker, previous pregnancy, and not taking iron supplements. Dietary variables were in general unrelated to AIS and/or IDE, as was race and/or ethnicity. CONCLUSION A large proportion of both female and male frequent blood donors have iron depletion. Donation intensity, sex and/or menstrual status, weight, and age are important independent predictors of AIS and/or IDE. Reducing the frequency of blood donation is likely to reduce the prevalence of iron deficiency among blood donors, as might implementing routine iron supplementation.},
-author = {Cable, Ritchard G. and Glynn, Simone A. and Kiss, Joseph E. and Mast, Alan E. and Steele, Whitney R. and Murphy, Edward L. and Wright, David J. and Sacher, Ronald A. and Gottschall, Jerry L. and Vij, Vibha and Simon, Toby L.},
-doi = {10.1111/j.1537-2995.2010.02865.x},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Cable et al. - 2011 - Iron deficiency in blood donors Analysis of enrollment data from the REDS-II Donor Iron Status Evaluation (RISE) s.pdf:pdf},
-isbn = {1537-2995 (Electronic) 0041-1132 (Linking)},
-issn = {00411132},
-journal = {Transfusion},
-number = {3},
-pages = {511--522},
-pmid = {20804527},
-title = {{Iron deficiency in blood donors: Analysis of enrollment data from the REDS-II Donor Iron Status Evaluation (RISE) study}},
-volume = {51},
-year = {2011}
-}
-@misc{Kiss2018,
-abstract = {Summary: Blood donors and the RBCs and other components they willingly provide are essential in the delivery of healthcare in all parts of the world. Nearly 70{\%} of donated blood comes from repeat or committed donors. The amount of iron removed in the 10 min or so it takes to withdraw a unit of blood (500 ml, plus 25 ml for testing) requires over 24 weeks to replace on a “standard” diet, i.e., without added iron in the form of supplements The cumulative effect of repeat blood donations without adequate iron replacement or a longer wait between donations results in iron deficiency (ID) in many donors, low haemoglobin deferral ({\~{}}8{\%} of donation attempts), and frank anaemia in some. Moreover, ID can be associated with side effects that can impact a blood donor's health, such as fatigue, cognitive changes and other neuromuscular symptoms. In an effort to better identify and prevent ID, blood collection agencies are recommending various strategies, including changes in the donation interval, donation frequency, testing of iron status and iron supplementation. In this review, we present the evidence basis for these strategies and suggest our own approaches to improving iron balance in blood donors.},
-author = {Kiss, Joseph E. and Vassallo, Ralph R.},
-booktitle = {British Journal of Haematology},
-doi = {10.1111/bjh.15136},
-file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Kiss 2018 Blood donor iron what to do.pdf:pdf},
-issn = {13652141},
-keywords = {blood donor,ferritin,interdonation interval,iron deficiency,iron supplement},
-month = {jun},
-number = {5},
-pages = {590--603},
-pmid = {29767836},
-publisher = {Blackwell Publishing Ltd},
-title = {{How do we manage iron deficiency after blood donation?}},
-url = {http://doi.wiley.com/10.1111/bjh.15136},
-volume = {181},
-year = {2018}
-}
-@article{Breiman2001,
-abstract = {Random forests are a combination of tree predictors such that each tree depends on the values of a random vector sampled independently and with the same distribution for all trees in the forest. The generalization error for forests converges a.s. to a limit as the number of trees in the forest becomes large. The generalization error of a forest of tree classifiers depends on the strength of the individual trees in the forest and the correlation between them. Using a random selection of features to split each node yields error rates that compare favorably to Adaboost (Y. Freund {\&} R. Schapire, Machine Learning: Proceedings of the Thirteenth International conference, ***, 148–156), but are more robust with respect to noise. Internal estimates monitor error, strength, and correlation and these are used to show the response to increasing the number of features used in the splitting. Internal estimates are also used to measure variable importance. These ideas are also applicable to regression.},
-author = {Breiman, Leo},
-doi = {10.1023/A:1010933404324},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Breiman - 2001 - Random Forests.pdf:pdf},
-issn = {1573-0565},
-journal = {Machine Learning 2001 45:1},
-keywords = {Artificial Intelligence,Control,Mechatronics,Natural Language Processing (NLP),Robotics,Simulation and Modeling},
-month = {oct},
-number = {1},
-pages = {5--32},
-publisher = {Springer},
-title = {{Random forests}},
-url = {https://link-springer-com.ezp-prod1.hul.harvard.edu/article/10.1023/A:1010933404324},
-volume = {45},
-year = {2001}
-}
-@article{Bialkowski2015,
-abstract = {Background and Objectives—Repeated blood donation produces iron deficiency. Changes in dietary iron intake do not prevent donation-induced iron deficiency. Prolonging the interdonation interval or using oral iron supplements can mitigate donation-induced iron deficiency. The most effective operational methods for reducing iron deficiency in donors are unknown.},
-author = {Bialkowski, W and Bryant, B J and Schlumpf, K S and Wright, D J and Birch, R and Kiss, J E and {D 'andrea}, P and Cable, R G and Spencer, B R and Vij, V and Mast, A E},
-doi = {10.1111/vox.12210},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Bialkowski et al. - Unknown - The strategies to reduce iron deficiency in blood donors randomized trial design, enrolment and early rete.pdf:pdf},
-journal = {Vox sanguinis},
-number = {2},
-pages = {178--185},
-title = {{The strategies to reduce iron deficiency in blood donors randomized trial: design, enrolment and early retention}},
-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4300282/pdf/nihms646989.pdf},
-volume = {108},
-year = {2015}
+@article{Scheinker2019,
+abstract = {Importance: Obesity is a leading cause of high health care expenditures, disability, and premature mortality. Previous studies have documented geographic disparities in obesity prevalence. Objective: To identify county-level factors associated with obesity using traditional epidemiologic and machine learning methods. Design, Setting, and Participants: Cross-sectional study using linear regression models and machine learning models to evaluate the associations between county-level obesity and county-level demographic, socioeconomic, health care, and environmental factors from summarized statistical data extracted from the 2018 Robert Wood Johnson Foundation County Health Rankings and merged with US Census data from each of 3138 US counties. The explanatory power of the linear multivariate regression and the top performing machine learning model were compared using mean R2 measured in 30-fold cross validation. Exposures: County-level demographic factors (population; rural status; census region; and race/ethnicity, sex, and age composition), socioeconomic factors (median income, unemployment rate, and percentage of population with some college education), health care factors (rate of uninsured adults and primary care physicians), and environmental factors (access to healthy foods and access to exercise opportunities). Main Outcomes and Measures: County-level obesity prevalence in 2018, its association with each county-level factor, and the percentage of variation in county-level obesity prevalence explained by linear multivariate and gradient boosting machine regression measured with R2. Results: Among the 3138 counties studied, the mean (range) obesity prevalence was 31.5{\%} (12.8{\%}-47.8{\%}). In multivariate regressions, demographic factors explained 44.9{\%} of variation in obesity prevalence; socioeconomic factors, 33.0{\%}; environmental factors, 15.5{\%}; and health care factors, 9.1{\%}. The county-level factors with the strongest association with obesity were census region, median household income, and percentage of population with some college education. R2 values of univariate regressions of obesity prevalence were 0.238 for census region, 0.218 for median household income, and 0.160 for percentage of population with some college education. Multivariate linear regression and gradient boosting machine regression (the best-performing machine learning model) of obesity prevalence using all county-level demographic, socioeconomic, health care, and environmental factors had R2 values of 0.58 and 0.66, respectively (P {\textless} .001). Conclusions and Relevance: Obesity prevalence varies significantly between counties. County-level demographic, socioeconomic, health care, and environmental factors explain the majority of variation in county-level obesity prevalence. Using machine learning models may explain significantly more of the variation in obesity prevalence..},
+author = {Scheinker, David and Valencia, Areli and Rodriguez, Fatima},
+doi = {10.1001/jamanetworkopen.2019.2884},
+file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Scheinker 2019 US obesity supplement.pdf:pdf;:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Scheinker, Valencia, Rodriguez - 2019 - Identification of factors associated with variation in US county-level obesity prevalence rates.pdf:pdf},
+issn = {25743805},
+journal = {JAMA network open},
+keywords = {The JAMA Network},
+month = {apr},
+number = {4},
+pages = {e192884},
+pmid = {31026030},
+publisher = {NLM (Medline)},
+title = {{Identification of factors associated with variation in US county-level obesity prevalence rates using epidemiologic vs machine learning models}},
+url = {https://jamanetwork.com/},
+volume = {2},
+year = {2019}
 }
-@article{Baart2013,
-abstract = {Background Blood donors that meet the hemoglobin (Hb) criteria for donation may have undetected subclinical iron deficiency. The aim of this study was to assess the prevalence of subclinical iron deficiency in whole blood donors with Hb levels above cutoff levels for donation by measuring zinc protoporphyrin (ZPP) levels. In addition, prevalence rates based on other iron variables were assessed for comparison. Study Design and Methods The study population comprised 5280 Dutch whole blood donors, who passed the Hb criteria for donation. During donor screening, Hb levels were measured in capillary samples (finger prick), and venous blood samples were taken for measurements of ZPP and other iron variables. These variables included ferritin, transferrin saturation, soluble transferrin receptor (sTfR), hepcidin, red blood cell mean corpuscular volume (MCV), and mean cell Hb (MCH). Results With a ZPP cutoff level of at least 100 $\mu$mol/mol heme, subclinical iron deficiency was present in 6.9{\%} of male donors and in 9.8{\%} of female donors. Based on other iron variables, iron deficiency was also observed. Prevalence rates ranged from 4.8{\%} (based on transferrin saturation) to 27.4{\%} (based on hepcidin concentration) in men and from 5.6{\%} (based on sTfR concentration) to 24.7{\%} (based on hepcidin concentration) in women. Conclusion Results from this study showed that subclinical iron deficiency is prevalent among blood donors that meet the Hb criteria for blood donation, based on ZPP levels and on other iron variables. This finding needs attention because these donors are at increased risk of developing iron deficiency affecting Hb formation and other cellular processes. {\textcopyright} 2012 American Association of Blood Banks.},
-author = {Baart, A. Mireille and {Van Noord}, Paulus A.H. and Vergouwe, Yvonne and Moons, Karel G.M. and Swinkels, Dorine W. and Wiegerinck, Erwin T. and {De Kort}, Wim L.A.M. and Atsma, Femke},
-doi = {10.1111/j.1537-2995.2012.03956.x},
-file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Baart 2013 Dutch iron blood donors.pdf:pdf},
-issn = {00411132},
+@article{Patel2019,
+abstract = {BACKGROUND: Blood donation results in a loss of iron stores, which is particularly concerning for young female blood donors. This study examines the association of blood donation and iron deficiency among adolescent and adult females in the United States. STUDY DESIGN AND METHODS: A cross-sectional analysis was performed using data from the 1999–2010 National Health and Nutrition Examination Survey (NHANES). Females who reported their blood donation history in the preceding year and had serum ferritin (SF) measurements were included. Analyses were weighted and stratified by adolescents (16–19 years; n = 2419) and adults (20–49 years; n = 7228). Adjusted prevalence ratios (aPRs) were estimated by multivariable Poisson regression. Standard errors were estimated by Taylor series linearization. RESULTS: Geometric mean SF levels (ng/mL) were lower in blood donors compared to nondonors among adolescents (21.2 vs. 31.4; p {\textless} 0.001) and among adults (26.2 vs. 43.7; p {\textless} 0.001). The prevalence of absent iron stores (SF {\textless} 12 ng/mL) was higher in blood donors compared to nondonors among adolescents (22.6{\%} vs. 12.2{\%}; aPR = 2.03 [95{\%} confidence interval (CI) = 1.45–2.85]) and among adults (18.3{\%} vs. 9.8{\%}; aPR = 2.06 [95{\%} CI = 1.48–2.88]). Additionally, the prevalence of iron deficiency anemia (SF {\textless} 26 ng/mL and hemoglobin {\textless} 12.0 g/dL) was also higher in blood donors compared to nondonors among adolescents (9.5{\%} vs. 6.1{\%}; aPR = 2.10 [95{\%} CI = 1.13–3.90]) and among adults (7.9{\%} vs. 6.1{\%}; aPR = 1.74 [95{\%} CI = 1.06–2.85]). Similar results were observed in a sensitivity analysis restricted to adolescents aged 16 to 18 years. CONCLUSIONS: Blood donation is associated with iron deficiency among adolescent and adult females in the United States. These national data call for further development and implementation of blood donation practices aimed toward mitigating iron deficiency.},
+author = {Patel, Eshan U. and White, Jodie L. and Bloch, Evan M. and Grabowski, Mary K. and Gehrie, Eric A. and Lokhandwala, Parvez M. and Brunker, Patricia A.R. and Goel, Ruchika and Shaz, Beth H. and Ness, Paul M. and Tobian, Aaron A.R.},
+doi = {10.1111/trf.15179},
+file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Patel 2019 Iron women NHANES.pdf:pdf},
+issn = {15372995},
 journal = {Transfusion},
-month = {aug},
-number = {8},
-pages = {1670--1677},
-pmid = {23176175},
-publisher = {John Wiley {\&} Sons, Ltd},
-title = {{High prevalence of subclinical iron deficiency in whole blood donors not deferred for low hemoglobin}},
-url = {http://doi.wiley.com/10.1111/j.1537-2995.2012.03956.x},
-volume = {53},
-year = {2013}
-}
-@techreport{Rajbhandary2018,
-address = {Bethesday},
-author = {Rajbhandary, Srijana and Whitaker, Barbee I and Perez, Gabriela E},
-file = {::},
-institution = {AABB},
-pages = {1--91},
-title = {{The 2014-2015 AABB blood collection and utilization survey report}},
-url = {http://www.aabb.org/research/hemovigilance/bloodsurvey/Docs/2014-2015-AABB-Blood-Survey-Report.pdf?ct=483178b5c665113a4a67486385907873f0a434f88cddea5b6fece817de48206c5db7f5b0d5fe99d98ac08ff2cbe330ae8ed7602ce9b2b0b3ea6861794458c137},
-year = {2018}
+month = {may},
+number = {5},
+pages = {1723--1733},
+pmid = {30779173},
+publisher = {Blackwell Publishing Inc.},
+title = {{Association of blood donation with iron deficiency among adolescent and adult females in the United States: a nationally representative study}},
+url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/trf.15179},
+volume = {59},
+year = {2019}
 }
 @article{Letham2015,
 abstract = {We aim to produce predictive models that are not only accurate, but are also interpretable to human experts. Our models are decision lists, which consist of a series of if...then... statements (e.g., if high blood pressure, then stroke) that discretize a high-dimensional, multivariate feature space into a series of simple, readily interpretable decision statements. We introduce a generative model called Bayesian Rule Lists that yields a posterior distribution over possible decision lists. It employs a novel prior structure to encourage sparsity. Our experiments show that Bayesian Rule Lists has predictive accuracy on par with the current top algorithms for prediction in machine learning. Our method is motivated by recent developments in personalized medicine, and can be used to produce highly accurate and interpretable medical scoring systems. We demonstrate this by producing an alternative to the CHADS{\$}{\_}2{\$} score, actively used in clinical practice for estimating the risk of stroke in patients that have atrial fibrillation. Our model is as interpretable as CHADS{\$}{\_}2{\$}, but more accurate.},
@@ -561,33 +573,38 @@ @article{Letham2015
 volume = {9},
 year = {2015}
 }
-@article{Spencer2019,
-abstract = {BACKGROUND: High school students 16 to 18 years-old contribute 10{\%} of the US blood supply. Mitigating iron depletion in these donors is important because they continue to undergo physical and neurocognitive development. STUDY DESIGN AND METHODS: Study objectives were to determine the prevalence of iron depletion in 16- to 18-year-old donors and whether their risk for iron depletion was greater than adult donors. Successful, age-eligible donors were enrolled from high school blood drives at two large US blood centers. Plasma ferritin testing was performed with ferritin less than 12 ng/mL as our primary measure of iron depletion and ferritin less than 26 ng/mL a secondary measure. Multivariable repeated-measures logistic regression models evaluated the role of age and other demographic/donation factors. RESULTS: Ferritin was measured from 4265 enrollment donations September to November 2015 and 1954 follow-up donations through May 2016. At enrollment, prevalence of ferritin less than 12 ng/mL in teenagers was 1{\%} in males and 18{\%} in females making their first blood donation, and 8{\%} in males and 33{\%} in females with prior donations. Adjusted odds for ferritin less than 12 ng/mL were 2.1 to 2.8 times greater in 16- to 18-year-olds than in 19- to 49-year-olds, and for ferritin less than 26 ng/mL were 3.3- to 4.7-fold higher in 16- to 18-year-olds. Progression to hemoglobin deferral was twice as likely in 16- to 18-year-old versus 19- to 49-year-old females. CONCLUSION: Age 16 to 18 years-old is an independent risk factor for iron deficiency in blood donors at any donation frequency. Blood centers should implement alternate eligibility criteria or additional safety measures to protect teenage donors from iron depletion.},
-author = {Spencer, Bryan R. and Bialkowski, Walter and Creel, Darryl V. and Cable, Ritchard G. and Kiss, Joseph E. and Stone, Mars and McClure, Christopher and Kleinman, Steven and Glynn, Simone A. and Mast, Alan E.},
-doi = {10.1111/trf.15133},
-file = {:G$\backslash$:/My Drive/References/PDFs not auto added/Spencer 2019 High school aged blood donors.pdf:pdf},
-issn = {15372995},
-journal = {Transfusion},
-month = {jan},
-number = {5},
-pages = {1706--1716},
-pmid = {30633813},
-publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
-title = {{Elevated risk for iron depletion in high-school age blood donors}},
-url = {http://doi.wiley.com/10.1111/trf.15133},
-volume = {59},
+@article{Large2019,
+abstract = {Our hypothesis is that building ensembles of small sets of strong classifiers constructed with different learning algorithms is, on average, the best approach to classification for real-world problems. We propose a simple mechanism for building small heterogeneous ensembles based on exponentially weighting the probability estimates of the base classifiers with an estimate of the accuracy formed through cross-validation on the train data. We demonstrate through extensive experimentation that, given the same small set of base classifiers, this method has measurable benefits over commonly used alternative weighting, selection or meta-classifier approaches to heterogeneous ensembles. We also show how an ensemble of five well-known, fast classifiers can produce an ensemble that is not significantly worse than large homogeneous ensembles and tuned individual classifiers on datasets from the UCI archive. We provide evidence that the performance of the cross-validation accuracy weighted probabilistic ensemble (CAWPE) generalises to a completely separate set of datasets, the UCR time series classification archive, and we also demonstrate that our ensemble technique can significantly improve the state-of-the-art classifier for this problem domain. We investigate the performance in more detail, and find that the improvement is most marked in problems with smaller train sets. We perform a sensitivity analysis and an ablation study to demonstrate the robustness of the ensemble and the significant contribution of each design element of the classifier. We conclude that it is, on average, better to ensemble strong classifiers with a weighting scheme rather than perform extensive tuning and that CAWPE is a sensible starting point for combining classifiers.},
+author = {Large, James and Lines, Jason and Bagnall, Anthony},
+doi = {10.1007/s10618-019-00638-y},
+file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Large, Lines, Bagnall - 2019 - A probabilistic classifier ensemble weighting scheme based on cross-validated accuracy estimates.pdf:pdf},
+issn = {1573756X},
+journal = {Data Mining and Knowledge Discovery},
+keywords = {Classification,Ensemble,Heterogeneous,Weighted},
+month = {nov},
+number = {6},
+pages = {1674--1709},
+publisher = {Springer New York LLC},
+title = {{A probabilistic classifier ensemble weighting scheme based on cross-validated accuracy estimates}},
+url = {https://doi.org/10.1007/s10618-019-00638-y},
+volume = {33},
 year = {2019}
 }
-@article{Cable2012,
-abstract = {Background-Blood donors are at risk of iron deficiency. We evaluated the effects of blood donation intensity on iron and hemoglobin in a prospective study.},
-author = {Cable, Ritchard G and Glynn, Simone A and Kiss, Joseph E and Mast, Alan E and Steele, Whitney R and Murphy, Edward L and Wright, David J and Sacher, Ronald A and Gottschall, Jerry L and Tobler, Leslie H},
-doi = {10.1111/j.1537-2995.2011.03401.x},
-file = {:C$\backslash$:/Users/alton/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Cable et al. - 2012 - Iron Deficiency in Blood Donors The REDS-II Donor Iron Status Evaluation (RISE) Study.pdf:pdf},
-journal = {Transfusion},
-number = {4},
-pages = {702--711},
-title = {{Iron deficiency in blood donors: the REDS-II donor iron status evaluation (RISE) study}},
-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3618489/pdf/nihms330328.pdf},
-volume = {52},
-year = {2012}
+@article{Ustun2019,
+abstract = {Risk scores are simple classification models that let users make quick risk predictions by adding and subtracting a few small numbers. These models are widely used in medicine and criminal justice, but are difficult to learn from data because they need to be calibrated, sparse, use small integer coefficients, and obey application-specific operational constraints. In this paper, we present a new machine learning approach to learn risk scores. We formulate the risk score problem as a mixed integer nonlinear program, and present a cutting plane algorithm for non-convex settings to efficiently recover its optimal solution. We improve our algorithm with specialized techniques to generate feasible solutions, narrow the optimality gap, and reduce data-related computation. Our approach can fit risk scores in a way that scales linearly in the number of samples, provides a certificate of optimality, and obeys real-world constraints without parameter tuning or post-processing. We benchmark the performance benefits of this approach through an extensive set of numerical experiments, comparing to risk scores built using heuristic approaches. We also discuss its practical benefits through a real-world application where we build a customized risk score for ICU seizure prediction in collaboration with the Massachusetts General Hospital.},
+archivePrefix = {arXiv},
+arxivId = {1610.00168},
+author = {Ustun, Berk and Rudin, Cynthia},
+eprint = {1610.00168},
+file = {::},
+journal = {Journal of Machine Learning Research},
+keywords = {calibration,classification,constraints,cut-ting plane methods,discrete optimization,interpretability,mixed integer nonlinear programming,scoring systems},
+month = {sep},
+number = {150},
+pages = {75},
+publisher = {Microtome Publishing},
+title = {{Learning optimized risk scores}},
+url = {http://arxiv.org/abs/1610.00168},
+volume = {20},
+year = {2019}
 }