Skip to content

Commit

Permalink
Resolved conflicts
Browse files Browse the repository at this point in the history
Merge branch 'main' of https://github.com/altonrus/iron_trajectories into main

# Conflicts:
#	2_scripts/3_feature_importance.R
#	4_output/figs/feat_imp_both_top15.png
#	5_manuscript/iron_trajectories.Rmd
#	5_manuscript/iron_trajectories.docx
  • Loading branch information
altonrus committed Aug 11, 2021
2 parents d5416fb + 13d6b50 commit 703708e
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 23 deletions.
20 changes: 20 additions & 0 deletions 2_scripts/3_feature_importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ setDT(featimp_noXB)[baselines_noXB,
featimp_noXB<-featimp_noXB[featname_lookup, on="feature",nomatch=0]



ggplot(featimp_noXB)+
geom_boxplot(aes(x=reorder(display_name, AUC_multi_pctchg, FUN = median), y=AUC_multi_pctchg))+
coord_flip()+geom_hline(yintercept=0, color="red")+
Expand Down Expand Up @@ -75,6 +76,7 @@ setDT(featimp_XB)[baselines_XB,

featimp_XB<-featimp_XB[featname_lookup, on="feature",nomatch=0]


# ggplot(featimp_XB, aes(y=reorder(feature, AUC_multi_pctchg, FUN = median),
# x=AUC_multi_pctchg))+
# geom_density_ridges()+
Expand Down Expand Up @@ -105,7 +107,11 @@ ggsave("./4_output/figs/feat_imp_XB_top15.png",
#Combined fig
featimp_both <- rbind(
cbind(mod = "Extra biomarkers", featimp_XB),
<<<<<<< HEAD
cbind(mod = "Sandard biomarkers", featimp_noXB)
=======
cbind(mod = "Standard biomarkers", featimp_noXB)
>>>>>>> 13d6b501ac8ccabab8e6904f651037f9baf12be0
)


Expand All @@ -118,3 +124,17 @@ ggplot(featimp_both[display_name %in% c(top15_noXB, top15_XB)])+

ggsave("./4_output/figs/feat_imp_both_top15.png",
width = 5, height = 4, units = "in")
<<<<<<< HEAD
=======


#combined median table
featimp_XB_median <- featimp_XB[, list(median_AUC_pctchg = median(AUC_multi_pctchg)), by = feature]
featimp_noXB_median <- featimp_noXB[, list(median_AUC_pctchg = median(AUC_multi_pctchg)), by = feature]

featimp_median <- rbind(
cbind(featimp_XB_median, model = "XB"),
cbind(featimp_noXB_median, model = "noXB")
)
fwrite(featimp_median, "./4_output/feature_importance_medians.csv")
>>>>>>> 13d6b501ac8ccabab8e6904f651037f9baf12be0
88 changes: 88 additions & 0 deletions 4_output/feature_importance_medians.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
feature,median_AUC_pctchg,model
DER_RBC_Last12months,-0.000185915229897558,XB
DER_RBC_Last24months,-0.000302776643216761,XB
DER_RBCLoss_mL,-0.00212785721675668,XB
DER_DaysRBCLoss,0.00212069802271586,XB
DER_DaysDRLoss,-3.67203754750615e-05,XB
RQ1_Ever_Donated,-0.000103507632037124,XB
cumLifetimeDonations,0.000861402885265816,XB
FingerstickHGB_equiv,0.00443686824600925,XB
DD_ABO_RH,-0.0004435880739737,XB
DER_AdjVenousHgb,0.000685765421891465,XB
DER_Weight,0.000457447611378972,XB
DER_Height,0.000927173050739652,XB
BMI,0.000438907662160802,XB
DER_EBV,0.00273531177075801,XB
DER_RedCellVolume,-0.00349174285481035,XB
DER_PercentRBCLoss,0.00143656213513137,XB
DD_Country,-6.96759589184439e-06,XB
DER_Age,-0.000282021917054784,XB
Gender_F,-8.19860500688867e-05,XB
DD_Raceth,-0.000389176410880195,XB
RQ7_Ever_Smoked,0.000770392174190851,XB
RQ8_Smoked_Past_90Days,2.43037878035976e-05,XB
RQ11_Liver,-0.000106484242089831,XB
RQ11_Beef,-0.000347832627835958,XB
RQ11_LPCT,0.000167438765771861,XB
RQ11_Clams,-0.000122357267752535,XB
RQ11_OMSS,-0.000189966110539133,XB
RQ11_OtrFish,-0.000193569753988068,XB
RQ11_Eggs,4.52073417720874e-05,XB
RQ11_Dairy,-0.000353742510298574,XB
compositeIronScore,-0.000130087935291848,XB
supp_iron_pct_of_daily,0.000278110556570099,XB
multivitamins_per_week,-0.000269880570981962,XB
RQ17_NumberOfPeriods,0.000382195422268364,XB
RQ18_Menstrual_Flow,0.00192924708460815,XB
menstrual_flow_times_freq,4.96204390035059e-06,XB
RQ19_Ever_Pregnant,-0.000243612104399751,XB
RQ20_NumberOfPregnancies,7.78094527312066e-05,XB
RQ21_NumberOfLiveBirths,0.000358996620618697,XB
gender_menstrating_cohorts,-6.19674006808694e-05,XB
ARUP_Ferritin,0.0359557200882218,XB
ARUP_STR,-0.00237275153328058,XB
DER_ARUP_log_Ferr,-0.00665611951042507,XB
DER_ARUP_log_STfR_Ferr,-0.00388899104047682,XB
DER_BodyIron,-0.00484544340220101,XB
time_to_fu,0.0151557736622218,XB
DER_RBC_Last12months,0.000281160311159002,noXB
DER_RBC_Last24months,0.0203829682389969,noXB
DER_RBCLoss_mL,0.000597300267872413,noXB
DER_DaysRBCLoss,0.00321579605803409,noXB
DER_DaysDRLoss,1.68645269467925e-05,noXB
RQ1_Ever_Donated,-0.000135684345694789,noXB
cumLifetimeDonations,0.00335753457766085,noXB
FingerstickHGB_equiv,0.0178143797784942,noXB
DD_ABO_RH,0.00354916989368776,noXB
DER_AdjVenousHgb,0.0306191946935143,noXB
DER_Weight,0.000623781669961677,noXB
DER_Height,0.002729929180144,noXB
BMI,0.0057021968570862,noXB
DER_EBV,0.0037697616722523,noXB
DER_RedCellVolume,0.006848688963954,noXB
DER_PercentRBCLoss,0.00271782650044966,noXB
DD_Country,-6.09111291662264e-05,noXB
DER_Age,0.00438168919773054,noXB
Gender_F,0.00109971991135721,noXB
DD_Raceth,0.0015427131515774,noXB
RQ7_Ever_Smoked,0.00479843773458306,noXB
RQ8_Smoked_Past_90Days,5.93002631386062e-05,noXB
RQ11_Liver,0.000464912412662309,noXB
RQ11_Beef,0.00499029657656764,noXB
RQ11_LPCT,0.000737657495048408,noXB
RQ11_Clams,0.00173592559051128,noXB
RQ11_OMSS,0.000931700257256853,noXB
RQ11_OtrFish,0.00216027220952832,noXB
RQ11_Eggs,0.00139307740664848,noXB
RQ11_Dairy,0.00142592543428316,noXB
compositeIronScore,0.00613843772184326,noXB
supp_iron_pct_of_daily,0.00457642211093289,noXB
multivitamins_per_week,0.000696990820080279,noXB
RQ17_NumberOfPeriods,0.000521779387632847,noXB
RQ18_Menstrual_Flow,0.00162307283089049,noXB
menstrual_flow_times_freq,0.00183507102540359,noXB
RQ19_Ever_Pregnant,6.16045933960414e-05,noXB
RQ20_NumberOfPregnancies,7.75110554445592e-05,noXB
RQ21_NumberOfLiveBirths,0.0013546095909115,noXB
gender_menstrating_cohorts,0.000226920289119022,noXB
time_to_fu,0.0485898780661315,noXB
69 changes: 46 additions & 23 deletions 5_manuscript/iron_trajectories.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ knitr::opts_chunk$set(echo = FALSE)

# Introduction


> Repeat blood donation can cause or exacerbate iron deficiency, with higher incidence among teen donors and premenopausal women [@Cable2012; @Salvin2014; @Spencer2019; @Baart2013; @Rigas2014; @Patel2019]. In the United States, potential donors are screened using fingerstick hemoglobin or hematocrit tests and deferred if levels are below a minimum cutoff. Such low hemoglobin deferrals prevent some collections from iron deficient donors but consume time and resources from both donor and blood center, decreasing donor satisfaction and the likelihood of returning for future donations [@Custer2007]. Because fingerstick hemoglobin is an unreliable indicator of true iron stores, many donors qualify to donate despite having low or absent underlying iron stores [@Baart2013]. More reliable measures of iron status include ferritin, zinc protoporphyrin, soluble transferrin receptor, and hepcidin, but these are more costly to measure and not available as point of care tests [@Kiss2018]. Past studies have identified several factors that increase risk of iron deficiency among blood donors. The Danish Blood Donor Study found that sex, menopause status, and donation history were the strongest predictors of iron deficiency among donors, and weight, age, vitamin use, and diet were also significant [@Rigas2014]. Similar results have been found for donors in the United States, Australia, and the Netherlands [@Cable2012; @Salvin2014; @Spencer2019; @Baart2013; @Patel2019]. Other studies have analyzed predictors for a low hemoglobin deferral for repeat blood donors, identifying age, time since last donation, and donation history as strong predictors [@Baart2011; @Baart2012]. To our knowledge, no prediction model has been developed that considers the competing risks of hemoglobin deferral and of collecting blood from a donor with sufficient hemoglobin but low or absent underlying iron stores.

> In this study, we developed machine learning models to estimate how risk of hemoglobin deferral and completed donations from donors with low or absent iron stores develop as a function of the donation interval -- the length of time from an index donation until the donor returns for a subsequent donation attempt -- in a cohort of donors from the REDS-II Iron Status Evaluation (RISE) study [@Cable2016]. We also compared predictive performance with and without ferritin and soluble transferritin receptor (STfR), two biomarkers that were available for many donations in the RISE study but are not routinely collected by most US blood centers.
Expand Down Expand Up @@ -214,6 +215,7 @@ tbl_labels_per_idx <- table(dt.md[ , .N, by = donation_id]$N)
```


> In the RISE dataset, a total of `r nrow(dt.firstreturn)` donations from `r dt.firstreturn[, uniqueN(RandID)]` donors were followed by at least one follow-up visit. We removed `r dt.firstreturn[is.na(FingerstickHGB_equiv), .N]` index donations because hemoglobin was not recorded, and we removed a further `r dt.firstreturn[time_to_fu < 56, .N]` index donations from the first return dataset because the first follow-up visit with significant iron loss was less than 56 days later. The first return dataset contained `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56, .N]` index donations labeled with the outcome of the first follow-up donation. That outcome was a hemoglobin deferral for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 1, .N]` index donations; a low-iron donation for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 2, .N]`; an absent iron donation for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 3, .N]`; no adverse outcome for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == 0, .N]`; and a completed donation with unknown iron status for `r dt.firstreturn[!is.na(FingerstickHGB_equiv) & time_to_fu >= 56 & fu_outcome == -1, .N]`. The model development dataset included `r dt.md[ , uniqueN(donation_id)]` unique index donations from `r dt.md[ , uniqueN(RandID)]` donors. `r tbl_labels_per_idx[[1]]` index donations were labeled with one follow-up donation, `r tbl_labels_per_idx[[2]]` were labeled twice, and `r dt.md[ , uniqueN(donation_id)] - tbl_labels_per_idx[[1]] - tbl_labels_per_idx[[2]]` were labeled with 3 or more follow-up visit outcomes (maximum of 8).

## Prediction model
Expand Down Expand Up @@ -293,34 +295,19 @@ block_caption("One vs. all ROC curves with and without ferritin, soluble transfe
autonum = fig_num)
```

#####

```{r fig.width=5, fig.height=6}
fig_num <- run_autonum(seq_id = "fig",
pre_label = "Figure ",
bkm="f-var-imp-noXB",
bkm_all = TRUE,
prop = fp_text(bold=TRUE, underlined = TRUE))
knitr::include_graphics("../4_output/figs/feat_imp_noXB.png")
#####

block_caption("Relative variable importance for the top \"standard biomarkers\" model.",
style = "Image Caption",
autonum = fig_num)
```

#####

```{r fig.width=5, fig.height=6}
fig_num <- run_autonum(seq_id = "fig",
pre_label = "Figure ",
fig_num <- run_autonum(seq_id = "sfig",
pre_label = "Figure S",
bkm="f-var-imp-XB",
bkm_all = TRUE,
prop = fp_text(bold=TRUE, underlined = TRUE))
knitr::include_graphics("../4_output/figs/feat_imp_XB.png")
knitr::include_graphics("../4_output/figs/feat_imp_both_top15.png")
block_caption("Relative variable importance for the top \"extra biomarkers\" model.",
block_caption("Relative variable imprtance for the top \"standard\" and \"extra\" biomarker models. Variables were included in this figure if among the top 15 most important variables for at least one of the models. Full variable importance plots shown in the supplement.",
style = "Image Caption",
autonum = fig_num)
```
Expand Down Expand Up @@ -368,7 +355,7 @@ fig_num <- run_autonum(seq_id = "fig",
knitr::include_graphics("../4_output/figs/ae_traject_by_Venous_HGB_tertile.png")
block_caption(".",
block_caption(".",
style = "Image Caption",
autonum = fig_num)
```
Expand Down Expand Up @@ -421,13 +408,15 @@ Blank.

# Supplemental tables


```{r, echo=FALSE}
stab_num <- run_autonum(seq_id = "stab",
pre_label = "Table S",
bkm="t-feature-engineering",
bkm_all = TRUE,
prop = fp_text(bold=TRUE, underlined = TRUE))
t_feature_engineering <- as_flextable(as_grouped_data(read_excel("../1_data/tables.xlsx", sheet = "features"),
groups = "Category"))
t_feature_engineering <- compose(t_feature_engineering, i = ~ !is.na(Category), j = "Variable name",
Expand Down Expand Up @@ -483,6 +472,7 @@ t_mod_tuning <- set_caption(t_mod_tuning,
t_mod_tuning
```


#####

# Supplemental figures
Expand All @@ -503,20 +493,53 @@ block_caption("Average multiclass AUC for each evaluated model configuration as

#####

# Supplemental figures

```{r fig.width=6, fig.height=5.5}
sfig_num <- run_autonum(seq_id = "sfig",
pre_label = "Figure S",
bkm="f-ensemble-auc",
bkm_all = TRUE,
prop = fp_text(bold=TRUE, underlined = TRUE))
knitr::include_graphics("../4_output/figs/AUC_tuning_no_ensemble.png")
knitr::include_graphics("../4_output/figs/AUC_top_models.png")
block_caption("Distribution of multiclass AUC for across the 15 tuning sets for the top ensemble model configurations and the base model configurations that comprised them. For both the ``standard`` and ``extra biomarkers`` versions, the top ensemble was an average of the base models.",
style = "Image Caption",
autonum = sfig_num)
```

#####


```{r fig.width=5, fig.height=6}
fig_num <- run_autonum(seq_id = "sfig",
pre_label = "Figure S",
bkm="f-var-imp-noXB",
bkm_all = TRUE,
prop = fp_text(bold=TRUE, underlined = TRUE))
knitr::include_graphics("../4_output/figs/feat_imp_noXB.png")
block_caption("Relative variable importance for the top \"standard biomarkers\" model.",
style = "Image Caption",
autonum = fig_num)
```

#####

```{r fig.width=5, fig.height=6}
fig_num <- run_autonum(seq_id = "sfig",
pre_label = "Figure S",
bkm="f-var-imp-XB",
bkm_all = TRUE,
prop = fp_text(bold=TRUE, underlined = TRUE))
knitr::include_graphics("../4_output/figs/feat_imp_XB.png")
block_caption("Relative variable importance for the top \"extra biomarkers\" model.",
style = "Image Caption",
autonum = fig_num)
```

#####
Expand Down
Binary file modified 5_manuscript/iron_trajectories.docx
Binary file not shown.

0 comments on commit 703708e

Please sign in to comment.