-
Notifications
You must be signed in to change notification settings - Fork 0
/
Reanalysis_Notebook.Rmd
129 lines (115 loc) · 4.99 KB
/
Reanalysis_Notebook.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
---
title: "Reanalysis of a random medical paper"
output:
html_document:
df_print: paged
---
```{r,echo=FALSE,message=FALSE}
library(tidyverse)
library(readxl)
library(knitr)
library(kableExtra)
library(stringr)
#download the publication data from Figshare
url1<-'https://ndownloader.figshare.com/files/9422038'
p1f <- tempfile()
download.file(url1, p1f, mode="wb")
#read in data. Note that we need to define the NA
my_data<-read_excel(path = p1f, sheet = 1, na="NA")
```
Recreating some results from publication:
https://doi.org/10.1038/s41598-018-27482-2
The data set contains mortality of patients after burning damage to lung tissue depending on
whether and where they had inhalation burns
first create a new variable that is a factor (0=normal, 1=subjective, 2=upper, and 3=lower)
create a data frame for a barplot
group by the new inhalation severity variable and the PFdivide
("We also divided the patients into four groups depending on the PF ratio (>300, 200-300, 100-200, and <100)")
you will need the mean and sd of mortality (look out for spelling mistake) for each level of inhalation injury (INHdiv)
from this calculate the upper (ymax) and lower (ymin) bounds of the error bars (S.E.M.:mean +- sd/sqrt(n))
create a barplot from this with mean mortality on y axis and Pfdivide on x axis
create a facet for each severity level
add error bars
make nice axis labels
```{r}
my_data<-
my_data %>%
mutate(INHdiv_fac=factor(INHdiv,labels=c("normal","subjective","upper","lower")))
plot_data<-
my_data %>%
group_by(INHdiv_fac,Pfdivide) %>%
summarise(mean_mort=mean(mortaltiy),
sd_mort=sd(mortaltiy),
sem_mort=sd_mort/sqrt(n()),
ymax=mean_mort+sem_mort,
ymin=mean_mort-sem_mort)
p.1<-ggplot(aes(y=mean_mort,x=Pfdivide),data=plot_data)+geom_bar(stat="identity")+
geom_errorbar(aes(ymax=ymax,ymin=ymin),width=.1)+
facet_wrap(~INHdiv_fac)+
theme_classic()+
ylab("Mean mortality")+
xlab(expression("PaO"[2]*"/FiO"[2]*" (PF) ratio"))
p.1
```
We want to do a standard logistic regression on mortality and whether this depends on the PF Ratio:
PaO2/FiO2 ratio is the ratio of arterial oxygen partial pressure to fractional inspired oxygen.
It is a widely used clinical indicator of hypoxaemia, though its diagnostic utility is disputed.
At sea level normal is > 500mmHg.
```{r}
names(my_data)[8]<-c("PFratio")
hist_data<-
my_data %>%
#first add new variable that codes breaks
mutate(breaks = findInterval(PFratio, seq(20,935,10))) %>%
#then group by dead/alive and the breaks
group_by(mortaltiy, breaks) %>%
#count
summarise(n = n()) %>%
#if patients are dead, we want them to show on top with histogram on top so you need to
#calculate in this case the percentage as 1-percentage
mutate(pct = ifelse(mortaltiy==0, n/sum(n), 1 - n/sum(n)),breaks=seq(20,935,10)[breaks])
######
#
#####
ggplot() + #this just sets an empty frame to build upon
#first add a histopgram with geom_segment use the help of geom_segment
geom_segment(data=hist_data, size=2, show.legend=FALSE,
aes(x=breaks, xend=breaks, y=mortaltiy, yend=pct, colour=factor(mortaltiy)))+
#then predict a logistic regression via stat_smooth and the glm method (we will cover the details in the next session)
stat_smooth(data=my_data,aes(y=mortaltiy,x=PFratio),method="glm", method.args = list(family = "binomial"))+
#some cosmetics
scale_y_continuous(limits=c(-0.02,1.02)) +
scale_x_continuous(limits=c(10,950)) +
theme_bw(base_size=12)+
ylab("Patient Alive=0/Dead=1")+xlab(expression("PaO"[2]*"/FiO"[2]*" (PF) ratio"))
```
And now we plot this for each severity level separately.
```{r}
hist_data<-
my_data %>%
#first add new variable that codes breaks
mutate(breaks = findInterval(PFratio, seq(20,935,10))) %>%
#then group by dead/alive and the breaks
group_by(INHdiv_fac,mortaltiy, breaks) %>%
#count
summarise(n = n()) %>%
#if patients are dead, we want them to show on top with histogram on top so you need to
#calculate in this case the percentage as 1-percentage
mutate(pct = ifelse(mortaltiy==0, n/sum(n), 1 - n/sum(n)),breaks=seq(20,935,10)[breaks])
######
#
#####
ggplot() + #this just sets an empty frame to build upon
facet_wrap(~INHdiv_fac)+
#first add a histopgram with geom_segment use the help of geom_segment
geom_segment(data=hist_data, size=2, show.legend=FALSE,
aes(x=breaks, xend=breaks, y=mortaltiy, yend=pct, colour=factor(mortaltiy)))+
#then predict a logistic regression via stat_smooth and the glm method
#(we will cover the details in the next session)
stat_smooth(data=my_data,aes(y=mortaltiy,x=PFratio),method="glm", method.args = list(family = "binomial"))+
#some cosmetics
scale_y_continuous(limits=c(-0.02,1.02)) +
scale_x_continuous(limits=c(10,950)) +
theme_bw(base_size=12)+
ylab("Patient Alive=0/Dead=1")+xlab(expression("PaO"[2]*"/FiO"[2]*" (PF) ratio"))
```