-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPoisson generating process (3.1).R
101 lines (69 loc) · 3.6 KB
/
Poisson generating process (3.1).R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
source("ML functions.R")
# You may need to install the package "vectrs" to get bayesplot to work
library(rstan)
library(bayesplot)
library(posterior)
library(ggplot2)
library(reshape2)
library(MCMCprecision)
rstan_options(auto_write = T)
ncores <- 4 #Change to less than 4 if there are less than 4 cores available on machine
options(mc.cores = ncores)
##Main run
#Specify the underlying probability matrix and vector of row means
P_real <- matrix(c(0.9, 0.05, 0.05, 0.1, 0.8, 0.1, 0.1, 0.1, 0.8), nrow = 3, ncol = 3,
byrow = T)
mu_real <- c(10, 100, 50)
set.seed(5)
#Generate C (original confusion matrix estimate) from s (true category counts
#in training set)
s <- c(50, 200, 100)
C <- Cgen(s, P_real)
C
#Generate prior matrix parameters
alpha_P <- matrix(1, nrow = 3, ncol = 3)
#Simulate N = 15 observations according to above parameters
N <- 15
distribution = rep("Poisson", length(mu_real))
Wobs <- observedW.gen.multi(n = N, mu = mu_real, pmat = P_real, distribution = distribution)
Wobs
#Calculate column means
Wmeans <- colMeans(Wobs)
Wmeans
#Running stan model
w_dat <- list(N = s, L = N, M = 3, P = P_real, w = Wobs, C = C, alpha_P = alpha_P)
st_model <- stan_model(file = "poisson_model.stan")
mcmc.fit <- sampling(st_model, data = w_dat, iter=4000, seed = 5)
##Model checking
posterior <- as.matrix(mcmc.fit)
q1 = round(quantile(posterior[,1], probs=c(0.05, 0.95)), 1) #90% credible interval for category 1 mean
q2 = round(quantile(posterior[,2], probs=c(0.05, 0.95)), 1) #90% credible interval for category 2 mean
q3 = round(quantile(posterior[,3], probs=c(0.05, 0.95)), 1) #90% credible interval for category 3 mean
q1; q2; q3
##Bayesplot diagnostics
#Preparing data for plotting
arr.fit <- as.array(mcmc.fit)
mat.fit <- as.matrix(mcmc.fit)
dfmu.fit <- data.frame(mat.fit[,1:3])
mu_names <- c("mu[1]", "mu[2]", "mu[3]")
names(dfmu.fit) <- mu_names
dfmu.plt <- melt(dfmu.fit) # ignore R message about "No id variables ..."
dfmu.plt$true <- unlist(lapply(1:3, function(x) {rep(mu_real[x], nrow(dfmu.plt)/3)}))
dfmu.plt$wmeans <- unlist(lapply(1:3, function(x) {rep(Wmeans[x], nrow(dfmu.plt)/3)}))
#Effective sample size and Rhat
npars <- dim(arr.fit)[3]
max(sapply(1:npars, function(i) {rstan::Rhat(arr.fit[,,i])})) #3277 (R seed = 5, stan seed = 5)
min(sapply(1:npars, function(i) {ess_basic(arr.fit[,,i])})) #1.00186 (R seed = 5, stan seed = 5)
#Trace plots of (post burn-in) MCMC draws for the category mean parameters
par_trace <- mcmc_trace(arr.fit, regex_pars = "mu\\[[1,2,3]\\]", facet_args = list(labeller = ggplot2::label_parsed, ncol = 1, strip.position = "left"))
par_trace
#Histogram of posterior draws of each of the category mean parameters with true values plotted on
compare_hist <- mcmc_recover_hist(arr.fit[,,c(1, 2, 3)], mu_real, facet_args = list(labeller = ggplot2::label_parsed)) + xlab("Parameter value") + ylab("Number of samples")
compare_hist
# Violin plots for each of the category mean parameters with true values plotted on
# (white dot) and also the mean of the w data (red dot)
# Included in paper as Figure 2
par_viol_withpoints <- ggplot(dfmu.plt) + geom_violin(aes(variable, value), fill = "#6497b1", scale = "width") + geom_point(aes(variable, true), color = "white", size = 2) + geom_point(aes(variable, wmeans), color = "#6497b1", fill = "red", shape = 23, size = 2.5) + scale_x_discrete(labels = expression(mu[1], mu[2], mu[3])) + xlab("Parameter") + ylab("Value")
#jpeg("viol_mu_Pois_withpoints4.jpg", height=15, width=15, res=600, pointsize=1, units="cm")
par_viol_withpoints
#dev.off()