-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added everything we have so far with proper testing.
- Loading branch information
1 parent
c2b4837
commit 02a0331
Showing
17 changed files
with
2,649 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,10 +3,17 @@ Type: Package | |
Title: Bayesian Marginal Structural Models | ||
Version: 0.1.0 | ||
Author: Kuan Liu, Xiao Yan | ||
Maintainer: Kuan Liu, Xiao Yan | ||
Imports: | ||
Maintainer: Xiao Yan <[email protected]> | ||
Imports: | ||
MCMCpack, | ||
doParallel, | ||
foreach, | ||
coda (>= 0.19-4) | ||
License: MIT License | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 7.3.1 | ||
Suggests: | ||
testthat (>= 3.0.0) | ||
Config/testthat/edition: 3 | ||
Description: This package provides tools for estimating causal effects using Bayesian Marginal Structural Models. It includes functions for Bayesian non-parametric bootstrap to calculate causal effects and for plotting the average potential outcome (APO). The package aims to facilitate the analysis of observational data with time-varying treatments, providing a user-friendly software to Bayesian causal inference methods. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,8 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(bayesmsm) | ||
export(plot_APO) | ||
export(plot_est_box) | ||
import(MCMCpack) | ||
import(doParallel) | ||
import(foreach) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
#' Bayesian Marginal Structural Model Bootstrap Estimation | ||
#' | ||
#' This function performs Bayesian non-parametric bootstrap to estimate causal | ||
#' effects in Bayesian marginal structural models.It supports both continuous | ||
#' (gaussian) and binary (binomial) outcome variables. | ||
#' | ||
#' @param ymodel A formula representing the outcome model with interactions. | ||
#' @param nvisit Number of visits or time points. | ||
#' @param reference Reference intervention across all visits. Default is a vector of all 0's with length nvisit (i.e. never treated). | ||
#' @param comparator Comparison intervention across all visits. Default is a vector of all 1's with length nvisit (i.e. always treated). | ||
#' @param family Outcome distribution family; "gaussian" (default) for continuous outcomes or "binomial" for binary outcomes. | ||
#' @param data The dataset. | ||
#' @param wmean Vector of treatment assignment weights. Default is rep(1, 1000). | ||
#' @param nboot Number of bootstrap iterations. | ||
#' @param optim_method Optimization method to be used. Default is 'BFGS'. | ||
#' @param parallel Whether parallel computation should be used. Default is TRUE. | ||
#' @param ncore Number of cores to use for parallel computation. Default is 4. | ||
#' | ||
#' @return It returns an object of class `bayesmsm` that contains the information about the data, model, etc. | ||
#' | ||
#' An object of class `bayesmsm` is a list containing at least the following components: | ||
#' * `mean`, the mean of the bootstrap estimates | ||
#' * `sd`, the standard deviation of the bootstrap estimates | ||
#' * `quantile`, the 95% quantiles of the bootstrap estimates | ||
#' * `bootdata`, a data frame of bootstrapped estimates | ||
#' * `reference`, the reference intervention level | ||
#' * `comparator`, the camparison intervention level | ||
#' | ||
#' @import foreach | ||
#' @import doParallel | ||
#' @import MCMCpack | ||
#' | ||
#' @export | ||
#' | ||
#' @examples Using the testdata in the data folder: | ||
#' | ||
#' # Continuous outcome | ||
#' testdata <- readr::read_csv("inst/extdata/continuous_outcome_data.csv") | ||
#' model <- bayesmsm(ymodel = y ~ a_1+a_2, | ||
#' nvisit = 2, | ||
#' reference = c(rep(0,2)), | ||
#' comparator = c(rep(1,2)), | ||
#' family = "gaussian", | ||
#' data = testdata, | ||
#' wmean = rep(1, 1000), | ||
#' nboot = 1000, | ||
#' optim_method = "BFGS", | ||
#' parallel = FALSE, | ||
#' ncore = 6) | ||
#' | ||
#' | ||
#' | ||
bayesmsm <- function(ymodel, | ||
nvisit, | ||
reference = c(rep(0,nvisit)), # An example of never treated | ||
comparator = c(rep(1,nvisit)), | ||
family = "gaussian", # "gaussian" or "binomial" | ||
data, | ||
wmean = rep(1, 1000), | ||
nboot = 1000, | ||
optim_method = 'BFGS', | ||
parallel = TRUE, | ||
ncore = 4){ | ||
|
||
# return error message if the input weight vector has different length comparing to the outcome Y; | ||
if (length(wmean) != nrow(data)) { | ||
stop("The length of the weight vector does not match the length of Y.") | ||
} | ||
|
||
# load utility functions | ||
extract_variables <- function(formula) { | ||
# Get the terms of the formula | ||
formula_terms <- terms(formula) | ||
|
||
# Extract the response variable name (if there is one) | ||
response_variable <- attr(formula_terms, "response") | ||
response_name <- if (response_variable > 0) { | ||
all_vars <- all.vars(formula) | ||
all_vars[response_variable] | ||
} else {NA} | ||
|
||
# Extract predictor variable names | ||
predictor_names <- attr(formula_terms, "term.labels") | ||
|
||
# Return a list of response and predictor variables | ||
list(response = response_name, predictors = predictor_names) | ||
} | ||
|
||
variables <- extract_variables(ymodel) # Extract variable names from the formula | ||
Y_name <- variables$response | ||
|
||
Y <- data[[Y_name]] | ||
A_base <- data.frame(matrix(data = NA, | ||
nrow = nrow(data), | ||
ncol = length(variables$predictors))) | ||
for (i in 1:length(variables$predictors)){ | ||
initial_vector <- variables$predictors[i] | ||
split_vector <- strsplit(initial_vector, ":") | ||
new_vector <- unlist(split_vector) | ||
if (length(new_vector)==1){ | ||
A_base[,i] <- data[, new_vector] | ||
} else if (length(new_vector)>1){ | ||
A_base[,i] <- apply(data[, new_vector],1,prod) | ||
} | ||
} | ||
|
||
A <- cbind(1, A_base) | ||
colnames(A)[2:ncol(A)]<- variables$predictors | ||
|
||
wloglik_normal<-function(param, | ||
Y, | ||
A, | ||
weight){ | ||
#number of observations; | ||
n <- length(Y) | ||
theta <- param[1:dim(A)[2]] #causal parameters on the mean | ||
#number of parameter is determined by number of treatment variables, plus intercept; | ||
sigma <- param[(dim(A)[2]+1)] # the remaining the parameter represent the standard deviation; | ||
mmat <- as.matrix(A) #design matrix of the causal outcome model, e.g., A = cbind(1, a_1, a_2); | ||
logl<- -0.5*log(sigma**2) - 0.5*((Y - mmat%*%theta)**2)/(sigma**2) | ||
wlogl<-sum(weight*logl) | ||
return(wlogl) | ||
} | ||
|
||
wloglik_binomial <- function(param, | ||
Y, | ||
A, | ||
weight){ | ||
# number of observations; | ||
n <- length(Y) | ||
beta <- param[1:dim(A)[2]] # causal parameters on the log-odds scale (no sigma for binomial?) | ||
mmat <- as.matrix(A) | ||
eta<-mmat %*% beta # linear predictor | ||
p <- 1 / (1 + exp(-eta)) | ||
logl <- Y * log(p + 0.0001) + (1 - Y) * log(1 - p + 0.0001) | ||
wlogl<-sum(weight*logl) | ||
return(wlogl) | ||
} | ||
|
||
if (family == "gaussian"){ | ||
wfn = wloglik_normal | ||
inits1 <- c(rep(0.1, length(A)), 4) # Default initial values, 4 is for the SD; | ||
} else if (family == "binomial"){ | ||
wfn = wloglik_binomial | ||
inits1 <- c(rep(0.1, length(A))) | ||
} else if (!family %in% c("gaussian","binomial")){ | ||
stop("Current version only handles continuous (gaussian) and binary (binomial) outcomes.") | ||
} | ||
|
||
|
||
#parallel computing only for this bootstrap step; | ||
if (parallel == TRUE){ | ||
numCores <- ncore | ||
registerDoParallel(cores = numCores) | ||
|
||
results <- foreach(i=1:nboot, .combine = 'rbind') %dopar% { | ||
|
||
results.it <- matrix(NA, 1, 3) #result matrix, three columns for bootest, effect_ref, and effect_comp; | ||
|
||
alpha <- as.numeric(rdirichlet(1, rep(1.0, length(Y)))) | ||
|
||
maxim <- optim(inits1, | ||
fn = wfn, | ||
Y = Y, | ||
A = A, | ||
weight = alpha * wmean, | ||
control = list(fnscale = -1), | ||
method = optim_method, | ||
hessian = FALSE) | ||
|
||
names(maxim$par) <- c("(Intercept)", variables$predictors) | ||
|
||
# Calculate the effects | ||
results.it[1,1] <- calculate_effect(reference, variables, param_estimates=maxim$par) | ||
results.it[1,2] <- calculate_effect(comparator, variables, param_estimates=maxim$par) | ||
# Calculate the ATE | ||
results.it[1,3] <- results.it[1,1] - results.it[1,2] | ||
|
||
# combining parallel results; | ||
cbind(i,results.it) #end of parallel; | ||
} | ||
|
||
#saving output for the parallel setting; | ||
return(list( | ||
mean = mean(results[,4]), | ||
sd = sqrt(var(results[,4])), | ||
quantile = quantile(results[,4], probs = c(0.025, 0.975)), | ||
bootdata <- data.frame(results[,-1]), | ||
reference = reference, | ||
comparator = comparator | ||
)) | ||
|
||
} | ||
|
||
else if (parallel == FALSE) { | ||
|
||
bootest <- numeric(nboot) | ||
effect_reference <- numeric(nboot) | ||
effect_comparator <- numeric(nboot) | ||
|
||
for (j in 1:nboot) { | ||
alpha <- as.numeric(rdirichlet(1, rep(1.0, length(Y)))) | ||
|
||
maxim <- optim(inits1, | ||
fn = wfn, | ||
Y = Y, | ||
A = A, | ||
weight = alpha * wmean, | ||
control = list(fnscale = -1), | ||
method = optim_method, | ||
hessian = FALSE) | ||
|
||
names(maxim$par) <- c("(Intercept)", variables$predictors) | ||
|
||
# Calculate the effects | ||
effect_reference[j] <- calculate_effect(reference, variables, param_estimates=maxim$par) | ||
effect_comparator[j] <- calculate_effect(comparator, variables, param_estimates=maxim$par) | ||
|
||
# Calculate the ATE | ||
bootest[j] <- effect_comparator[j] - effect_reference[j] | ||
|
||
} | ||
|
||
#saving output for the non-parallel setting; | ||
return(list( | ||
mean = mean(bootest), | ||
sd = sqrt(var(bootest)), | ||
quantile = quantile(bootest, probs = c(0.025, 0.975)), | ||
bootdata = data.frame(effect_reference, effect_comparator, ATE=bootest), | ||
reference = reference, | ||
comparator = comparator | ||
)) | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#' A function to calculate the effect of an intervention given the parameter estimates and intervention levels | ||
#' | ||
#' @param intervention_levels A numeric vector indicating the levels of intervention for each predictor variable. | ||
#' @param variables A list of the names of the response variable and predictor variables extracted from the model. | ||
#' @param param_estimates A vector of parameter estimates from the model. | ||
#' | ||
#' @return A numeric value representing the calculated effect of the specified intervention. | ||
#' | ||
calculate_effect <- function(intervention_levels, variables, param_estimates) { | ||
# Start with the intercept term | ||
effect<-effect_intercept<-param_estimates[1] | ||
|
||
# Go through each predictor and add its contribution | ||
for (i in 1:length(variables$predictors)) { | ||
term <- variables$predictors[i] | ||
term_variables <- unlist(strsplit(term, ":")) | ||
term_index <- which(names(param_estimates) == term) | ||
|
||
# Calculate the product of intervention levels for the interaction term | ||
term_contribution <- param_estimates[term_index] | ||
for (term_variable in term_variables) { | ||
var_index <- which(variables$predictors == term_variable) | ||
term_contribution <- term_contribution * intervention_levels[var_index] | ||
} | ||
|
||
# Add the term contribution to the effect | ||
effect <- effect + term_contribution | ||
} | ||
|
||
return(effect) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#' Plot Average Potential Outcomes (APO) | ||
#' | ||
#' This function plots the density of APO for a specified effect type from bootstrap simulation results. | ||
#' | ||
#' @param input A data frame or model object containing 'bootdata', which include 'effect_comparator' and 'effect_reference' columns. | ||
#' @param effect_type A character string specifying which effect to plot: 'effect_comparator' or 'effect_reference'. | ||
#' @param ... Additional arguments passed to the plotting function. | ||
#' | ||
#' @return A density plot showing the distribution of the specified average potential outcome (reference or comparison). | ||
#' @export | ||
#' | ||
#' @examples | ||
#' plot_APO(model$bootdata, effect_type = "effect_comparator") | ||
#' plot_APO(model, effect_type = "effect_reference") | ||
#' | ||
plot_APO <- function(input, effect_type, ...) { | ||
# Validate input | ||
if ("bootdata" %in% names(input)) { | ||
bootdata <- input$bootdata | ||
} else if (is.data.frame(input)) { | ||
bootdata <- input | ||
} else { | ||
stop("Input must be a data frame or a model object containing a 'bootdata' data frame.") | ||
} | ||
|
||
if (!is.data.frame(bootdata) || !("effect_comparator" %in% names(bootdata)) || !("effect_reference" %in% names(bootdata))) { | ||
stop("bootdata must be a data frame containing 'effect_comparator' and 'effect_reference' columns.") | ||
} | ||
|
||
if (!is.character(effect_type) || length(effect_type) != 1) { | ||
stop("effect_type must be a single character string specifying the effect to plot.") | ||
} | ||
|
||
if (!effect_type %in% c("effect_comparator", "effect_reference")) { | ||
stop("effect_type must be either 'effect_comparator' or 'effect_reference'.") | ||
} | ||
|
||
# Extract the relevant column | ||
effect <- bootdata[, effect_type, drop = FALSE] | ||
|
||
# Calculate density | ||
density_effect <- density(effect[[1]]) | ||
|
||
# Define titles and colors based on effect_type | ||
titles <- c(effect_comparator = "Comparator Level", effect_reference = "Reference Level") | ||
colors <- c(effect_comparator = "blue", effect_reference = "red") | ||
|
||
# Calculate mean and median | ||
mean_effect <- mean(effect[[1]]) | ||
median_effect <- median(effect[[1]]) | ||
|
||
# Plotting | ||
plot(density_effect, main = paste("Average Potential Outcome (APO) of", titles[effect_type]), xlab = "Effect", ylab = "Density", col = colors[effect_type], lwd = 2, ...) | ||
|
||
# Add a vertical line for the mean and median | ||
abline(v = mean_effect, col = "darkgrey", lty = 3) | ||
abline(v = median_effect, col = "darkgrey", lty = 4) | ||
|
||
# Legend with mean and median | ||
legend_text <- c(titles[effect_type], | ||
paste("Mean:", round(mean_effect, 3)), | ||
paste("Median:", round(median_effect, 3))) | ||
|
||
legend("topright", legend = legend_text, col = c(colors[effect_type], "darkgrey", "darkgrey"), | ||
lwd = 2, lty = c(1, 3, 4)) | ||
} |
Oops, something went wrong.