Skip to content

Commit

Permalink
added score test option instead of wald test
Browse files Browse the repository at this point in the history
  • Loading branch information
jr-leary7 committed Oct 27, 2024
1 parent a48170b commit db34914
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 11 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ importFrom(stats,fitted.values)
importFrom(stats,hclust)
importFrom(stats,kmeans)
importFrom(stats,logLik)
importFrom(stats,model.matrix)
importFrom(stats,offset)
importFrom(stats,p.adjust)
importFrom(stats,p.adjust.methods)
Expand Down
70 changes: 70 additions & 0 deletions R/scoreTestGEE.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#' Use a Lagrange multiplier (score) test to compare nested GEE models.
#'
#' @name scoreTestGEE
#' @author Jack R. Leary
#' @description Performs a basic Lagrange multiplier test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes.
#' @importFrom stats model.matrix predict pchisq
#' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.
#' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.
#' @param alt.df The dataframe used to fit the alternative model. Defaults to NULL.
#' @param null.df The dataframe used to fit the null model. Defaults to NULL.
#' @param sandwich.var Boolean specifying whether the sandwich variance-covariance matrix should be used. Defaults to FALSE.
#' @return A list containing the Score test statistic, a \emph{p}-value, and the degrees of freedom used in the test.
#' @details
#' \itemize{
#' \item Calculating the test statistic involves taking the inverse of the variance-covariance matrix of the coefficients. Ideally this would be done using the "true" inverse with something like \code{\link{solve}}, \code{\link{qr.solve}}, or \code{\link{chol2inv}}, but in practice this can cause issues when the variance-covariance matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse as implemented in \code{\link[MASS]{ginv}} instead, which leads to more stable results.
#' \item The \emph{p}-value is calculated using an asymptotic \eqn{\Chi^2} distribution, with the degrees of freedom equal to the number of non-intercept coefficients in the alternative model.
#' }
#' @seealso \code{\link[geeM]{geem}}
#' @seealso \code{\link{waldTestGEE}}
#' @seealso \code{\link{modelLRT}}

scoreTestGEE <- function(mod.1 = NULL,
mod.0 = NULL,
alt.df = NULL,
null.df = NULL,
sandwich.var = FALSE) {
# check inputs
if (is.null(mod.1) || is.null(mod.0) || is.null(alt.df) || is.null(null.df)) { stop("Please provide all inputs to scoreTestGEE().") }
if (inherits(mod.1, "try-error") || inherits(mod.0, "try-error")) {
res <- list(Score_Stat = NA_real_,
DF = NA_real_,
P_Val = NA_real_,
Notes = "No test performed due to model failure.")
return(res)
}
mod.1 <- mod.1$final_mod
if (!(inherits(mod.1, "geem") && inherits(mod.0, "geem"))) { stop("You must provide two geeM models to scoreTestGee().") }
if (length(coef(mod.0)) != 1) { stop("Null GEE model must be intercept-only.") }
if (length(coef(mod.1)) <= length(coef(mod.0))) {
# can't calculate Score statistic if both models are intercept-only
res <- list(Score_Stat = 0,
DF = 0,
P_Val = 1,
Notes = NA_character_)
} else {
X_null <- stats::model.matrix(mod.0, data = null.df)
X_alt <- stats::model.matrix(mod.1, data = alt.df)
residuals_null <- null.df$Y - exp(stats::predict(mod.0))
if (sandwich.var) {
V_null <- as.matrix(mod.0$var)
} else {
V_null <- as.matrix(mod.0$naiv.var)
}
X_alt_t <- t(X_alt)
U <- X_alt_t %*% residuals_null
V_U <- (X_alt_t * as.numeric(V_null)) %*% X_alt
V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE)
if (inherits(V_U_inv, "try-error")) {
V_U_inv <- eigenMapPseudoInverse(V_U)
}
test_stat <- as.numeric(t(U) %*% V_U_inv %*% U)
df1 <- ncol(X_alt) - ncol(X_null)
p_value <- 1 - stats::pchisq(test_stat, df = df1)
res <- list(Score_Stat = test_stat,
DF = df1,
P_Val = p_value,
Notes = NA_character_)
}
return(res)
}
37 changes: 27 additions & 10 deletions R/testDynamic.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ testDynamic <- function(expr.mat = NULL,
is.gee = FALSE,
cor.structure = "ar1",
gee.bias.correction.method = NULL,
gee.test = "wald",
is.glmm = FALSE,
glmm.adaptive = TRUE,
id.vec = NULL,
Expand Down Expand Up @@ -113,7 +114,9 @@ testDynamic <- function(expr.mat = NULL,
if ((is.gee || is.glmm) && is.unsorted(id.vec)) { stop("Your data must be ordered by subject, please do so before running testDynamic() with the GEE / GLMM backends.") }
cor.structure <- tolower(cor.structure)
if (is.gee && !(cor.structure %in% c("ar1", "independence", "exchangeable"))) { stop("GEE models require a specified correlation structure.") }

# check GEE testing method
gee.test <- tolower(gee.test)
if (is.gee & !gee.test %in% c("wald", "score")) { stop("GEE testing method must be one of score or wald.") }
# set up time tracking
start_time <- Sys.time()

Expand Down Expand Up @@ -143,7 +146,7 @@ testDynamic <- function(expr.mat = NULL,
is_read_only = TRUE)

# build list of objects to prevent from being sent to parallel workers
necessary_vars <- c("expr.mat", "genes", "pt", "n.potential.basis.fns", "approx.knot", "is.glmm", "gee.bias.correction.method",
necessary_vars <- c("expr.mat", "genes", "pt", "n.potential.basis.fns", "approx.knot", "is.glmm", "gee.bias.correction.method", "gee.test",
"verbose", "n_lineages", "id.vec", "cor.structure", "is.gee", "gee.scale.fix", "glmm.adaptive", "size.factor.offset")
if (any(ls(envir = .GlobalEnv) %in% necessary_vars)) {
no_export <- c(ls(envir = .GlobalEnv)[-which(ls(envir = .GlobalEnv) %in% necessary_vars)],
Expand Down Expand Up @@ -325,7 +328,9 @@ testDynamic <- function(expr.mat = NULL,
lineage_list[[j]] <- list(Gene = genes[i],
Lineage = LETTERS[j],
Test_Stat = NA_real_,
Test_Stat_Type = ifelse(is.gee, "Wald", "LRT"),
Test_Stat_Type = ifelse(is.gee,
ifelse(gee.test == "wald", "Wald", "Score"),
"LRT"),
Test_Stat_Note = NA_character_,
P_Val = NA_real_,
LogLik_MARGE = marge_sumy$ll_marge,
Expand All @@ -344,18 +349,28 @@ testDynamic <- function(expr.mat = NULL,

# compute test stat using asymptotic Chi-squared approximation
if (is.gee) {
test_res <- waldTestGEE(mod.1 = marge_mod,
mod.0 = null_mod,
correction.method = gee.bias.correction.method,
id.vec = id.vec[lineage_cells],
verbose = verbose)
if (gee.test == "wald") {
test_res <- waldTestGEE(mod.1 = marge_mod,
mod.0 = null_mod,
correction.method = gee.bias.correction.method,
id.vec = id.vec[lineage_cells],
verbose = verbose)
} else if (gee.test == "score") {
test_res <- scoreTestGEE(mod.1 = marge_mod,
mod.0 = null_mod,
alt.df = as.data.frame(marge_mod$basis_mtx),
null.df = null_mod_df,
sandwich.var = ifelse(is.null(gee.bias.correction.method), FALSE, TRUE))
}
} else {
test_res <- modelLRT(mod.1 = marge_mod,
mod.0 = null_mod,
is.glmm = is.glmm)
}
# add test stats to result list
lineage_list[[j]]$Test_Stat <- ifelse(is.gee, test_res$Wald_Stat, test_res$LRT_Stat)
lineage_list[[j]]$Test_Stat <- ifelse(is.gee,
ifelse(gee.test == "wald", test_res$Wald_Stat, test_res$Score_Stat),
test_res$LRT_Stat)
lineage_list[[j]]$Test_Stat_Note <- test_res$Notes
lineage_list[[j]]$P_Val <- test_res$P_Val
}
Expand All @@ -380,7 +395,9 @@ testDynamic <- function(expr.mat = NULL,
list(Gene = y,
Lineage = LETTERS[z],
Test_Stat = NA_real_,
Test_Stat_Type = ifelse(is.gee, "Wald", "LRT"),
Test_Stat_Type = ifelse(is.gee,
ifelse(gee.test == "wald", "Wald", "Score"),
"LRT"),
Test_Stat_Note = NA_character_,
P_Val = NA_real_,
LogLik_MARGE = NA_real_,
Expand Down
3 changes: 2 additions & 1 deletion R/waldTestGEE.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ waldTestGEE <- function(mod.1 = NULL,
id.vec = NULL,
verbose = FALSE) {
# check inputs
if (is.null(mod.1) || is.null(mod.0)) { stop("Please provide both models to waldTestGEE().") }
if (inherits(mod.1, "try-error") || inherits(mod.0, "try-error")) {
res <- list(Wald_Stat = NA_real_,
DF = NA_real_,
Expand All @@ -43,7 +44,7 @@ waldTestGEE <- function(mod.1 = NULL,
}

mod.1 <- mod.1$final_mod
if (is.null(mod.1) || is.null(mod.0) || !(inherits(mod.1, "geem") && inherits(mod.0, "geem"))) { stop("You must provide two geeM models to waldTestGee().") }
if (!(inherits(mod.1, "geem") && inherits(mod.0, "geem"))) { stop("You must provide two geeM models to waldTestGee().") }
if (length(coef(mod.0)) != 1) { stop("Null GEE model must be intercept-only.") }
if (length(coef(mod.1)) <= length(coef(mod.0))) {
# can't calculate Wald statistic if both models are intercept-only
Expand Down
47 changes: 47 additions & 0 deletions man/scoreTestGEE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/testDynamic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit db34914

Please sign in to comment.