From e02f6c05468ef1524ada9aab2a1a9b28580dd2ab Mon Sep 17 00:00:00 2001
From: Jack Leary <j.leary@ufl.edu>
Date: Sun, 10 Nov 2024 14:26:21 -0500
Subject: [PATCH] fixed score test function to provide more accurate values

---
 R/scoreTestGEE.R    | 64 +++++++++++++++++++++++++++++----------------
 man/scoreTestGEE.Rd |  4 ++-
 2 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/R/scoreTestGEE.R b/R/scoreTestGEE.R
index 1a4f102..a9085a7 100644
--- a/R/scoreTestGEE.R
+++ b/R/scoreTestGEE.R
@@ -4,6 +4,7 @@
 #' @author Jack R. Leary
 #' @description Performs a basic Lagrange multiplier test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes.
 #' @importFrom stats model.matrix predict pchisq
+#' @importFrom Matrix bdiag
 #' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.
 #' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.
 #' @param alt.df The dataframe used to fit the alternative model. Defaults to NULL. 
@@ -13,10 +14,11 @@
 #' @return A list containing the Score test statistic, a \emph{p}-value, and the degrees of freedom used in the test.
 #' @details
 #' \itemize{
-#' \item Calculating the test statistic involves taking the inverse of the variance-covariance matrix of the coefficients. Ideally this would be done using the "true" inverse with something like \code{\link{solve}}, \code{\link{qr.solve}}, or \code{\link{chol2inv}}, but in practice this can cause issues when the variance-covariance matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse as implemented in \code{\link[MASS]{ginv}} instead, which leads to more stable results.
+#' \item Calculating the test statistic involves taking the inverse of the variance of the score vector. Ideally this would be done using the true inverse, but in practice this can cause issues when the matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse if the original matrix inversion fails.
 #' \item The \emph{p}-value is calculated using an asymptotic \eqn{\Chi^2} distribution, with the degrees of freedom equal to the number of non-intercept coefficients in the alternative model.
 #' }
 #' @seealso \code{\link[geeM]{geem}}
+#' @seealso \code{\link[glmtoolbox]{anova2}}
 #' @seealso \code{\link{waldTestGEE}}
 #' @seealso \code{\link{modelLRT}}
 
@@ -46,19 +48,19 @@ scoreTestGEE <- function(mod.1 = NULL,
                 Notes = NA_character_)
   } else {
     rho <- summary(mod.0)$alpha
-    sigma2 <- summary(mod.0)$phi
+    phi <- summary(mod.0)$phi
+    theta <- as.numeric(gsub("\\)", "", gsub(".*\\(", "", mod.1$FunList$family)))
     X_null <- stats::model.matrix(mod.0, data = null.df)
     X_alt <- stats::model.matrix(mod.1, data = alt.df)
-    residuals_null <- null.df$Y - exp(stats::predict(mod.0))
+    r_null <- null.df$Y - stats::predict(mod.0)
     p_alt <- ncol(X_alt)
-    U <- rep(0, p_alt)
-    V_U <- matrix(0, nrow = p_alt, ncol = p_alt)
     groups <- unique(id.vec)
+    i <- 1
+    W <- K <- vector("list", length = length(groups))
     for (i in seq(groups)) {
-      group_indices <- which(id.vec == groups[i])
-      X_alt_i <- X_alt[group_indices, , drop = FALSE]
-      residuals_i <- residuals_null[group_indices]
-      n_i <- length(group_indices)
+      group_idx <- which(id.vec == groups[i])
+      n_i <- length(group_idx)
+      # create working correlation matrix R_i
       if (cor.structure == "independence") {
         R_i <- diag(1, nrow = n_i, ncol = n_i)
       } else if (cor.structure == "exchangeable") {
@@ -67,26 +69,42 @@ scoreTestGEE <- function(mod.1 = NULL,
       } else if (cor.structure == "ar1") {
         R_i <- matrix(rho^abs(outer(seq(n_i), seq(n_i), "-")), nrow = n_i, ncol = n_i)
       }
-      V_i <- sigma2 * R_i
-      R_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
-      if (inherits(R_i_inv, "try-error")) {
-        R_i_inv <- eigenMapPseudoInverse(V_i)
+      # create working covariance matrix V_i
+      mu_i <- stats::predict(mod.0)[group_idx]
+      V_mu_i <- mu_i * (1 + mu_i / theta)
+      A_i <- diag(V_mu_i)
+      A_i_sqrt <- sqrt(A_i)  # same as taking the 1/2 power of A_i since A_i is diagonal
+      V_i <- A_i_sqrt %*% R_i %*% A_i_sqrt
+      # create matrices W_i and K_i
+      K_i <- diag(mu_i)
+      V_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
+      if (inherits(V_i_inv, "try-error")) {
+        V_i_inv <- eigenMapPseudoInverse(V_i)
       }
-      X_alt_i_t <- t(X_alt_i)
-      U_i <- X_alt_i_t %*% (R_i_inv %*% residuals_i)
-      V_U_i <- X_alt_i_t %*% (R_i_inv %*% X_alt_i)
-      U <- U + U_i
-      V_U <- V_U + V_U_i
+      W_i <- K_i %*% V_i_inv %*% K_i
+      W[[i]] <- W_i
+      K[[i]] <- K_i
     }
+    W <- as.matrix(Matrix::bdiag(W))
+    K <- as.matrix(Matrix::bdiag(K))
+    K_inv <- try({ eigenMapMatrixInvert(K) }, silent = TRUE)
+    if (inherits(K_inv, "try-error")) {
+      K_inv <- eigenMapPseudoInverse(K)
+    }
+    # generate score vector 
+    U <- phi^(-1) * t(X_alt) %*% W %*% K_inv %*% r_null
+    # generate variance of score vector and invert it   
+    V_U <- phi^(-2) * t(X_alt) %*% W %*% X_alt
     V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE)
     if (inherits(V_U_inv, "try-error")) {
       V_U_inv <- eigenMapPseudoInverse(V_U)
     }
-    test_stat <- as.numeric(t(U) %*% V_U_inv %*% U)
-    df1 <- ncol(X_alt) - ncol(X_null)
-    p_value <- 1 - stats::pchisq(test_stat, df = df1)
-    res <- list(Score_Stat = test_stat,
-                DF = df1,
+    # estimate test statistic and accompanying p-value 
+    S <- t(U) %*% V_U_inv %*% U
+    S_df <- ncol(X_alt) - ncol(X_null)
+    p_value <- 1 - stats::pchisq(S, df = S_df)
+    res <- list(Score_Stat = S,
+                DF = S_df,
                 P_Val = p_value,
                 Notes = NA_character_)
   }
diff --git a/man/scoreTestGEE.Rd b/man/scoreTestGEE.Rd
index c90200a..c1b60ab 100644
--- a/man/scoreTestGEE.Rd
+++ b/man/scoreTestGEE.Rd
@@ -34,13 +34,15 @@ Performs a basic Lagrange multiplier test to determine whether an alternate mode
 }
 \details{
 \itemize{
-\item Calculating the test statistic involves taking the inverse of the variance-covariance matrix of the coefficients. Ideally this would be done using the "true" inverse with something like \code{\link{solve}}, \code{\link{qr.solve}}, or \code{\link{chol2inv}}, but in practice this can cause issues when the variance-covariance matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse as implemented in \code{\link[MASS]{ginv}} instead, which leads to more stable results.
+\item Calculating the test statistic involves taking the inverse of the variance of the score vector. Ideally this would be done using the true inverse, but in practice this can cause issues when the matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse if the original matrix inversion fails.
 \item The \emph{p}-value is calculated using an asymptotic \eqn{\Chi^2} distribution, with the degrees of freedom equal to the number of non-intercept coefficients in the alternative model.
 }
 }
 \seealso{
 \code{\link[geeM]{geem}}
 
+\code{\link[glmtoolbox]{anova2}}
+
 \code{\link{waldTestGEE}}
 
 \code{\link{modelLRT}}