Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 112 jackknife #113

Merged
merged 12 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: PLNmodels
Title: Poisson Lognormal Models
Version: 1.0.4-0200
Version: 1.0.4-0300
Authors@R: c(
person("Julien", "Chiquet", role = c("aut", "cre"), email = "[email protected]",
comment = c(ORCID = "0000-0002-3629-3429")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export(PLNmixture_param)
export(PLNnetwork)
export(PLNnetwork_param)
export(coefficient_path)
export(compute_PLN_starting_point)
export(compute_offset)
export(extract_probs)
export(getBestModel)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

* Update documentation of PLN*_param() functions to include torch optimization parameters
* Add (somehow) explicit error message when torch convergence fails
* Change initialization in `variance_jackknife()` and `variance_bootstrap()` to prevent estimation recycling, results from those functions are now comparable to doing jackknife / bootstrap "by hand".

# PLNmodels 1.0.4 (2023-08-24)

Expand Down
42 changes: 28 additions & 14 deletions R/PLNfit-class.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## CLASS PLNfit #######################################
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' An R6 Class to represent a PLNfit in a standard, general framework
#'
#' @description The function [PLN()] fit a model which is an instance of a object with class [`PLNfit`].
Expand Down Expand Up @@ -211,7 +215,10 @@ PLNfit <- R6Class(
O = O[-i, , drop = FALSE],
w = w[-i])
args <- list(data = data,
params = list(B = private$B, M = matrix(0, self$n-1, self$p), S = private$S[-i, ]),
# params = list(B = private$B,
# M = matrix(0, self$n-1, self$p),
# S = private$S[-i, , drop = FALSE]),
params = do.call(compute_PLN_starting_point, data),
config = config)
optim_out <- do.call(private$optimizer$main, args)
optim_out[c("B", "Omega")]
Expand Down Expand Up @@ -240,7 +247,8 @@ PLNfit <- R6Class(
O = O[resample, , drop = FALSE],
w = w[resample])
args <- list(data = data,
params = list(B = private$B, M = matrix(0,self$n,self$p), S = private$S[resample, ]),
# params = list(B = private$B, M = matrix(0,self$n,self$p), S = private$S[resample, ]),
params = do.call(compute_PLN_starting_point, data),
config = config)
optim_out <- do.call(private$optimizer$main, args)
optim_out[c("B", "Omega", "monitoring")]
Expand Down Expand Up @@ -301,10 +309,14 @@ PLNfit <- R6Class(
private$S <- control$inception$var_par$S
} else {
if (control$trace > 1) cat("\n Use LM after log transformation to define the inceptive model")
fits <- lm.fit(weights * covariates, weights * log((1 + responses)/exp(offsets)))
private$B <- matrix(fits$coefficients, d, p)
private$M <- matrix(fits$residuals, n, p)
private$S <- matrix(.1, n, p)
# fits <- lm.fit(weights * covariates, weights * log((1 + responses)/exp(offsets)))
# private$B <- matrix(fits$coefficients, d, p)
# private$M <- matrix(fits$residuals, n, p)
# private$S <- matrix(.1, n, p)
start_point <- compute_PLN_starting_point(Y = responses, X = covariates, O = offsets, w = weights)
private$B <- start_point$B
private$M <- start_point$M
private$S <- start_point$S
}
private$optimizer$main <- ifelse(control$backend == "nlopt", nlopt_optimize, private$torch_optimize)
private$optimizer$vestep <- nlopt_optimize_vestep
Expand Down Expand Up @@ -595,7 +607,7 @@ PLNfit <- R6Class(
)

## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## CLASS PLNfit_diagonal
## CLASS PLNfit_diagonal ##############################
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' An R6 Class to represent a PLNfit in a standard, general framework, with diagonal residual covariance
Expand Down Expand Up @@ -677,7 +689,7 @@ PLNfit_diagonal <- R6Class(
)

## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## CLASS PLNfit_spherical
## CLASS PLNfit_spherical ############################
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' An R6 Class to represent a PLNfit in a standard, general framework, with spherical residual covariance
Expand Down Expand Up @@ -757,7 +769,7 @@ PLNfit_spherical <- R6Class(
)

## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## CLASS PLNfit_fixedcov
## CLASS PLNfit_fixedcov #############################
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' An R6 Class to represent a PLNfit in a standard, general framework, with fixed (inverse) residual covariance
Expand Down Expand Up @@ -850,11 +862,13 @@ PLNfit_fixedcov <- R6Class(

variance_jackknife = function(Y, X, O, w, config = config_default_nlopt) {
jacks <- future.apply::future_lapply(seq_len(self$n), function(i) {
args <- list(Y = Y[-i, , drop = FALSE],
data <- list(Y = Y[-i, , drop = FALSE],
X = X[-i, , drop = FALSE],
O = O[-i, , drop = FALSE],
w = w[-i],
params = list(B = private$B, Omega = private$Omega, M = private$M[-i, ], S = private$S[-i, ]),
w = w[-i])
args <- list(data = data,
# params = list(B = private$B, Omega = private$Omega, M = private$M[-i, ], S = private$S[-i, ]),
params = do.call(compute_PLN_starting_point, data),
config = config)
optim_out <- do.call(private$optimizer$main, args)
optim_out[c("B", "Omega")]
Expand Down Expand Up @@ -907,8 +921,8 @@ PLNfit_fixedcov <- R6Class(
)

## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## CLASS PLNfit_genetprior
# ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## CLASS PLNfit_genetprior ############################
# ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# #' An R6 Class to represent a PLNfit in a standard, general framework, with residual covariance modelling
# #' motivatived by population genetics
Expand Down
36 changes: 36 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,39 @@ create_parameters <- function(
Sigma = sigma * toeplitz(x = rho^seq(0, p-1)),
depths = depths)
}

#' Helper function for PLN initialization.
#'
#' @description
#' Barebone function to compute starting points for B, M and S when fitting a PLN. Mostly intended for internal use.
#'
#' @param Y Response count matrix
#' @param X Covariate matrix
#' @param O Offset matrix (in log-scale)
#' @param w Weight vector (defaults to 1)
#' @param s Scale parameter for S (defaults to 0.1)
#' @return a named list of starting values for model parameter B and variational parameters M and S used in the iterative optimization algorithm of [PLN()]
#'
#' @details The default strategy to estimate B and M is to fit a linear model with covariates `X` to the response count matrix (after adding a pseudocount of 1, scaling by the offset and taking the log). The regression matrix is used to initialize `B` and the residuals to initialize `M`. `S` is initialized as a constant conformable matrix with value `s`.
#'
#' @rdname compute_PLN_starting_point
#' @examples
#' \dontrun{
#' data(barents)
#' Y <- barents$Abundance
#' X <- model.matrix(Abundance ~ Latitude + Longitude + Depth + Temperature, data = barents)
#' O <- log(barents$Offset)
#' w <-- rep(1, nrow(Y))
#' compute_PLN_starting_point(Y, X, O, w)
#' }
#'
#' @importFrom stats lm.fit
#' @export
compute_PLN_starting_point <- function(Y, X, O, w, s = 0.1) {
# Y = responses, X = covariates, O = offsets (in log scale), w = weights
n <- nrow(Y); p <- ncol(Y); d <- ncol(X)
fits <- lm.fit(w * X, w * log((1 + Y)/exp(O)))
list(B = matrix(fits$coefficients, d, p),
M = matrix(fits$residuals, n, p),
S = matrix(s, n, p))
}
2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ reference:
- '`PLNfamily`'
- '`plot.PLNfamily`'
- '`rPLN`'
- '`rPLN`'
- '`compute_PLN_starting_point`'
- title: Data sets
desc: ~
contents:
Expand Down
59 changes: 59 additions & 0 deletions inst/check/jackknife_variance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
library(PLNmodels); library(mvtnorm)


data("barents")
n <- nrow(barents$Abundance); p <- ncol(barents$Abundance)
fit <- PLN(Abundance ~ Latitude + Longitude + Depth + Temperature, data = barents)
X <- model.matrix(Abundance ~ Latitude + Longitude + Depth + Temperature, data = barents)

# Simulate data
n <- 1e2
X <- X[sample(1:nrow(X), n, replace=TRUE), ]
B0 <- fit$model_par$B; Sigma0 <- fit$model_par$Sigma
Y <- matrix(rpois(n*p, exp(X %*% B0 + mvtnorm::rmvnorm(n, sigma= Sigma0))), n, p)
d <- ncol(X)

# Jacknife sd estimate
tictoc::tic()
pln <- PLN(Y ~ -1 + X, control=PLN_param(config_post = list(jackknife=TRUE)))
tictoc::toc()

betaSdPackage <- as.vector(standard_error(pln, type='jackknife'))
jackknife_estimates <- attr(pln$model_par$B, "estimates_jaccknife") |> purrr::map("B")
betaJKPackage <- matrix(NA, n, p*d)
for(i in 1:n){
betaJKPackage[i, ] <- as.vector(jackknife_estimates[[i]])
}


# Jackknife sd estimate by hand
tictoc::tic()
betaJK <- matrix(NA, n, p*d)
for(i in 1:n){
plnTmp <- PLN(Y[-i, ] ~ -1 + X[-i, ], control=PLN_param(trace=0))
betaJK[i, ] <- as.vector(plnTmp$model_par$B)
}
tictoc::toc()
betaSdByHand <- sqrt((n-1)*apply(betaJK, 2, function(x) { mean(x^2) - mean(x)^2 }))

## Check that "automatic" and "manual" jackknife estimates are identical
plot(as.vector(betaJK), as.vector(betaJKPackage),
ylab = "Computed from package", xlab = "Computed by hand",
main = "Jackknife estimates (all coefficients, all samples)")
abline(0, 1, col = "red")
max(abs(betaJK - betaJKPackage))

# Empirical sd estimate
B <- n
betaEmp <- matrix(NA, B, p*d)
for(b in 1:B){
if(b %% 10==0){cat(b, '')}
Ytmp <- matrix(rpois(n*p, exp(X %*% B0 + rmvnorm(n, sigma= Sigma0))), n, p)
plnTmp <- PLN(Ytmp ~ -1 + X, control=PLN_param(trace=0))
betaEmp[b, ] <- as.vector(plnTmp$model_par$B)
}
cat("\n")
betaSdEmpirical <- apply(betaEmp, 2, sd)

# Comparison
plot(as.data.frame(cbind(betaSdPackage, betaSdByHand, betaSdEmpirical)))
39 changes: 39 additions & 0 deletions man/compute_PLN_starting_point.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-standard-error.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ test_that("Check that variance estimation are coherent in PLNfit", {
trace = 2
)

myPLN$postTreatment(Y, X, exp(log_O), config = config_post)
myPLN$postTreatment(Y, X, log_O, config = config_post)

tr_variational <- sum(standard_error(myPLN, "variational")^2)
tr_bootstrap <- sum(standard_error(myPLN, "bootstrap")^2)
Expand Down
Loading