diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 1474613d..ade15537 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -19,17 +19,18 @@ jobs: matrix: config: - {os: macOS-latest, r: 'release'} + - {os: macOS-latest, r: 'oldrel-1'} - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: windows-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/DESCRIPTION b/DESCRIPTION index bfa54fe0..b203c763 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,7 @@ BugReports: https://github.com/pln-team/PLNmodels/issues License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Depends: R (>= 3.4) LazyData: true biocViews: @@ -90,6 +90,7 @@ Collate: 'ZIPLNfit-class.R' 'ZIPLN.R' 'ZIPLNfit-S3methods.R' + 'ZIPLNnetwork.R' 'barents.R' 'import_utils.R' 'mollusk.R' diff --git a/NAMESPACE b/NAMESPACE index ae446525..debf5692 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,12 +7,17 @@ S3method(coef,ZIPLNfit) S3method(fitted,PLNfit) S3method(fitted,PLNmixturefit) S3method(fitted,ZIPLNfit) +S3method(getBestModel,Networkfamily) S3method(getBestModel,PLNPCAfamily) S3method(getBestModel,PLNmixturefamily) S3method(getBestModel,PLNnetworkfamily) +S3method(getBestModel,ZIPLNnetworkfamily) +S3method(getModel,Networkfamily) S3method(getModel,PLNPCAfamily) S3method(getModel,PLNmixturefamily) S3method(getModel,PLNnetworkfamily) +S3method(getModel,ZIPLNnetworkfamily) +S3method(plot,Networkfamily) S3method(plot,PLNLDAfit) S3method(plot,PLNPCAfamily) S3method(plot,PLNPCAfit) @@ -21,6 +26,8 @@ S3method(plot,PLNmixturefamily) S3method(plot,PLNmixturefit) S3method(plot,PLNnetworkfamily) S3method(plot,PLNnetworkfit) +S3method(plot,ZIPLNfit_sparse) +S3method(plot,ZIPLNnetworkfamily) S3method(predict,PLNLDAfit) S3method(predict,PLNfit) S3method(predict,PLNmixturefit) @@ -48,6 +55,8 @@ export(PLNnetwork) export(PLNnetwork_param) export(ZIPLN) export(ZIPLN_param) +export(ZIPLNnetwork) +export(ZIPLNnetwork_param) export(coefficient_path) export(compute_PLN_starting_point) export(compute_offset) @@ -77,6 +86,7 @@ importFrom(corrplot,corrplot) importFrom(future.apply,future_lapply) importFrom(future.apply,future_sapply) importFrom(glassoFast,glassoFast) +importFrom(grDevices,rgb) importFrom(grid,nullGrob) importFrom(grid,textGrob) importFrom(gridExtra,arrangeGrob) diff --git a/NEWS.md b/NEWS.md index 01adb95d..b0ea56d7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,10 @@ -# Current (2024-01-23) +# PLNmodels 1.2.0 (2024-02-24) -* Addition of ZIPLN() and ZIPLNfit-class to allow for zero-inflation in the (for now) standard PLN model (merge PR #116) +* new feature: ZIPLN (PLN with zero inflation) + * ZIPLN() and ZIPLNfit-class to allow for zero-inflation in the standard PLN model (merge PR #116) + * ZIPLNnetwork() and ZIPLNfit_sparse-class to allow for zero-inflation in the PLNnetwork model (merge PR #118) + * Code factorization between PLNnetwork and ZIPLNnetwork (and associated classes) +* fix inconsistency between fitted and predict (merge PR #115) # PLNmodels 1.1.0 (2024-01-08) diff --git a/R/PLNnetwork.R b/R/PLNnetwork.R index a6f70c0e..372357be 100644 --- a/R/PLNnetwork.R +++ b/R/PLNnetwork.R @@ -1,18 +1,19 @@ -#' Poisson lognormal model towards sparse network inference +#' Sparse Poisson lognormal model for network inference #' -#' Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm. Use the (g)lm syntax for model specification (covariates, offsets). +#' Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +#' using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +#' Use the (g)lm syntax to specify the model (including covariates and offsets). #' #' @param formula an object of class "formula": a symbolic description of the model to be fitted. #' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which lm is called. #' @param subset an optional vector specifying a subset of observations to be used in the fitting process. #' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param penalties an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See \code{PLNnetwork_param()} for additional tuning of the penalty. +#' @param penalties an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See `PLNnetwork_param()` for additional tuning of the penalty. #' @param control a list-like structure for controlling the optimization, with default generated by [PLNnetwork_param()]. See the corresponding documentation for details; #' #' @return an R6 object with class [`PLNnetworkfamily`], which contains #' a collection of models with class [`PLNnetworkfit`] #' -#' @rdname PLNnetwork #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) @@ -28,16 +29,16 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control replace 'list(my_arg = xx)' by PLN_param(my_arg = xx) and see the documentation of PLNnetwork_param().") ## extract the data matrices and weights - args <- extract_model(match.call(expand.dots = FALSE), parent.frame()) + data_ <- extract_model(match.call(expand.dots = FALSE), parent.frame()) ## Instantiate the collection of models if (control$trace > 0) cat("\n Initialization...") - myPLN <- PLNnetworkfamily$new(penalties, args$Y, args$X, args$O, args$w, args$formula, control) + myPLN <- PLNnetworkfamily$new(penalties, data_, control) ## Optimization if (control$trace > 0) cat("\n Adjusting", length(myPLN$penalties), "PLN with sparse inverse covariance estimation\n") if (control$trace) cat("\tJoint optimization alternating gradient descent and graphical-lasso\n") - myPLN$optimize(control$config_optim) + myPLN$optimize(data_, control$config_optim) ## Post-treatments if (control$trace > 0) cat("\n Post-treatments") @@ -59,7 +60,7 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control #' @param n_penalties an integer that specifies the number of values for the penalty grid when internally generated. Ignored when penalties is non `NULL` #' @param min_ratio the penalty grid ranges from the minimal value that produces a sparse to this value multiplied by `min_ratio`. Default is 0.1. #' @param penalize_diagonal boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE} -#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. +#' @param penalty_weights either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. #' @param inception Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on #' log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit), #' which sometimes speeds up the inference. @@ -67,7 +68,7 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control #' @return list of parameters configuring the fit. #' @inherit PLN_param details #' @details See [PLN_param()] for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: -#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 +#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than ftol multiplied by the absolute value of the parameter. Default is 1e-6 #' * "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 #' #' @seealso [PLN_param()] diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index ca2648cf..8a8fd7bc 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -6,23 +6,21 @@ ## Auxiliary functions to check the given class of an objet -isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamily")} +isNetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} -#' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) -#' -#' @name plot.PLNnetworkfamily +#' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`]) #' #' @inheritParams plot.PLNfamily #' @inherit plot.PLNfamily return details #' -#' @param x an R6 object with class [`PLNnetworkfamily`] +#' @param x an R6 object with class [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] #' @param type a character, either "criteria", "stability" or "diagnostic" for the type of plot. -#' @param criteria vector of characters. The criteria to plot in c("loglik", "BIC", "ICL", "R_squared", "EBIC", "pen_loglik"). -#' Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only relevant when `type = "criteria"`. +#' @param criteria Vector of criteria to plot, to be selected among "loglik" (log-likelihood), +#' "BIC", "ICL", "R_squared", "EBIC" and "pen_loglik" (penalized log-likelihood). +#' Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only used when `type = "criteria"`. #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. #' @param stability scalar: the targeted level of stability in stability plot. Default is .9. #' -#' #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) @@ -30,18 +28,17 @@ isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamily")} #' \dontrun{ #' plot(fits) #' } -#' @return Produces either a diagnostic plot (with \code{type = 'diagnostic'}), a stability plot -#' (with \code{type = 'stability'}) or the evolution of the criteria of the different models considered -#' (with \code{type = 'criteria'}, the default). +#' @return Produces either a diagnostic plot (with `type = 'diagnostic'`), a stability plot +#' (with `type = 'stability'`) or the evolution of the criteria of the different models considered +#' (with `type = 'criteria'`, the default). #' @export -plot.PLNnetworkfamily <- - function(x, +plot.Networkfamily <- function(x, type = c("criteria", "stability", "diagnostic"), criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, log.x = TRUE, stability = 0.9, ...) { - stopifnot(isPLNnetworkfamily(x)) + stopifnot(isNetworkfamily(x)) type <- match.arg(type) if (type == "criteria") p <- x$plot(criteria, reverse) @@ -53,27 +50,50 @@ plot.PLNnetworkfamily <- p } -#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] +#' @describeIn plot.Networkfamily Display various outputs associated with a collection of network fits #' @export -getModel.PLNnetworkfamily <- function(Robject, var, index = NULL) { - stopifnot(isPLNnetworkfamily(Robject)) +plot.PLNnetworkfamily <- plot.Networkfamily + +#' @describeIn plot.Networkfamily Display various outputs associated with a collection of network fits +#' @export +plot.ZIPLNnetworkfamily <- plot.Networkfamily + +#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] +#' @export +getModel.Networkfamily <- function(Robject, var, index = NULL) { + stopifnot(isNetworkfamily(Robject)) Robject$getModel(var, index) } -#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] +#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] +#' @export +getModel.PLNnetworkfamily <- getModel.Networkfamily + +#' @describeIn getModel Model extraction for [`ZIPLNnetworkfamily`] #' @export -getBestModel.PLNnetworkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { - stopifnot(isPLNnetworkfamily(Robject)) +getModel.ZIPLNnetworkfamily <- getModel.Networkfamily + +#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] +#' @export +getBestModel.Networkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { + stopifnot(isNetworkfamily(Robject)) stability <- list(...)[["stability"]] if (is.null(stability)) stability <- 0.9 Robject$getBestModel(match.arg(crit), stability) } +#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] +#' @export +getBestModel.PLNnetworkfamily <- getBestModel.Networkfamily + +#' @describeIn getBestModel Model extraction for [`ZIPLNnetworkfamily`] +#' @export +getBestModel.ZIPLNnetworkfamily <- getBestModel.Networkfamily #' Extract the regularization path of a PLNnetwork fit #' #' @name coefficient_path -#' @param Robject an object with class [`PLNnetworkfamily`], i.e. an output from [PLNnetwork()] +#' @param Robject an object with class [`Networkfamily`], i.e. an output from [PLNnetwork()] #' @param precision a logical, should the coefficients of the precision matrix Omega or the covariance matrix Sigma be sent back. Default is `TRUE`. #' @param corr a logical, should the correlation (partial in case `precision = TRUE`) be sent back. Default is `TRUE`. #' @@ -85,7 +105,7 @@ getBestModel.PLNnetworkfamily <- function(Robject, crit = c("BIC", "EBIC", "StAR #' head(coefficient_path(fits)) #' @export coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) Robject$coefficient_path(precision, corr) } @@ -95,12 +115,12 @@ coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { #' #' @description This function computes the StARS stability criteria over a path of penalties. If a path has already been computed, the functions stops with a message unless `force = TRUE` has been specified. #' -#' @param Robject an object with class [`PLNnetworkfamily`], i.e. an output from [PLNnetwork()] -#' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. -#' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. +#' @param Robject an object with class [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`], i.e. an output from [PLNnetwork()] or [ZIPLNnetwork()] +#' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. +#' @param control a list controlling the main optimization process in each call to [PLNnetwork()] or [ZIPLNnetwork()]. See [PLN_param()] or [ZIPLN_param()] for details. #' @param force force computation of the stability path, even if a previous one has been detected. #' -#' @return the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields `stability_path` of the initial Robject with class [`PLNnetworkfamily`] +#' @return the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields `stability_path` of the initial Robject with class [`Networkfamily`] #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) @@ -112,7 +132,8 @@ coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { #' } #' @export stability_selection <- function(Robject, subsamples = NULL, control = PLNnetwork_param(), force = FALSE) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) + if (inherits(Robject, "ZIPLNnetworkfamily")) control <- ZIPLNnetwork_param() if (force || anyNA(Robject$stability)) { Robject$stability_selection(subsamples, control) } else { @@ -120,8 +141,6 @@ stability_selection <- function(Robject, subsamples = NULL, control = PLNnetwork } } - - #' Extract edge selection frequency in bootstrap subsamples #' #' @description Extracts edge selection frequency in networks reconstructed from bootstrap subsamples @@ -162,7 +181,7 @@ extract_probs <- function(Robject, penalty = NULL, index = NULL, crit = c("StARS", "BIC", "EBIC"), format = c("matrix", "vector"), tol = 1e-5) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) ## Check if stability selection has been performed stab_path <- Robject$stability_path if (is.null(stab_path)) { diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 00f2cec8..20f607a7 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -1,33 +1,28 @@ -#' An R6 Class to represent a collection of PLNnetworkfit +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS Networkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' An R6 Class to virtually represent a collection of network fits #' -#' @description The function [PLNnetwork()] produces an instance of this class. +#' @description The functions [PLNnetwork()] and [ZIPLNnetwork()] both produce an instance of this class, which can be thought of as a vector of [`PLNnetworkfit`]s [`ZIPLNfit_sparse`]s (indexed by penalty parameter) #' -#' This class comes with a set of methods, some of them being useful for the user: +#' This class comes with a set of methods mostly used to compare +#' network fits (in terms of goodness of fit) or extract one from +#' the family (based on penalty parameter and/or goodness of it). #' See the documentation for [getBestModel()], -#' [getModel()] and [plot()][plot.PLNnetworkfamily()] +#' [getModel()] and [plot()][plot.Networkfamily()] for the user-facing ones. #' ## Parameters shared by many methods #' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. -#' @param responses the matrix of responses common to every models -#' @param covariates the matrix of covariates common to every models -#' @param offsets the matrix of offsets common to every models -#' @param weights the vector of observation weights -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. -#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. -#' @param index Integer index of the model to be returned. Only the first value is taken into account #' #' @include PLNfamily-class.R #' @importFrom R6 R6Class #' @importFrom glassoFast glassoFast -#' @examples -#' data(trichoptera) -#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) -#' class(fits) -#' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] -PLNnetworkfamily <- R6Class( - classname = "PLNnetworkfamily", +#' @seealso The functions [PLNnetwork()], [ZIPLNnetwork()] and the classes [`PLNnetworkfit`], [`ZIPLNfit_sparse`] +Networkfamily <- R6Class( + classname = "Networkfamily", inherit = PLNfamily, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ------ @@ -36,33 +31,14 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize all models in the collection - #' @return Update current [`PLNnetworkfit`] with smart starting values - initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { + #' @return Update all network fits in the family with smart starting values + initialize = function(penalties, data, control) { ## Initialize fields shared by the super class - super$initialize(responses, covariates, offsets, weights, control) - - ## A basic model for inception, useless one is defined by the user -### TODO check if it is useful - if (is.null(control$inception)) { - - # CHECK_ME_TORCH_GPU - # This appears to be in torch_gpu only. The commented out line below is - # in both PLNmodels/master and PLNmodels/dev. - myPLN <- switch( - control$inception_cov, - "spherical" = PLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), - "diagonal" = PLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), - PLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full - ) - ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov - ## for the inner-outer loop of PLNnetwork. - myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) - control$inception <- myPLN - } + super$initialize(data$Y, data$X, data$O, data$w, control) if (is.null(control$penalty_weights)) - control$penalty_weights <- matrix(1, ncol(responses), ncol(responses)) + control$penalty_weights <- matrix(1, private$p, private$p) ## Get the number of penalty if (is.null(penalties)) { if (is.list(control$penalty_weights)) @@ -76,41 +52,37 @@ PLNnetworkfamily <- R6Class( else list_penalty_weights <- control$penalty_weights + ## Check consistency of weights and optionally silent diagonal penalties + list_penalty_weights <- + map(list_penalty_weights, function(penalty_weights) { + stopifnot(isSymmetric(penalty_weights), all(penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(penalty_weights) <- 0 + penalty_weights + }) + ## Get an appropriate grid of penalties if (is.null(penalties)) { - if (control$trace > 1) cat("\n Recovering an appropriate grid of penalties.") - # CHECK_ME_TORCH_GPU - # This appears to be in torch_gpu only. The commented out line below is - # in both PLNmodels/master and PLNmodels/dev. - # changed it to other one + if (control$trace > 1) cat("\nComputing an appropriate grid of penalties.") max_pen <- list_penalty_weights %>% - map(~ as.matrix(myPLN$model_par$Sigma) / .x) %>% - # map(~ control$inception$model_par$Sigma / .x) %>% + map(~ as.matrix(control$inception$model_par$Sigma) / .x) %>% map_dbl(~ max(abs(.x[upper.tri(.x, diag = control$penalize_diagonal)]))) %>% max() penalties <- 10^seq(log10(max_pen), log10(max_pen*control$min_ratio), len = control$n_penalties) } else { - if (control$trace > 1) cat("\nPenalties already set by the user") + if (control$trace > 1) cat("\nUsing penalties penalties provided by the user.") stopifnot(all(penalties > 0)) } ## Sort the penalty in decreasing order o <- order(penalties, decreasing = TRUE) private$params <- penalties[o] - list_penalty_weights <- list_penalty_weights[o] - - ## instantiate as many models as penalties - control$trace <- 0 - self$models <- map2(private$params, list_penalty_weights, function(penalty, penalty_weights) { - PLNnetworkfit$new(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) - }) - + private$penalties_weights <- list_penalty_weights[o] }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Optimization ---------------------- #' @description Call to the C++ optimizer on all models of the collection #' @param config a list for controlling the optimization. - optimize = function(config) { + optimize = function(data, config) { ## Go along the penalty grid (i.e the models) for (m in seq_along(self$models)) { @@ -121,7 +93,7 @@ PLNnetworkfamily <- R6Class( if (config$trace > 1) { cat("\tsparsifying penalty =", self$models[[m]]$penalty, "- iteration:") } - self$models[[m]]$optimize(self$responses, self$covariates, self$offsets, self$weights, config) + self$models[[m]]$optimize(data, config) ## Save time by starting the optimization of model m + 1 with optimal parameters of model m if (m < length(self$penalties)) self$models[[m + 1]]$update( @@ -139,71 +111,9 @@ PLNnetworkfamily <- R6Class( }, - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Stability ------------------------- - #' @description Compute the stability path by stability selection - #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. - #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. - stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { - - ## select default subsamples according - if (is.null(subsamples)) { - subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) - subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) - } - - ## got for stability selection - cat("\nStability Selection for PLNnetwork: ") - cat("\nsubsampling: ") - - stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { - cat("+") - inception_ <- self$getModel(self$penalties[1]) - inception_$update( - M = inception_$var_par$M[subsample, ], - S = inception_$var_par$S[subsample, ] - ) - - ## force some control parameters - control$inception = inception_ - control$penalty_weights = map(self$models, "penalty_weights") - control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) - control$trace <- 0 - control$config_optim$trace <- 0 - - myPLN <- PLNnetworkfamily$new(penalties = self$penalties, - responses = self$responses [subsample, , drop = FALSE], - covariates = self$covariates[subsample, , drop = FALSE], - offsets = self$offsets [subsample, , drop = FALSE], - formula = private$formula, - weights = self$weights [subsample], control = control) - - myPLN$optimize(control$config_optim) - nets <- do.call(cbind, lapply(myPLN$models, function(model) { - as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] - })) - nets - }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) - - prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) - ## formatting/tyding - node_set <- colnames(self$getModel(index = 1)$model_par$B) - colnames(prob) <- self$penalties - private$stab_path <- prob %>% - as.data.frame() %>% - mutate(Edge = 1:n()) %>% - gather(key = "Penalty", value = "Prob", -Edge) %>% - mutate(Penalty = as.numeric(Penalty), - Node1 = node_set[edge_to_node(Edge)$node1], - Node2 = node_set[edge_to_node(Edge)$node2], - Edge = paste0(Node1, "|", Node2)) - - invisible(subsamples) - }, - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Extractors ------------------------ - #' @description Extract the regularization path of a [`PLNnetworkfamily`] + #' @description Extract the regularization path of a [`Networkfamily`] #' @param precision Logical. Should the regularization path be extracted from the precision matrix Omega (`TRUE`, default) or from the variance matrix Sigma (`FALSE`) #' @param corr Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to `TRUE` coefficient_path = function(precision = TRUE, corr = TRUE) { @@ -231,8 +141,10 @@ PLNnetworkfamily <- R6Class( }, #' @description Extract the best network in the family according to some criteria - #' @param crit character. Criterion used to perform the selection. Is "StARS" is chosen but `$stability` field is empty, will compute stability path. + #' @param crit character. Criterion used to perform the selection. If "StARS" is chosen but `$stability` field is empty, will compute stability path. #' @param stability Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is `0.9`. + #' @details + #' For BIC and EBIC criteria, higher is better. getBestModel = function(crit = c("BIC", "EBIC", "StARS"), stability = 0.9){ crit <- match.arg(crit) if (crit == "StARS") { @@ -255,11 +167,11 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Graphical methods ----------------- - #' @description Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) + #' @description Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (a [`Networkfamily`]) #' @param criteria vector of characters. The criteria to plot in `c("loglik", "pen_loglik", "BIC", "EBIC")`. Defaults to all of them. #' @param reverse A logical indicating whether to plot the value of the criteria in the "natural" direction #' (loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the - #' natural direction, on the same scale as the log-likelihood.. + #' natural direction, on the same scale as the log-likelihood. #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. #' @return a [`ggplot`] graph plot = function(criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, log.x = TRUE) { @@ -269,61 +181,61 @@ PLNnetworkfamily <- R6Class( p }, - #' @description Plot stability path - #' @param stability scalar: the targeted level of stability in stability plot. Default is `0.9`. - #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. - #' @return a [`ggplot`] graph - plot_stars = function(stability = 0.9, log.x = TRUE) { - if (anyNA(self$stability)) stop("stability selection has not yet been performed! Use stability_selection()") - dplot <- self$criteria %>% select(param, density, stability) %>% - rename(Penalty = param) %>% - gather(key = "Metric", value = "Value", stability:density) - penalty_stars <- dplot %>% filter(Metric == "stability" & Value >= stability) %>% - pull(Penalty) %>% min() - - p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + - geom_point() + geom_line() + theme_bw() + - ## Add information correspinding to best lambda - geom_vline(xintercept = penalty_stars, linetype = 2) + - geom_hline(yintercept = stability, linetype = 2) + - annotate(x = penalty_stars, y = 0, - label = paste("lambda == ", round(penalty_stars, 5)), - parse = TRUE, hjust = -0.05, vjust = 0, geom = "text") + - annotate(x = penalty_stars, y = stability, - label = paste("stability == ", stability), - parse = TRUE, hjust = -0.05, vjust = 1.5, geom = "text") - if (log.x) p <- p + ggplot2::scale_x_log10() + annotation_logticks(sides = "b") - p - }, - - #' @description Plot objective value of the optimization problem along the penalty path - #' @return a [`ggplot`] graph - plot_objective = function() { - objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) - changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$outer_iterations))) - dplot <- data.frame(iteration = 1:length(objective), objective = objective) - p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + - geom_vline(xintercept = changes, linetype="dashed", alpha = 0.25) + - ggtitle("Objective along the alternate algorithm") + xlab("iteration (+ changes of model)") + - annotate("text", x = changes, y = min(dplot$objective), angle = 90, - label = paste("penalty=",format(self$criteria$param, digits = 1)), hjust = -.1, size = 3, alpha = 0.7) + theme_bw() - p - }, - - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Print methods --------------------- - #' @description User friendly print method - show = function() { - super$show() - cat(" Task: Network Inference \n") - cat("========================================================\n") - cat(" -", length(self$penalties) , "penalties considered: from", min(self$penalties), "to", max(self$penalties), "\n") - cat(" - Best model (greater BIC): lambda =", format(self$getBestModel("BIC")$penalty, digits = 3), "\n") - cat(" - Best model (greater EBIC): lambda =", format(self$getBestModel("EBIC")$penalty, digits = 3), "\n") - if (!anyNA(self$criteria$stability)) - cat(" - Best model (regarding StARS): lambda =", format(self$getBestModel("StARS")$penalty, digits = 3), "\n") - } + #' @description Plot stability path + #' @param stability scalar: the targeted level of stability using stability selection. Default is `0.9`. + #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. + #' @return a [`ggplot`] graph + plot_stars = function(stability = 0.9, log.x = TRUE) { + if (anyNA(self$stability)) stop("stability selection has not yet been performed! Use stability_selection()") + dplot <- self$criteria %>% select(param, density, stability) %>% + rename(Penalty = param) %>% + gather(key = "Metric", value = "Value", stability:density) + penalty_stars <- dplot %>% filter(Metric == "stability" & Value >= stability) %>% + pull(Penalty) %>% min() + + p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + + geom_point() + geom_line() + theme_bw() + + ## Add information corresponding to best lambda + geom_vline(xintercept = penalty_stars, linetype = 2) + + geom_hline(yintercept = stability, linetype = 2) + + annotate(x = penalty_stars, y = 0, + label = paste("lambda == ", round(penalty_stars, 5)), + parse = TRUE, hjust = -0.05, vjust = 0, geom = "text") + + annotate(x = penalty_stars, y = stability, + label = paste("stability == ", stability), + parse = TRUE, hjust = -0.05, vjust = 1.5, geom = "text") + if (log.x) p <- p + ggplot2::scale_x_log10() + annotation_logticks(sides = "b") + p + }, + + #' @description Plot objective value of the optimization problem along the penalty path + #' @return a [`ggplot`] graph + plot_objective = function() { + objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) + changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$iterations))) + dplot <- data.frame(iteration = 1:length(objective), objective = objective) + p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + + geom_vline(xintercept = changes, linetype = "dashed", alpha = 0.25) + + ggtitle("Objective along the alternate algorithm") + xlab("iteration (+ changes of model)") + + annotate("text", x = changes, y = min(dplot$objective), angle = 90, + label = paste("penalty=",format(self$criteria$param, digits = 1)), hjust = -.1, size = 3, alpha = 0.7) + theme_bw() + p + }, + + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Print methods --------------------- + #' @description User friendly print method + show = function() { + super$show() + cat(" Task: Network Inference \n") + cat("========================================================\n") + cat(" -", length(self$penalties) , "penalties considered: from", min(self$penalties), "to", max(self$penalties), "\n") + cat(" - Best model (greater BIC): lambda =", format(self$getBestModel("BIC")$penalty, digits = 3), "\n") + cat(" - Best model (greater EBIC): lambda =", format(self$getBestModel("EBIC")$penalty, digits = 3), "\n") + if (!anyNA(self$criteria$stability)) + cat(" - Best model (regarding StARS): lambda =", format(self$getBestModel("StARS")$penalty, digits = 3), "\n") + } ), ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -331,6 +243,7 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% private = list( + penalties_weights = NULL, # a field to store the weights for each penalty, stab_path = NULL # a field to store the stability path, ), @@ -347,16 +260,16 @@ PLNnetworkfamily <- R6Class( if (!is.null(private$stab_path)) { stability <- self$stability_path %>% dplyr::select(Penalty, Prob) %>% - group_by(Penalty) %>% - summarize(Stability = 1 - mean(4 * Prob * (1 - Prob))) %>% - arrange(desc(Penalty)) %>% - pull(Stability) + dplyr::group_by(Penalty) %>% + dplyr::summarize(Stability = 1 - mean(4 * Prob * (1 - Prob))) %>% + dplyr::arrange(desc(Penalty)) %>% + dplyr::pull(Stability) } else { stability <- rep(NA, length(self$penalties)) } stability }, - #' @field criteria a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits + #' @field criteria a data frame with the values of some criteria (variational log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits #' BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty criteria = function() {mutate(super$criteria, stability = self$stability)} ) @@ -366,3 +279,279 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ) +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS PLNnetworkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' An R6 Class to represent a collection of [`PLNnetworkfit`]s +#' +#' @description The function [PLNnetwork()] produces an instance of this class. +#' +#' This class comes with a set of methods mostly used to compare +#' network fits (in terms of goodness of fit) or extract one from +#' the family (based on penalty parameter and/or goodness of it). +#' See the documentation for [getBestModel()], +#' [getModel()] and [plot()][plot.Networkfamily()] for the user-facing ones. +#' +#' +## Parameters shared by many methods +#' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. +#' @param data a named list used internally to carry the data matrices +#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param control a list for controlling the optimization. +#' +#' @include PLNfamily-class.R +#' @importFrom R6 R6Class +#' @importFrom glassoFast glassoFast +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +#' class(fits) +#' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] +PLNnetworkfamily <- R6Class( + classname = "PLNnetworkfamily", + inherit = Networkfamily, + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PUBLIC MEMBERS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + public = list( + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Creation functions ---------------- + #' @description Initialize all models in the collection + #' @return Update current [`PLNnetworkfit`] with smart starting values + initialize = function(penalties, data, control) { + + ## A basic model (constrained model) for inception, ignored if inception is provided by the user + if (is.null(control$inception)) { + ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov + ## for the inner-outer loop of PLNnetwork. + myPLN <- switch( + control$inception_cov, + "spherical" = PLNfit_spherical$new(data$Y, data$X, data$O, data$w, data$formula, control), + "diagonal" = PLNfit_diagonal$new(data$Y, data$X, data$O, data$w, data$formula, control), + PLNfit$new(data$Y, data$X, data$O, data$w, data$formula, control) # defaults to full + ) + myPLN$optimize(data$Y, data$X, data$O, data$w, control$config_optim) + control$inception <- myPLN + } + + ## Initialize fields shared by the super class + super$initialize(penalties, data, control) + + ## instantiate one model per penalty + control$trace <- 0 + self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { + control$penalty <- penalty + control$penalty_weights <- penalty_weights + PLNnetworkfit$new(data, control) + }) + + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Stability ------------------------- + #' @description Compute the stability path by stability selection + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to [`PLNnetwork()`]. See [PLNnetwork()] and [PLN_param()] for details. + stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { + + ## select default subsamples according to Liu et al. (2010) recommendations. + if (is.null(subsamples)) { + subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) + subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) + } + + ## got for stability selection + cat("\nStability Selection for PLNnetwork: ") + cat("\nsubsampling: ") + + stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { + cat("+") + inception_ <- self$getModel(self$penalties[1]) + inception_$update( + M = inception_$var_par$M[subsample, ], + S = inception_$var_par$S[subsample, ] + ) + + ## force some control parameters + control$inception = inception_ + control$penalty_weights = map(self$models, "penalty_weights") + control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) + control$trace <- 0 + control$config_optim$trace <- 0 + + data <- list( + Y = self$responses [subsample, , drop = FALSE], + X = self$covariates[subsample, , drop = FALSE], + O = self$offsets [subsample, , drop = FALSE], + w = self$weights [subsample]) + + myPLN <- PLNnetworkfamily$new(self$penalties, data, control) + myPLN$optimize(data, control$config_optim) + nets <- do.call(cbind, lapply(myPLN$models, function(model) { + as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] + })) + nets + }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + + prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) + ## formatting/tyding + node_set <- colnames(self$getModel(index = 1)$model_par$B) + colnames(prob) <- self$penalties + private$stab_path <- prob %>% + as.data.frame() %>% + mutate(Edge = 1:n()) %>% + gather(key = "Penalty", value = "Prob", -Edge) %>% + mutate(Penalty = as.numeric(Penalty), + Node1 = node_set[edge_to_node(Edge)$node1], + Node2 = node_set[edge_to_node(Edge)$node2], + Edge = paste0(Node1, "|", Node2)) + + invisible(subsamples) + } + ) + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## END OF CLASS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +) + +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS PLNnetworkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' An R6 Class to represent a collection of ZIPLNnetwork +#' +#' @description The function [ZIPLNnetwork()] produces an instance of this class. +#' +#' This class comes with a set of methods, some of them being useful for the user: +#' See the documentation for [getBestModel()], +#' [getModel()] and [plot()][plot.ZIPLNnetworkfamily()] +#' +## Parameters shared by many methods +#' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. +#' @param data a named list used internally to carry the data matrices +#' @param control a list for controlling the optimization. +#' +#' @include PLNfamily-class.R +#' @importFrom R6 R6Class +#' @importFrom glassoFast glassoFast +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +#' class(fits) +#' @seealso The function [ZIPLNnetwork()], the class [`ZIPLNfit_sparse`] +ZIPLNnetworkfamily <- R6Class( + classname = "ZIPLNnetworkfamily", + inherit = Networkfamily, + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PUBLIC MEMBERS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + public = list( + #' @field covariates0 the matrix of covariates included in the ZI component + covariates0 = NULL, # covariates used in the ZI component + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Creation functions ---------------- + #' @description Initialize all models in the collection + #' @return Update current [`PLNnetworkfit`] with smart starting values + initialize = function(penalties, data, control) { + + ## A basic model for inception, useless one is defined by the user + if (is.null(control$inception)) { + ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov + ## for the inner-outer loop of PLNnetwork. + myPLN <- switch( + control$inception_cov, + "spherical" = ZIPLNfit_spherical$new(data, control), + "diagonal" = ZIPLNfit_diagonal$new(data, control), + ZIPLNfit$new(data, control) # defaults to full + ) + myPLN$optimize(data, control$config_optim) + control$inception <- myPLN + } + + ## Initialize fields shared by the super class + super$initialize(penalties, data, control) + self$covariates0 <- data$X0 + + ## instantiate one model per penalty + control$trace <- 0 + self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { + control$penalty <- penalty + control$penalty_weights <- penalty_weights + ZIPLNfit_sparse$new(data, control) + }) + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Stability ------------------------- + #' @description Compute the stability path by stability selection + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to [`PLNnetwork()`]. See [ZIPLNnetwork()] and [ZIPLN_param()] for details. + stability_selection = function(subsamples = NULL, control = ZIPLNnetwork_param()) { + + ## select default subsamples according to Liu et al. (2010) recommendations. + if (is.null(subsamples)) { + subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) + subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) + } + + ## got for stability selection + cat("\nStability Selection for ZIPLNnetwork: ") + cat("\nsubsampling: ") + + stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { + cat("+") + inception_ <- self$getModel(self$penalties[1]) + inception_$update( + R = inception_$var_par$R[subsample, ], + M = inception_$var_par$M[subsample, ], + S = inception_$var_par$S[subsample, ] + ) + + ## force some control parameters + control$inception = inception_ + control$penalty_weights = map(self$models, "penalty_weights") + control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) + control$trace <- 0 + control$config_optim$trace <- 0 + control$ziparam <- inception_$zi_model + X0 <- self$covariates0 + if (nrow(X0) > 0) X0 <- X0[subsample, , drop = FALSE] + data <- list( + Y = self$responses [subsample, , drop = FALSE], + X = self$covariates [subsample, , drop = FALSE], + X0 = X0, + O = self$offsets [subsample, , drop = FALSE], + w = self$weights [subsample]) + + myPLN <- ZIPLNnetworkfamily$new(self$penalties, data, control) + myPLN$optimize(data, control$config_optim) + + nets <- do.call(cbind, lapply(myPLN$models, function(model) { + as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] + })) + nets + }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + + prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) + ## formatting/tyding + node_set <- colnames(self$getModel(index = 1)$model_par$B) + colnames(prob) <- self$penalties + private$stab_path <- prob %>% + as.data.frame() %>% + mutate(Edge = 1:n()) %>% + gather(key = "Penalty", value = "Prob", -Edge) %>% + mutate(Penalty = as.numeric(Penalty), + Node1 = node_set[edge_to_node(Edge)$node1], + Node2 = node_set[edge_to_node(Edge)$node2], + Edge = paste0(Node1, "|", Node2)) + + invisible(subsamples) + } + ) + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## END OF CLASS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +) diff --git a/R/PLNnetworkfit-class.R b/R/PLNnetworkfit-class.R index 1dff42da..312c16df 100644 --- a/R/PLNnetworkfit-class.R +++ b/R/PLNnetworkfit-class.R @@ -5,20 +5,16 @@ #' See the documentation for [`plot()`][plot.PLNnetworkfit()] and methods inherited from [`PLNfit`]. #' ## Parameters common to all PLN-xx-fit methods (shared with PLNfit but inheritance does not work) -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. -#' @param nullModel null model used for approximate R2 computations. Defaults to a GLM model with same design matrix but not latent variable. -#' @param B matrix of regression matrix +#' @param nullModel null model used for approximate R2 computations. Defaults to a GLM model with same design matrix but no latent variable. +#' @param B matrix of regression coefficients #' @param Sigma variance-covariance matrix of the latent variables #' @param Omega precision matrix of the latent variables. Inverse of Sigma. #' ## Parameters specific to PLNnetwork-fit methods #' @param penalty a positive real number controlling the level of sparsity of the underlying network. -#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. +#' @param penalty_weights either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pair of node. Must be symmetric with positive values. #' #' @include PLNnetworkfit-class.R #' @examples @@ -41,40 +37,27 @@ PLNnetworkfit <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize a [`PLNnetworkfit`] object - initialize = function(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) { - stopifnot(isSymmetric(penalty_weights), all(penalty_weights >= 0)) - super$initialize(responses, covariates, offsets, weights, formula, control) - private$lambda <- penalty - private$rho <- penalty_weights - if (!control$penalize_diagonal) diag(private$rho) <- 0 - }, - #' @description Update fields of a [`PLNnetworkfit`] object - #' @param B matrix of regression matrix - #' @param Sigma variance-covariance matrix of the latent variables - #' @param Omega precision matrix of the latent variables. Inverse of Sigma. - #' @param M matrix of mean vectors for the variational approximation - #' @param S matrix of variance vectors for the variational approximation - #' @param Z matrix of latent vectors (includes covariates and offset effects) - #' @param A matrix of fitted values - #' @param Ji vector of variational lower bounds of the log-likelihoods (one value per sample) - #' @param R2 approximate R^2 goodness-of-fit criterion - #' @param monitoring a list with optimization monitoring quantities - update = function(penalty=NA, B=NA, Sigma=NA, Omega=NA, M=NA, S=NA, Z=NA, A=NA, Ji=NA, R2=NA, monitoring=NA) { - super$update(B = B, Sigma = Sigma, Omega = Omega, M, S = S, Z = Z, A = A, Ji = Ji, R2 = R2, monitoring = monitoring) - if (!anyNA(penalty)) private$lambda <- penalty + initialize = function(data, control) { + super$initialize(data$Y, data$X, data$O, data$w, data$formula, control) + ## Default for penalty weights (if not already set) + if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, self$p, self$p) + stopifnot(isSymmetric(control$penalty_weights), all(control$penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(control$penalty_weights) <- 0 + private$lambda <- control$penalty + private$rho <- control$penalty_weights }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Optimization ---------------------- #' @description Call to the C++ optimizer and update of the relevant fields #' @param config a list for controlling the optimization - optimize = function(responses, covariates, offsets, weights, config) { + optimize = function(data, config) { cond <- FALSE; iter <- 0 objective <- numeric(config$maxit_out) convergence <- numeric(config$maxit_out) ## start from the standard PLN at initialization objective.old <- -self$loglik - args <- list(data = list(Y = responses, X = covariates, O = offsets, w = weights), + args <- list(data = data, params = list(B = private$B, M = private$M, S = private$S), config = config) while (!cond) { @@ -90,7 +73,7 @@ PLNnetworkfit <- R6Class( do.call(self$update, optim_out) ## Check convergence - objective[iter] <- -self$loglik + self$penalty * sum(abs(private$Omega)) + objective[iter] <- -self$loglik # + self$penalty * sum(abs(private$Omega)) convergence[iter] <- abs(objective[iter] - objective.old)/abs(objective[iter]) if ((convergence[iter] < config$ftol_out) | (iter >= config$maxit_out)) cond <- TRUE @@ -102,9 +85,9 @@ PLNnetworkfit <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## OUTPUT private$Sigma <- Matrix::symmpart(glasso_out$w) - private$monitoring$objective <- objective[1:iter] - private$monitoring$convergence <- convergence[1:iter] - private$monitoring$outer_iterations <- iter + private$monitoring$objective <- objective[1:iter] + private$monitoring$convergence <- convergence[1:iter] + private$monitoring$iterations <- iter }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -145,51 +128,14 @@ PLNnetworkfit <- R6Class( node.labels = NULL, layout = layout_in_circle, plot = TRUE) { - - type <- match.arg(type) - output <- match.arg(output) - - net <- self$latent_network(type) - - if (output == "igraph") { - - G <- graph_from_adjacency_matrix(net, mode = "undirected", weighted = TRUE, diag = FALSE) - - if (!is.null(node.labels)) { - igraph::V(G)$label <- node.labels - } else { - igraph::V(G)$label <- colnames(net) - } - ## Nice nodes - V.deg <- degree(G)/sum(degree(G)) - igraph::V(G)$label.cex <- V.deg / max(V.deg) + .5 - igraph::V(G)$size <- V.deg * 100 - igraph::V(G)$label.color <- rgb(0, 0, .2, .8) - igraph::V(G)$frame.color <- NA - ## Nice edges - igraph::E(G)$color <- ifelse(igraph::E(G)$weight > 0, edge.color[1], edge.color[2]) - if (type == "support") - igraph::E(G)$width <- abs(igraph::E(G)$weight) - else - igraph::E(G)$width <- 15*abs(igraph::E(G)$weight) - - if (remove.isolated) { - G <- delete.vertices(G, which(degree(G) == 0)) - } - if (plot) plot(G, layout = layout) - } - if (output == "corrplot") { - if (plot) { - if (ncol(net) > 100) - colnames(net) <- rownames(net) <- rep(" ", ncol(net)) - G <- net - diag(net) <- 0 - corrplot(as.matrix(net), method = "color", is.corr = FALSE, tl.pos = "td", cl.pos = "n", tl.cex = 0.5, type = "upper") - } else { - G <- net - } - } - invisible(G) + .plot_network(self$latent_network(match.arg(type)), + type = match.arg(type), + output = match.arg(output), + edge.color = edge.color, + remove.isolated = remove.isolated, + node.labels = node.labels, + layout = layout, + plot = plot) }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/R/RcppExports.R b/R/RcppExports.R index c2f80753..d1053c03 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -69,8 +69,12 @@ optim_zipln_zipar_covar <- function(R, init_B0, X0, configuration) { .Call('_PLNmodels_optim_zipln_zipar_covar', PACKAGE = 'PLNmodels', R, init_B0, X0, configuration) } -optim_zipln_R <- function(Y, X, O, M, S, Pi) { - .Call('_PLNmodels_optim_zipln_R', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi) +optim_zipln_R_var <- function(Y, X, O, M, S, Pi, B) { + .Call('_PLNmodels_optim_zipln_R_var', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi, B) +} + +optim_zipln_R_exact <- function(Y, X, O, M, S, Pi, B) { + .Call('_PLNmodels_optim_zipln_R_exact', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi, B) } optim_zipln_M <- function(init_M, Y, X, O, R, S, B, Omega, configuration) { diff --git a/R/ZIPLN.R b/R/ZIPLN.R index f5fe288e..7b84ddf2 100644 --- a/R/ZIPLN.R +++ b/R/ZIPLN.R @@ -43,23 +43,23 @@ ZIPLN <- function(formula, data, subset, zi = c("single", "row", "col"), control = ZIPLN_param()) { ## extract the data matrices and weights - args <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) - control$ziparam <- ifelse((args$zicovar), "covar", match.arg(zi)) + data_ <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) + control$ziparam <- ifelse((data_$zicovar), "covar", match.arg(zi)) ## initialization if (control$trace > 0) cat("\n Initialization...") myPLN <- switch(control$covariance, - "diagonal" = ZIPLNfit_diagonal$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "spherical" = ZIPLNfit_spherical$new(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "fixed" = ZIPLNfit_fixed$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "sparse" = ZIPLNfit_sparse$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - ZIPLNfit$new(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control)) # default: full covariance + "diagonal" = ZIPLNfit_diagonal$new(data_, control), + "spherical" = ZIPLNfit_spherical$new(data_, control), + "fixed" = ZIPLNfit_fixed$new(data_, control), + "sparse" = ZIPLNfit_sparse$new(data_, control), + ZIPLNfit$new(data_, control)) # default: full covariance ## optimization if (control$trace > 0) cat("\n Adjusting a ZI-PLN model with", control$covariance,"covariance model and", control$ziparam, "specific parameter(s) in Zero inflation component.") - myPLN$optimize(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, control$config_optim) + myPLN$optimize(data_, control$config_optim) if (control$trace > 0) cat("\n DONE!\n") myPLN @@ -73,14 +73,16 @@ ZIPLN <- function(formula, data, subset, zi = c("single", "row", "col"), control #' Helper to define list of parameters to control the PLN fit. All arguments have defaults. #' #' @inheritParams PLN_param +#' @inheritParams PLNnetwork_param #' @param penalty a user-defined penalty to sparsify the residual covariance. Defaults to 0 (no sparsity). #' @return list of parameters used during the fit and post-processing steps #' #' @inherit PLN_param details -#' @details See [PLN_param()] for a full description of the generic optimization parameters. ZIPLN_param() also has two additional parameters controlling the optimization due -#' the inner-outer loop structure of the optimizer: -#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than `ftol_out` multiplied by the absolute value of the parameter. Default is 1e-8 +#' @details See [PLN_param()] and [PLNnetwork_param()] for a full description of the generic optimization parameters. Like [PLNnetwork_param()], ZIPLN_param() has two parameters controlling the optimization due the inner-outer loop structure of the optimizer: +#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than `ftol_out` multiplied by the absolute value of the parameter. Default is 1e-6 #' * "maxit_out" outer solver stops when the number of iteration exceeds `maxit_out`. Default is 100 +#' and one additional parameter controlling the form of the variational approximation of the zero inflation: +#' * "approx_ZI" either uses an exact or approximated conditional distribution for the zero inflation. Default is FALSE #' #' @export ZIPLN_param <- function( @@ -89,6 +91,8 @@ ZIPLN_param <- function( covariance = c("full", "diagonal", "spherical", "fixed", "sparse"), Omega = NULL, penalty = 0, + penalize_diagonal = TRUE , + penalty_weights = NULL , config_post = list(), config_optim = list(), inception = NULL # pretrained ZIPLNfit used as initialization @@ -113,6 +117,7 @@ ZIPLN_param <- function( config_opt$trace <- trace config_opt$ftol_out <- 1e-6 config_opt$maxit_out <- 100 + config_opt$approx_ZI <- FALSE config_opt[names(config_optim)] <- config_optim structure(list( @@ -121,6 +126,8 @@ ZIPLN_param <- function( covariance = covariance, Omega = Omega , penalty = penalty , + penalize_diagonal = penalize_diagonal, + penalty_weights = penalty_weights , config_post = config_pst, config_optim = config_opt, inception = inception), class = "PLNmodels_param") diff --git a/R/ZIPLNfit-S3methods.R b/R/ZIPLNfit-S3methods.R index 6fd24af8..d6aabb84 100644 --- a/R/ZIPLNfit-S3methods.R +++ b/R/ZIPLNfit-S3methods.R @@ -95,3 +95,40 @@ sigma.ZIPLNfit <- function(object, ...) { object$model_par$Sigma } +## ========================================================================================= +## +## PUBLIC S3 METHODS FOR ZIPLNfit_sparse +## +## ========================================================================================= + +## Auxiliary functions to check the given class of an objet +isZIPLNfit_sparse <- function(Robject) {inherits(Robject, "ZIPLNfit_sparse")} + +#' Extract and plot the network (partial correlation, support or inverse covariance) from a [`ZIPLNfit_sparse`] object +#' +#' @name plot.ZIPLNfit_sparse +#' @inheritParams plot.PLNnetworkfit +#' @param x an R6 object with class [`ZIPLNfit_sparse`] +#' +#' @inherit plot.PLNnetworkfit return +#' +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fit <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) +#' \dontrun{ +#' plot(fit) +#' } +#' @export +plot.ZIPLNfit_sparse <- + function(x, + type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE, ...) { + stopifnot(isZIPLNfit_sparse(x)) + invisible(x$plot_network(type, output, edge.color, remove.isolated, node.labels, layout, plot)) + } diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 3ab9e286..567e7a95 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -1,3 +1,7 @@ +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS ZIPLNfit ----- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + #' An R6 Class to represent a ZIPLNfit #' #' @description The function [ZIPLN()] fits a model which is an instance of an object with class [`ZIPLNfit`]. @@ -7,11 +11,7 @@ #' #' Fields are accessed via active binding and cannot be changed by the user. #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @inherit ZIPLN details @@ -69,59 +69,66 @@ ZIPLNfit <- R6Class( #' @description Initialize a [`ZIPLNfit`] model #' @importFrom stats glm.fit residuals poisson fitted coef #' @importFrom pscl zeroinfl - initialize = function(responses, covariates, offsets, weights, formula, control) { + initialize = function(data, control) { ## problem dimensions - n <- nrow(responses); p <- ncol(responses); d <- ncol(covariates$PLN); d0 <- ncol(covariates$ZI) + n <- nrow(data$Y); p <- ncol(data$Y); d <- ncol(data$X); d0 <- ncol(data$X0) ## save the formula call as specified by the user - private$formula <- formula - private$X <- covariates$PLN - private$X0 <- covariates$ZI + private$formula <- data$formula + private$X <- data$X + private$X0 <- data$X0 ## initialize the covariance model private$covariance <- control$covariance - private$ziparam <- control$ziparam - - R <- matrix(0, n, p) - M <- matrix(0, n, p) - B <- matrix(0, d , p) - B0 <- matrix(0, d0, p) - ## Feature-wise univariate (ZI)poisson regression as starting point for ZIPLN - for (j in 1:p) { - y = responses[, j] - if (min(y) == 0) { - suppressWarnings( - zip_out <- switch(control$ziparam, - "row" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + factor(1:n), offset = offsets[, j]), - "covar" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + covariates$ZI , offset = offsets[, j]), - pscl::zeroinfl(y ~ 0 + covariates$PLN | 1, offset = offsets[, j])) # offset only for the count model - ) - B0[,j] <- coef(zip_out, "zero") - B[,j] <- coef(zip_out, "count") - R[, j] <- predict(zip_out, type = "zero") - M[,j] <- residuals(zip_out) + covariates$PLN %*% coef(zip_out, "count") - } else { - p_out <- glm(y ~ 0 + covariates$PLN, family = 'poisson', offset = offsets[, j]) - B0[,j] <- rep(-10, d) - B[,j] <- coef(p_out) - R[, j] <- 0 - M[,j] <- residuals(p_out) + covariates$PLN %*% coef(p_out) + private$ziparam <- control$ziparam + + if (isZIPLNfit(control$inception)) { + private$R <- control$inception$var_par$R + private$M <- control$inception$var_par$M + private$S <- control$inception$var_par$S + private$B <- control$inception$model_par$B + private$B0 <- control$inception$model_par$B0 + } else { + R <- matrix(0, n, p) + M <- matrix(0, n, p) + B <- matrix(0, d , p) + B0 <- matrix(0, d0, p) + ## Feature-wise univariate (ZI)poisson regression as starting point for ZIPLN + for (j in 1:p) { + y = data$Y[, j] + if (min(y) == 0) { + suppressWarnings( + zip_out <- switch(control$ziparam, + "row" = pscl::zeroinfl(y ~ 0 + data$X | 0 + factor(1:n), offset = data$O[, j]), + "covar" = pscl::zeroinfl(y ~ 0 + data$X | 0 + data$X0 , offset = data$O[, j]), + pscl::zeroinfl(y ~ 0 + data$X | 1, offset = data$O[, j])) # offset only for the count model + ) + B0[,j] <- coef(zip_out, "zero") + B[,j] <- coef(zip_out, "count") + R[, j] <- predict(zip_out, type = "zero") + M[,j] <- residuals(zip_out) + data$X %*% coef(zip_out, "count") + } else { + p_out <- glm(y ~ 0 + data$X, family = 'poisson', offset = data$O[, j]) + B0[,j] <- rep(-10, d) + B[,j] <- coef(p_out) + R[, j] <- 0 + M[,j] <- residuals(p_out) + data$X %*% coef(p_out) + } } - } - ## Initialization of the ZI component - private$R <- R + ## Initialization of the ZI component + private$R <- R + private$B0 <- B0 + ## Initialization of the PLN component + private$B <- B + private$M <- M + private$S <- matrix(.1, n, p) + } private$Pi <- switch(control$ziparam, - "single" = matrix( mean(R), n, p) , - "row" = matrix(rowMeans(R), n, p) , - "col" = matrix(colMeans(R), n, p, byrow = TRUE), - "covar" = R) - private$B0 <- B0 - private$zeros <- 1 * (responses == 0) - - ## Initialization of the PLN component - private$B <- B - private$M <- M - private$S <- matrix(.1, n, p) + "single" = matrix( mean(private$R), n, p) , + "row" = matrix(rowMeans(private$R), n, p) , + "col" = matrix(colMeans(private$R), n, p, byrow = TRUE), + "covar" = private$R) + private$zeros <- 1 * (data$Y == 0) ## Link to functions performing the optimization private$optimizer$B <- function(M, X) optim_zipln_B_dense(M, X) @@ -132,15 +139,15 @@ ZIPLNfit <- R6Class( "col" = function(R, ...) list(Pi = matrix(colMeans(R), nrow(R), p, byrow = TRUE), B0 = matrix(NA, d0, p)), "covar" = optim_zipln_zipar_covar ) + private$optimizer$R <- ifelse(control$config_optim$approx_ZI, optim_zipln_R_var, optim_zipln_R_exact) private$optimizer$Omega <- optim_zipln_Omega_full }, #' @description Call to the Cpp optimizer and update of the relevant fields #' @param control a list for controlling the optimization. See details. - optimize = function(responses, covariates, offsets, weights, control) { + optimize = function(data, control) { - data <- list(Y = responses, X = covariates$PLN, X0 = covariates$ZI, O = offsets) parameters <- list(Omega = NA, B0 = private$B0, B = private$B, Pi = private$Pi, M = private$M, S = private$S, R = private$R) @@ -179,9 +186,8 @@ ZIPLNfit <- R6Class( ### VE Step # ZI part - new_R <- optim_zipln_R( - Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = new_Pi - ) + new_R <- private$optimizer$R(Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = new_Pi, B = new_B) + # PLN part new_M <- optim_zipln_M( init_M = parameters$M, @@ -238,8 +244,8 @@ ZIPLNfit <- R6Class( M = parameters$M, S = parameters$S, R = parameters$R, - Z = offsets + parameters$M, - A = exp(offsets + parameters$M + .5 * parameters$S^2), + Z = data$O + parameters$M, + A = exp(data$O + parameters$M + .5 * parameters$S^2), Ji = vloglik, monitoring = list( iterations = nb_iter, @@ -249,12 +255,14 @@ ZIPLNfit <- R6Class( ) ### TODO: Should be in post-treatment - if (is.null(colnames(responses))) colnames(responses) <- paste0("Y", 1:self$p) - colnames(private$B0) <- colnames(private$B) <- colnames(responses) - rownames(private$B0) <- rownames(private$B) <- colnames(covariates) - rownames(private$Omega) <- colnames(private$Omega) <- colnames(private$Pi) <- colnames(responses) + colnames_Y <- colnames(data$Y) + if (is.null(colnames_Y)) colnames_Y <- paste0("Y", 1:self$p) + colnames(private$B0) <- colnames(private$B) <- colnames_Y + rownames(private$B) <- colnames(data$X) + rownames(private$B0) <- colnames(data$X0) + rownames(private$Omega) <- colnames(private$Omega) <- colnames(private$Pi) <- colnames_Y dimnames(private$Sigma) <- dimnames(private$Omega) - rownames(private$M) <- rownames(private$S) <- rownames(private$R) <- rownames(private$Pi) <- rownames(responses) + rownames(private$M) <- rownames(private$S) <- rownames(private$R) <- rownames(private$Pi) <- rownames(data$Y) }, @@ -268,13 +276,12 @@ ZIPLNfit <- R6Class( #' * the matrix `R` of variational ZI probabilities #' * the vector `Ji` of (variational) log-likelihood of each new observation #' * a list `monitoring` with information about convergence status - optimize_vestep = function(covariates, offsets, responses, weights, + optimize_vestep = function(data, B = self$model_par$B, B0 = self$model_par$B0, Omega = self$model_par$Omega, control = ZIPLN_param(backend = "nlopt")$config_optim) { - n <- nrow(responses) - data <- list(Y = responses, X = covariates$PLN, X0 = covariates$ZI, O = offsets) + n <- nrow(data$Y) parameters <- list(M = matrix(0, n, self$p), S = matrix(0.1, n, self$p), R = matrix(0, n, self$p)) @@ -300,8 +307,8 @@ ZIPLNfit <- R6Class( )$Pi # VE Step - new_R <- optim_zipln_R( - Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = Pi + new_R <- private$optimizer$R( + Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = Pi, B = B ) new_M <- optim_zipln_M( init_M = parameters$M, @@ -399,10 +406,9 @@ ZIPLNfit <- R6Class( ## Optimize M and S if responses are provided, if (level == 1) { VE <- self$optimize_vestep( - covariates = list(PLN = X, ZI = X0), - offsets = O, - responses = as.matrix(responses), - weights = rep(1, n_new), + data = list( + Y = as.matrix(responses), X = X, X0 = X0, O = O, w = rep(1, n_new) + ), B = private$B, B0 = private$B0, Omega = private$Omega @@ -495,16 +501,21 @@ ZIPLNfit <- R6Class( d = function() {nrow(private$B)}, #' @field d0 number of covariates in the ZI part d0 = function() {nrow(private$B0)}, - #' @field nb_param number of parameters in the current PLN model + #' @field nb_param_zi number of parameters in the ZI part of the model + nb_param_zi = function() { + as.integer(switch(private$ziparam, + "single" = 1L, + "row" = self$n, + "col" = self$p, + "covar" = self$p * self$d)) + }, + #' @field nb_param_pln number of parameters in the PLN part of the model + nb_param_pln = function() { + as.integer(self$p * self$d + self$p * (self$p + 1L) / 2L) + }, + #' @field nb_param number of parameters in the ZIPLN model nb_param = function() { - as.integer( - self$p * self$d + self$p * (self$p + 1L)/2L + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - ) + self$nb_param_zi + self$nb_param_pln }, #' @field model_par a list with the matrices of parameters found in the model (B, Sigma, plus some others depending on the variant) model_par = function() {list(B = private$B, B0 = private$B0, Pi = private$Pi, Omega = private$Omega, Sigma = private$Sigma)}, @@ -552,11 +563,7 @@ ZIPLNfit <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with diagonal residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -575,21 +582,15 @@ ZIPLNfit_diagonal <- R6Class( inherit = ZIPLNfit, public = list( #' @description Initialize a [`ZIPLNfit_diagonal`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$optimizer$Omega <- optim_zipln_Omega_diagonal } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + self$p + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + self$p) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"diagonal"} @@ -605,11 +606,7 @@ ZIPLNfit_diagonal <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with spherical residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -628,21 +625,15 @@ ZIPLNfit_spherical <- R6Class( inherit = ZIPLNfit, public = list( #' @description Initialize a [`ZIPLNfit_spherical`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$optimizer$Omega <- optim_zipln_Omega_spherical } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + 1L + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + 1L) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"spherical"} @@ -658,11 +649,7 @@ ZIPLNfit_spherical <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with fixed (inverse) residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -685,22 +672,16 @@ ZIPLNfit_fixed <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% public = list( #' @description Initialize a [`ZIPLNfit_fixed`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$Omega <- control$Omega private$optimizer$Omega <- function(M, X, B, S) {private$Omega} } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + 0L) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"fixed"} @@ -716,11 +697,7 @@ ZIPLNfit_fixed <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with sparse inverse residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -730,40 +707,113 @@ ZIPLNfit_fixed <- R6Class( #' # See other examples in function ZIPLN #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -#' myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 0.2)) +#' myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 1)) #' class(myPLN) #' print(myPLN) +#' plot(myPLN) #' } ZIPLNfit_sparse <- R6Class( classname = "ZIPLNfit_sparse", inherit = ZIPLNfit, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PRIVATE MEMBERS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + private = list( + lambda = NA, # the sparsity tuning parameter + rho = NA # the p x p penalty weight + ), + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% public = list( #' @description Initialize a [`ZIPLNfit_fixed`] model #' @importFrom glassoFast glassoFast - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) + ## Default for penalty weights (if not already set) + if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, self$p, self$p) + stopifnot(isSymmetric(control$penalty_weights), all(control$penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(control$penalty_weights) <- 0 + private$lambda <- control$penalty + private$rho <- control$penalty_weights private$optimizer$Omega <- function(M, X, B, S) { - glassoFast( crossprod(M - X %*% B)/self$n + diag(colMeans(S * S), self$p, self$p), rho = control$penalty )$wi + glassoFast( crossprod(M - X %*% B)/self$n + diag(colMeans(S * S), self$p, self$p), + rho = private$lambda * private$rho )$wi + } + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Extractors ------------------------ + #' @description Extract interaction network in the latent space + #' @param type edge value in the network. Can be "support" (binary edges), "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species) + #' @importFrom Matrix Matrix + #' @return a square matrix of size `ZIPLNfit_sparse$n` + latent_network = function(type = c("partial_cor", "support", "precision")) { + net <- switch( + match.arg(type), + "support" = 1 * (private$Omega != 0 & !diag(TRUE, ncol(private$Omega))), + "precision" = private$Omega, + "partial_cor" = { + tmp <- -private$Omega / tcrossprod(sqrt(diag(private$Omega))); diag(tmp) <- 1 + tmp } + ) + ## Enforce sparse Matrix encoding to avoid downstream problems with igraph::graph_from_adjacency_matrix + ## as it fails when given dsyMatrix objects + Matrix(net, sparse = TRUE) + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Graphical methods------------------ + #' @description plot the latent network. + #' @param type edge value in the network. Either "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species). + #' @param output Output type. Either `igraph` (for the network) or `corrplot` (for the adjacency matrix) + #' @param edge.color Length 2 color vector. Color for positive/negative edges. Default is `c("#F8766D", "#00BFC4")`. Only relevant for igraph output. + #' @param node.labels vector of character. The labels of the nodes. The default will use the column names ot the response matrix. + #' @param remove.isolated if `TRUE`, isolated node are remove before plotting. Only relevant for igraph output. + #' @param layout an optional igraph layout. Only relevant for igraph output. + #' @param plot logical. Should the final network be displayed or only sent back to the user. Default is `TRUE`. + plot_network = function(type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE) { + .plot_network(self$latent_network(match.arg(type)), + type = match.arg(type), + output = match.arg(output), + edge.color = edge.color, + remove.isolated = remove.isolated, + node.labels = node.labels, + layout = layout, + plot = plot) } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + (sum(private$Omega != 0) - self$p)/2L + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field penalty the global level of sparsity in the current model + penalty = function() {private$lambda}, + #' @field penalty_weights a matrix of weights controlling the amount of penalty element-wise. + penalty_weights = function() {private$rho}, + #' @field n_edges number of edges if the network (non null coefficient of the sparse precision matrix) + n_edges = function() {sum(private$Omega[upper.tri(private$Omega, diag = FALSE)] != 0)}, + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + self$n_edges + self$p) }, #' @field vcov_model character: the model used for the residual covariance - vcov_model = function() {"sparse"} + vcov_model = function() {"sparse"}, + #' @field pen_loglik variational lower bound of the l1-penalized loglikelihood + pen_loglik = function() {self$loglik - private$lambda * sum(abs(private$Omega))}, + #' @field EBIC variational lower bound of the EBIC + EBIC = function() {self$BIC - .5 * ifelse(self$n_edges > 0, self$n_edges * log(.5 * self$p*(self$p - 1)/self$n_edges), 0)}, + #' @field density proportion of non-null edges in the network + density = function() {mean(self$latent_network("support"))}, + #' @field criteria a vector with loglik, penalized loglik, BIC, EBIC, ICL, R_squared, number of parameters, number of edges and graph density + criteria = function() {data.frame(super$criteria, n_edges = self$n_edges, EBIC = self$EBIC, pen_loglik = self$pen_loglik, density = self$density)} ) ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## END OF THE CLASS ZIPLNfit_sparse diff --git a/R/ZIPLNnetwork.R b/R/ZIPLNnetwork.R new file mode 100644 index 00000000..3f06a417 --- /dev/null +++ b/R/ZIPLNnetwork.R @@ -0,0 +1,108 @@ +#' Zero Inflated Sparse Poisson lognormal model for network inference +#' +#' Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +#' using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +#' Use the (g)lm syntax to specify the model (including covariates and offsets). +#' +#' @inheritParams PLNnetwork +#' @param control a list-like structure for controlling the optimization, with default generated by [ZIPLNnetwork_param()]. See the associated documentation +#' for details. +#' @param zi a character describing the model used for zero inflation, either of +#' - "single" (default, one parameter shared by all counts) +#' - "col" (one parameter per variable / feature) +#' - "row" (one parameter per sample / individual). +#' If covariates are specified in the formula RHS (see details) this parameter is ignored. +#' +#' @details +#' Covariates for the Zero-Inflation parameter (using a logistic regression model) can be specified in the formula RHS using the pipe +#' (`~ PLN effect | ZI effect`) to separate covariates for the PLN part of the model from those for the Zero-Inflation part. +#' Note that different covariates can be used for each part. +#' +#' @return an R6 object with class [`ZIPLNnetworkfamily`] +#' +#' @include PLNnetworkfamily-class.R +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' myZIPLNs <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, zi = "single") +#' @seealso The classes [`ZIPLNfit`] and [`ZIPLNnetworkfamily`] +#' @export +ZIPLNnetwork <- function(formula, data, subset, weights, zi = c("single", "row", "col"), penalties = NULL, control = ZIPLNnetwork_param()) { + + ## extract the data matrices and weights + data_ <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) + control$ziparam <- ifelse((data_$zicovar), "covar", match.arg(zi)) + + ## initialization + if (control$trace > 0) cat("\n Initialization...") + myPLN <- ZIPLNnetworkfamily$new(penalties, data_, control) + + ## optimization + if (control$trace > 0) cat("\n Adjusting", + length(myPLN$penalties), "ZI-PLN with sparse inverse covariance estimation and", + control$ziparam, "specific parameter(s) in Zero inflation component.\n") + myPLN$optimize(data_, control$config_optim) + + if (control$trace > 0) cat("\n DONE!\n") + myPLN +} + +#' Control of ZIPLNnetwork fit +#' +#' Helper to define list of parameters to control the ZIPLNnetwork fit. All arguments have defaults. +#' +#' @inheritParams PLNnetwork_param +#' +#' @inherit PLN_param details return +#' @details See [PLNnetwork_param()] for a full description of the optimization parameters. Note that some defaults values are different than those used in [PLNnetwork_param()]: +#' * "ftol_out" (outer loop convergence tolerance the objective function) is set by default to 1e-6 +#' * "maxit_out" (max number of iterations for the outer loop) is set by default to 100 +#' +#' @seealso [PLNnetwork_param()] and [PLN_param()] +#' @export +ZIPLNnetwork_param <- function( + backend = c("nlopt"), + inception_cov = c("full", "spherical", "diagonal"), + trace = 1 , + n_penalties = 30 , + min_ratio = 0.1 , + penalize_diagonal = TRUE , + penalty_weights = NULL , + config_post = list(), + config_optim = list(), + inception = NULL +) { + + if (!is.null(inception)) stopifnot(isZIPLNfit(inception)) + + ## post-treatment config + config_pst <- config_post_default_PLNnetwork + config_pst[names(config_post)] <- config_post + config_pst$trace <- trace + + ## optimization config + stopifnot(backend %in% c("nlopt")) + stopifnot(config_optim$algorithm %in% available_algorithms_nlopt) + config_opt <- config_default_nlopt + config_opt$trace <- trace + config_opt$ftol_out <- 1e-6 + config_opt$maxit_out <- 100 + config_opt$approx_ZI <- FALSE + config_opt[names(config_optim)] <- config_optim + inception_cov <- match.arg(inception_cov) + + structure(list( + backend = backend , + trace = trace , + inception_cov = inception_cov , + n_penalties = n_penalties , + min_ratio = min_ratio , + penalize_diagonal = penalize_diagonal, + penalty_weights = penalty_weights , + jackknife = FALSE , + bootstrap = 0 , + variance = FALSE , + config_post = config_pst , + config_optim = config_opt , + inception = inception ), class = "ZIPLNmodels_param") +} diff --git a/R/plot_utils.R b/R/plot_utils.R index 647dea8c..51df0dbf 100644 --- a/R/plot_utils.R +++ b/R/plot_utils.R @@ -185,3 +185,54 @@ plot_matrix = function(Mat, rowFG = "sample", colFG = "variable", clustering = N } g } + +#' @importFrom grDevices rgb +.plot_network = function(net , + type , + output , + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE) { + + if (output == "igraph") { + + G <- graph_from_adjacency_matrix(net, mode = "undirected", weighted = TRUE, diag = FALSE) + + if (!is.null(node.labels)) { + igraph::V(G)$label <- node.labels + } else { + igraph::V(G)$label <- colnames(net) + } + ## Nice nodes + V.deg <- degree(G)/sum(degree(G)) + igraph::V(G)$label.cex <- V.deg / max(V.deg) + .5 + igraph::V(G)$size <- V.deg * 100 + igraph::V(G)$label.color <- rgb(0, 0, .2, .8) + igraph::V(G)$frame.color <- NA + ## Nice edges + igraph::E(G)$color <- ifelse(igraph::E(G)$weight > 0, edge.color[1], edge.color[2]) + if (type == "support") + igraph::E(G)$width <- abs(igraph::E(G)$weight) + else + igraph::E(G)$width <- 15*abs(igraph::E(G)$weight) + + if (remove.isolated) { + G <- delete.vertices(G, which(degree(G) == 0)) + } + if (plot) plot(G, layout = layout) + } + if (output == "corrplot") { + if (plot) { + if (ncol(net) > 100) + colnames(net) <- rownames(net) <- rep(" ", ncol(net)) + G <- net + diag(net) <- 0 + corrplot(as.matrix(net), method = "color", is.corr = FALSE, tl.pos = "td", cl.pos = "n", tl.cex = 0.5, type = "upper") + } else { + G <- net + } + } + invisible(G) +} diff --git a/README.Rmd b/README.Rmd index 2516078d..3ce56d45 100644 --- a/README.Rmd +++ b/README.Rmd @@ -17,6 +17,7 @@ knitr::opts_chunk$set( [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/PLNmodels)](https://cran.r-project.org/package=PLNmodels) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![](https://img.shields.io/github/last-commit/pln-team/PLNmodels.svg)](https://github.com/pln-team/PLNmodels/commits/master) +[![R-CMD-check](https://github.com/PLN-team/PLNmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/PLN-team/PLNmodels/actions/workflows/R-CMD-check.yaml) > The Poisson lognormal model and variants can be used for a variety of multivariate problems when count data are at play (including PCA, LDA and network inference for count data). This package implements efficient algorithms to fit such models accompanied with a set of functions for visualization and diagnostic. See [this deck of slides](https://pln-team.github.io/slideshow/slides) for a comprehensive introduction. diff --git a/_pkgdown.yml b/_pkgdown.yml index 3a19da12..a1fc70e4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,6 +44,7 @@ reference: - '`PLNLDA`' - '`PLNPCA`' - '`PLNnetwork`' + - '`ZIPLNnetwork`' - '`PLNmixture`' - title: 'Poisson lognormal fit' desc: > @@ -69,6 +70,7 @@ reference: - '`sigma.ZIPLNfit`' - '`predict.ZIPLNfit`' - '`fitted.ZIPLNfit`' + - '`plot.ZIPLNfit_sparse`' - title: 'Linear discriminant analysis via a Poisson lognormal fit' desc: > Description of the PLNLDAfit object and methods for its manipulation. @@ -104,13 +106,17 @@ reference: - '`fitted.PLNmixturefit`' - '`getBestModel.PLNmixturefamily`' - '`getModel.PLNmixturefamily`' -- title: 'Sparse Poisson lognormal fit and network' +- title: 'Sparse Poisson lognormal fit and network, w/o Zero Inflated component' desc: > - Description of the PLNnetworkfit and PLNnetworkfamily objects and methods for their manipulation. + Description of the (ZI)PLNnetworkfit and (ZI)PLNnetworkfamily objects and methods for their manipulation. contents: - starts_with('PLNnetworkfit') - '`PLNnetwork_param`' + - '`ZIPLNnetwork_param`' - '`plot.PLNnetworkfit`' + - '`plot.ZIPLNfit_sparse`' + - '`Networkfamily`' + - '`ZIPLNnetworkfamily`' - '`PLNnetworkfamily`' - '`plot.PLNnetworkfamily`' - '`getBestModel.PLNnetworkfamily`' diff --git a/inst/case_studies/scRNA.R b/inst/case_studies/scRNA.R index ddb89b2d..659be1fb 100644 --- a/inst/case_studies/scRNA.R +++ b/inst/case_studies/scRNA.R @@ -5,7 +5,7 @@ data(scRNA) # data subsample: only 500 random cell and the 200 most varying transcript scRNA <- scRNA[sample.int(nrow(scRNA), 500), ] scRNA$counts <- scRNA$counts[, 1:200] -myZIPLN <- ZIPLN(counts ~ 1 + offset(log(total_counts)), data = scRNA) +myZIPLN <- ZIPLN(counts ~ 1 + offset(log(total_counts)), zi = "col", data = scRNA) myPLN <- PLN(counts ~ 1 + offset(log(total_counts)), data = scRNA) data.frame( diff --git a/inst/simus_ZIPLN/essai_ZIPLN.R b/inst/simus_ZIPLN/essai_ZIPLN.R index 1cb9bdcf..bf494ff2 100644 --- a/inst/simus_ZIPLN/essai_ZIPLN.R +++ b/inst/simus_ZIPLN/essai_ZIPLN.R @@ -133,8 +133,8 @@ p <- ggplot(res) + aes(x = factor(n), y = pred_Y, fill = factor(method)) + geom_ scale_y_log10() + ylim(c(0,2)) p -p <- ggplot(res) + aes(x = factor(n), y = rmse_B, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(2.75,3)) +p <- ggplot(res) + aes(x = factor(n), y = rmse_B, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(2,5)) p -p <- ggplot(res) + aes(x = factor(n), y = rmse_Omega, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(0,0.5)) +p <- ggplot(res) + aes(x = factor(n), y = rmse_Omega, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(0.1,.3)) p diff --git a/man/Networkfamily.Rd b/man/Networkfamily.Rd new file mode 100644 index 00000000..31b53c27 --- /dev/null +++ b/man/Networkfamily.Rd @@ -0,0 +1,235 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PLNnetworkfamily-class.R +\name{Networkfamily} +\alias{Networkfamily} +\title{An R6 Class to virtually represent a collection of network fits} +\description{ +The functions \code{\link[=PLNnetwork]{PLNnetwork()}} and \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} both produce an instance of this class, which can be thought of as a vector of \code{\link{PLNnetworkfit}}s \code{\link{ZIPLNfit_sparse}}s (indexed by penalty parameter) + +This class comes with a set of methods mostly used to compare +network fits (in terms of goodness of fit) or extract one from +the family (based on penalty parameter and/or goodness of it). +See the documentation for \code{\link[=getBestModel]{getBestModel()}}, +\code{\link[=getModel]{getModel()}} and \link[=plot.Networkfamily]{plot()} for the user-facing ones. +} +\seealso{ +The functions \code{\link[=PLNnetwork]{PLNnetwork()}}, \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} and the classes \code{\link{PLNnetworkfit}}, \code{\link{ZIPLNfit_sparse}} +} +\section{Super class}{ +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{Networkfamily} +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{penalties}}{the sparsity level of the network in the successively fitted models} + +\item{\code{stability_path}}{the stability path of each edge as returned by the stars procedure} + +\item{\code{stability}}{mean edge stability along the penalty path} + +\item{\code{criteria}}{a data frame with the values of some criteria (variational log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits +BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-Networkfamily-new}{\code{Networkfamily$new()}} +\item \href{#method-Networkfamily-optimize}{\code{Networkfamily$optimize()}} +\item \href{#method-Networkfamily-coefficient_path}{\code{Networkfamily$coefficient_path()}} +\item \href{#method-Networkfamily-getBestModel}{\code{Networkfamily$getBestModel()}} +\item \href{#method-Networkfamily-plot}{\code{Networkfamily$plot()}} +\item \href{#method-Networkfamily-plot_stars}{\code{Networkfamily$plot_stars()}} +\item \href{#method-Networkfamily-plot_objective}{\code{Networkfamily$plot_objective()}} +\item \href{#method-Networkfamily-show}{\code{Networkfamily$show()}} +\item \href{#method-Networkfamily-clone}{\code{Networkfamily$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-new}{}}} +\subsection{Method \code{new()}}{ +Initialize all models in the collection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$new(penalties, data, control)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{penalties}}{a vector of positive real number controlling the level of sparsity of the underlying network.} + +\item{\code{data}}{a named list used internally to carry the data matrices} + +\item{\code{control}}{a list for controlling the optimization.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Update all network fits in the family with smart starting values +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-optimize}{}}} +\subsection{Method \code{optimize()}}{ +Call to the C++ optimizer on all models of the collection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$optimize(data, config)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{data}}{a named list used internally to carry the data matrices} + +\item{\code{config}}{a list for controlling the optimization.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-coefficient_path}{}}} +\subsection{Method \code{coefficient_path()}}{ +Extract the regularization path of a \code{\link{Networkfamily}} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{precision}}{Logical. Should the regularization path be extracted from the precision matrix Omega (\code{TRUE}, default) or from the variance matrix Sigma (\code{FALSE})} + +\item{\code{corr}}{Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to \code{TRUE}} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-getBestModel}{}}} +\subsection{Method \code{getBestModel()}}{ +Extract the best network in the family according to some criteria +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$getBestModel(crit = c("BIC", "EBIC", "StARS"), stability = 0.9)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{crit}}{character. Criterion used to perform the selection. If "StARS" is chosen but \verb{$stability} field is empty, will compute stability path.} + +\item{\code{stability}}{Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is \code{0.9}.} +} +\if{html}{\out{
}} +} +\subsection{Details}{ +For BIC and EBIC criteria, higher is better. +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-plot}{}}} +\subsection{Method \code{plot()}}{ +Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (a \code{\link{Networkfamily}}) +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$plot( + criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), + reverse = FALSE, + log.x = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{criteria}}{vector of characters. The criteria to plot in \code{c("loglik", "pen_loglik", "BIC", "EBIC")}. Defaults to all of them.} + +\item{\code{reverse}}{A logical indicating whether to plot the value of the criteria in the "natural" direction +(loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the +natural direction, on the same scale as the log-likelihood.} + +\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-plot_stars}{}}} +\subsection{Method \code{plot_stars()}}{ +Plot stability path +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$plot_stars(stability = 0.9, log.x = TRUE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{stability}}{scalar: the targeted level of stability using stability selection. Default is \code{0.9}.} + +\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-plot_objective}{}}} +\subsection{Method \code{plot_objective()}}{ +Plot objective value of the optimization problem along the penalty path +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$plot_objective()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-show}{}}} +\subsection{Method \code{show()}}{ +User friendly print method +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$show()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Networkfamily$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/PLNnetwork.Rd b/man/PLNnetwork.Rd index e711f491..51589dfe 100644 --- a/man/PLNnetwork.Rd +++ b/man/PLNnetwork.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/PLNnetwork.R \name{PLNnetwork} \alias{PLNnetwork} -\title{Poisson lognormal model towards sparse network inference} +\title{Sparse Poisson lognormal model for network inference} \usage{ PLNnetwork( formula, @@ -31,7 +31,9 @@ an R6 object with class \code{\link{PLNnetworkfamily}}, which contains a collection of models with class \code{\link{PLNnetworkfit}} } \description{ -Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm. Use the (g)lm syntax for model specification (covariates, offsets). +Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +Use the (g)lm syntax to specify the model (including covariates and offsets). } \examples{ data(trichoptera) diff --git a/man/PLNnetwork_param.Rd b/man/PLNnetwork_param.Rd index f74ea009..c40c1350 100644 --- a/man/PLNnetwork_param.Rd +++ b/man/PLNnetwork_param.Rd @@ -30,7 +30,7 @@ PLNnetwork_param( \item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE}} -\item{penalty_weights}{either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} +\item{penalty_weights}{either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} \item{config_post}{a list for controlling the post-treatment (optional bootstrap, jackknife, R2, etc).} @@ -49,7 +49,7 @@ Helper to define list of parameters to control the PLN fit. All arguments have d \details{ See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: \itemize{ -\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 +\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than ftol multiplied by the absolute value of the parameter. Default is 1e-6 \item "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 } } diff --git a/man/PLNnetworkfamily.Rd b/man/PLNnetworkfamily.Rd index 63218bf1..6cb3e9ab 100644 --- a/man/PLNnetworkfamily.Rd +++ b/man/PLNnetworkfamily.Rd @@ -2,13 +2,15 @@ % Please edit documentation in R/PLNnetworkfamily-class.R \name{PLNnetworkfamily} \alias{PLNnetworkfamily} -\title{An R6 Class to represent a collection of PLNnetworkfit} +\title{An R6 Class to represent a collection of \code{\link{PLNnetworkfit}}s} \description{ The function \code{\link[=PLNnetwork]{PLNnetwork()}} produces an instance of this class. -This class comes with a set of methods, some of them being useful for the user: +This class comes with a set of methods mostly used to compare +network fits (in terms of goodness of fit) or extract one from +the family (based on penalty parameter and/or goodness of it). See the documentation for \code{\link[=getBestModel]{getBestModel()}}, -\code{\link[=getModel]{getModel()}} and \link[=plot.PLNnetworkfamily]{plot()} +\code{\link[=getModel]{getModel()}} and \link[=plot.Networkfamily]{plot()} for the user-facing ones. } \examples{ data(trichoptera) @@ -19,44 +21,30 @@ class(fits) \seealso{ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNnetworkfit}} } -\section{Super class}{ -\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{PLNnetworkfamily} -} -\section{Active bindings}{ -\if{html}{\out{
}} -\describe{ -\item{\code{penalties}}{the sparsity level of the network in the successively fitted models} - -\item{\code{stability_path}}{the stability path of each edge as returned by the stars procedure} - -\item{\code{stability}}{mean edge stability along the penalty path} - -\item{\code{criteria}}{a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits -BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty} -} -\if{html}{\out{
}} +\section{Super classes}{ +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:Networkfamily]{PLNmodels::Networkfamily}} -> \code{PLNnetworkfamily} } \section{Methods}{ \subsection{Public methods}{ \itemize{ \item \href{#method-PLNnetworkfamily-new}{\code{PLNnetworkfamily$new()}} -\item \href{#method-PLNnetworkfamily-optimize}{\code{PLNnetworkfamily$optimize()}} \item \href{#method-PLNnetworkfamily-stability_selection}{\code{PLNnetworkfamily$stability_selection()}} -\item \href{#method-PLNnetworkfamily-coefficient_path}{\code{PLNnetworkfamily$coefficient_path()}} -\item \href{#method-PLNnetworkfamily-getBestModel}{\code{PLNnetworkfamily$getBestModel()}} -\item \href{#method-PLNnetworkfamily-plot}{\code{PLNnetworkfamily$plot()}} -\item \href{#method-PLNnetworkfamily-plot_stars}{\code{PLNnetworkfamily$plot_stars()}} -\item \href{#method-PLNnetworkfamily-plot_objective}{\code{PLNnetworkfamily$plot_objective()}} -\item \href{#method-PLNnetworkfamily-show}{\code{PLNnetworkfamily$show()}} \item \href{#method-PLNnetworkfamily-clone}{\code{PLNnetworkfamily$clone()}} } } \if{html}{\out{ -
Inherited methods +
Inherited methods
}} @@ -66,15 +54,7 @@ BIC, ICL and EBIC are defined so that they are on the same scale as the model lo \subsection{Method \code{new()}}{ Initialize all models in the collection \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$new( - penalties, - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{PLNnetworkfamily$new(penalties, data, control)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -82,15 +62,7 @@ Initialize all models in the collection \describe{ \item{\code{penalties}}{a vector of positive real number controlling the level of sparsity of the underlying network.} -\item{\code{responses}}{the matrix of responses common to every models} - -\item{\code{covariates}}{the matrix of covariates common to every models} - -\item{\code{offsets}}{the matrix of offsets common to every models} - -\item{\code{weights}}{the vector of observation weights} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization.} } @@ -101,23 +73,6 @@ Update current \code{\link{PLNnetworkfit}} with smart starting values } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-optimize}{}}} -\subsection{Method \code{optimize()}}{ -Call to the C++ optimizer on all models of the collection -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$optimize(config)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{config}}{a list for controlling the optimization.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-PLNnetworkfamily-stability_selection}{}}} \subsection{Method \code{stability_selection()}}{ @@ -134,126 +89,10 @@ Compute the stability path by stability selection \describe{ \item{\code{subsamples}}{a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations.} -\item{\code{control}}{a list controlling the main optimization process in each call to PLNnetwork. See \code{\link[=PLNnetwork]{PLNnetwork()}} for details.} -} -\if{html}{\out{}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-coefficient_path}{}}} -\subsection{Method \code{coefficient_path()}}{ -Extract the regularization path of a \code{\link{PLNnetworkfamily}} -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{precision}}{Logical. Should the regularization path be extracted from the precision matrix Omega (\code{TRUE}, default) or from the variance matrix Sigma (\code{FALSE})} - -\item{\code{corr}}{Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to \code{TRUE}} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-getBestModel}{}}} -\subsection{Method \code{getBestModel()}}{ -Extract the best network in the family according to some criteria -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$getBestModel( - crit = c("BIC", "EBIC", "StARS"), - stability = 0.9 -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{crit}}{character. Criterion used to perform the selection. Is "StARS" is chosen but \verb{$stability} field is empty, will compute stability path.} - -\item{\code{stability}}{Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is \code{0.9}.} +\item{\code{control}}{a list controlling the main optimization process in each call to \code{\link[=PLNnetwork]{PLNnetwork()}}. See \code{\link[=PLNnetwork]{PLNnetwork()}} and \code{\link[=PLN_param]{PLN_param()}} for details.} } \if{html}{\out{
}} } -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-plot}{}}} -\subsection{Method \code{plot()}}{ -Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$plot( - criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), - reverse = FALSE, - log.x = TRUE -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{criteria}}{vector of characters. The criteria to plot in \code{c("loglik", "pen_loglik", "BIC", "EBIC")}. Defaults to all of them.} - -\item{\code{reverse}}{A logical indicating whether to plot the value of the criteria in the "natural" direction -(loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the -natural direction, on the same scale as the log-likelihood..} - -\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-plot_stars}{}}} -\subsection{Method \code{plot_stars()}}{ -Plot stability path -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$plot_stars(stability = 0.9, log.x = TRUE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{stability}}{scalar: the targeted level of stability in stability plot. Default is \code{0.9}.} - -\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-plot_objective}{}}} -\subsection{Method \code{plot_objective()}}{ -Plot objective value of the optimization problem along the penalty path -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$plot_objective()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-show}{}}} -\subsection{Method \code{show()}}{ -User friendly print method -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$show()}\if{html}{\out{
}} -} - } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/man/PLNnetworkfit.Rd b/man/PLNnetworkfit.Rd index bc7bb2ff..7e531d24 100644 --- a/man/PLNnetworkfit.Rd +++ b/man/PLNnetworkfit.Rd @@ -51,7 +51,6 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn \subsection{Public methods}{ \itemize{ \item \href{#method-PLNnetworkfit-new}{\code{PLNnetworkfit$new()}} -\item \href{#method-PLNnetworkfit-update}{\code{PLNnetworkfit$update()}} \item \href{#method-PLNnetworkfit-optimize}{\code{PLNnetworkfit$optimize()}} \item \href{#method-PLNnetworkfit-latent_network}{\code{PLNnetworkfit$latent_network()}} \item \href{#method-PLNnetworkfit-plot_network}{\code{PLNnetworkfit$plot_network()}} @@ -60,12 +59,13 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn } } \if{html}{\out{ -
Inherited methods +
Inherited methods
@@ -76,34 +76,13 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn \subsection{Method \code{new()}}{ Initialize a \code{\link{PLNnetworkfit}} object \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfit$new( - penalty, - penalty_weights, - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{PLNnetworkfit$new(data, control)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{penalty}}{a positive real number controlling the level of sparsity of the underlying network.} - -\item{\code{penalty_weights}}{either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} - -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization.} } @@ -111,73 +90,18 @@ Initialize a \code{\link{PLNnetworkfit}} object } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfit-update}{}}} -\subsection{Method \code{update()}}{ -Update fields of a \code{\link{PLNnetworkfit}} object -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfit$update( - penalty = NA, - B = NA, - Sigma = NA, - Omega = NA, - M = NA, - S = NA, - Z = NA, - A = NA, - Ji = NA, - R2 = NA, - monitoring = NA -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{penalty}}{a positive real number controlling the level of sparsity of the underlying network.} - -\item{\code{B}}{matrix of regression matrix} - -\item{\code{Sigma}}{variance-covariance matrix of the latent variables} - -\item{\code{Omega}}{precision matrix of the latent variables. Inverse of Sigma.} - -\item{\code{M}}{matrix of mean vectors for the variational approximation} - -\item{\code{S}}{matrix of variance vectors for the variational approximation} - -\item{\code{Z}}{matrix of latent vectors (includes covariates and offset effects)} - -\item{\code{A}}{matrix of fitted values} - -\item{\code{Ji}}{vector of variational lower bounds of the log-likelihoods (one value per sample)} - -\item{\code{R2}}{approximate R^2 goodness-of-fit criterion} - -\item{\code{monitoring}}{a list with optimization monitoring quantities} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-PLNnetworkfit-optimize}{}}} \subsection{Method \code{optimize()}}{ Call to the C++ optimizer and update of the relevant fields \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfit$optimize(responses, covariates, offsets, weights, config)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{PLNnetworkfit$optimize(data, config)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{config}}{a list for controlling the optimization} } diff --git a/man/ZIPLN_param.Rd b/man/ZIPLN_param.Rd index 798b7fad..5d9a624d 100644 --- a/man/ZIPLN_param.Rd +++ b/man/ZIPLN_param.Rd @@ -10,6 +10,8 @@ ZIPLN_param( covariance = c("full", "diagonal", "spherical", "fixed", "sparse"), Omega = NULL, penalty = 0, + penalize_diagonal = TRUE, + penalty_weights = NULL, config_post = list(), config_optim = list(), inception = NULL @@ -26,6 +28,10 @@ ZIPLN_param( \item{penalty}{a user-defined penalty to sparsify the residual covariance. Defaults to 0 (no sparsity).} +\item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE}} + +\item{penalty_weights}{either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} + \item{config_post}{a list for controlling the post-treatments (optional bootstrap, jackknife, R2, etc.). See details} \item{config_optim}{a list for controlling the optimizer (either "nlopt" or "torch" backend). See details} @@ -41,10 +47,11 @@ list of parameters used during the fit and post-processing steps Helper to define list of parameters to control the PLN fit. All arguments have defaults. } \details{ -See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. ZIPLN_param() also has two additional parameters controlling the optimization due -the inner-outer loop structure of the optimizer: +See \code{\link[=PLN_param]{PLN_param()}} and \code{\link[=PLNnetwork_param]{PLNnetwork_param()}} for a full description of the generic optimization parameters. Like \code{\link[=PLNnetwork_param]{PLNnetwork_param()}}, ZIPLN_param() has two parameters controlling the optimization due the inner-outer loop structure of the optimizer: \itemize{ -\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than \code{ftol_out} multiplied by the absolute value of the parameter. Default is 1e-8 +\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than \code{ftol_out} multiplied by the absolute value of the parameter. Default is 1e-6 \item "maxit_out" outer solver stops when the number of iteration exceeds \code{maxit_out}. Default is 100 +and one additional parameter controlling the form of the variational approximation of the zero inflation: +\item "approx_ZI" either uses an exact or approximated conditional distribution for the zero inflation. Default is FALSE } } diff --git a/man/ZIPLNfit.Rd b/man/ZIPLNfit.Rd index 1f3c43b3..5d4a5950 100644 --- a/man/ZIPLNfit.Rd +++ b/man/ZIPLNfit.Rd @@ -40,7 +40,11 @@ print(myPLN) \item{\code{d0}}{number of covariates in the ZI part} -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_zi}}{number of parameters in the ZI part of the model} + +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the model} + +\item{\code{nb_param}}{number of parameters in the ZIPLN model} \item{\code{model_par}}{a list with the matrices of parameters found in the model (B, Sigma, plus some others depending on the variant)} @@ -150,21 +154,13 @@ Update the current \code{\link{ZIPLNfit}} object \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit}} model \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNfit$new(responses, covariates, offsets, weights, formula, control)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ZIPLNfit$new(data, control)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } @@ -177,19 +173,13 @@ Initialize a \code{\link{ZIPLNfit}} model \subsection{Method \code{optimize()}}{ Call to the Cpp optimizer and update of the relevant fields \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNfit$optimize(responses, covariates, offsets, weights, control)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ZIPLNfit$optimize(data, control)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } @@ -203,10 +193,7 @@ Call to the Cpp optimizer and update of the relevant fields Result of one call to the VE step of the optimization procedure: optimal variational parameters (M, S, R) and corresponding log likelihood values for fixed model parameters (Sigma, B, B0). Intended to position new data in the latent space. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{ZIPLNfit$optimize_vestep( - covariates, - offsets, - responses, - weights, + data, B = self$model_par$B, B0 = self$model_par$B0, Omega = self$model_par$Omega, @@ -217,13 +204,7 @@ Result of one call to the VE step of the optimization procedure: optimal variati \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{B}}{Optional fixed value of the regression parameters in the PLN component} diff --git a/man/ZIPLNfit_diagonal.Rd b/man/ZIPLNfit_diagonal.Rd index acce0d88..8b75204f 100644 --- a/man/ZIPLNfit_diagonal.Rd +++ b/man/ZIPLNfit_diagonal.Rd @@ -24,7 +24,7 @@ print(myPLN) \section{Active bindings}{ \if{html}{\out{
}} \describe{ -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the current model} \item{\code{vcov_model}}{character: the model used for the residual covariance} } @@ -55,28 +55,13 @@ print(myPLN) \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit_diagonal}} model \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNfit_diagonal$new( - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ZIPLNfit_diagonal$new(data, control)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } diff --git a/man/ZIPLNfit_fixed.Rd b/man/ZIPLNfit_fixed.Rd index d57a5bd7..6f6da2fd 100644 --- a/man/ZIPLNfit_fixed.Rd +++ b/man/ZIPLNfit_fixed.Rd @@ -25,7 +25,7 @@ print(myPLN) \section{Active bindings}{ \if{html}{\out{
}} \describe{ -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the current model} \item{\code{vcov_model}}{character: the model used for the residual covariance} } @@ -56,21 +56,13 @@ print(myPLN) \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit_fixed}} model \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNfit_fixed$new(responses, covariates, offsets, weights, formula, control)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ZIPLNfit_fixed$new(data, control)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } diff --git a/man/ZIPLNfit_sparse.Rd b/man/ZIPLNfit_sparse.Rd index 6894af91..f5e5387e 100644 --- a/man/ZIPLNfit_sparse.Rd +++ b/man/ZIPLNfit_sparse.Rd @@ -13,9 +13,10 @@ An R6 Class to represent a ZIPLNfit in a standard, general framework, with spars # See other examples in function ZIPLN data(trichoptera) trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 0.2)) +myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 1)) class(myPLN) print(myPLN) +plot(myPLN) } } \section{Super class}{ @@ -24,9 +25,23 @@ print(myPLN) \section{Active bindings}{ \if{html}{\out{
}} \describe{ -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{penalty}}{the global level of sparsity in the current model} + +\item{\code{penalty_weights}}{a matrix of weights controlling the amount of penalty element-wise.} + +\item{\code{n_edges}}{number of edges if the network (non null coefficient of the sparse precision matrix)} + +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the current model} \item{\code{vcov_model}}{character: the model used for the residual covariance} + +\item{\code{pen_loglik}}{variational lower bound of the l1-penalized loglikelihood} + +\item{\code{EBIC}}{variational lower bound of the EBIC} + +\item{\code{density}}{proportion of non-null edges in the network} + +\item{\code{criteria}}{a vector with loglik, penalized loglik, BIC, EBIC, ICL, R_squared, number of parameters, number of edges and graph density} } \if{html}{\out{
}} } @@ -34,6 +49,8 @@ print(myPLN) \subsection{Public methods}{ \itemize{ \item \href{#method-ZIPLNfit_sparse-new}{\code{ZIPLNfit_sparse$new()}} +\item \href{#method-ZIPLNfit_sparse-latent_network}{\code{ZIPLNfit_sparse$latent_network()}} +\item \href{#method-ZIPLNfit_sparse-plot_network}{\code{ZIPLNfit_sparse$plot_network()}} \item \href{#method-ZIPLNfit_sparse-clone}{\code{ZIPLNfit_sparse$clone()}} } } @@ -55,23 +72,72 @@ print(myPLN) \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit_fixed}} model \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNfit_sparse$new(responses, covariates, offsets, weights, formula, control)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ZIPLNfit_sparse$new(data, control)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{data}}{a named list used internally to carry the data matrices} + +\item{\code{control}}{a list for controlling the optimization. See details.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNfit_sparse-latent_network}{}}} +\subsection{Method \code{latent_network()}}{ +Extract interaction network in the latent space +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNfit_sparse$latent_network(type = c("partial_cor", "support", "precision"))}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} +\item{\code{type}}{edge value in the network. Can be "support" (binary edges), "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species)} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a square matrix of size \code{ZIPLNfit_sparse$n} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNfit_sparse-plot_network}{}}} +\subsection{Method \code{plot_network()}}{ +plot the latent network. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNfit_sparse$plot_network( + type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{type}}{edge value in the network. Either "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species).} -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} +\item{\code{output}}{Output type. Either \code{igraph} (for the network) or \code{corrplot} (for the adjacency matrix)} -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} +\item{\code{edge.color}}{Length 2 color vector. Color for positive/negative edges. Default is \code{c("#F8766D", "#00BFC4")}. Only relevant for igraph output.} -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} +\item{\code{remove.isolated}}{if \code{TRUE}, isolated node are remove before plotting. Only relevant for igraph output.} -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{node.labels}}{vector of character. The labels of the nodes. The default will use the column names ot the response matrix.} -\item{\code{control}}{a list for controlling the optimization. See details.} +\item{\code{layout}}{an optional igraph layout. Only relevant for igraph output.} + +\item{\code{plot}}{logical. Should the final network be displayed or only sent back to the user. Default is \code{TRUE}.} } \if{html}{\out{
}} } diff --git a/man/ZIPLNfit_spherical.Rd b/man/ZIPLNfit_spherical.Rd index df904d7b..5779c2c8 100644 --- a/man/ZIPLNfit_spherical.Rd +++ b/man/ZIPLNfit_spherical.Rd @@ -24,7 +24,7 @@ print(myPLN) \section{Active bindings}{ \if{html}{\out{
}} \describe{ -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the current model} \item{\code{vcov_model}}{character: the model used for the residual covariance} } @@ -55,28 +55,13 @@ print(myPLN) \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit_spherical}} model \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNfit_spherical$new( - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ZIPLNfit_spherical$new(data, control)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } diff --git a/man/ZIPLNnetwork.Rd b/man/ZIPLNnetwork.Rd new file mode 100644 index 00000000..aef11b01 --- /dev/null +++ b/man/ZIPLNnetwork.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZIPLNnetwork.R +\name{ZIPLNnetwork} +\alias{ZIPLNnetwork} +\title{Zero Inflated Sparse Poisson lognormal model for network inference} +\usage{ +ZIPLNnetwork( + formula, + data, + subset, + weights, + zi = c("single", "row", "col"), + penalties = NULL, + control = ZIPLNnetwork_param() +) +} +\arguments{ +\item{formula}{an object of class "formula": a symbolic description of the model to be fitted.} + +\item{data}{an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which lm is called.} + +\item{subset}{an optional vector specifying a subset of observations to be used in the fitting process.} + +\item{weights}{an optional vector of observation weights to be used in the fitting process.} + +\item{zi}{a character describing the model used for zero inflation, either of +\itemize{ +\item "single" (default, one parameter shared by all counts) +\item "col" (one parameter per variable / feature) +\item "row" (one parameter per sample / individual). +If covariates are specified in the formula RHS (see details) this parameter is ignored. +}} + +\item{penalties}{an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See \code{PLNnetwork_param()} for additional tuning of the penalty.} + +\item{control}{a list-like structure for controlling the optimization, with default generated by \code{\link[=ZIPLNnetwork_param]{ZIPLNnetwork_param()}}. See the associated documentation +for details.} +} +\value{ +an R6 object with class \code{\link{ZIPLNnetworkfamily}} +} +\description{ +Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +Use the (g)lm syntax to specify the model (including covariates and offsets). +} +\details{ +Covariates for the Zero-Inflation parameter (using a logistic regression model) can be specified in the formula RHS using the pipe +(\verb{~ PLN effect | ZI effect}) to separate covariates for the PLN part of the model from those for the Zero-Inflation part. +Note that different covariates can be used for each part. +} +\examples{ +data(trichoptera) +trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +myZIPLNs <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, zi = "single") +} +\seealso{ +The classes \code{\link{ZIPLNfit}} and \code{\link{ZIPLNnetworkfamily}} +} diff --git a/man/ZIPLNnetwork_param.Rd b/man/ZIPLNnetwork_param.Rd new file mode 100644 index 00000000..fe3d4daa --- /dev/null +++ b/man/ZIPLNnetwork_param.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZIPLNnetwork.R +\name{ZIPLNnetwork_param} +\alias{ZIPLNnetwork_param} +\title{Control of ZIPLNnetwork fit} +\usage{ +ZIPLNnetwork_param( + backend = c("nlopt"), + inception_cov = c("full", "spherical", "diagonal"), + trace = 1, + n_penalties = 30, + min_ratio = 0.1, + penalize_diagonal = TRUE, + penalty_weights = NULL, + config_post = list(), + config_optim = list(), + inception = NULL +) +} +\arguments{ +\item{backend}{optimization back used, either "nlopt" or "torch". Default is "nlopt"} + +\item{inception_cov}{Covariance structure used for the inception model used to initialize the PLNfamily. Defaults to "full" and can be constrained to "diagonal" and "spherical".} + +\item{trace}{a integer for verbosity.} + +\item{n_penalties}{an integer that specifies the number of values for the penalty grid when internally generated. Ignored when penalties is non \code{NULL}} + +\item{min_ratio}{the penalty grid ranges from the minimal value that produces a sparse to this value multiplied by \code{min_ratio}. Default is 0.1.} + +\item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE}} + +\item{penalty_weights}{either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} + +\item{config_post}{a list for controlling the post-treatment (optional bootstrap, jackknife, R2, etc).} + +\item{config_optim}{a list for controlling the optimizer (either "nlopt" or "torch" backend). See details} + +\item{inception}{Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on +log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit), +which sometimes speeds up the inference.} +} +\value{ +list of parameters configuring the fit. +} +\description{ +Helper to define list of parameters to control the ZIPLNnetwork fit. All arguments have defaults. +} +\details{ +See \code{\link[=PLNnetwork_param]{PLNnetwork_param()}} for a full description of the optimization parameters. Note that some defaults values are different than those used in \code{\link[=PLNnetwork_param]{PLNnetwork_param()}}: +\itemize{ +\item "ftol_out" (outer loop convergence tolerance the objective function) is set by default to 1e-6 +\item "maxit_out" (max number of iterations for the outer loop) is set by default to 100 +} +} +\seealso{ +\code{\link[=PLNnetwork_param]{PLNnetwork_param()}} and \code{\link[=PLN_param]{PLN_param()}} +} diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd new file mode 100644 index 00000000..0bf90623 --- /dev/null +++ b/man/ZIPLNnetworkfamily.Rd @@ -0,0 +1,119 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PLNnetworkfamily-class.R +\name{ZIPLNnetworkfamily} +\alias{ZIPLNnetworkfamily} +\title{An R6 Class to represent a collection of ZIPLNnetwork} +\description{ +The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} produces an instance of this class. + +This class comes with a set of methods, some of them being useful for the user: +See the documentation for \code{\link[=getBestModel]{getBestModel()}}, +\code{\link[=getModel]{getModel()}} and \link[=plot.ZIPLNnetworkfamily]{plot()} +} +\examples{ +data(trichoptera) +trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +class(fits) +} +\seealso{ +The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}, the class \code{\link{ZIPLNfit_sparse}} +} +\section{Super classes}{ +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:Networkfamily]{PLNmodels::Networkfamily}} -> \code{ZIPLNnetworkfamily} +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{covariates0}}{the matrix of covariates included in the ZI component} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ZIPLNnetworkfamily-new}{\code{ZIPLNnetworkfamily$new()}} +\item \href{#method-ZIPLNnetworkfamily-stability_selection}{\code{ZIPLNnetworkfamily$stability_selection()}} +\item \href{#method-ZIPLNnetworkfamily-clone}{\code{ZIPLNnetworkfamily$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-new}{}}} +\subsection{Method \code{new()}}{ +Initialize all models in the collection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$new(penalties, data, control)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{penalties}}{a vector of positive real number controlling the level of sparsity of the underlying network.} + +\item{\code{data}}{a named list used internally to carry the data matrices} + +\item{\code{control}}{a list for controlling the optimization.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Update current \code{\link{PLNnetworkfit}} with smart starting values +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-stability_selection}{}}} +\subsection{Method \code{stability_selection()}}{ +Compute the stability path by stability selection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$stability_selection( + subsamples = NULL, + control = ZIPLNnetwork_param() +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{subsamples}}{a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations.} + +\item{\code{control}}{a list controlling the main optimization process in each call to \code{\link[=PLNnetwork]{PLNnetwork()}}. See \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} and \code{\link[=ZIPLN_param]{ZIPLN_param()}} for details.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/coefficient_path.Rd b/man/coefficient_path.Rd index 3a389ccb..45552555 100644 --- a/man/coefficient_path.Rd +++ b/man/coefficient_path.Rd @@ -7,7 +7,7 @@ coefficient_path(Robject, precision = TRUE, corr = TRUE) } \arguments{ -\item{Robject}{an object with class \code{\link{PLNnetworkfamily}}, i.e. an output from \code{\link[=PLNnetwork]{PLNnetwork()}}} +\item{Robject}{an object with class \code{\link{Networkfamily}}, i.e. an output from \code{\link[=PLNnetwork]{PLNnetwork()}}} \item{precision}{a logical, should the coefficients of the precision matrix Omega or the covariance matrix Sigma be sent back. Default is \code{TRUE}.} diff --git a/man/getBestModel.Rd b/man/getBestModel.Rd index aff5ac35..bce004c4 100644 --- a/man/getBestModel.Rd +++ b/man/getBestModel.Rd @@ -6,7 +6,9 @@ \alias{getBestModel.PLNPCAfamily} \alias{getBestModel} \alias{getBestModel.PLNmixturefamily} +\alias{getBestModel.Networkfamily} \alias{getBestModel.PLNnetworkfamily} +\alias{getBestModel.ZIPLNnetworkfamily} \title{Best model extraction from a collection of models} \usage{ \method{getBestModel}{PLNPCAfamily}(Robject, crit = c("ICL", "BIC"), ...) @@ -15,7 +17,11 @@ getBestModel(Robject, crit, ...) \method{getBestModel}{PLNmixturefamily}(Robject, crit = c("ICL", "BIC"), ...) +\method{getBestModel}{Networkfamily}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) + \method{getBestModel}{PLNnetworkfamily}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) + +\method{getBestModel}{ZIPLNnetworkfamily}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) } \arguments{ \item{Robject}{an object with class PLNPCAfamilly ot PLNnetworkfamily} @@ -39,8 +45,12 @@ Best model extraction from a collection of models \item \code{getBestModel(PLNmixturefamily)}: Model extraction for \code{\link{PLNmixturefamily}} +\item \code{getBestModel(Networkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}} + \item \code{getBestModel(PLNnetworkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} +\item \code{getBestModel(ZIPLNnetworkfamily)}: Model extraction for \code{\link{ZIPLNnetworkfamily}} + }} \examples{ \dontrun{ diff --git a/man/getModel.Rd b/man/getModel.Rd index 846dbf8b..a812d321 100644 --- a/man/getModel.Rd +++ b/man/getModel.Rd @@ -6,7 +6,9 @@ \alias{getModel.PLNPCAfamily} \alias{getModel} \alias{getModel.PLNmixturefamily} +\alias{getModel.Networkfamily} \alias{getModel.PLNnetworkfamily} +\alias{getModel.ZIPLNnetworkfamily} \title{Model extraction from a collection of models} \usage{ \method{getModel}{PLNPCAfamily}(Robject, var, index = NULL) @@ -15,7 +17,11 @@ getModel(Robject, var, index) \method{getModel}{PLNmixturefamily}(Robject, var, index = NULL) +\method{getModel}{Networkfamily}(Robject, var, index = NULL) + \method{getModel}{PLNnetworkfamily}(Robject, var, index = NULL) + +\method{getModel}{ZIPLNnetworkfamily}(Robject, var, index = NULL) } \arguments{ \item{Robject}{an R6 object with class \code{\link{PLNPCAfamily}} or \code{\link{PLNnetworkfamily}}} @@ -36,8 +42,12 @@ Model extraction from a collection of models \item \code{getModel(PLNmixturefamily)}: Model extraction for \code{\link{PLNmixturefamily}} +\item \code{getModel(Networkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}} + \item \code{getModel(PLNnetworkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} +\item \code{getModel(ZIPLNnetworkfamily)}: Model extraction for \code{\link{ZIPLNnetworkfamily}} + }} \examples{ \dontrun{ diff --git a/man/plot.PLNnetworkfamily.Rd b/man/plot.Networkfamily.Rd similarity index 56% rename from man/plot.PLNnetworkfamily.Rd rename to man/plot.Networkfamily.Rd index 64cb0f34..ab24d8b7 100644 --- a/man/plot.PLNnetworkfamily.Rd +++ b/man/plot.Networkfamily.Rd @@ -1,9 +1,21 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PLNnetworkfamily-S3methods.R -\name{plot.PLNnetworkfamily} +\name{plot.Networkfamily} +\alias{plot.Networkfamily} \alias{plot.PLNnetworkfamily} -\title{Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}})} +\alias{plot.ZIPLNnetworkfamily} +\title{Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}})} \usage{ +\method{plot}{Networkfamily}( + x, + type = c("criteria", "stability", "diagnostic"), + criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), + reverse = FALSE, + log.x = TRUE, + stability = 0.9, + ... +) + \method{plot}{PLNnetworkfamily}( x, type = c("criteria", "stability", "diagnostic"), @@ -13,14 +25,25 @@ stability = 0.9, ... ) + +\method{plot}{ZIPLNnetworkfamily}( + x, + type = c("criteria", "stability", "diagnostic"), + criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), + reverse = FALSE, + log.x = TRUE, + stability = 0.9, + ... +) } \arguments{ -\item{x}{an R6 object with class \code{\link{PLNnetworkfamily}}} +\item{x}{an R6 object with class \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}}} \item{type}{a character, either "criteria", "stability" or "diagnostic" for the type of plot.} -\item{criteria}{vector of characters. The criteria to plot in c("loglik", "BIC", "ICL", "R_squared", "EBIC", "pen_loglik"). -Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only relevant when \code{type = "criteria"}.} +\item{criteria}{Vector of criteria to plot, to be selected among "loglik" (log-likelihood), +"BIC", "ICL", "R_squared", "EBIC" and "pen_loglik" (penalized log-likelihood). +Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only used when \code{type = "criteria"}.} \item{reverse}{A logical indicating whether to plot the value of the criteria in the "natural" direction (loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the @@ -38,12 +61,19 @@ Produces either a diagnostic plot (with \code{type = 'diagnostic'}), a stability (with \code{type = 'criteria'}, the default). } \description{ -Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) +Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}}) } \details{ The BIC and ICL criteria have the form 'loglik - 1/2 * penalty' so that they are on the same scale as the model log-likelihood. You can change this direction and use the alternate form '-2*loglik + penalty', as some authors do, by setting \code{reverse = TRUE}. } +\section{Functions}{ +\itemize{ +\item \code{plot(PLNnetworkfamily)}: Display various outputs associated with a collection of network fits + +\item \code{plot(ZIPLNnetworkfamily)}: Display various outputs associated with a collection of network fits + +}} \examples{ data(trichoptera) trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) diff --git a/man/plot.ZIPLNfit_sparse.Rd b/man/plot.ZIPLNfit_sparse.Rd new file mode 100644 index 00000000..d358717c --- /dev/null +++ b/man/plot.ZIPLNfit_sparse.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZIPLNfit-S3methods.R +\name{plot.ZIPLNfit_sparse} +\alias{plot.ZIPLNfit_sparse} +\title{Extract and plot the network (partial correlation, support or inverse covariance) from a \code{\link{ZIPLNfit_sparse}} object} +\usage{ +\method{plot}{ZIPLNfit_sparse}( + x, + type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE, + ... +) +} +\arguments{ +\item{x}{an R6 object with class \code{\link{ZIPLNfit_sparse}}} + +\item{type}{character. Value of the weight of the edges in the network, either "partial_cor" (partial correlation) or "support" (binary). Default is \code{"partial_cor"}.} + +\item{output}{the type of output used: either 'igraph' or 'corrplot'. Default is \code{'igraph'}.} + +\item{edge.color}{Length 2 color vector. Color for positive/negative edges. Default is \code{c("#F8766D", "#00BFC4")}. Only relevant for igraph output.} + +\item{remove.isolated}{if \code{TRUE}, isolated node are remove before plotting. Only relevant for igraph output.} + +\item{node.labels}{vector of character. The labels of the nodes. The default will use the column names ot the response matrix.} + +\item{layout}{an optional igraph layout. Only relevant for igraph output.} + +\item{plot}{logical. Should the final network be displayed or only sent back to the user. Default is \code{TRUE}.} + +\item{...}{Not used (S3 compatibility).} +} +\value{ +Send back an invisible object (igraph or Matrix, depending on the output chosen) and optionally displays a graph (via igraph or corrplot for large ones) +} +\description{ +Extract and plot the network (partial correlation, support or inverse covariance) from a \code{\link{ZIPLNfit_sparse}} object +} +\examples{ +data(trichoptera) +trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +fit <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) +\dontrun{ +plot(fit) +} +} diff --git a/man/stability_selection.Rd b/man/stability_selection.Rd index 2fb5384a..ae378f80 100644 --- a/man/stability_selection.Rd +++ b/man/stability_selection.Rd @@ -12,16 +12,16 @@ stability_selection( ) } \arguments{ -\item{Robject}{an object with class \code{\link{PLNnetworkfamily}}, i.e. an output from \code{\link[=PLNnetwork]{PLNnetwork()}}} +\item{Robject}{an object with class \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}}, i.e. an output from \code{\link[=PLNnetwork]{PLNnetwork()}} or \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}} \item{subsamples}{a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations.} -\item{control}{a list controlling the main optimization process in each call to PLNnetwork. See \code{\link[=PLNnetwork]{PLNnetwork()}} for details.} +\item{control}{a list controlling the main optimization process in each call to \code{\link[=PLNnetwork]{PLNnetwork()}} or \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}. See \code{\link[=PLN_param]{PLN_param()}} or \code{\link[=ZIPLN_param]{ZIPLN_param()}} for details.} \item{force}{force computation of the stability path, even if a previous one has been detected.} } \value{ -the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields \code{stability_path} of the initial Robject with class \code{\link{PLNnetworkfamily}} +the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields \code{stability_path} of the initial Robject with class \code{\link{Networkfamily}} } \description{ This function computes the StARS stability criteria over a path of penalties. If a path has already been computed, the functions stops with a message unless \code{force = TRUE} has been specified. diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 62bb3121..01f8105d 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -250,9 +250,9 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// optim_zipln_R -arma::mat optim_zipln_R(const arma::mat& Y, const arma::mat& X, const arma::mat& O, const arma::mat& M, const arma::mat& S, const arma::mat& Pi); -RcppExport SEXP _PLNmodels_optim_zipln_R(SEXP YSEXP, SEXP XSEXP, SEXP OSEXP, SEXP MSEXP, SEXP SSEXP, SEXP PiSEXP) { +// optim_zipln_R_var +arma::mat optim_zipln_R_var(const arma::mat& Y, const arma::mat& X, const arma::mat& O, const arma::mat& M, const arma::mat& S, const arma::mat& Pi, const arma::mat& B); +RcppExport SEXP _PLNmodels_optim_zipln_R_var(SEXP YSEXP, SEXP XSEXP, SEXP OSEXP, SEXP MSEXP, SEXP SSEXP, SEXP PiSEXP, SEXP BSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -262,7 +262,25 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const arma::mat& >::type M(MSEXP); Rcpp::traits::input_parameter< const arma::mat& >::type S(SSEXP); Rcpp::traits::input_parameter< const arma::mat& >::type Pi(PiSEXP); - rcpp_result_gen = Rcpp::wrap(optim_zipln_R(Y, X, O, M, S, Pi)); + Rcpp::traits::input_parameter< const arma::mat& >::type B(BSEXP); + rcpp_result_gen = Rcpp::wrap(optim_zipln_R_var(Y, X, O, M, S, Pi, B)); + return rcpp_result_gen; +END_RCPP +} +// optim_zipln_R_exact +arma::mat optim_zipln_R_exact(const arma::mat& Y, const arma::mat& X, const arma::mat& O, const arma::mat& M, const arma::mat& S, const arma::mat& Pi, const arma::mat& B); +RcppExport SEXP _PLNmodels_optim_zipln_R_exact(SEXP YSEXP, SEXP XSEXP, SEXP OSEXP, SEXP MSEXP, SEXP SSEXP, SEXP PiSEXP, SEXP BSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type Y(YSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type O(OSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type M(MSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type S(SSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type Pi(PiSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type B(BSEXP); + rcpp_result_gen = Rcpp::wrap(optim_zipln_R_exact(Y, X, O, M, S, Pi, B)); return rcpp_result_gen; END_RCPP } @@ -331,7 +349,8 @@ static const R_CallMethodDef CallEntries[] = { {"_PLNmodels_optim_zipln_Omega_diagonal", (DL_FUNC) &_PLNmodels_optim_zipln_Omega_diagonal, 4}, {"_PLNmodels_optim_zipln_B_dense", (DL_FUNC) &_PLNmodels_optim_zipln_B_dense, 2}, {"_PLNmodels_optim_zipln_zipar_covar", (DL_FUNC) &_PLNmodels_optim_zipln_zipar_covar, 4}, - {"_PLNmodels_optim_zipln_R", (DL_FUNC) &_PLNmodels_optim_zipln_R, 6}, + {"_PLNmodels_optim_zipln_R_var", (DL_FUNC) &_PLNmodels_optim_zipln_R_var, 7}, + {"_PLNmodels_optim_zipln_R_exact", (DL_FUNC) &_PLNmodels_optim_zipln_R_exact, 7}, {"_PLNmodels_optim_zipln_M", (DL_FUNC) &_PLNmodels_optim_zipln_M, 9}, {"_PLNmodels_optim_zipln_S", (DL_FUNC) &_PLNmodels_optim_zipln_S, 7}, {"_PLNmodels_cpp_test_packing", (DL_FUNC) &_PLNmodels_cpp_test_packing, 0}, diff --git a/src/lambertW.cpp b/src/lambertW.cpp new file mode 100644 index 00000000..2d97dd3c --- /dev/null +++ b/src/lambertW.cpp @@ -0,0 +1,138 @@ +/* lambertW.cpp + +Copyright (C) 2015, Avraham Adler +All rights reserved. + +SPDX-License-Identifier: BSD-2-Clause + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +References: + +Corless, R. M.; Gonnet, G. H.; Hare, D. E.; Jeffrey, D. J. & Knuth, D. E. + "On the Lambert W function", Advances in Computational Mathematics, + Springer, 1996, 5, 329-359 + +Fritsch, F. N.; Shafer, R. E. & Crowley, W. P. + "Solution of the transcendental equation (we^w = x)", + Communications of the ACM, Association for Computing Machinery (ACM), + 1973, 16, 123-124 +*/ + +#include "lambertW.h" + + /* Fritsch Iteration + * W_{n+1} = W_n * (1 + e_n) + * z_n = ln(x / W_n) - W_n + * q_n = 2 * (1 + W_n) * (1 + W_n + 2 / 3 * z_n) + * e_n = z_n / (1 + W_n) * (q_n - z_n) / (q_n - 2 * z_n) + */ + +double FritschIter(double x, double w){ + int MaxEval = 5; + bool CONVERGED = false; + double k = 2.0 / 3.0; + int i = 0; + do { + double z = std::log(x / w) - w; + double w1 = w + 1.0; + double q = 2.0 * w1 * (w1 + k * z); + double qmz = q - z; + double e = z / w1 * qmz / (qmz - z); + CONVERGED = std::abs(e) <= EPS; + w *= (1.0 + e); + ++i; + } while (!CONVERGED && i < MaxEval); + return(w); +} + +double lambertW0_CS(double x) { + if (x == R_PosInf) { + return(R_PosInf); + } else if (x < -M_1_E) { + return(R_NaN); + } else if (std::abs(x + M_1_E) <= EPS) { + return(-1.0); + } else if (std::abs(x) <= 1e-16) { + /* This close to 0 the W_0 branch is best estimated by its Taylor/Pade + expansion whose first term is the value x and remaining terms are below + machine double precision. See + https://math.stackexchange.com/questions/1700919 + */ + return(x); + } else { + double w; + if (std::abs(x) <= 6.4e-3) { + /* When this close to 0 the Fritsch iteration may underflow. Instead, + * function will use degree-6 minimax polynomial approximation of Halley + * iteration-based values. Should be more accurate by three orders of + * magnitude than Fritsch's equation (5) in this range. + */ + + // Minimax Approximation calculated using R package minimaxApprox 0.1.0 + return((((((-1.0805085529250425e1 * x + 5.2100070265741278) * x - + 2.6666665063383532) * x + 1.4999999657268301) * x - + 1.0000000000016802) * x + 1.0000000000001752) * x + + 2.6020852139652106e-18); + + } else if (x <= M_E) { + /* Use expansion in Corliss 4.22 to create (2, 2) Pade approximant. + * Equation with a few extra terms is: + * -1 + p - 1/3p^2 + 11/72p^3 - 43/540p^4 + 689453/8398080p^4 - O(p^5) + * This is just used to estimate a good starting point for the Fritsch + * iteration process itself. + */ + double p = std::sqrt(2.0 * (M_E * x + 1.0)); + double Numer = (0.2787037037037037 * p + 0.311111111111111) * p - 1.0; + double Denom = (0.0768518518518518 * p + 0.688888888888889) * p + 1.0; + w = Numer / Denom; + } else { + /* Use first five terms of Corliss et al. 4.19 */ + w = std::log(x); + double L_2 = std::log(w); + double L_3 = L_2 / w; + double L_3_sq = L_3 * L_3; + w += -L_2 + L_3 + 0.5 * L_3_sq - L_3 / w + L_3 / (w * w) - 1.5 * L_3_sq / + w + L_3_sq * L_3 / 3.0; + } + return(FritschIter(x, w)); + } +} + +double lambertWm1_CS(double x){ + if (x == 0.0) { + return(R_NegInf); + } else if (x < -M_1_E || x > 0.0) { + return(R_NaN); + } else if (std::abs(x + M_1_E) <= EPS) { + return(-1.0); + } else { + double w; + /* Use first five terms of Corliss et al. 4.19 */ + w = std::log(-x); + double L_2 = std::log(-w); + double L_3 = L_2 / w; + double L_3_sq = L_3 * L_3; + w += -L_2 + L_3 + 0.5 * L_3_sq - L_3 / w + L_3 / (w * w) - 1.5 * L_3_sq / + w + L_3_sq * L_3 / 3.0; + return(FritschIter(x, w)); + } +} + diff --git a/src/lambertW.h b/src/lambertW.h new file mode 100644 index 00000000..6b56a484 --- /dev/null +++ b/src/lambertW.h @@ -0,0 +1,15 @@ +#include + +#define _USE_MATH_DEFINES +#include + +using namespace Rcpp; + +const double EPS = 2.2204460492503131e-16; +const double M_1_E = 1.0 / M_E; + +double FritschIter(double x, double w) ; +double lambertW0_CS(double x) ; +double lambertWm1_CS(double x) ; + + diff --git a/src/optim_zi-pln.cpp b/src/optim_zi-pln.cpp index 5665ed47..559979cb 100644 --- a/src/optim_zi-pln.cpp +++ b/src/optim_zi-pln.cpp @@ -6,6 +6,7 @@ #include "nlopt_wrapper.h" #include "packing.h" #include "utils.h" +#include "lambertW.h" // [[Rcpp::export]] arma::vec zipln_vloglik( @@ -118,13 +119,14 @@ Rcpp::List optim_zipln_zipar_covar( } // [[Rcpp::export]] -arma::mat optim_zipln_R( +arma::mat optim_zipln_R_var( const arma::mat & Y, // responses (n,p) const arma::mat & X, // covariates (n,d) const arma::mat & O, // offsets (n,p) const arma::mat & M, // (n,p) const arma::mat & S, // (n,p) - const arma::mat & Pi // (d,p) + const arma::mat & Pi, // (d,p) + const arma::mat & B // covariates (n,d) ) { arma::mat A = exp(O + M + 0.5 * S % S); arma::mat R = pow(1. + exp(- (A + logit(Pi))), -1); @@ -144,6 +146,39 @@ arma::mat optim_zipln_R( return R; } +double phi (double mu, double sigma2) { + double W = lambertW0_CS(sigma2 * exp(mu)) ; + return(exp(-(pow(W, 2) + 2 * W) / (2 * sigma2)) / sqrt(1 + W)) ; +} + +// [[Rcpp::export]] +arma::mat optim_zipln_R_exact ( + const arma::mat & Y, // covariates (n,d) + const arma::mat & X, // covariates (n,d) + const arma::mat & O, // offsets (n,p) + const arma::mat & M, // (n,p) + const arma::mat & S, // (n,p) + const arma::mat & Pi, // (n,p) + const arma::mat & B // covariates (n,d) +) { + + arma::mat XB = X * B; + arma::mat M_mu = M - XB; + arma::uword n = M.n_rows; + arma::uword p = M.n_cols; + arma::vec diag_Sigma = arma::diagvec((1./n) * (M_mu.t() * M_mu + diagmat(sum(S % S, 0)))) ; + arma::mat R = arma::zeros(n,p); + for(arma::uword i = 0; i < n; i += 1) { + for(arma::uword j = 0; j < p; j += 1) { + if(Y(i, j) < 0.5) { + double Phi = phi(O(i,j) + XB(i,j), diag_Sigma(j)) ; + R(i,j) = Pi(i,j) / (Phi * (1 - Pi(i,j)) + Pi(i,j)) ; + } + } + } + return R; +} + // [[Rcpp::export]] Rcpp::List optim_zipln_M( const arma::mat & init_M, // (n,p) diff --git a/tests/testthat/test-plnnetworkfamily.R b/tests/testthat/test-plnnetworkfamily.R index fe736451..97ffd071 100644 --- a/tests/testthat/test-plnnetworkfamily.R +++ b/tests/testthat/test-plnnetworkfamily.R @@ -10,19 +10,23 @@ test_that("PLNnetwork: main function, fields access and methods", { expect_equal(getBestModel(models), getBestModel(models, "BIC")) - X <- model.matrix(Abundance ~ 1, data = trichoptera) Y <- as.matrix(trichoptera$Abundance) - O <- matrix(0, nrow(Y),ncol(Y)) - w <- rep(1, nrow(Y)) + data = list( + Y = Y, + X = model.matrix(Abundance ~ 1, data = trichoptera), + O = matrix(0, nrow(Y),ncol(Y)), + w = rep(1, nrow(Y)), + formula = Abundance ~ 1 + ) ## extract the data matrices and weights ctrl <- PLNmodels:::PLNnetwork_param(trace = 0) ## instantiate - myPLN <- PLNmodels:::PLNnetworkfamily$new(NULL, Y, X, O, w, Abundance ~ 1, ctrl) + myPLN <- PLNmodels:::PLNnetworkfamily$new(NULL, data, ctrl) ## optimize - myPLN$optimize(ctrl$config_optim) + myPLN$optimize(data, ctrl$config_optim) ## post-treatment config_post <- PLNmodels:::config_post_default_PLNnetwork @@ -32,7 +36,7 @@ test_that("PLNnetwork: main function, fields access and methods", { expect_equal(myPLN$criteria$BIC, models$criteria$BIC) ## S3 methods - expect_true(PLNmodels:::isPLNnetworkfamily(myPLN)) + expect_true(PLNmodels:::isNetworkfamily(myPLN)) expect_is(plot(myPLN), "ggplot") expect_is(plot(myPLN, reverse = TRUE), "ggplot") expect_is(plot(myPLN, type = "diagnostic"), "ggplot") @@ -90,7 +94,7 @@ test_that("PLNnetwork: matrix of penalties work", { myPLN <- PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W)) ## S3 methods - expect_true(PLNmodels:::isPLNnetworkfamily(myPLN)) + expect_true(PLNmodels:::isNetworkfamily(myPLN)) expect_is(plot(myPLN), "ggplot") expect_is(plot(myPLN, reverse = TRUE), "ggplot") expect_is(plot(myPLN, type = "diagnostic"), "ggplot") @@ -141,7 +145,7 @@ test_that("PLNnetwork: list of matrices of penalties work", { myPLN <- PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = list_W)) ## S3 methods - expect_true(PLNmodels:::isPLNnetworkfamily(myPLN)) + expect_true(PLNmodels:::isNetworkfamily(myPLN)) expect_is(plot(myPLN), "ggplot") expect_is(plot(myPLN, reverse = TRUE), "ggplot") expect_is(plot(myPLN, type = "diagnostic"), "ggplot") diff --git a/tests/testthat/test-plnnetworkfit.R b/tests/testthat/test-plnnetworkfit.R index 4b311fa3..a835dc96 100644 --- a/tests/testthat/test-plnnetworkfit.R +++ b/tests/testthat/test-plnnetworkfit.R @@ -42,7 +42,7 @@ test_that("PLNnetwork fit: check classes, getters and field access", { expect_equal(vcov(myPLNfit, "covariance"), myPLNfit$model_par$Sigma) expect_equal(vcov(myPLNfit, "covariance"), sigma(myPLNfit)) expect_warning(standard_error(myPLNfit)) - expect_true(igraph::is.igraph(myPLNfit$plot_network(output = "igraph", plot = FALSE))) + expect_true(igraph::is_igraph(myPLNfit$plot_network(output = "igraph", plot = FALSE))) expect_true(inherits(myPLNfit$plot_network(output = "corrplot", plot = FALSE), "Matrix")) }) diff --git a/tests/testthat/test-zipln.R b/tests/testthat/test-zipln.R index d6361f9e..7eac24b0 100644 --- a/tests/testthat/test-zipln.R +++ b/tests/testthat/test-zipln.R @@ -60,6 +60,17 @@ test_that("PLN is working with unnamed data matrix", { expect_error(ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(config_optim = list(algorithm = "nawak")))) }) +test_that("ZIPLN is working with exact and variational inference for the conditional distribution of the ZI component", { + + approx <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(config_optim = list(approx_ZI = TRUE))) + exact <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(config_optim = list(approx_ZI = FALSE))) + + expect_equal(approx$loglik, exact$loglik, tolerance = 1e-1) ## Almost equivalent + expect_equal(approx$model_par$B, exact$model_par$B, tolerance = 1e-1) ## Almost equivalent + expect_equal(approx$model_par$Sigma, exact$model_par$Sigma, tolerance = 1e-1) ## Almost equivalent + +}) + test_that("ZIPLN: Check that univariate ZIPLN models works, with matrix of numeric format", { expect_no_error(uniZIPLN <- ZIPLN(Abundance[,1,drop=FALSE] ~ 1, data = trichoptera)) expect_no_error(uniZIPLN <- ZIPLN(Abundance[,1] ~ 1, data = trichoptera)) diff --git a/tests/testthat/test-ziplnfit.R b/tests/testthat/test-ziplnfit.R index 9ba0ffc5..dbc5bfd5 100644 --- a/tests/testthat/test-ziplnfit.R +++ b/tests/testthat/test-ziplnfit.R @@ -118,3 +118,16 @@ test_that("ZIPLN fit: Check number of parameters", { expect_equal(model$vcov_model, "fixed") }) + +test_that("ZIPLN fit: check sparse output and plot", { + + myPLNfit <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(trace = 1, penalty = 0.1)) + + expect_is(myPLNfit, "ZIPLNfit_sparse") + expect_is(myPLNfit, "ZIPLNfit") + + expect_equal(myPLNfit$vcov_model, "sparse") + expect_true(igraph::is.igraph(myPLNfit$plot_network(output = "igraph", plot = FALSE))) + expect_true(inherits(myPLNfit$plot_network(output = "corrplot", plot = FALSE), "Matrix")) + +}) diff --git a/tests/testthat/test-ziplnnetworkfamily.R b/tests/testthat/test-ziplnnetworkfamily.R new file mode 100644 index 00000000..b5223dd3 --- /dev/null +++ b/tests/testthat/test-ziplnnetworkfamily.R @@ -0,0 +1,170 @@ +context("test-plnnetworkfamily") + +data(trichoptera) +## use a subset t save some time +trichoptera <- prepare_data(trichoptera$Abundance[1:20, 1:5], trichoptera$Covariate[1:20, ]) + +models <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera) + +test_that("ZIPLNnetwork: main function, fields access and methods", { + + expect_equal(getBestModel(models), getBestModel(models, "BIC")) + + Y <- as.matrix(trichoptera$Abundance) + data = list( + Y = Y, + X = model.matrix(Abundance ~ 1, data = trichoptera), + X0 = matrix(NA,0,0), + O = matrix(0, nrow(Y),ncol(Y)), + w = rep(1, nrow(Y)), + formula = Abundance ~ 1 + ) + + ## extract the data matrices and weights + ctrl <- PLNmodels:::ZIPLNnetwork_param(trace = 0) + ctrl$ziparam <- "single" + + ## instantiate + myPLN <- PLNmodels:::ZIPLNnetworkfamily$new(NULL, data, ctrl) + + ## optimize + myPLN$optimize(data, ctrl$config_optim) + + expect_equal(myPLN$criteria$BIC, models$criteria$BIC) + + ## S3 methods + expect_true(PLNmodels:::isNetworkfamily(myPLN)) + expect_is(plot(myPLN), "ggplot") + expect_is(plot(myPLN, reverse = TRUE), "ggplot") + expect_is(plot(myPLN, type = "diagnostic"), "ggplot") + expect_is(getBestModel(myPLN), "ZIPLNfit_sparse") + expect_is(getModel(myPLN, myPLN$penalties[1]), "ZIPLNfit_sparse") + + ## Field access + expect_true(all(myPLN$penalties > 0)) + expect_null(myPLN$stability_path) + expect_true(anyNA(myPLN$stability)) + + ## Other R6 methods + expect_true(is.data.frame(myPLN$coefficient_path())) + subs <- replicate(2, + sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), + simplify = FALSE) + myPLN$stability_selection(subsamples = subs) + expect_is(plot(myPLN, type = "stability"), "ggplot") + expect_true(!is.null(myPLN$stability_path)) + expect_true(inherits(myPLN$plot(), "ggplot")) + expect_true(inherits(myPLN$plot_objective(), "ggplot")) + expect_true(inherits(myPLN$plot_stars(), "ggplot")) +}) + +test_that("ZIPLNnetwork computes the stability path only once.", { + + ## extract_probs fails if stability selection has not been performed. + expect_error(extract_probs(models), + "Please perform stability selection using stability_selection(Robject) first", fixed = TRUE) + set.seed(1077) + subs <- replicate(2, + sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), + simplify = FALSE) + stability_selection(models, subsamples = subs, force = TRUE) + models$stability_selection(subsamples = subs) + ## Stability_path has correct dimensions + p <- getModel(models, index = 1)$p + expect_equal(dim(models$stability_path), + c(length(models$penalties) * p*(p-1)/2L, 5)) + ## try to compute it again + expect_message(stability_selection(models), + "Previous stability selection detected. Use \"force = TRUE\" to recompute it.") + ## extracts the inclusion frequencies + expect_equal(dim(extract_probs(models, index = 1, format = "matrix")), + c(p, p)) + expect_length(extract_probs(models, index = 1, format = "vector"), + p*(p-1)/2) +}) + +test_that("ZIPLNnetwork: matrix of penalties work", { + + p <- ncol(trichoptera$Abundance) + W <- diag(1, p, p) + W[upper.tri(W)] <- runif(p*(p-1)/2, min = 1, max = 5) + W[lower.tri(W)] <- t(W)[lower.tri(W)] + myPLN <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, control = ZIPLNnetwork_param(penalty_weights = W)) + + ## S3 methods + expect_true(PLNmodels:::isNetworkfamily(myPLN)) + expect_is(plot(myPLN), "ggplot") + expect_is(plot(myPLN, reverse = TRUE), "ggplot") + expect_is(plot(myPLN, type = "diagnostic"), "ggplot") + expect_is(getBestModel(myPLN), "ZIPLNfit_sparse") + expect_is(getModel(myPLN, myPLN$penalties[1]), "ZIPLNfit_sparse") + + ## Field access + expect_true(all(myPLN$penalties > 0)) + expect_null(myPLN$stability_path) + expect_true(anyNA(myPLN$stability)) + + ## Other R6 methods + expect_true(is.data.frame(myPLN$coefficient_path())) + subs <- replicate(2, + sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), + simplify = FALSE) + myPLN$stability_selection(subsamples = subs, control = ZIPLNnetwork_param(penalty_weights = W)) + expect_is(plot(myPLN, type = "stability"), "ggplot") + expect_true(!is.null(myPLN$stability_path)) + expect_true(inherits(myPLN$plot(), "ggplot")) + expect_true(inherits(myPLN$plot_objective(), "ggplot")) + expect_true(inherits(myPLN$plot_stars(), "ggplot")) + + ## misspecification of penalty weights should induce errors + ## not symmetric + W <- diag(1, p, p) + W[upper.tri(W)] <- runif(p*(p-1)/2, min = 1, max = 5) + expect_error(PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W))) + + ## not square + W <- matrix(1, p + 1, p) + expect_error(PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W))) + + ## nonpositive entries + W <- matrix(0, p, p) + expect_error(PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W))) + +}) + +test_that("PLNnetwork: list of matrices of penalties work", { + + p <- ncol(trichoptera$Abundance) + W <- diag(1, p, p) + W[upper.tri(W)] <- runif(p*(p-1)/2, min = 1, max = 5) + W[lower.tri(W)] <- t(W)[lower.tri(W)] + list_W <- lapply(seq(1, 1e-2, len = 30), function(rho) rho * W) + + myPLN <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, control = ZIPLNnetwork_param(penalty_weights = list_W)) + + ## S3 methods + expect_true(PLNmodels:::isNetworkfamily(myPLN)) + expect_is(plot(myPLN), "ggplot") + expect_is(plot(myPLN, reverse = TRUE), "ggplot") + expect_is(plot(myPLN, type = "diagnostic"), "ggplot") + expect_is(getBestModel(myPLN), "ZIPLNfit_sparse") + expect_is(getModel(myPLN, myPLN$penalties[1]), "ZIPLNfit_sparse") + + ## Field access + expect_true(all(myPLN$penalties > 0)) + expect_null(myPLN$stability_path) + expect_true(anyNA(myPLN$stability)) + + ## Other R6 methods + expect_true(is.data.frame(myPLN$coefficient_path())) + subs <- replicate(2, + sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), + simplify = FALSE) + myPLN$stability_selection(subsamples = subs, control = ZIPLNnetwork_param(penalty_weights = W)) + expect_is(plot(myPLN, type = "stability"), "ggplot") + expect_true(!is.null(myPLN$stability_path)) + expect_true(inherits(myPLN$plot(), "ggplot")) + expect_true(inherits(myPLN$plot_objective(), "ggplot")) + expect_true(inherits(myPLN$plot_stars(), "ggplot")) + +})