diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 1474613d..ade15537 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -19,17 +19,18 @@ jobs: matrix: config: - {os: macOS-latest, r: 'release'} + - {os: macOS-latest, r: 'oldrel-1'} - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: windows-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/DESCRIPTION b/DESCRIPTION index bfa54fe0..b203c763 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,7 @@ BugReports: https://github.com/pln-team/PLNmodels/issues License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Depends: R (>= 3.4) LazyData: true biocViews: @@ -90,6 +90,7 @@ Collate: 'ZIPLNfit-class.R' 'ZIPLN.R' 'ZIPLNfit-S3methods.R' + 'ZIPLNnetwork.R' 'barents.R' 'import_utils.R' 'mollusk.R' diff --git a/NAMESPACE b/NAMESPACE index ae446525..debf5692 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,12 +7,17 @@ S3method(coef,ZIPLNfit) S3method(fitted,PLNfit) S3method(fitted,PLNmixturefit) S3method(fitted,ZIPLNfit) +S3method(getBestModel,Networkfamily) S3method(getBestModel,PLNPCAfamily) S3method(getBestModel,PLNmixturefamily) S3method(getBestModel,PLNnetworkfamily) +S3method(getBestModel,ZIPLNnetworkfamily) +S3method(getModel,Networkfamily) S3method(getModel,PLNPCAfamily) S3method(getModel,PLNmixturefamily) S3method(getModel,PLNnetworkfamily) +S3method(getModel,ZIPLNnetworkfamily) +S3method(plot,Networkfamily) S3method(plot,PLNLDAfit) S3method(plot,PLNPCAfamily) S3method(plot,PLNPCAfit) @@ -21,6 +26,8 @@ S3method(plot,PLNmixturefamily) S3method(plot,PLNmixturefit) S3method(plot,PLNnetworkfamily) S3method(plot,PLNnetworkfit) +S3method(plot,ZIPLNfit_sparse) +S3method(plot,ZIPLNnetworkfamily) S3method(predict,PLNLDAfit) S3method(predict,PLNfit) S3method(predict,PLNmixturefit) @@ -48,6 +55,8 @@ export(PLNnetwork) export(PLNnetwork_param) export(ZIPLN) export(ZIPLN_param) +export(ZIPLNnetwork) +export(ZIPLNnetwork_param) export(coefficient_path) export(compute_PLN_starting_point) export(compute_offset) @@ -77,6 +86,7 @@ importFrom(corrplot,corrplot) importFrom(future.apply,future_lapply) importFrom(future.apply,future_sapply) importFrom(glassoFast,glassoFast) +importFrom(grDevices,rgb) importFrom(grid,nullGrob) importFrom(grid,textGrob) importFrom(gridExtra,arrangeGrob) diff --git a/NEWS.md b/NEWS.md index 01adb95d..b0ea56d7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,10 @@ -# Current (2024-01-23) +# PLNmodels 1.2.0 (2024-02-24) -* Addition of ZIPLN() and ZIPLNfit-class to allow for zero-inflation in the (for now) standard PLN model (merge PR #116) +* new feature: ZIPLN (PLN with zero inflation) + * ZIPLN() and ZIPLNfit-class to allow for zero-inflation in the standard PLN model (merge PR #116) + * ZIPLNnetwork() and ZIPLNfit_sparse-class to allow for zero-inflation in the PLNnetwork model (merge PR #118) + * Code factorization between PLNnetwork and ZIPLNnetwork (and associated classes) +* fix inconsistency between fitted and predict (merge PR #115) # PLNmodels 1.1.0 (2024-01-08) diff --git a/R/PLNnetwork.R b/R/PLNnetwork.R index a6f70c0e..372357be 100644 --- a/R/PLNnetwork.R +++ b/R/PLNnetwork.R @@ -1,18 +1,19 @@ -#' Poisson lognormal model towards sparse network inference +#' Sparse Poisson lognormal model for network inference #' -#' Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm. Use the (g)lm syntax for model specification (covariates, offsets). +#' Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +#' using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +#' Use the (g)lm syntax to specify the model (including covariates and offsets). #' #' @param formula an object of class "formula": a symbolic description of the model to be fitted. #' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which lm is called. #' @param subset an optional vector specifying a subset of observations to be used in the fitting process. #' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param penalties an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See \code{PLNnetwork_param()} for additional tuning of the penalty. +#' @param penalties an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See `PLNnetwork_param()` for additional tuning of the penalty. #' @param control a list-like structure for controlling the optimization, with default generated by [PLNnetwork_param()]. See the corresponding documentation for details; #' #' @return an R6 object with class [`PLNnetworkfamily`], which contains #' a collection of models with class [`PLNnetworkfit`] #' -#' @rdname PLNnetwork #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) @@ -28,16 +29,16 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control replace 'list(my_arg = xx)' by PLN_param(my_arg = xx) and see the documentation of PLNnetwork_param().") ## extract the data matrices and weights - args <- extract_model(match.call(expand.dots = FALSE), parent.frame()) + data_ <- extract_model(match.call(expand.dots = FALSE), parent.frame()) ## Instantiate the collection of models if (control$trace > 0) cat("\n Initialization...") - myPLN <- PLNnetworkfamily$new(penalties, args$Y, args$X, args$O, args$w, args$formula, control) + myPLN <- PLNnetworkfamily$new(penalties, data_, control) ## Optimization if (control$trace > 0) cat("\n Adjusting", length(myPLN$penalties), "PLN with sparse inverse covariance estimation\n") if (control$trace) cat("\tJoint optimization alternating gradient descent and graphical-lasso\n") - myPLN$optimize(control$config_optim) + myPLN$optimize(data_, control$config_optim) ## Post-treatments if (control$trace > 0) cat("\n Post-treatments") @@ -59,7 +60,7 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control #' @param n_penalties an integer that specifies the number of values for the penalty grid when internally generated. Ignored when penalties is non `NULL` #' @param min_ratio the penalty grid ranges from the minimal value that produces a sparse to this value multiplied by `min_ratio`. Default is 0.1. #' @param penalize_diagonal boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE} -#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. +#' @param penalty_weights either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. #' @param inception Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on #' log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit), #' which sometimes speeds up the inference. @@ -67,7 +68,7 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control #' @return list of parameters configuring the fit. #' @inherit PLN_param details #' @details See [PLN_param()] for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: -#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 +#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than ftol multiplied by the absolute value of the parameter. Default is 1e-6 #' * "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 #' #' @seealso [PLN_param()] diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index ca2648cf..8a8fd7bc 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -6,23 +6,21 @@ ## Auxiliary functions to check the given class of an objet -isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamily")} +isNetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} -#' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) -#' -#' @name plot.PLNnetworkfamily +#' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`]) #' #' @inheritParams plot.PLNfamily #' @inherit plot.PLNfamily return details #' -#' @param x an R6 object with class [`PLNnetworkfamily`] +#' @param x an R6 object with class [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] #' @param type a character, either "criteria", "stability" or "diagnostic" for the type of plot. -#' @param criteria vector of characters. The criteria to plot in c("loglik", "BIC", "ICL", "R_squared", "EBIC", "pen_loglik"). -#' Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only relevant when `type = "criteria"`. +#' @param criteria Vector of criteria to plot, to be selected among "loglik" (log-likelihood), +#' "BIC", "ICL", "R_squared", "EBIC" and "pen_loglik" (penalized log-likelihood). +#' Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only used when `type = "criteria"`. #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. #' @param stability scalar: the targeted level of stability in stability plot. Default is .9. #' -#' #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) @@ -30,18 +28,17 @@ isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamily")} #' \dontrun{ #' plot(fits) #' } -#' @return Produces either a diagnostic plot (with \code{type = 'diagnostic'}), a stability plot -#' (with \code{type = 'stability'}) or the evolution of the criteria of the different models considered -#' (with \code{type = 'criteria'}, the default). +#' @return Produces either a diagnostic plot (with `type = 'diagnostic'`), a stability plot +#' (with `type = 'stability'`) or the evolution of the criteria of the different models considered +#' (with `type = 'criteria'`, the default). #' @export -plot.PLNnetworkfamily <- - function(x, +plot.Networkfamily <- function(x, type = c("criteria", "stability", "diagnostic"), criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, log.x = TRUE, stability = 0.9, ...) { - stopifnot(isPLNnetworkfamily(x)) + stopifnot(isNetworkfamily(x)) type <- match.arg(type) if (type == "criteria") p <- x$plot(criteria, reverse) @@ -53,27 +50,50 @@ plot.PLNnetworkfamily <- p } -#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] +#' @describeIn plot.Networkfamily Display various outputs associated with a collection of network fits #' @export -getModel.PLNnetworkfamily <- function(Robject, var, index = NULL) { - stopifnot(isPLNnetworkfamily(Robject)) +plot.PLNnetworkfamily <- plot.Networkfamily + +#' @describeIn plot.Networkfamily Display various outputs associated with a collection of network fits +#' @export +plot.ZIPLNnetworkfamily <- plot.Networkfamily + +#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] +#' @export +getModel.Networkfamily <- function(Robject, var, index = NULL) { + stopifnot(isNetworkfamily(Robject)) Robject$getModel(var, index) } -#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] +#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] +#' @export +getModel.PLNnetworkfamily <- getModel.Networkfamily + +#' @describeIn getModel Model extraction for [`ZIPLNnetworkfamily`] #' @export -getBestModel.PLNnetworkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { - stopifnot(isPLNnetworkfamily(Robject)) +getModel.ZIPLNnetworkfamily <- getModel.Networkfamily + +#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] +#' @export +getBestModel.Networkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { + stopifnot(isNetworkfamily(Robject)) stability <- list(...)[["stability"]] if (is.null(stability)) stability <- 0.9 Robject$getBestModel(match.arg(crit), stability) } +#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] +#' @export +getBestModel.PLNnetworkfamily <- getBestModel.Networkfamily + +#' @describeIn getBestModel Model extraction for [`ZIPLNnetworkfamily`] +#' @export +getBestModel.ZIPLNnetworkfamily <- getBestModel.Networkfamily #' Extract the regularization path of a PLNnetwork fit #' #' @name coefficient_path -#' @param Robject an object with class [`PLNnetworkfamily`], i.e. an output from [PLNnetwork()] +#' @param Robject an object with class [`Networkfamily`], i.e. an output from [PLNnetwork()] #' @param precision a logical, should the coefficients of the precision matrix Omega or the covariance matrix Sigma be sent back. Default is `TRUE`. #' @param corr a logical, should the correlation (partial in case `precision = TRUE`) be sent back. Default is `TRUE`. #' @@ -85,7 +105,7 @@ getBestModel.PLNnetworkfamily <- function(Robject, crit = c("BIC", "EBIC", "StAR #' head(coefficient_path(fits)) #' @export coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) Robject$coefficient_path(precision, corr) } @@ -95,12 +115,12 @@ coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { #' #' @description This function computes the StARS stability criteria over a path of penalties. If a path has already been computed, the functions stops with a message unless `force = TRUE` has been specified. #' -#' @param Robject an object with class [`PLNnetworkfamily`], i.e. an output from [PLNnetwork()] -#' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. -#' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. +#' @param Robject an object with class [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`], i.e. an output from [PLNnetwork()] or [ZIPLNnetwork()] +#' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. +#' @param control a list controlling the main optimization process in each call to [PLNnetwork()] or [ZIPLNnetwork()]. See [PLN_param()] or [ZIPLN_param()] for details. #' @param force force computation of the stability path, even if a previous one has been detected. #' -#' @return the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields `stability_path` of the initial Robject with class [`PLNnetworkfamily`] +#' @return the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields `stability_path` of the initial Robject with class [`Networkfamily`] #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) @@ -112,7 +132,8 @@ coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { #' } #' @export stability_selection <- function(Robject, subsamples = NULL, control = PLNnetwork_param(), force = FALSE) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) + if (inherits(Robject, "ZIPLNnetworkfamily")) control <- ZIPLNnetwork_param() if (force || anyNA(Robject$stability)) { Robject$stability_selection(subsamples, control) } else { @@ -120,8 +141,6 @@ stability_selection <- function(Robject, subsamples = NULL, control = PLNnetwork } } - - #' Extract edge selection frequency in bootstrap subsamples #' #' @description Extracts edge selection frequency in networks reconstructed from bootstrap subsamples @@ -162,7 +181,7 @@ extract_probs <- function(Robject, penalty = NULL, index = NULL, crit = c("StARS", "BIC", "EBIC"), format = c("matrix", "vector"), tol = 1e-5) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) ## Check if stability selection has been performed stab_path <- Robject$stability_path if (is.null(stab_path)) { diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 00f2cec8..20f607a7 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -1,33 +1,28 @@ -#' An R6 Class to represent a collection of PLNnetworkfit +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS Networkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' An R6 Class to virtually represent a collection of network fits #' -#' @description The function [PLNnetwork()] produces an instance of this class. +#' @description The functions [PLNnetwork()] and [ZIPLNnetwork()] both produce an instance of this class, which can be thought of as a vector of [`PLNnetworkfit`]s [`ZIPLNfit_sparse`]s (indexed by penalty parameter) #' -#' This class comes with a set of methods, some of them being useful for the user: +#' This class comes with a set of methods mostly used to compare +#' network fits (in terms of goodness of fit) or extract one from +#' the family (based on penalty parameter and/or goodness of it). #' See the documentation for [getBestModel()], -#' [getModel()] and [plot()][plot.PLNnetworkfamily()] +#' [getModel()] and [plot()][plot.Networkfamily()] for the user-facing ones. #' ## Parameters shared by many methods #' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. -#' @param responses the matrix of responses common to every models -#' @param covariates the matrix of covariates common to every models -#' @param offsets the matrix of offsets common to every models -#' @param weights the vector of observation weights -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. -#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. -#' @param index Integer index of the model to be returned. Only the first value is taken into account #' #' @include PLNfamily-class.R #' @importFrom R6 R6Class #' @importFrom glassoFast glassoFast -#' @examples -#' data(trichoptera) -#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) -#' class(fits) -#' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] -PLNnetworkfamily <- R6Class( - classname = "PLNnetworkfamily", +#' @seealso The functions [PLNnetwork()], [ZIPLNnetwork()] and the classes [`PLNnetworkfit`], [`ZIPLNfit_sparse`] +Networkfamily <- R6Class( + classname = "Networkfamily", inherit = PLNfamily, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ------ @@ -36,33 +31,14 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize all models in the collection - #' @return Update current [`PLNnetworkfit`] with smart starting values - initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { + #' @return Update all network fits in the family with smart starting values + initialize = function(penalties, data, control) { ## Initialize fields shared by the super class - super$initialize(responses, covariates, offsets, weights, control) - - ## A basic model for inception, useless one is defined by the user -### TODO check if it is useful - if (is.null(control$inception)) { - - # CHECK_ME_TORCH_GPU - # This appears to be in torch_gpu only. The commented out line below is - # in both PLNmodels/master and PLNmodels/dev. - myPLN <- switch( - control$inception_cov, - "spherical" = PLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), - "diagonal" = PLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), - PLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full - ) - ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov - ## for the inner-outer loop of PLNnetwork. - myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) - control$inception <- myPLN - } + super$initialize(data$Y, data$X, data$O, data$w, control) if (is.null(control$penalty_weights)) - control$penalty_weights <- matrix(1, ncol(responses), ncol(responses)) + control$penalty_weights <- matrix(1, private$p, private$p) ## Get the number of penalty if (is.null(penalties)) { if (is.list(control$penalty_weights)) @@ -76,41 +52,37 @@ PLNnetworkfamily <- R6Class( else list_penalty_weights <- control$penalty_weights + ## Check consistency of weights and optionally silent diagonal penalties + list_penalty_weights <- + map(list_penalty_weights, function(penalty_weights) { + stopifnot(isSymmetric(penalty_weights), all(penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(penalty_weights) <- 0 + penalty_weights + }) + ## Get an appropriate grid of penalties if (is.null(penalties)) { - if (control$trace > 1) cat("\n Recovering an appropriate grid of penalties.") - # CHECK_ME_TORCH_GPU - # This appears to be in torch_gpu only. The commented out line below is - # in both PLNmodels/master and PLNmodels/dev. - # changed it to other one + if (control$trace > 1) cat("\nComputing an appropriate grid of penalties.") max_pen <- list_penalty_weights %>% - map(~ as.matrix(myPLN$model_par$Sigma) / .x) %>% - # map(~ control$inception$model_par$Sigma / .x) %>% + map(~ as.matrix(control$inception$model_par$Sigma) / .x) %>% map_dbl(~ max(abs(.x[upper.tri(.x, diag = control$penalize_diagonal)]))) %>% max() penalties <- 10^seq(log10(max_pen), log10(max_pen*control$min_ratio), len = control$n_penalties) } else { - if (control$trace > 1) cat("\nPenalties already set by the user") + if (control$trace > 1) cat("\nUsing penalties penalties provided by the user.") stopifnot(all(penalties > 0)) } ## Sort the penalty in decreasing order o <- order(penalties, decreasing = TRUE) private$params <- penalties[o] - list_penalty_weights <- list_penalty_weights[o] - - ## instantiate as many models as penalties - control$trace <- 0 - self$models <- map2(private$params, list_penalty_weights, function(penalty, penalty_weights) { - PLNnetworkfit$new(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) - }) - + private$penalties_weights <- list_penalty_weights[o] }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Optimization ---------------------- #' @description Call to the C++ optimizer on all models of the collection #' @param config a list for controlling the optimization. - optimize = function(config) { + optimize = function(data, config) { ## Go along the penalty grid (i.e the models) for (m in seq_along(self$models)) { @@ -121,7 +93,7 @@ PLNnetworkfamily <- R6Class( if (config$trace > 1) { cat("\tsparsifying penalty =", self$models[[m]]$penalty, "- iteration:") } - self$models[[m]]$optimize(self$responses, self$covariates, self$offsets, self$weights, config) + self$models[[m]]$optimize(data, config) ## Save time by starting the optimization of model m + 1 with optimal parameters of model m if (m < length(self$penalties)) self$models[[m + 1]]$update( @@ -139,71 +111,9 @@ PLNnetworkfamily <- R6Class( }, - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Stability ------------------------- - #' @description Compute the stability path by stability selection - #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. - #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. - stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { - - ## select default subsamples according - if (is.null(subsamples)) { - subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) - subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) - } - - ## got for stability selection - cat("\nStability Selection for PLNnetwork: ") - cat("\nsubsampling: ") - - stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { - cat("+") - inception_ <- self$getModel(self$penalties[1]) - inception_$update( - M = inception_$var_par$M[subsample, ], - S = inception_$var_par$S[subsample, ] - ) - - ## force some control parameters - control$inception = inception_ - control$penalty_weights = map(self$models, "penalty_weights") - control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) - control$trace <- 0 - control$config_optim$trace <- 0 - - myPLN <- PLNnetworkfamily$new(penalties = self$penalties, - responses = self$responses [subsample, , drop = FALSE], - covariates = self$covariates[subsample, , drop = FALSE], - offsets = self$offsets [subsample, , drop = FALSE], - formula = private$formula, - weights = self$weights [subsample], control = control) - - myPLN$optimize(control$config_optim) - nets <- do.call(cbind, lapply(myPLN$models, function(model) { - as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] - })) - nets - }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) - - prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) - ## formatting/tyding - node_set <- colnames(self$getModel(index = 1)$model_par$B) - colnames(prob) <- self$penalties - private$stab_path <- prob %>% - as.data.frame() %>% - mutate(Edge = 1:n()) %>% - gather(key = "Penalty", value = "Prob", -Edge) %>% - mutate(Penalty = as.numeric(Penalty), - Node1 = node_set[edge_to_node(Edge)$node1], - Node2 = node_set[edge_to_node(Edge)$node2], - Edge = paste0(Node1, "|", Node2)) - - invisible(subsamples) - }, - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Extractors ------------------------ - #' @description Extract the regularization path of a [`PLNnetworkfamily`] + #' @description Extract the regularization path of a [`Networkfamily`] #' @param precision Logical. Should the regularization path be extracted from the precision matrix Omega (`TRUE`, default) or from the variance matrix Sigma (`FALSE`) #' @param corr Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to `TRUE` coefficient_path = function(precision = TRUE, corr = TRUE) { @@ -231,8 +141,10 @@ PLNnetworkfamily <- R6Class( }, #' @description Extract the best network in the family according to some criteria - #' @param crit character. Criterion used to perform the selection. Is "StARS" is chosen but `$stability` field is empty, will compute stability path. + #' @param crit character. Criterion used to perform the selection. If "StARS" is chosen but `$stability` field is empty, will compute stability path. #' @param stability Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is `0.9`. + #' @details + #' For BIC and EBIC criteria, higher is better. getBestModel = function(crit = c("BIC", "EBIC", "StARS"), stability = 0.9){ crit <- match.arg(crit) if (crit == "StARS") { @@ -255,11 +167,11 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Graphical methods ----------------- - #' @description Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) + #' @description Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (a [`Networkfamily`]) #' @param criteria vector of characters. The criteria to plot in `c("loglik", "pen_loglik", "BIC", "EBIC")`. Defaults to all of them. #' @param reverse A logical indicating whether to plot the value of the criteria in the "natural" direction #' (loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the - #' natural direction, on the same scale as the log-likelihood.. + #' natural direction, on the same scale as the log-likelihood. #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. #' @return a [`ggplot`] graph plot = function(criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, log.x = TRUE) { @@ -269,61 +181,61 @@ PLNnetworkfamily <- R6Class( p }, - #' @description Plot stability path - #' @param stability scalar: the targeted level of stability in stability plot. Default is `0.9`. - #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. - #' @return a [`ggplot`] graph - plot_stars = function(stability = 0.9, log.x = TRUE) { - if (anyNA(self$stability)) stop("stability selection has not yet been performed! Use stability_selection()") - dplot <- self$criteria %>% select(param, density, stability) %>% - rename(Penalty = param) %>% - gather(key = "Metric", value = "Value", stability:density) - penalty_stars <- dplot %>% filter(Metric == "stability" & Value >= stability) %>% - pull(Penalty) %>% min() - - p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + - geom_point() + geom_line() + theme_bw() + - ## Add information correspinding to best lambda - geom_vline(xintercept = penalty_stars, linetype = 2) + - geom_hline(yintercept = stability, linetype = 2) + - annotate(x = penalty_stars, y = 0, - label = paste("lambda == ", round(penalty_stars, 5)), - parse = TRUE, hjust = -0.05, vjust = 0, geom = "text") + - annotate(x = penalty_stars, y = stability, - label = paste("stability == ", stability), - parse = TRUE, hjust = -0.05, vjust = 1.5, geom = "text") - if (log.x) p <- p + ggplot2::scale_x_log10() + annotation_logticks(sides = "b") - p - }, - - #' @description Plot objective value of the optimization problem along the penalty path - #' @return a [`ggplot`] graph - plot_objective = function() { - objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) - changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$outer_iterations))) - dplot <- data.frame(iteration = 1:length(objective), objective = objective) - p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + - geom_vline(xintercept = changes, linetype="dashed", alpha = 0.25) + - ggtitle("Objective along the alternate algorithm") + xlab("iteration (+ changes of model)") + - annotate("text", x = changes, y = min(dplot$objective), angle = 90, - label = paste("penalty=",format(self$criteria$param, digits = 1)), hjust = -.1, size = 3, alpha = 0.7) + theme_bw() - p - }, - - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Print methods --------------------- - #' @description User friendly print method - show = function() { - super$show() - cat(" Task: Network Inference \n") - cat("========================================================\n") - cat(" -", length(self$penalties) , "penalties considered: from", min(self$penalties), "to", max(self$penalties), "\n") - cat(" - Best model (greater BIC): lambda =", format(self$getBestModel("BIC")$penalty, digits = 3), "\n") - cat(" - Best model (greater EBIC): lambda =", format(self$getBestModel("EBIC")$penalty, digits = 3), "\n") - if (!anyNA(self$criteria$stability)) - cat(" - Best model (regarding StARS): lambda =", format(self$getBestModel("StARS")$penalty, digits = 3), "\n") - } + #' @description Plot stability path + #' @param stability scalar: the targeted level of stability using stability selection. Default is `0.9`. + #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. + #' @return a [`ggplot`] graph + plot_stars = function(stability = 0.9, log.x = TRUE) { + if (anyNA(self$stability)) stop("stability selection has not yet been performed! Use stability_selection()") + dplot <- self$criteria %>% select(param, density, stability) %>% + rename(Penalty = param) %>% + gather(key = "Metric", value = "Value", stability:density) + penalty_stars <- dplot %>% filter(Metric == "stability" & Value >= stability) %>% + pull(Penalty) %>% min() + + p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + + geom_point() + geom_line() + theme_bw() + + ## Add information corresponding to best lambda + geom_vline(xintercept = penalty_stars, linetype = 2) + + geom_hline(yintercept = stability, linetype = 2) + + annotate(x = penalty_stars, y = 0, + label = paste("lambda == ", round(penalty_stars, 5)), + parse = TRUE, hjust = -0.05, vjust = 0, geom = "text") + + annotate(x = penalty_stars, y = stability, + label = paste("stability == ", stability), + parse = TRUE, hjust = -0.05, vjust = 1.5, geom = "text") + if (log.x) p <- p + ggplot2::scale_x_log10() + annotation_logticks(sides = "b") + p + }, + + #' @description Plot objective value of the optimization problem along the penalty path + #' @return a [`ggplot`] graph + plot_objective = function() { + objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) + changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$iterations))) + dplot <- data.frame(iteration = 1:length(objective), objective = objective) + p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + + geom_vline(xintercept = changes, linetype = "dashed", alpha = 0.25) + + ggtitle("Objective along the alternate algorithm") + xlab("iteration (+ changes of model)") + + annotate("text", x = changes, y = min(dplot$objective), angle = 90, + label = paste("penalty=",format(self$criteria$param, digits = 1)), hjust = -.1, size = 3, alpha = 0.7) + theme_bw() + p + }, + + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Print methods --------------------- + #' @description User friendly print method + show = function() { + super$show() + cat(" Task: Network Inference \n") + cat("========================================================\n") + cat(" -", length(self$penalties) , "penalties considered: from", min(self$penalties), "to", max(self$penalties), "\n") + cat(" - Best model (greater BIC): lambda =", format(self$getBestModel("BIC")$penalty, digits = 3), "\n") + cat(" - Best model (greater EBIC): lambda =", format(self$getBestModel("EBIC")$penalty, digits = 3), "\n") + if (!anyNA(self$criteria$stability)) + cat(" - Best model (regarding StARS): lambda =", format(self$getBestModel("StARS")$penalty, digits = 3), "\n") + } ), ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -331,6 +243,7 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% private = list( + penalties_weights = NULL, # a field to store the weights for each penalty, stab_path = NULL # a field to store the stability path, ), @@ -347,16 +260,16 @@ PLNnetworkfamily <- R6Class( if (!is.null(private$stab_path)) { stability <- self$stability_path %>% dplyr::select(Penalty, Prob) %>% - group_by(Penalty) %>% - summarize(Stability = 1 - mean(4 * Prob * (1 - Prob))) %>% - arrange(desc(Penalty)) %>% - pull(Stability) + dplyr::group_by(Penalty) %>% + dplyr::summarize(Stability = 1 - mean(4 * Prob * (1 - Prob))) %>% + dplyr::arrange(desc(Penalty)) %>% + dplyr::pull(Stability) } else { stability <- rep(NA, length(self$penalties)) } stability }, - #' @field criteria a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits + #' @field criteria a data frame with the values of some criteria (variational log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits #' BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty criteria = function() {mutate(super$criteria, stability = self$stability)} ) @@ -366,3 +279,279 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ) +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS PLNnetworkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' An R6 Class to represent a collection of [`PLNnetworkfit`]s +#' +#' @description The function [PLNnetwork()] produces an instance of this class. +#' +#' This class comes with a set of methods mostly used to compare +#' network fits (in terms of goodness of fit) or extract one from +#' the family (based on penalty parameter and/or goodness of it). +#' See the documentation for [getBestModel()], +#' [getModel()] and [plot()][plot.Networkfamily()] for the user-facing ones. +#' +#' +## Parameters shared by many methods +#' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. +#' @param data a named list used internally to carry the data matrices +#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param control a list for controlling the optimization. +#' +#' @include PLNfamily-class.R +#' @importFrom R6 R6Class +#' @importFrom glassoFast glassoFast +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +#' class(fits) +#' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] +PLNnetworkfamily <- R6Class( + classname = "PLNnetworkfamily", + inherit = Networkfamily, + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PUBLIC MEMBERS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + public = list( + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Creation functions ---------------- + #' @description Initialize all models in the collection + #' @return Update current [`PLNnetworkfit`] with smart starting values + initialize = function(penalties, data, control) { + + ## A basic model (constrained model) for inception, ignored if inception is provided by the user + if (is.null(control$inception)) { + ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov + ## for the inner-outer loop of PLNnetwork. + myPLN <- switch( + control$inception_cov, + "spherical" = PLNfit_spherical$new(data$Y, data$X, data$O, data$w, data$formula, control), + "diagonal" = PLNfit_diagonal$new(data$Y, data$X, data$O, data$w, data$formula, control), + PLNfit$new(data$Y, data$X, data$O, data$w, data$formula, control) # defaults to full + ) + myPLN$optimize(data$Y, data$X, data$O, data$w, control$config_optim) + control$inception <- myPLN + } + + ## Initialize fields shared by the super class + super$initialize(penalties, data, control) + + ## instantiate one model per penalty + control$trace <- 0 + self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { + control$penalty <- penalty + control$penalty_weights <- penalty_weights + PLNnetworkfit$new(data, control) + }) + + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Stability ------------------------- + #' @description Compute the stability path by stability selection + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to [`PLNnetwork()`]. See [PLNnetwork()] and [PLN_param()] for details. + stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { + + ## select default subsamples according to Liu et al. (2010) recommendations. + if (is.null(subsamples)) { + subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) + subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) + } + + ## got for stability selection + cat("\nStability Selection for PLNnetwork: ") + cat("\nsubsampling: ") + + stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { + cat("+") + inception_ <- self$getModel(self$penalties[1]) + inception_$update( + M = inception_$var_par$M[subsample, ], + S = inception_$var_par$S[subsample, ] + ) + + ## force some control parameters + control$inception = inception_ + control$penalty_weights = map(self$models, "penalty_weights") + control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) + control$trace <- 0 + control$config_optim$trace <- 0 + + data <- list( + Y = self$responses [subsample, , drop = FALSE], + X = self$covariates[subsample, , drop = FALSE], + O = self$offsets [subsample, , drop = FALSE], + w = self$weights [subsample]) + + myPLN <- PLNnetworkfamily$new(self$penalties, data, control) + myPLN$optimize(data, control$config_optim) + nets <- do.call(cbind, lapply(myPLN$models, function(model) { + as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] + })) + nets + }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + + prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) + ## formatting/tyding + node_set <- colnames(self$getModel(index = 1)$model_par$B) + colnames(prob) <- self$penalties + private$stab_path <- prob %>% + as.data.frame() %>% + mutate(Edge = 1:n()) %>% + gather(key = "Penalty", value = "Prob", -Edge) %>% + mutate(Penalty = as.numeric(Penalty), + Node1 = node_set[edge_to_node(Edge)$node1], + Node2 = node_set[edge_to_node(Edge)$node2], + Edge = paste0(Node1, "|", Node2)) + + invisible(subsamples) + } + ) + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## END OF CLASS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +) + +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS PLNnetworkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' An R6 Class to represent a collection of ZIPLNnetwork +#' +#' @description The function [ZIPLNnetwork()] produces an instance of this class. +#' +#' This class comes with a set of methods, some of them being useful for the user: +#' See the documentation for [getBestModel()], +#' [getModel()] and [plot()][plot.ZIPLNnetworkfamily()] +#' +## Parameters shared by many methods +#' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. +#' @param data a named list used internally to carry the data matrices +#' @param control a list for controlling the optimization. +#' +#' @include PLNfamily-class.R +#' @importFrom R6 R6Class +#' @importFrom glassoFast glassoFast +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +#' class(fits) +#' @seealso The function [ZIPLNnetwork()], the class [`ZIPLNfit_sparse`] +ZIPLNnetworkfamily <- R6Class( + classname = "ZIPLNnetworkfamily", + inherit = Networkfamily, + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PUBLIC MEMBERS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + public = list( + #' @field covariates0 the matrix of covariates included in the ZI component + covariates0 = NULL, # covariates used in the ZI component + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Creation functions ---------------- + #' @description Initialize all models in the collection + #' @return Update current [`PLNnetworkfit`] with smart starting values + initialize = function(penalties, data, control) { + + ## A basic model for inception, useless one is defined by the user + if (is.null(control$inception)) { + ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov + ## for the inner-outer loop of PLNnetwork. + myPLN <- switch( + control$inception_cov, + "spherical" = ZIPLNfit_spherical$new(data, control), + "diagonal" = ZIPLNfit_diagonal$new(data, control), + ZIPLNfit$new(data, control) # defaults to full + ) + myPLN$optimize(data, control$config_optim) + control$inception <- myPLN + } + + ## Initialize fields shared by the super class + super$initialize(penalties, data, control) + self$covariates0 <- data$X0 + + ## instantiate one model per penalty + control$trace <- 0 + self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { + control$penalty <- penalty + control$penalty_weights <- penalty_weights + ZIPLNfit_sparse$new(data, control) + }) + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Stability ------------------------- + #' @description Compute the stability path by stability selection + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to [`PLNnetwork()`]. See [ZIPLNnetwork()] and [ZIPLN_param()] for details. + stability_selection = function(subsamples = NULL, control = ZIPLNnetwork_param()) { + + ## select default subsamples according to Liu et al. (2010) recommendations. + if (is.null(subsamples)) { + subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) + subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) + } + + ## got for stability selection + cat("\nStability Selection for ZIPLNnetwork: ") + cat("\nsubsampling: ") + + stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { + cat("+") + inception_ <- self$getModel(self$penalties[1]) + inception_$update( + R = inception_$var_par$R[subsample, ], + M = inception_$var_par$M[subsample, ], + S = inception_$var_par$S[subsample, ] + ) + + ## force some control parameters + control$inception = inception_ + control$penalty_weights = map(self$models, "penalty_weights") + control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) + control$trace <- 0 + control$config_optim$trace <- 0 + control$ziparam <- inception_$zi_model + X0 <- self$covariates0 + if (nrow(X0) > 0) X0 <- X0[subsample, , drop = FALSE] + data <- list( + Y = self$responses [subsample, , drop = FALSE], + X = self$covariates [subsample, , drop = FALSE], + X0 = X0, + O = self$offsets [subsample, , drop = FALSE], + w = self$weights [subsample]) + + myPLN <- ZIPLNnetworkfamily$new(self$penalties, data, control) + myPLN$optimize(data, control$config_optim) + + nets <- do.call(cbind, lapply(myPLN$models, function(model) { + as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] + })) + nets + }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + + prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) + ## formatting/tyding + node_set <- colnames(self$getModel(index = 1)$model_par$B) + colnames(prob) <- self$penalties + private$stab_path <- prob %>% + as.data.frame() %>% + mutate(Edge = 1:n()) %>% + gather(key = "Penalty", value = "Prob", -Edge) %>% + mutate(Penalty = as.numeric(Penalty), + Node1 = node_set[edge_to_node(Edge)$node1], + Node2 = node_set[edge_to_node(Edge)$node2], + Edge = paste0(Node1, "|", Node2)) + + invisible(subsamples) + } + ) + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## END OF CLASS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +) diff --git a/R/PLNnetworkfit-class.R b/R/PLNnetworkfit-class.R index 1dff42da..312c16df 100644 --- a/R/PLNnetworkfit-class.R +++ b/R/PLNnetworkfit-class.R @@ -5,20 +5,16 @@ #' See the documentation for [`plot()`][plot.PLNnetworkfit()] and methods inherited from [`PLNfit`]. #' ## Parameters common to all PLN-xx-fit methods (shared with PLNfit but inheritance does not work) -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. -#' @param nullModel null model used for approximate R2 computations. Defaults to a GLM model with same design matrix but not latent variable. -#' @param B matrix of regression matrix +#' @param nullModel null model used for approximate R2 computations. Defaults to a GLM model with same design matrix but no latent variable. +#' @param B matrix of regression coefficients #' @param Sigma variance-covariance matrix of the latent variables #' @param Omega precision matrix of the latent variables. Inverse of Sigma. #' ## Parameters specific to PLNnetwork-fit methods #' @param penalty a positive real number controlling the level of sparsity of the underlying network. -#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. +#' @param penalty_weights either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pair of node. Must be symmetric with positive values. #' #' @include PLNnetworkfit-class.R #' @examples @@ -41,40 +37,27 @@ PLNnetworkfit <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize a [`PLNnetworkfit`] object - initialize = function(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) { - stopifnot(isSymmetric(penalty_weights), all(penalty_weights >= 0)) - super$initialize(responses, covariates, offsets, weights, formula, control) - private$lambda <- penalty - private$rho <- penalty_weights - if (!control$penalize_diagonal) diag(private$rho) <- 0 - }, - #' @description Update fields of a [`PLNnetworkfit`] object - #' @param B matrix of regression matrix - #' @param Sigma variance-covariance matrix of the latent variables - #' @param Omega precision matrix of the latent variables. Inverse of Sigma. - #' @param M matrix of mean vectors for the variational approximation - #' @param S matrix of variance vectors for the variational approximation - #' @param Z matrix of latent vectors (includes covariates and offset effects) - #' @param A matrix of fitted values - #' @param Ji vector of variational lower bounds of the log-likelihoods (one value per sample) - #' @param R2 approximate R^2 goodness-of-fit criterion - #' @param monitoring a list with optimization monitoring quantities - update = function(penalty=NA, B=NA, Sigma=NA, Omega=NA, M=NA, S=NA, Z=NA, A=NA, Ji=NA, R2=NA, monitoring=NA) { - super$update(B = B, Sigma = Sigma, Omega = Omega, M, S = S, Z = Z, A = A, Ji = Ji, R2 = R2, monitoring = monitoring) - if (!anyNA(penalty)) private$lambda <- penalty + initialize = function(data, control) { + super$initialize(data$Y, data$X, data$O, data$w, data$formula, control) + ## Default for penalty weights (if not already set) + if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, self$p, self$p) + stopifnot(isSymmetric(control$penalty_weights), all(control$penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(control$penalty_weights) <- 0 + private$lambda <- control$penalty + private$rho <- control$penalty_weights }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Optimization ---------------------- #' @description Call to the C++ optimizer and update of the relevant fields #' @param config a list for controlling the optimization - optimize = function(responses, covariates, offsets, weights, config) { + optimize = function(data, config) { cond <- FALSE; iter <- 0 objective <- numeric(config$maxit_out) convergence <- numeric(config$maxit_out) ## start from the standard PLN at initialization objective.old <- -self$loglik - args <- list(data = list(Y = responses, X = covariates, O = offsets, w = weights), + args <- list(data = data, params = list(B = private$B, M = private$M, S = private$S), config = config) while (!cond) { @@ -90,7 +73,7 @@ PLNnetworkfit <- R6Class( do.call(self$update, optim_out) ## Check convergence - objective[iter] <- -self$loglik + self$penalty * sum(abs(private$Omega)) + objective[iter] <- -self$loglik # + self$penalty * sum(abs(private$Omega)) convergence[iter] <- abs(objective[iter] - objective.old)/abs(objective[iter]) if ((convergence[iter] < config$ftol_out) | (iter >= config$maxit_out)) cond <- TRUE @@ -102,9 +85,9 @@ PLNnetworkfit <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## OUTPUT private$Sigma <- Matrix::symmpart(glasso_out$w) - private$monitoring$objective <- objective[1:iter] - private$monitoring$convergence <- convergence[1:iter] - private$monitoring$outer_iterations <- iter + private$monitoring$objective <- objective[1:iter] + private$monitoring$convergence <- convergence[1:iter] + private$monitoring$iterations <- iter }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -145,51 +128,14 @@ PLNnetworkfit <- R6Class( node.labels = NULL, layout = layout_in_circle, plot = TRUE) { - - type <- match.arg(type) - output <- match.arg(output) - - net <- self$latent_network(type) - - if (output == "igraph") { - - G <- graph_from_adjacency_matrix(net, mode = "undirected", weighted = TRUE, diag = FALSE) - - if (!is.null(node.labels)) { - igraph::V(G)$label <- node.labels - } else { - igraph::V(G)$label <- colnames(net) - } - ## Nice nodes - V.deg <- degree(G)/sum(degree(G)) - igraph::V(G)$label.cex <- V.deg / max(V.deg) + .5 - igraph::V(G)$size <- V.deg * 100 - igraph::V(G)$label.color <- rgb(0, 0, .2, .8) - igraph::V(G)$frame.color <- NA - ## Nice edges - igraph::E(G)$color <- ifelse(igraph::E(G)$weight > 0, edge.color[1], edge.color[2]) - if (type == "support") - igraph::E(G)$width <- abs(igraph::E(G)$weight) - else - igraph::E(G)$width <- 15*abs(igraph::E(G)$weight) - - if (remove.isolated) { - G <- delete.vertices(G, which(degree(G) == 0)) - } - if (plot) plot(G, layout = layout) - } - if (output == "corrplot") { - if (plot) { - if (ncol(net) > 100) - colnames(net) <- rownames(net) <- rep(" ", ncol(net)) - G <- net - diag(net) <- 0 - corrplot(as.matrix(net), method = "color", is.corr = FALSE, tl.pos = "td", cl.pos = "n", tl.cex = 0.5, type = "upper") - } else { - G <- net - } - } - invisible(G) + .plot_network(self$latent_network(match.arg(type)), + type = match.arg(type), + output = match.arg(output), + edge.color = edge.color, + remove.isolated = remove.isolated, + node.labels = node.labels, + layout = layout, + plot = plot) }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/R/RcppExports.R b/R/RcppExports.R index c2f80753..d1053c03 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -69,8 +69,12 @@ optim_zipln_zipar_covar <- function(R, init_B0, X0, configuration) { .Call('_PLNmodels_optim_zipln_zipar_covar', PACKAGE = 'PLNmodels', R, init_B0, X0, configuration) } -optim_zipln_R <- function(Y, X, O, M, S, Pi) { - .Call('_PLNmodels_optim_zipln_R', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi) +optim_zipln_R_var <- function(Y, X, O, M, S, Pi, B) { + .Call('_PLNmodels_optim_zipln_R_var', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi, B) +} + +optim_zipln_R_exact <- function(Y, X, O, M, S, Pi, B) { + .Call('_PLNmodels_optim_zipln_R_exact', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi, B) } optim_zipln_M <- function(init_M, Y, X, O, R, S, B, Omega, configuration) { diff --git a/R/ZIPLN.R b/R/ZIPLN.R index f5fe288e..7b84ddf2 100644 --- a/R/ZIPLN.R +++ b/R/ZIPLN.R @@ -43,23 +43,23 @@ ZIPLN <- function(formula, data, subset, zi = c("single", "row", "col"), control = ZIPLN_param()) { ## extract the data matrices and weights - args <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) - control$ziparam <- ifelse((args$zicovar), "covar", match.arg(zi)) + data_ <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) + control$ziparam <- ifelse((data_$zicovar), "covar", match.arg(zi)) ## initialization if (control$trace > 0) cat("\n Initialization...") myPLN <- switch(control$covariance, - "diagonal" = ZIPLNfit_diagonal$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "spherical" = ZIPLNfit_spherical$new(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "fixed" = ZIPLNfit_fixed$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "sparse" = ZIPLNfit_sparse$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - ZIPLNfit$new(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control)) # default: full covariance + "diagonal" = ZIPLNfit_diagonal$new(data_, control), + "spherical" = ZIPLNfit_spherical$new(data_, control), + "fixed" = ZIPLNfit_fixed$new(data_, control), + "sparse" = ZIPLNfit_sparse$new(data_, control), + ZIPLNfit$new(data_, control)) # default: full covariance ## optimization if (control$trace > 0) cat("\n Adjusting a ZI-PLN model with", control$covariance,"covariance model and", control$ziparam, "specific parameter(s) in Zero inflation component.") - myPLN$optimize(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, control$config_optim) + myPLN$optimize(data_, control$config_optim) if (control$trace > 0) cat("\n DONE!\n") myPLN @@ -73,14 +73,16 @@ ZIPLN <- function(formula, data, subset, zi = c("single", "row", "col"), control #' Helper to define list of parameters to control the PLN fit. All arguments have defaults. #' #' @inheritParams PLN_param +#' @inheritParams PLNnetwork_param #' @param penalty a user-defined penalty to sparsify the residual covariance. Defaults to 0 (no sparsity). #' @return list of parameters used during the fit and post-processing steps #' #' @inherit PLN_param details -#' @details See [PLN_param()] for a full description of the generic optimization parameters. ZIPLN_param() also has two additional parameters controlling the optimization due -#' the inner-outer loop structure of the optimizer: -#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than `ftol_out` multiplied by the absolute value of the parameter. Default is 1e-8 +#' @details See [PLN_param()] and [PLNnetwork_param()] for a full description of the generic optimization parameters. Like [PLNnetwork_param()], ZIPLN_param() has two parameters controlling the optimization due the inner-outer loop structure of the optimizer: +#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than `ftol_out` multiplied by the absolute value of the parameter. Default is 1e-6 #' * "maxit_out" outer solver stops when the number of iteration exceeds `maxit_out`. Default is 100 +#' and one additional parameter controlling the form of the variational approximation of the zero inflation: +#' * "approx_ZI" either uses an exact or approximated conditional distribution for the zero inflation. Default is FALSE #' #' @export ZIPLN_param <- function( @@ -89,6 +91,8 @@ ZIPLN_param <- function( covariance = c("full", "diagonal", "spherical", "fixed", "sparse"), Omega = NULL, penalty = 0, + penalize_diagonal = TRUE , + penalty_weights = NULL , config_post = list(), config_optim = list(), inception = NULL # pretrained ZIPLNfit used as initialization @@ -113,6 +117,7 @@ ZIPLN_param <- function( config_opt$trace <- trace config_opt$ftol_out <- 1e-6 config_opt$maxit_out <- 100 + config_opt$approx_ZI <- FALSE config_opt[names(config_optim)] <- config_optim structure(list( @@ -121,6 +126,8 @@ ZIPLN_param <- function( covariance = covariance, Omega = Omega , penalty = penalty , + penalize_diagonal = penalize_diagonal, + penalty_weights = penalty_weights , config_post = config_pst, config_optim = config_opt, inception = inception), class = "PLNmodels_param") diff --git a/R/ZIPLNfit-S3methods.R b/R/ZIPLNfit-S3methods.R index 6fd24af8..d6aabb84 100644 --- a/R/ZIPLNfit-S3methods.R +++ b/R/ZIPLNfit-S3methods.R @@ -95,3 +95,40 @@ sigma.ZIPLNfit <- function(object, ...) { object$model_par$Sigma } +## ========================================================================================= +## +## PUBLIC S3 METHODS FOR ZIPLNfit_sparse +## +## ========================================================================================= + +## Auxiliary functions to check the given class of an objet +isZIPLNfit_sparse <- function(Robject) {inherits(Robject, "ZIPLNfit_sparse")} + +#' Extract and plot the network (partial correlation, support or inverse covariance) from a [`ZIPLNfit_sparse`] object +#' +#' @name plot.ZIPLNfit_sparse +#' @inheritParams plot.PLNnetworkfit +#' @param x an R6 object with class [`ZIPLNfit_sparse`] +#' +#' @inherit plot.PLNnetworkfit return +#' +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fit <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) +#' \dontrun{ +#' plot(fit) +#' } +#' @export +plot.ZIPLNfit_sparse <- + function(x, + type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE, ...) { + stopifnot(isZIPLNfit_sparse(x)) + invisible(x$plot_network(type, output, edge.color, remove.isolated, node.labels, layout, plot)) + } diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 3ab9e286..567e7a95 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -1,3 +1,7 @@ +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS ZIPLNfit ----- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + #' An R6 Class to represent a ZIPLNfit #' #' @description The function [ZIPLN()] fits a model which is an instance of an object with class [`ZIPLNfit`]. @@ -7,11 +11,7 @@ #' #' Fields are accessed via active binding and cannot be changed by the user. #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @inherit ZIPLN details @@ -69,59 +69,66 @@ ZIPLNfit <- R6Class( #' @description Initialize a [`ZIPLNfit`] model #' @importFrom stats glm.fit residuals poisson fitted coef #' @importFrom pscl zeroinfl - initialize = function(responses, covariates, offsets, weights, formula, control) { + initialize = function(data, control) { ## problem dimensions - n <- nrow(responses); p <- ncol(responses); d <- ncol(covariates$PLN); d0 <- ncol(covariates$ZI) + n <- nrow(data$Y); p <- ncol(data$Y); d <- ncol(data$X); d0 <- ncol(data$X0) ## save the formula call as specified by the user - private$formula <- formula - private$X <- covariates$PLN - private$X0 <- covariates$ZI + private$formula <- data$formula + private$X <- data$X + private$X0 <- data$X0 ## initialize the covariance model private$covariance <- control$covariance - private$ziparam <- control$ziparam - - R <- matrix(0, n, p) - M <- matrix(0, n, p) - B <- matrix(0, d , p) - B0 <- matrix(0, d0, p) - ## Feature-wise univariate (ZI)poisson regression as starting point for ZIPLN - for (j in 1:p) { - y = responses[, j] - if (min(y) == 0) { - suppressWarnings( - zip_out <- switch(control$ziparam, - "row" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + factor(1:n), offset = offsets[, j]), - "covar" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + covariates$ZI , offset = offsets[, j]), - pscl::zeroinfl(y ~ 0 + covariates$PLN | 1, offset = offsets[, j])) # offset only for the count model - ) - B0[,j] <- coef(zip_out, "zero") - B[,j] <- coef(zip_out, "count") - R[, j] <- predict(zip_out, type = "zero") - M[,j] <- residuals(zip_out) + covariates$PLN %*% coef(zip_out, "count") - } else { - p_out <- glm(y ~ 0 + covariates$PLN, family = 'poisson', offset = offsets[, j]) - B0[,j] <- rep(-10, d) - B[,j] <- coef(p_out) - R[, j] <- 0 - M[,j] <- residuals(p_out) + covariates$PLN %*% coef(p_out) + private$ziparam <- control$ziparam + + if (isZIPLNfit(control$inception)) { + private$R <- control$inception$var_par$R + private$M <- control$inception$var_par$M + private$S <- control$inception$var_par$S + private$B <- control$inception$model_par$B + private$B0 <- control$inception$model_par$B0 + } else { + R <- matrix(0, n, p) + M <- matrix(0, n, p) + B <- matrix(0, d , p) + B0 <- matrix(0, d0, p) + ## Feature-wise univariate (ZI)poisson regression as starting point for ZIPLN + for (j in 1:p) { + y = data$Y[, j] + if (min(y) == 0) { + suppressWarnings( + zip_out <- switch(control$ziparam, + "row" = pscl::zeroinfl(y ~ 0 + data$X | 0 + factor(1:n), offset = data$O[, j]), + "covar" = pscl::zeroinfl(y ~ 0 + data$X | 0 + data$X0 , offset = data$O[, j]), + pscl::zeroinfl(y ~ 0 + data$X | 1, offset = data$O[, j])) # offset only for the count model + ) + B0[,j] <- coef(zip_out, "zero") + B[,j] <- coef(zip_out, "count") + R[, j] <- predict(zip_out, type = "zero") + M[,j] <- residuals(zip_out) + data$X %*% coef(zip_out, "count") + } else { + p_out <- glm(y ~ 0 + data$X, family = 'poisson', offset = data$O[, j]) + B0[,j] <- rep(-10, d) + B[,j] <- coef(p_out) + R[, j] <- 0 + M[,j] <- residuals(p_out) + data$X %*% coef(p_out) + } } - } - ## Initialization of the ZI component - private$R <- R + ## Initialization of the ZI component + private$R <- R + private$B0 <- B0 + ## Initialization of the PLN component + private$B <- B + private$M <- M + private$S <- matrix(.1, n, p) + } private$Pi <- switch(control$ziparam, - "single" = matrix( mean(R), n, p) , - "row" = matrix(rowMeans(R), n, p) , - "col" = matrix(colMeans(R), n, p, byrow = TRUE), - "covar" = R) - private$B0 <- B0 - private$zeros <- 1 * (responses == 0) - - ## Initialization of the PLN component - private$B <- B - private$M <- M - private$S <- matrix(.1, n, p) + "single" = matrix( mean(private$R), n, p) , + "row" = matrix(rowMeans(private$R), n, p) , + "col" = matrix(colMeans(private$R), n, p, byrow = TRUE), + "covar" = private$R) + private$zeros <- 1 * (data$Y == 0) ## Link to functions performing the optimization private$optimizer$B <- function(M, X) optim_zipln_B_dense(M, X) @@ -132,15 +139,15 @@ ZIPLNfit <- R6Class( "col" = function(R, ...) list(Pi = matrix(colMeans(R), nrow(R), p, byrow = TRUE), B0 = matrix(NA, d0, p)), "covar" = optim_zipln_zipar_covar ) + private$optimizer$R <- ifelse(control$config_optim$approx_ZI, optim_zipln_R_var, optim_zipln_R_exact) private$optimizer$Omega <- optim_zipln_Omega_full }, #' @description Call to the Cpp optimizer and update of the relevant fields #' @param control a list for controlling the optimization. See details. - optimize = function(responses, covariates, offsets, weights, control) { + optimize = function(data, control) { - data <- list(Y = responses, X = covariates$PLN, X0 = covariates$ZI, O = offsets) parameters <- list(Omega = NA, B0 = private$B0, B = private$B, Pi = private$Pi, M = private$M, S = private$S, R = private$R) @@ -179,9 +186,8 @@ ZIPLNfit <- R6Class( ### VE Step # ZI part - new_R <- optim_zipln_R( - Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = new_Pi - ) + new_R <- private$optimizer$R(Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = new_Pi, B = new_B) + # PLN part new_M <- optim_zipln_M( init_M = parameters$M, @@ -238,8 +244,8 @@ ZIPLNfit <- R6Class( M = parameters$M, S = parameters$S, R = parameters$R, - Z = offsets + parameters$M, - A = exp(offsets + parameters$M + .5 * parameters$S^2), + Z = data$O + parameters$M, + A = exp(data$O + parameters$M + .5 * parameters$S^2), Ji = vloglik, monitoring = list( iterations = nb_iter, @@ -249,12 +255,14 @@ ZIPLNfit <- R6Class( ) ### TODO: Should be in post-treatment - if (is.null(colnames(responses))) colnames(responses) <- paste0("Y", 1:self$p) - colnames(private$B0) <- colnames(private$B) <- colnames(responses) - rownames(private$B0) <- rownames(private$B) <- colnames(covariates) - rownames(private$Omega) <- colnames(private$Omega) <- colnames(private$Pi) <- colnames(responses) + colnames_Y <- colnames(data$Y) + if (is.null(colnames_Y)) colnames_Y <- paste0("Y", 1:self$p) + colnames(private$B0) <- colnames(private$B) <- colnames_Y + rownames(private$B) <- colnames(data$X) + rownames(private$B0) <- colnames(data$X0) + rownames(private$Omega) <- colnames(private$Omega) <- colnames(private$Pi) <- colnames_Y dimnames(private$Sigma) <- dimnames(private$Omega) - rownames(private$M) <- rownames(private$S) <- rownames(private$R) <- rownames(private$Pi) <- rownames(responses) + rownames(private$M) <- rownames(private$S) <- rownames(private$R) <- rownames(private$Pi) <- rownames(data$Y) }, @@ -268,13 +276,12 @@ ZIPLNfit <- R6Class( #' * the matrix `R` of variational ZI probabilities #' * the vector `Ji` of (variational) log-likelihood of each new observation #' * a list `monitoring` with information about convergence status - optimize_vestep = function(covariates, offsets, responses, weights, + optimize_vestep = function(data, B = self$model_par$B, B0 = self$model_par$B0, Omega = self$model_par$Omega, control = ZIPLN_param(backend = "nlopt")$config_optim) { - n <- nrow(responses) - data <- list(Y = responses, X = covariates$PLN, X0 = covariates$ZI, O = offsets) + n <- nrow(data$Y) parameters <- list(M = matrix(0, n, self$p), S = matrix(0.1, n, self$p), R = matrix(0, n, self$p)) @@ -300,8 +307,8 @@ ZIPLNfit <- R6Class( )$Pi # VE Step - new_R <- optim_zipln_R( - Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = Pi + new_R <- private$optimizer$R( + Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = Pi, B = B ) new_M <- optim_zipln_M( init_M = parameters$M, @@ -399,10 +406,9 @@ ZIPLNfit <- R6Class( ## Optimize M and S if responses are provided, if (level == 1) { VE <- self$optimize_vestep( - covariates = list(PLN = X, ZI = X0), - offsets = O, - responses = as.matrix(responses), - weights = rep(1, n_new), + data = list( + Y = as.matrix(responses), X = X, X0 = X0, O = O, w = rep(1, n_new) + ), B = private$B, B0 = private$B0, Omega = private$Omega @@ -495,16 +501,21 @@ ZIPLNfit <- R6Class( d = function() {nrow(private$B)}, #' @field d0 number of covariates in the ZI part d0 = function() {nrow(private$B0)}, - #' @field nb_param number of parameters in the current PLN model + #' @field nb_param_zi number of parameters in the ZI part of the model + nb_param_zi = function() { + as.integer(switch(private$ziparam, + "single" = 1L, + "row" = self$n, + "col" = self$p, + "covar" = self$p * self$d)) + }, + #' @field nb_param_pln number of parameters in the PLN part of the model + nb_param_pln = function() { + as.integer(self$p * self$d + self$p * (self$p + 1L) / 2L) + }, + #' @field nb_param number of parameters in the ZIPLN model nb_param = function() { - as.integer( - self$p * self$d + self$p * (self$p + 1L)/2L + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - ) + self$nb_param_zi + self$nb_param_pln }, #' @field model_par a list with the matrices of parameters found in the model (B, Sigma, plus some others depending on the variant) model_par = function() {list(B = private$B, B0 = private$B0, Pi = private$Pi, Omega = private$Omega, Sigma = private$Sigma)}, @@ -552,11 +563,7 @@ ZIPLNfit <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with diagonal residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -575,21 +582,15 @@ ZIPLNfit_diagonal <- R6Class( inherit = ZIPLNfit, public = list( #' @description Initialize a [`ZIPLNfit_diagonal`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$optimizer$Omega <- optim_zipln_Omega_diagonal } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + self$p + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + self$p) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"diagonal"} @@ -605,11 +606,7 @@ ZIPLNfit_diagonal <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with spherical residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -628,21 +625,15 @@ ZIPLNfit_spherical <- R6Class( inherit = ZIPLNfit, public = list( #' @description Initialize a [`ZIPLNfit_spherical`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$optimizer$Omega <- optim_zipln_Omega_spherical } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + 1L + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + 1L) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"spherical"} @@ -658,11 +649,7 @@ ZIPLNfit_spherical <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with fixed (inverse) residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -685,22 +672,16 @@ ZIPLNfit_fixed <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% public = list( #' @description Initialize a [`ZIPLNfit_fixed`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$Omega <- control$Omega private$optimizer$Omega <- function(M, X, B, S) {private$Omega} } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + 0L) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"fixed"} @@ -716,11 +697,7 @@ ZIPLNfit_fixed <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with sparse inverse residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -730,40 +707,113 @@ ZIPLNfit_fixed <- R6Class( #' # See other examples in function ZIPLN #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -#' myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 0.2)) +#' myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 1)) #' class(myPLN) #' print(myPLN) +#' plot(myPLN) #' } ZIPLNfit_sparse <- R6Class( classname = "ZIPLNfit_sparse", inherit = ZIPLNfit, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PRIVATE MEMBERS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + private = list( + lambda = NA, # the sparsity tuning parameter + rho = NA # the p x p penalty weight + ), + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% public = list( #' @description Initialize a [`ZIPLNfit_fixed`] model #' @importFrom glassoFast glassoFast - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) + ## Default for penalty weights (if not already set) + if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, self$p, self$p) + stopifnot(isSymmetric(control$penalty_weights), all(control$penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(control$penalty_weights) <- 0 + private$lambda <- control$penalty + private$rho <- control$penalty_weights private$optimizer$Omega <- function(M, X, B, S) { - glassoFast( crossprod(M - X %*% B)/self$n + diag(colMeans(S * S), self$p, self$p), rho = control$penalty )$wi + glassoFast( crossprod(M - X %*% B)/self$n + diag(colMeans(S * S), self$p, self$p), + rho = private$lambda * private$rho )$wi + } + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Extractors ------------------------ + #' @description Extract interaction network in the latent space + #' @param type edge value in the network. Can be "support" (binary edges), "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species) + #' @importFrom Matrix Matrix + #' @return a square matrix of size `ZIPLNfit_sparse$n` + latent_network = function(type = c("partial_cor", "support", "precision")) { + net <- switch( + match.arg(type), + "support" = 1 * (private$Omega != 0 & !diag(TRUE, ncol(private$Omega))), + "precision" = private$Omega, + "partial_cor" = { + tmp <- -private$Omega / tcrossprod(sqrt(diag(private$Omega))); diag(tmp) <- 1 + tmp } + ) + ## Enforce sparse Matrix encoding to avoid downstream problems with igraph::graph_from_adjacency_matrix + ## as it fails when given dsyMatrix objects + Matrix(net, sparse = TRUE) + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Graphical methods------------------ + #' @description plot the latent network. + #' @param type edge value in the network. Either "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species). + #' @param output Output type. Either `igraph` (for the network) or `corrplot` (for the adjacency matrix) + #' @param edge.color Length 2 color vector. Color for positive/negative edges. Default is `c("#F8766D", "#00BFC4")`. Only relevant for igraph output. + #' @param node.labels vector of character. The labels of the nodes. The default will use the column names ot the response matrix. + #' @param remove.isolated if `TRUE`, isolated node are remove before plotting. Only relevant for igraph output. + #' @param layout an optional igraph layout. Only relevant for igraph output. + #' @param plot logical. Should the final network be displayed or only sent back to the user. Default is `TRUE`. + plot_network = function(type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE) { + .plot_network(self$latent_network(match.arg(type)), + type = match.arg(type), + output = match.arg(output), + edge.color = edge.color, + remove.isolated = remove.isolated, + node.labels = node.labels, + layout = layout, + plot = plot) } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + (sum(private$Omega != 0) - self$p)/2L + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field penalty the global level of sparsity in the current model + penalty = function() {private$lambda}, + #' @field penalty_weights a matrix of weights controlling the amount of penalty element-wise. + penalty_weights = function() {private$rho}, + #' @field n_edges number of edges if the network (non null coefficient of the sparse precision matrix) + n_edges = function() {sum(private$Omega[upper.tri(private$Omega, diag = FALSE)] != 0)}, + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + self$n_edges + self$p) }, #' @field vcov_model character: the model used for the residual covariance - vcov_model = function() {"sparse"} + vcov_model = function() {"sparse"}, + #' @field pen_loglik variational lower bound of the l1-penalized loglikelihood + pen_loglik = function() {self$loglik - private$lambda * sum(abs(private$Omega))}, + #' @field EBIC variational lower bound of the EBIC + EBIC = function() {self$BIC - .5 * ifelse(self$n_edges > 0, self$n_edges * log(.5 * self$p*(self$p - 1)/self$n_edges), 0)}, + #' @field density proportion of non-null edges in the network + density = function() {mean(self$latent_network("support"))}, + #' @field criteria a vector with loglik, penalized loglik, BIC, EBIC, ICL, R_squared, number of parameters, number of edges and graph density + criteria = function() {data.frame(super$criteria, n_edges = self$n_edges, EBIC = self$EBIC, pen_loglik = self$pen_loglik, density = self$density)} ) ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## END OF THE CLASS ZIPLNfit_sparse diff --git a/R/ZIPLNnetwork.R b/R/ZIPLNnetwork.R new file mode 100644 index 00000000..3f06a417 --- /dev/null +++ b/R/ZIPLNnetwork.R @@ -0,0 +1,108 @@ +#' Zero Inflated Sparse Poisson lognormal model for network inference +#' +#' Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +#' using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +#' Use the (g)lm syntax to specify the model (including covariates and offsets). +#' +#' @inheritParams PLNnetwork +#' @param control a list-like structure for controlling the optimization, with default generated by [ZIPLNnetwork_param()]. See the associated documentation +#' for details. +#' @param zi a character describing the model used for zero inflation, either of +#' - "single" (default, one parameter shared by all counts) +#' - "col" (one parameter per variable / feature) +#' - "row" (one parameter per sample / individual). +#' If covariates are specified in the formula RHS (see details) this parameter is ignored. +#' +#' @details +#' Covariates for the Zero-Inflation parameter (using a logistic regression model) can be specified in the formula RHS using the pipe +#' (`~ PLN effect | ZI effect`) to separate covariates for the PLN part of the model from those for the Zero-Inflation part. +#' Note that different covariates can be used for each part. +#' +#' @return an R6 object with class [`ZIPLNnetworkfamily`] +#' +#' @include PLNnetworkfamily-class.R +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' myZIPLNs <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, zi = "single") +#' @seealso The classes [`ZIPLNfit`] and [`ZIPLNnetworkfamily`] +#' @export +ZIPLNnetwork <- function(formula, data, subset, weights, zi = c("single", "row", "col"), penalties = NULL, control = ZIPLNnetwork_param()) { + + ## extract the data matrices and weights + data_ <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) + control$ziparam <- ifelse((data_$zicovar), "covar", match.arg(zi)) + + ## initialization + if (control$trace > 0) cat("\n Initialization...") + myPLN <- ZIPLNnetworkfamily$new(penalties, data_, control) + + ## optimization + if (control$trace > 0) cat("\n Adjusting", + length(myPLN$penalties), "ZI-PLN with sparse inverse covariance estimation and", + control$ziparam, "specific parameter(s) in Zero inflation component.\n") + myPLN$optimize(data_, control$config_optim) + + if (control$trace > 0) cat("\n DONE!\n") + myPLN +} + +#' Control of ZIPLNnetwork fit +#' +#' Helper to define list of parameters to control the ZIPLNnetwork fit. All arguments have defaults. +#' +#' @inheritParams PLNnetwork_param +#' +#' @inherit PLN_param details return +#' @details See [PLNnetwork_param()] for a full description of the optimization parameters. Note that some defaults values are different than those used in [PLNnetwork_param()]: +#' * "ftol_out" (outer loop convergence tolerance the objective function) is set by default to 1e-6 +#' * "maxit_out" (max number of iterations for the outer loop) is set by default to 100 +#' +#' @seealso [PLNnetwork_param()] and [PLN_param()] +#' @export +ZIPLNnetwork_param <- function( + backend = c("nlopt"), + inception_cov = c("full", "spherical", "diagonal"), + trace = 1 , + n_penalties = 30 , + min_ratio = 0.1 , + penalize_diagonal = TRUE , + penalty_weights = NULL , + config_post = list(), + config_optim = list(), + inception = NULL +) { + + if (!is.null(inception)) stopifnot(isZIPLNfit(inception)) + + ## post-treatment config + config_pst <- config_post_default_PLNnetwork + config_pst[names(config_post)] <- config_post + config_pst$trace <- trace + + ## optimization config + stopifnot(backend %in% c("nlopt")) + stopifnot(config_optim$algorithm %in% available_algorithms_nlopt) + config_opt <- config_default_nlopt + config_opt$trace <- trace + config_opt$ftol_out <- 1e-6 + config_opt$maxit_out <- 100 + config_opt$approx_ZI <- FALSE + config_opt[names(config_optim)] <- config_optim + inception_cov <- match.arg(inception_cov) + + structure(list( + backend = backend , + trace = trace , + inception_cov = inception_cov , + n_penalties = n_penalties , + min_ratio = min_ratio , + penalize_diagonal = penalize_diagonal, + penalty_weights = penalty_weights , + jackknife = FALSE , + bootstrap = 0 , + variance = FALSE , + config_post = config_pst , + config_optim = config_opt , + inception = inception ), class = "ZIPLNmodels_param") +} diff --git a/R/plot_utils.R b/R/plot_utils.R index 647dea8c..51df0dbf 100644 --- a/R/plot_utils.R +++ b/R/plot_utils.R @@ -185,3 +185,54 @@ plot_matrix = function(Mat, rowFG = "sample", colFG = "variable", clustering = N } g } + +#' @importFrom grDevices rgb +.plot_network = function(net , + type , + output , + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE) { + + if (output == "igraph") { + + G <- graph_from_adjacency_matrix(net, mode = "undirected", weighted = TRUE, diag = FALSE) + + if (!is.null(node.labels)) { + igraph::V(G)$label <- node.labels + } else { + igraph::V(G)$label <- colnames(net) + } + ## Nice nodes + V.deg <- degree(G)/sum(degree(G)) + igraph::V(G)$label.cex <- V.deg / max(V.deg) + .5 + igraph::V(G)$size <- V.deg * 100 + igraph::V(G)$label.color <- rgb(0, 0, .2, .8) + igraph::V(G)$frame.color <- NA + ## Nice edges + igraph::E(G)$color <- ifelse(igraph::E(G)$weight > 0, edge.color[1], edge.color[2]) + if (type == "support") + igraph::E(G)$width <- abs(igraph::E(G)$weight) + else + igraph::E(G)$width <- 15*abs(igraph::E(G)$weight) + + if (remove.isolated) { + G <- delete.vertices(G, which(degree(G) == 0)) + } + if (plot) plot(G, layout = layout) + } + if (output == "corrplot") { + if (plot) { + if (ncol(net) > 100) + colnames(net) <- rownames(net) <- rep(" ", ncol(net)) + G <- net + diag(net) <- 0 + corrplot(as.matrix(net), method = "color", is.corr = FALSE, tl.pos = "td", cl.pos = "n", tl.cex = 0.5, type = "upper") + } else { + G <- net + } + } + invisible(G) +} diff --git a/README.Rmd b/README.Rmd index 2516078d..3ce56d45 100644 --- a/README.Rmd +++ b/README.Rmd @@ -17,6 +17,7 @@ knitr::opts_chunk$set( [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/PLNmodels)](https://cran.r-project.org/package=PLNmodels) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![](https://img.shields.io/github/last-commit/pln-team/PLNmodels.svg)](https://github.com/pln-team/PLNmodels/commits/master) +[![R-CMD-check](https://github.com/PLN-team/PLNmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/PLN-team/PLNmodels/actions/workflows/R-CMD-check.yaml) > The Poisson lognormal model and variants can be used for a variety of multivariate problems when count data are at play (including PCA, LDA and network inference for count data). This package implements efficient algorithms to fit such models accompanied with a set of functions for visualization and diagnostic. See [this deck of slides](https://pln-team.github.io/slideshow/slides) for a comprehensive introduction. diff --git a/_pkgdown.yml b/_pkgdown.yml index 3a19da12..a1fc70e4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,6 +44,7 @@ reference: - '`PLNLDA`' - '`PLNPCA`' - '`PLNnetwork`' + - '`ZIPLNnetwork`' - '`PLNmixture`' - title: 'Poisson lognormal fit' desc: > @@ -69,6 +70,7 @@ reference: - '`sigma.ZIPLNfit`' - '`predict.ZIPLNfit`' - '`fitted.ZIPLNfit`' + - '`plot.ZIPLNfit_sparse`' - title: 'Linear discriminant analysis via a Poisson lognormal fit' desc: > Description of the PLNLDAfit object and methods for its manipulation. @@ -104,13 +106,17 @@ reference: - '`fitted.PLNmixturefit`' - '`getBestModel.PLNmixturefamily`' - '`getModel.PLNmixturefamily`' -- title: 'Sparse Poisson lognormal fit and network' +- title: 'Sparse Poisson lognormal fit and network, w/o Zero Inflated component' desc: > - Description of the PLNnetworkfit and PLNnetworkfamily objects and methods for their manipulation. + Description of the (ZI)PLNnetworkfit and (ZI)PLNnetworkfamily objects and methods for their manipulation. contents: - starts_with('PLNnetworkfit') - '`PLNnetwork_param`' + - '`ZIPLNnetwork_param`' - '`plot.PLNnetworkfit`' + - '`plot.ZIPLNfit_sparse`' + - '`Networkfamily`' + - '`ZIPLNnetworkfamily`' - '`PLNnetworkfamily`' - '`plot.PLNnetworkfamily`' - '`getBestModel.PLNnetworkfamily`' diff --git a/inst/case_studies/scRNA.R b/inst/case_studies/scRNA.R index ddb89b2d..659be1fb 100644 --- a/inst/case_studies/scRNA.R +++ b/inst/case_studies/scRNA.R @@ -5,7 +5,7 @@ data(scRNA) # data subsample: only 500 random cell and the 200 most varying transcript scRNA <- scRNA[sample.int(nrow(scRNA), 500), ] scRNA$counts <- scRNA$counts[, 1:200] -myZIPLN <- ZIPLN(counts ~ 1 + offset(log(total_counts)), data = scRNA) +myZIPLN <- ZIPLN(counts ~ 1 + offset(log(total_counts)), zi = "col", data = scRNA) myPLN <- PLN(counts ~ 1 + offset(log(total_counts)), data = scRNA) data.frame( diff --git a/inst/simus_ZIPLN/essai_ZIPLN.R b/inst/simus_ZIPLN/essai_ZIPLN.R index 1cb9bdcf..bf494ff2 100644 --- a/inst/simus_ZIPLN/essai_ZIPLN.R +++ b/inst/simus_ZIPLN/essai_ZIPLN.R @@ -133,8 +133,8 @@ p <- ggplot(res) + aes(x = factor(n), y = pred_Y, fill = factor(method)) + geom_ scale_y_log10() + ylim(c(0,2)) p -p <- ggplot(res) + aes(x = factor(n), y = rmse_B, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(2.75,3)) +p <- ggplot(res) + aes(x = factor(n), y = rmse_B, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(2,5)) p -p <- ggplot(res) + aes(x = factor(n), y = rmse_Omega, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(0,0.5)) +p <- ggplot(res) + aes(x = factor(n), y = rmse_Omega, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(0.1,.3)) p diff --git a/man/Networkfamily.Rd b/man/Networkfamily.Rd new file mode 100644 index 00000000..31b53c27 --- /dev/null +++ b/man/Networkfamily.Rd @@ -0,0 +1,235 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PLNnetworkfamily-class.R +\name{Networkfamily} +\alias{Networkfamily} +\title{An R6 Class to virtually represent a collection of network fits} +\description{ +The functions \code{\link[=PLNnetwork]{PLNnetwork()}} and \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} both produce an instance of this class, which can be thought of as a vector of \code{\link{PLNnetworkfit}}s \code{\link{ZIPLNfit_sparse}}s (indexed by penalty parameter) + +This class comes with a set of methods mostly used to compare +network fits (in terms of goodness of fit) or extract one from +the family (based on penalty parameter and/or goodness of it). +See the documentation for \code{\link[=getBestModel]{getBestModel()}}, +\code{\link[=getModel]{getModel()}} and \link[=plot.Networkfamily]{plot()} for the user-facing ones. +} +\seealso{ +The functions \code{\link[=PLNnetwork]{PLNnetwork()}}, \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} and the classes \code{\link{PLNnetworkfit}}, \code{\link{ZIPLNfit_sparse}} +} +\section{Super class}{ +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{Networkfamily} +} +\section{Active bindings}{ +\if{html}{\out{
PLNmodels::PLNfamily$getModel()
PLNmodels::PLNfamily$postTreatment()
PLNmodels::PLNfamily$print()
PLNmodels::Networkfamily$coefficient_path()
PLNmodels::Networkfamily$getBestModel()
PLNmodels::Networkfamily$optimize()
PLNmodels::Networkfamily$plot()
PLNmodels::Networkfamily$plot_objective()
PLNmodels::Networkfamily$plot_stars()
PLNmodels::Networkfamily$show()
PLNmodels::PLNfamily$getModel()
PLNmodels::PLNfamily$postTreatment()
PLNmodels::PLNfamily$print()
PLNmodels::Networkfamily$coefficient_path()
PLNmodels::Networkfamily$getBestModel()
PLNmodels::Networkfamily$optimize()
PLNmodels::Networkfamily$plot()
PLNmodels::Networkfamily$plot_objective()
PLNmodels::Networkfamily$plot_stars()
PLNmodels::Networkfamily$show()