From c2922b38ca5ba46f7ac6a3d5cda55ae9707cd000 Mon Sep 17 00:00:00 2001 From: LTLA Date: Fri, 20 Dec 2024 12:12:36 -0800 Subject: [PATCH] Removed the deprecated combineCommonResults function. --- R/classifySingleR.R | 2 +- R/combineCommonResults.R | 97 +-------------------------------- R/combineRecomputedResults.R | 5 +- R/plotMarkerHeatmap.R | 3 +- R/plotScoreDistribution.R | 3 +- R/plotScoreHeatmap.R | 3 +- inst/NEWS.Rd | 2 + man/classifySingleR.Rd | 2 +- man/combineCommonResults.Rd | 74 ------------------------- man/combineRecomputedResults.Rd | 5 +- man/plotDeltaDistribution.Rd | 3 +- man/plotMarkerHeatmap.Rd | 3 +- man/plotScoreDistribution.Rd | 3 +- man/plotScoreHeatmap.Rd | 3 +- 14 files changed, 14 insertions(+), 194 deletions(-) delete mode 100644 man/combineCommonResults.Rd diff --git a/R/classifySingleR.R b/R/classifySingleR.R index 969afe1..8ce9da0 100644 --- a/R/classifySingleR.R +++ b/R/classifySingleR.R @@ -87,7 +87,7 @@ #' #' \code{\link{pruneScores}}, to remove low-quality labels based on the scores. #' -#' \code{\link{combineCommonResults}}, to combine results from multiple references. +#' \code{\link{combineRecomputedResults}}, to combine results from multiple references. #' #' @export #' @importFrom BiocParallel bpnworkers diff --git a/R/combineCommonResults.R b/R/combineCommonResults.R index 34eac20..3374ce1 100644 --- a/R/combineCommonResults.R +++ b/R/combineCommonResults.R @@ -1,99 +1,4 @@ -#' Combine SingleR results with common genes -#' -#' Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}. -#' This assumes that each run of \code{\link{classifySingleR}} was performed using a common set of marker genes. -#' -#' @param results A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately. -#' -#' @return A \linkS4class{DataFrame} is returned containing the annotation statistics for each cell or cluster (row). -#' This mimics the output of \code{\link{classifySingleR}} and contains the following fields: -#' \itemize{ -#' \item \code{scores}, a numeric matrix of correlations formed by combining the equivalent matrices from \code{results}. -#' \item \code{labels}, a character vector containing the per-cell combined label across references. -#' \item \code{references}, an integer vector specifying the reference from which the combined label was derived. -#' \item \code{orig.results}, a DataFrame containing \code{results}. -#' } -#' It may also contain \code{pruned.labels} if these were also present in \code{results}. -#' -#' The \code{\link{metadata}} contains \code{common.genes}, -#' a character vector of the common genes that were used across all references in \code{results}; -#' and \code{label.origin}, a DataFrame specifying the reference of origin for each label in \code{scores}. -#' -#' @details -#' Here, the strategy is to performed classification separately within each reference, -#' then collating the results to choose the label with the highest score across references. -#' For each cell, we identify the reference with the highest score across all of its labels. -#' The \dQuote{combined label} is then defined as the label assigned to that cell in the highest-scoring reference. -#' (The same logic is also applied to the first and pruned labels, if those are available.) -#' -#' Each result should be generated from training sets that use a common set of genes during classification, -#' i.e., \code{common.genes} should be the same in the \code{trained} argument to each \code{\link{classifySingleR}} call. -#' This is because the scores are not comparable across results if they were generated from different sets of genes. -#' It is also for this reason that we use the highest score prior to fine-tuning, -#' even if it does not correspond to the score of the fine-tuned label. -#' -#' It is highly unlikely that this function will be called directly by the end-user. -#' Users are advised to use the multi-reference mode of \code{\link{SingleR}} and related functions, -#' which will take care of the use of a common set of genes before calling this function to combine results across references. -#' -#' @author -#' Jared Andrews, -#' Aaron Lun -#' -#' @examples -#' # Making up data (using one reference to seed another). -#' ref <- .mockRefData(nreps=8) -#' ref1 <- ref[,1:2%%2==0] -#' ref2 <- ref[,1:2%%2==1] -#' ref2$label <- tolower(ref2$label) -#' -#' test <- .mockTestData(ref1) -#' -#' # Applying classification with SingleR's multi-reference mode. -#' ref1 <- scuttle::logNormCounts(ref1) -#' ref2 <- scuttle::logNormCounts(ref2) -#' test <- scuttle::logNormCounts(test) -#' -#' pred <- SingleR(test, list(ref1, ref2), labels=list(ref1$label, ref2$label)) -#' pred[,1:5] # Only viewing the first 5 columns for visibility. -#' -#' @seealso -#' \code{\link{SingleR}} and \code{\link{classifySingleR}}, for generating predictions to use in \code{results}. -#' -#' \code{\link{combineRecomputedResults}}, for another approach to combining predictions. -#' #' @export -#' @importFrom S4Vectors DataFrame metadata metadata<- combineCommonResults <- function(results) { - .Deprecated(new = "combineRecomputedResults") - - if (length(unique(lapply(results, rownames))) != 1) { - stop("cell/cluster names in 'results' are not identical") - } - if (length(unique(vapply(results, nrow, 0L)))!=1) { - stop("numbers of cells/clusters in 'results' are not identical") - } - - all.common <- lapply(results, function (x) sort(metadata(x)$common.genes)) - if (length(unique(all.common)) != 1) { - # This should be changed to 'stop' before release/after merge with PR #60. - warning("common genes are not identical") - } - - ncells <- nrow(results[[1]]) - collected.scores <- collected.best <- vector("list", length(results)) - for (i in seq_along(results)) { - scores <- results[[i]]$scores - collected.best[[i]] <- scores[cbind(seq_len(ncells), max.col(scores))] - collected.scores[[i]] <- scores - } - - all.scores <- do.call(cbind, collected.scores) - output <- DataFrame(scores = I(all.scores), row.names=rownames(results[[1]])) - - metadata(output)$common.genes <- all.common[[1]] - metadata(output)$label.origin <- .create_label_origin(collected.scores) - - chosen <- max.col(do.call(cbind, collected.best)) - cbind(output, .combine_result_frames(chosen, results)) + .Defunct(new = "combineRecomputedResults") } diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R index f83d4d4..e29ed3a 100644 --- a/R/combineRecomputedResults.R +++ b/R/combineRecomputedResults.R @@ -1,8 +1,7 @@ #' Combine SingleR results with recomputation #' #' Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}. -#' The label from the results with the highest score for each cell is retained. -#' Unlike \code{\link{combineCommonResults}}, this does not assume that each run of \code{\link{classifySingleR}} was performed using the same set of common genes, instead recomputing the scores for comparison across references. +#' This involves recomputing the scores so that they are comparable across references. #' #' @param results A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately. #' @inheritParams SingleR @@ -67,8 +66,6 @@ #' @seealso #' \code{\link{SingleR}} and \code{\link{classifySingleR}}, for generating predictions to use in \code{results}. #' -#' \code{\link{combineCommonResults}}, for another approach to combining predictions. -#' #' @references #' Lun A, Bunis D, Andrews J (2020). #' Thoughts on a more scalable algorithm for multiple references. diff --git a/R/plotMarkerHeatmap.R b/R/plotMarkerHeatmap.R index 4298f77..6fd9d40 100644 --- a/R/plotMarkerHeatmap.R +++ b/R/plotMarkerHeatmap.R @@ -2,8 +2,7 @@ #' #' Create a heatmap of the log-normalized expression for the most interesting markers of a particular label. #' -#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, -#' \code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}. +#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}. #' @param test A numeric matrix of log-normalized expression values where rows are genes and columns are cells. #' Each row should be named with the same gene name that was used to compute \code{results}. #' diff --git a/R/plotScoreDistribution.R b/R/plotScoreDistribution.R index ac8e863..87f0892 100644 --- a/R/plotScoreDistribution.R +++ b/R/plotScoreDistribution.R @@ -2,8 +2,7 @@ #' #' Plot the distribution of assignment scores across all cells assigned to each reference label. #' -#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, -#' \code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}. +#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}. #' @param show Deprecated, use \code{\link{plotDeltaDistribution}} instead for \code{show!="scores"}. #' @param labels.use Character vector specifying the labels to show in the plot facets. #' Defaults to all labels in \code{results}. diff --git a/R/plotScoreHeatmap.R b/R/plotScoreHeatmap.R index 9f8a6a9..229ae32 100644 --- a/R/plotScoreHeatmap.R +++ b/R/plotScoreHeatmap.R @@ -2,8 +2,7 @@ #' #' Create a heatmap of the \code{\link{SingleR}} assignment scores across all cell-label combinations. #' -#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, -#' \code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}. +#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}. #' @param cells.use Integer or string vector specifying the single cells (i.e., rows of \code{results}) to show. #' If \code{NULL}, all cells are shown. #' @param labels.use Character vector specifying the labels to show in the heatmap rows. diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index a91c3fb..f27e032 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -23,6 +23,8 @@ This is simpler and more efficient than the previous "expanded with NA" format. \item Separate the missingness check arguments in \code{SingleR()} with the new \code{check.missing.test=} and \code{check.missing.ref=} options. The former is disabled by default, to avoid an unnecessary missingness check in the vast majority of test cases. + +\item Removed the deprecated \code{combineCommonResults()} function. }} \section{Version 2.8.0}{\itemize{ diff --git a/man/classifySingleR.Rd b/man/classifySingleR.Rd index 13b8b9e..ce3df82 100644 --- a/man/classifySingleR.Rd +++ b/man/classifySingleR.Rd @@ -117,7 +117,7 @@ table(predicted=pred$labels, truth=test$label) \code{\link{pruneScores}}, to remove low-quality labels based on the scores. -\code{\link{combineCommonResults}}, to combine results from multiple references. +\code{\link{combineRecomputedResults}}, to combine results from multiple references. } \author{ Aaron Lun, based on the original \code{SingleR} code by Dvir Aran. diff --git a/man/combineCommonResults.Rd b/man/combineCommonResults.Rd deleted file mode 100644 index 4739a32..0000000 --- a/man/combineCommonResults.Rd +++ /dev/null @@ -1,74 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/combineCommonResults.R -\name{combineCommonResults} -\alias{combineCommonResults} -\title{Combine SingleR results with common genes} -\usage{ -combineCommonResults(results) -} -\arguments{ -\item{results}{A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately.} -} -\value{ -A \linkS4class{DataFrame} is returned containing the annotation statistics for each cell or cluster (row). -This mimics the output of \code{\link{classifySingleR}} and contains the following fields: -\itemize{ -\item \code{scores}, a numeric matrix of correlations formed by combining the equivalent matrices from \code{results}. -\item \code{labels}, a character vector containing the per-cell combined label across references. -\item \code{references}, an integer vector specifying the reference from which the combined label was derived. -\item \code{orig.results}, a DataFrame containing \code{results}. -} -It may also contain \code{pruned.labels} if these were also present in \code{results}. - -The \code{\link{metadata}} contains \code{common.genes}, -a character vector of the common genes that were used across all references in \code{results}; -and \code{label.origin}, a DataFrame specifying the reference of origin for each label in \code{scores}. -} -\description{ -Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}. -This assumes that each run of \code{\link{classifySingleR}} was performed using a common set of marker genes. -} -\details{ -Here, the strategy is to performed classification separately within each reference, -then collating the results to choose the label with the highest score across references. -For each cell, we identify the reference with the highest score across all of its labels. -The \dQuote{combined label} is then defined as the label assigned to that cell in the highest-scoring reference. -(The same logic is also applied to the first and pruned labels, if those are available.) - -Each result should be generated from training sets that use a common set of genes during classification, -i.e., \code{common.genes} should be the same in the \code{trained} argument to each \code{\link{classifySingleR}} call. -This is because the scores are not comparable across results if they were generated from different sets of genes. -It is also for this reason that we use the highest score prior to fine-tuning, -even if it does not correspond to the score of the fine-tuned label. - -It is highly unlikely that this function will be called directly by the end-user. -Users are advised to use the multi-reference mode of \code{\link{SingleR}} and related functions, -which will take care of the use of a common set of genes before calling this function to combine results across references. -} -\examples{ -# Making up data (using one reference to seed another). -ref <- .mockRefData(nreps=8) -ref1 <- ref[,1:2\%\%2==0] -ref2 <- ref[,1:2\%\%2==1] -ref2$label <- tolower(ref2$label) - -test <- .mockTestData(ref1) - -# Applying classification with SingleR's multi-reference mode. -ref1 <- scuttle::logNormCounts(ref1) -ref2 <- scuttle::logNormCounts(ref2) -test <- scuttle::logNormCounts(test) - -pred <- SingleR(test, list(ref1, ref2), labels=list(ref1$label, ref2$label)) -pred[,1:5] # Only viewing the first 5 columns for visibility. - -} -\seealso{ -\code{\link{SingleR}} and \code{\link{classifySingleR}}, for generating predictions to use in \code{results}. - -\code{\link{combineRecomputedResults}}, for another approach to combining predictions. -} -\author{ -Jared Andrews, -Aaron Lun -} diff --git a/man/combineRecomputedResults.Rd b/man/combineRecomputedResults.Rd index 6674b4e..46d41a2 100644 --- a/man/combineRecomputedResults.Rd +++ b/man/combineRecomputedResults.Rd @@ -69,8 +69,7 @@ a DataFrame specifying the reference of origin for each label in \code{scores}. } \description{ Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}. -The label from the results with the highest score for each cell is retained. -Unlike \code{\link{combineCommonResults}}, this does not assume that each run of \code{\link{classifySingleR}} was performed using the same set of common genes, instead recomputing the scores for comparison across references. +This involves recomputing the scores so that they are comparable across references. } \details{ Here, the strategy is to perform classification separately within each reference, @@ -140,8 +139,6 @@ Thoughts on a more scalable algorithm for multiple references. } \seealso{ \code{\link{SingleR}} and \code{\link{classifySingleR}}, for generating predictions to use in \code{results}. - -\code{\link{combineCommonResults}}, for another approach to combining predictions. } \author{ Aaron Lun diff --git a/man/plotDeltaDistribution.Rd b/man/plotDeltaDistribution.Rd index d8b1a82..0eb4a0f 100644 --- a/man/plotDeltaDistribution.Rd +++ b/man/plotDeltaDistribution.Rd @@ -19,8 +19,7 @@ plotDeltaDistribution( ) } \arguments{ -\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, -\code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.} +\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.} \item{show}{String specifying whether to show the difference from the median (\code{"delta.med"}) or the difference from the next-best score (\code{"delta.next"}).} diff --git a/man/plotMarkerHeatmap.Rd b/man/plotMarkerHeatmap.Rd index ce9a4b5..514d620 100644 --- a/man/plotMarkerHeatmap.Rd +++ b/man/plotMarkerHeatmap.Rd @@ -34,8 +34,7 @@ configureMarkerHeatmap( ) } \arguments{ -\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, -\code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.} +\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.} \item{test}{A numeric matrix of log-normalized expression values where rows are genes and columns are cells. Each row should be named with the same gene name that was used to compute \code{results}. diff --git a/man/plotScoreDistribution.Rd b/man/plotScoreDistribution.Rd index 4f0402c..d620ad8 100644 --- a/man/plotScoreDistribution.Rd +++ b/man/plotScoreDistribution.Rd @@ -24,8 +24,7 @@ plotScoreDistribution( ) } \arguments{ -\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, -\code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.} +\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.} \item{show}{Deprecated, use \code{\link{plotDeltaDistribution}} instead for \code{show!="scores"}.} diff --git a/man/plotScoreHeatmap.Rd b/man/plotScoreHeatmap.Rd index c1e4d69..871826f 100644 --- a/man/plotScoreHeatmap.Rd +++ b/man/plotScoreHeatmap.Rd @@ -32,8 +32,7 @@ plotScoreHeatmap( ) } \arguments{ -\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, -\code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.} +\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.} \item{cells.use}{Integer or string vector specifying the single cells (i.e., rows of \code{results}) to show. If \code{NULL}, all cells are shown.}