From c2922b38ca5ba46f7ac6a3d5cda55ae9707cd000 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 20 Dec 2024 12:12:36 -0800
Subject: [PATCH] Removed the deprecated combineCommonResults function.

---
 R/classifySingleR.R             |  2 +-
 R/combineCommonResults.R        | 97 +--------------------------------
 R/combineRecomputedResults.R    |  5 +-
 R/plotMarkerHeatmap.R           |  3 +-
 R/plotScoreDistribution.R       |  3 +-
 R/plotScoreHeatmap.R            |  3 +-
 inst/NEWS.Rd                    |  2 +
 man/classifySingleR.Rd          |  2 +-
 man/combineCommonResults.Rd     | 74 -------------------------
 man/combineRecomputedResults.Rd |  5 +-
 man/plotDeltaDistribution.Rd    |  3 +-
 man/plotMarkerHeatmap.Rd        |  3 +-
 man/plotScoreDistribution.Rd    |  3 +-
 man/plotScoreHeatmap.Rd         |  3 +-
 14 files changed, 14 insertions(+), 194 deletions(-)
 delete mode 100644 man/combineCommonResults.Rd

diff --git a/R/classifySingleR.R b/R/classifySingleR.R
index 969afe1..8ce9da0 100644
--- a/R/classifySingleR.R
+++ b/R/classifySingleR.R
@@ -87,7 +87,7 @@
 #'
 #' \code{\link{pruneScores}}, to remove low-quality labels based on the scores.
 #'
-#' \code{\link{combineCommonResults}}, to combine results from multiple references.
+#' \code{\link{combineRecomputedResults}}, to combine results from multiple references.
 #'
 #' @export
 #' @importFrom BiocParallel bpnworkers
diff --git a/R/combineCommonResults.R b/R/combineCommonResults.R
index 34eac20..3374ce1 100644
--- a/R/combineCommonResults.R
+++ b/R/combineCommonResults.R
@@ -1,99 +1,4 @@
-#' Combine SingleR results with common genes
-#'
-#' Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}.
-#' This assumes that each run of \code{\link{classifySingleR}} was performed using a common set of marker genes.
-#'
-#' @param results A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately.
-#'
-#' @return A \linkS4class{DataFrame} is returned containing the annotation statistics for each cell or cluster (row).
-#' This mimics the output of \code{\link{classifySingleR}} and contains the following fields:
-#' \itemize{
-#' \item \code{scores}, a numeric matrix of correlations formed by combining the equivalent matrices from \code{results}.
-#' \item \code{labels}, a character vector containing the per-cell combined label across references.
-#' \item \code{references}, an integer vector specifying the reference from which the combined label was derived.
-#' \item \code{orig.results}, a DataFrame containing \code{results}.
-#' }
-#' It may also contain \code{pruned.labels} if these were also present in \code{results}.
-#'
-#' The \code{\link{metadata}} contains \code{common.genes},
-#' a character vector of the common genes that were used across all references in \code{results};
-#' and \code{label.origin}, a DataFrame specifying the reference of origin for each label in \code{scores}.
-#' 
-#' @details
-#' Here, the strategy is to performed classification separately within each reference, 
-#' then collating the results to choose the label with the highest score across references.
-#' For each cell, we identify the reference with the highest score across all of its labels.
-#' The \dQuote{combined label} is then defined as the label assigned to that cell in the highest-scoring reference.
-#' (The same logic is also applied to the first and pruned labels, if those are available.)
-#' 
-#' Each result should be generated from training sets that use a common set of genes during classification, 
-#' i.e., \code{common.genes} should be the same in the \code{trained} argument to each \code{\link{classifySingleR}} call.
-#' This is because the scores are not comparable across results if they were generated from different sets of genes.
-#' It is also for this reason that we use the highest score prior to fine-tuning, 
-#' even if it does not correspond to the score of the fine-tuned label.
-#'
-#' It is highly unlikely that this function will be called directly by the end-user.
-#' Users are advised to use the multi-reference mode of \code{\link{SingleR}} and related functions,
-#' which will take care of the use of a common set of genes before calling this function to combine results across references.
-#'
-#' @author 
-#' Jared Andrews,
-#' Aaron Lun
-#'
-#' @examples
-#' # Making up data (using one reference to seed another).
-#' ref <- .mockRefData(nreps=8)
-#' ref1 <- ref[,1:2%%2==0]
-#' ref2 <- ref[,1:2%%2==1]
-#' ref2$label <- tolower(ref2$label)
-#'
-#' test <- .mockTestData(ref1)
-#'
-#' # Applying classification with SingleR's multi-reference mode.
-#' ref1 <- scuttle::logNormCounts(ref1)
-#' ref2 <- scuttle::logNormCounts(ref2)
-#' test <- scuttle::logNormCounts(test)
-#'
-#' pred <- SingleR(test, list(ref1, ref2), labels=list(ref1$label, ref2$label))
-#' pred[,1:5] # Only viewing the first 5 columns for visibility.
-#'
-#' @seealso
-#' \code{\link{SingleR}} and \code{\link{classifySingleR}}, for generating predictions to use in \code{results}.
-#'
-#' \code{\link{combineRecomputedResults}}, for another approach to combining predictions.
-#'
 #' @export
-#' @importFrom S4Vectors DataFrame metadata metadata<-
 combineCommonResults <- function(results) {
-    .Deprecated(new = "combineRecomputedResults")
-
-    if (length(unique(lapply(results, rownames))) != 1) {
-        stop("cell/cluster names in 'results' are not identical")
-    }
-    if (length(unique(vapply(results, nrow, 0L)))!=1) {
-        stop("numbers of cells/clusters in 'results' are not identical")
-    }
-
-    all.common <- lapply(results, function (x) sort(metadata(x)$common.genes))
-    if (length(unique(all.common)) != 1) {
-        # This should be changed to 'stop' before release/after merge with PR #60.
-        warning("common genes are not identical")
-    }
-
-    ncells <- nrow(results[[1]])
-    collected.scores <- collected.best <- vector("list", length(results))
-    for (i in seq_along(results)) {
-        scores <- results[[i]]$scores
-        collected.best[[i]] <- scores[cbind(seq_len(ncells), max.col(scores))]
-        collected.scores[[i]] <- scores
-    }
-
-    all.scores <- do.call(cbind, collected.scores)
-    output <- DataFrame(scores = I(all.scores), row.names=rownames(results[[1]]))
-
-    metadata(output)$common.genes <- all.common[[1]]
-    metadata(output)$label.origin <- .create_label_origin(collected.scores)
-
-    chosen <- max.col(do.call(cbind, collected.best))
-    cbind(output, .combine_result_frames(chosen, results))
+    .Defunct(new = "combineRecomputedResults")
 }
diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R
index f83d4d4..e29ed3a 100644
--- a/R/combineRecomputedResults.R
+++ b/R/combineRecomputedResults.R
@@ -1,8 +1,7 @@
 #' Combine SingleR results with recomputation
 #'
 #' Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}.
-#' The label from the results with the highest score for each cell is retained.
-#' Unlike \code{\link{combineCommonResults}}, this does not assume that each run of \code{\link{classifySingleR}} was performed using the same set of common genes, instead recomputing the scores for comparison across references.
+#' This involves recomputing the scores so that they are comparable across references.
 #'
 #' @param results A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately.
 #' @inheritParams SingleR
@@ -67,8 +66,6 @@
 #' @seealso
 #' \code{\link{SingleR}} and \code{\link{classifySingleR}}, for generating predictions to use in \code{results}.
 #'
-#' \code{\link{combineCommonResults}}, for another approach to combining predictions.
-#'
 #' @references
 #' Lun A, Bunis D, Andrews J (2020).
 #' Thoughts on a more scalable algorithm for multiple references.
diff --git a/R/plotMarkerHeatmap.R b/R/plotMarkerHeatmap.R
index 4298f77..6fd9d40 100644
--- a/R/plotMarkerHeatmap.R
+++ b/R/plotMarkerHeatmap.R
@@ -2,8 +2,7 @@
 #'
 #' Create a heatmap of the log-normalized expression for the most interesting markers of a particular label.
 #'
-#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}},
-#' \code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.
+#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
 #' @param test A numeric matrix of log-normalized expression values where rows are genes and columns are cells.
 #' Each row should be named with the same gene name that was used to compute \code{results}.
 #'
diff --git a/R/plotScoreDistribution.R b/R/plotScoreDistribution.R
index ac8e863..87f0892 100644
--- a/R/plotScoreDistribution.R
+++ b/R/plotScoreDistribution.R
@@ -2,8 +2,7 @@
 #'
 #' Plot the distribution of assignment scores across all cells assigned to each reference label.
 #'
-#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, 
-#' \code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.
+#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
 #' @param show Deprecated, use \code{\link{plotDeltaDistribution}} instead for \code{show!="scores"}.
 #' @param labels.use Character vector specifying the labels to show in the plot facets.
 #' Defaults to all labels in \code{results}.
diff --git a/R/plotScoreHeatmap.R b/R/plotScoreHeatmap.R
index 9f8a6a9..229ae32 100644
--- a/R/plotScoreHeatmap.R
+++ b/R/plotScoreHeatmap.R
@@ -2,8 +2,7 @@
 #'
 #' Create a heatmap of the \code{\link{SingleR}} assignment scores across all cell-label combinations.
 #'
-#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}},
-#' \code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.
+#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
 #' @param cells.use Integer or string vector specifying the single cells (i.e., rows of \code{results}) to show.
 #' If \code{NULL}, all cells are shown.
 #' @param labels.use Character vector specifying the labels to show in the heatmap rows.
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index a91c3fb..f27e032 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -23,6 +23,8 @@ This is simpler and more efficient than the previous "expanded with NA" format.
 
 \item Separate the missingness check arguments in \code{SingleR()} with the new \code{check.missing.test=} and \code{check.missing.ref=} options.
 The former is disabled by default, to avoid an unnecessary missingness check in the vast majority of test cases.
+
+\item Removed the deprecated \code{combineCommonResults()} function.
 }}
 
 \section{Version 2.8.0}{\itemize{
diff --git a/man/classifySingleR.Rd b/man/classifySingleR.Rd
index 13b8b9e..ce3df82 100644
--- a/man/classifySingleR.Rd
+++ b/man/classifySingleR.Rd
@@ -117,7 +117,7 @@ table(predicted=pred$labels, truth=test$label)
 
 \code{\link{pruneScores}}, to remove low-quality labels based on the scores.
 
-\code{\link{combineCommonResults}}, to combine results from multiple references.
+\code{\link{combineRecomputedResults}}, to combine results from multiple references.
 }
 \author{
 Aaron Lun, based on the original \code{SingleR} code by Dvir Aran.
diff --git a/man/combineCommonResults.Rd b/man/combineCommonResults.Rd
deleted file mode 100644
index 4739a32..0000000
--- a/man/combineCommonResults.Rd
+++ /dev/null
@@ -1,74 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/combineCommonResults.R
-\name{combineCommonResults}
-\alias{combineCommonResults}
-\title{Combine SingleR results with common genes}
-\usage{
-combineCommonResults(results)
-}
-\arguments{
-\item{results}{A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately.}
-}
-\value{
-A \linkS4class{DataFrame} is returned containing the annotation statistics for each cell or cluster (row).
-This mimics the output of \code{\link{classifySingleR}} and contains the following fields:
-\itemize{
-\item \code{scores}, a numeric matrix of correlations formed by combining the equivalent matrices from \code{results}.
-\item \code{labels}, a character vector containing the per-cell combined label across references.
-\item \code{references}, an integer vector specifying the reference from which the combined label was derived.
-\item \code{orig.results}, a DataFrame containing \code{results}.
-}
-It may also contain \code{pruned.labels} if these were also present in \code{results}.
-
-The \code{\link{metadata}} contains \code{common.genes},
-a character vector of the common genes that were used across all references in \code{results};
-and \code{label.origin}, a DataFrame specifying the reference of origin for each label in \code{scores}.
-}
-\description{
-Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}.
-This assumes that each run of \code{\link{classifySingleR}} was performed using a common set of marker genes.
-}
-\details{
-Here, the strategy is to performed classification separately within each reference, 
-then collating the results to choose the label with the highest score across references.
-For each cell, we identify the reference with the highest score across all of its labels.
-The \dQuote{combined label} is then defined as the label assigned to that cell in the highest-scoring reference.
-(The same logic is also applied to the first and pruned labels, if those are available.)
-
-Each result should be generated from training sets that use a common set of genes during classification, 
-i.e., \code{common.genes} should be the same in the \code{trained} argument to each \code{\link{classifySingleR}} call.
-This is because the scores are not comparable across results if they were generated from different sets of genes.
-It is also for this reason that we use the highest score prior to fine-tuning, 
-even if it does not correspond to the score of the fine-tuned label.
-
-It is highly unlikely that this function will be called directly by the end-user.
-Users are advised to use the multi-reference mode of \code{\link{SingleR}} and related functions,
-which will take care of the use of a common set of genes before calling this function to combine results across references.
-}
-\examples{
-# Making up data (using one reference to seed another).
-ref <- .mockRefData(nreps=8)
-ref1 <- ref[,1:2\%\%2==0]
-ref2 <- ref[,1:2\%\%2==1]
-ref2$label <- tolower(ref2$label)
-
-test <- .mockTestData(ref1)
-
-# Applying classification with SingleR's multi-reference mode.
-ref1 <- scuttle::logNormCounts(ref1)
-ref2 <- scuttle::logNormCounts(ref2)
-test <- scuttle::logNormCounts(test)
-
-pred <- SingleR(test, list(ref1, ref2), labels=list(ref1$label, ref2$label))
-pred[,1:5] # Only viewing the first 5 columns for visibility.
-
-}
-\seealso{
-\code{\link{SingleR}} and \code{\link{classifySingleR}}, for generating predictions to use in \code{results}.
-
-\code{\link{combineRecomputedResults}}, for another approach to combining predictions.
-}
-\author{
-Jared Andrews,
-Aaron Lun
-}
diff --git a/man/combineRecomputedResults.Rd b/man/combineRecomputedResults.Rd
index 6674b4e..46d41a2 100644
--- a/man/combineRecomputedResults.Rd
+++ b/man/combineRecomputedResults.Rd
@@ -69,8 +69,7 @@ a DataFrame specifying the reference of origin for each label in \code{scores}.
 }
 \description{
 Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}.
-The label from the results with the highest score for each cell is retained.
-Unlike \code{\link{combineCommonResults}}, this does not assume that each run of \code{\link{classifySingleR}} was performed using the same set of common genes, instead recomputing the scores for comparison across references.
+This involves recomputing the scores so that they are comparable across references.
 }
 \details{
 Here, the strategy is to perform classification separately within each reference, 
@@ -140,8 +139,6 @@ Thoughts on a more scalable algorithm for multiple references.
 }
 \seealso{
 \code{\link{SingleR}} and \code{\link{classifySingleR}}, for generating predictions to use in \code{results}.
-
-\code{\link{combineCommonResults}}, for another approach to combining predictions.
 }
 \author{
 Aaron Lun
diff --git a/man/plotDeltaDistribution.Rd b/man/plotDeltaDistribution.Rd
index d8b1a82..0eb4a0f 100644
--- a/man/plotDeltaDistribution.Rd
+++ b/man/plotDeltaDistribution.Rd
@@ -19,8 +19,7 @@ plotDeltaDistribution(
 )
 }
 \arguments{
-\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, 
-\code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.}
+\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.}
 
 \item{show}{String specifying whether to show the difference from the median (\code{"delta.med"}) 
 or the difference from the next-best score (\code{"delta.next"}).}
diff --git a/man/plotMarkerHeatmap.Rd b/man/plotMarkerHeatmap.Rd
index ce9a4b5..514d620 100644
--- a/man/plotMarkerHeatmap.Rd
+++ b/man/plotMarkerHeatmap.Rd
@@ -34,8 +34,7 @@ configureMarkerHeatmap(
 )
 }
 \arguments{
-\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}},
-\code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.}
+\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.}
 
 \item{test}{A numeric matrix of log-normalized expression values where rows are genes and columns are cells.
 Each row should be named with the same gene name that was used to compute \code{results}.
diff --git a/man/plotScoreDistribution.Rd b/man/plotScoreDistribution.Rd
index 4f0402c..d620ad8 100644
--- a/man/plotScoreDistribution.Rd
+++ b/man/plotScoreDistribution.Rd
@@ -24,8 +24,7 @@ plotScoreDistribution(
 )
 }
 \arguments{
-\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, 
-\code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.}
+\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.}
 
 \item{show}{Deprecated, use \code{\link{plotDeltaDistribution}} instead for \code{show!="scores"}.}
 
diff --git a/man/plotScoreHeatmap.Rd b/man/plotScoreHeatmap.Rd
index c1e4d69..871826f 100644
--- a/man/plotScoreHeatmap.Rd
+++ b/man/plotScoreHeatmap.Rd
@@ -32,8 +32,7 @@ plotScoreHeatmap(
 )
 }
 \arguments{
-\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}},
-\code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.}
+\item{results}{A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.}
 
 \item{cells.use}{Integer or string vector specifying the single cells (i.e., rows of \code{results}) to show.
 If \code{NULL}, all cells are shown.}