Skip to content

Commit

Permalink
Moved marker heatmap configuration into a separate function.
Browse files Browse the repository at this point in the history
This gives users a chance to switch to a different visualization (e.g., dot
plots) while still using the ranking of markers that we identified.
  • Loading branch information
LTLA committed Nov 18, 2024
1 parent a5d73f6 commit 0a49952
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 35 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export(aggregateReference)
export(classifySingleR)
export(combineCommonResults)
export(combineRecomputedResults)
export(configureMarkerHeatmap)
export(getClassicMarkers)
export(getDeltaFromMedian)
export(matchReferences)
Expand Down
112 changes: 81 additions & 31 deletions R/plotMarkerHeatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,29 @@
#' @param ... Additional parameters for heatmap control passed to \code{\link[pheatmap]{pheatmap}}.
#'
#' @return
#' The output of \code{\link[pheatmap]{pheatmap}} is returned showing the heatmap on the current graphics device.
#' For \code{plotMarkerHeatmap}, the output of \code{\link[pheatmap]{pheatmap}} is returned showing the heatmap on the current graphics device.
#'
#' For \code{configureMarkerHeatmap}, a list is returned containing:
#' \itemize{
#' \item \code{rows}, an integer vector of row indices for the markers of \code{label}, ordered from most to least interesting.
#' \item \code{columns}, an integer vector of column indices to show in the heatmap.
#' This is ordered by the predicted labels so that cells assigned to the same label are contiguous.
#' \item \code{predictions}, a character vector of predicted labels for cells to be shown in the heatmap.
#' Each entry corresponds to an entry of \code{columns}.
#' The labels in this vector are guaranteed to be sorted.
#' }
#'
#' @details
#' This function creates a heatmap where each row is a marker gene for \code{label} and each column is a cell in the test dataset.
#' The \code{plotMarkerHeatmap} function creates a heatmap where each row is a marker gene for \code{label} and each column is a cell in the test dataset.
#' The aim is to check the effectiveness of the reference-derived markers for distinguishing between labels in the test dataset.
#' Useful markers from the reference should show strong upregulation in \code{label} compared to all \code{other.labels}.
#' \dQuote{Interesting} markers should show strong upregulation in cells assigned to \code{label} compared to cells assigned to all \code{other.labels}.
#' We identify such markers by scoring all reference-derived markers with \code{\link[scrapper]{scoreMarkers}} on the \code{test} expression.
#' The \code{top} markers based on the specified \code{order.by.*} fields are shown in the heatmap.
#' If only one label is present, markers are ranked by average abundance intead.
#'
#' The \code{configureMarkerHeatmap} function performs all the calculations underlying \code{plotMarkerHeatmap}.
#' This can be used to apply the same general approach with other plots, e.g., using functions from \pkg{scuttle} or \pkg{dittoSeq}.
#'
#' @author Aaron Lun
#' @examples
#' # Running the SingleR() example.
Expand All @@ -42,9 +55,14 @@
#' plotMarkerHeatmap(pred, test, pred$labels[1])
#' plotMarkerHeatmap(pred, test, pred$labels[1], use.pruned=TRUE)
#' plotMarkerHeatmap(pred, test, pred$labels[1], order.by.effect="auc")
#'
#' # Manually configuring a simpler heatmap by label:
#' config <- configureMarkerHeatmap(pred, test, pred$labels[1])
#' mat <- assay(test, "logcounts")[head(config$rows, 20), config$columns]
#' aggregated <- scuttle::summarizeAssayByGroup(mat, config$predictions)
#' pheatmap::pheatmap(assay(aggregated), cluster_col=FALSE)
#'
#' @export
#' @importFrom S4Vectors metadata
#' @importFrom BiocParallel SerialParam bpnworkers
#' @importFrom Matrix rowMeans
#' @importFrom utils head
Expand All @@ -62,6 +80,54 @@ plotMarkerHeatmap <- function(
num.threads=bpnworkers(BPPARAM),
BPPARAM = SerialParam(),
...)
{
test <- .to_clean_matrix(test, assay.type, check.missing=FALSE)
config <- configureMarkerHeatmap(
results,
test,
label=label,
other.labels=other.labels,
assay.type=assay.type,
use.pruned=use.pruned,
order.by.effect=order.by.effect,
order.by.summary=order.by.summary,
num.threads=num.threads
)

to.show <- head(config$rows, top)
predictions <- config$predictions
test <- test[to.show, config$columns, drop=FALSE]
if (!is.null(display.row.names)) {
rownames(test) <- display.row.names[to.show]
}

limits <- range(test, na.rm=TRUE)
colnames(test) <- seq_len(ncol(test))
pheatmap::pheatmap(
test,
breaks=seq(limits[1], limits[2], length.out=26),
color=viridis::viridis(25),
annotation_col=data.frame(labels=predictions, row.names=colnames(test)),
cluster_col=FALSE,
show_colnames=FALSE,
...
)
}

#' @export
#' @rdname plotMarkerHeatmap
#' @importFrom S4Vectors metadata
#' @importFrom Matrix rowMeans
configureMarkerHeatmap <- function(
results,
test,
label,
other.labels=NULL,
assay.type="logcounts",
use.pruned=FALSE,
order.by.effect="cohens.d",
order.by.summary="min.rank",
num.threads=1)
{
test <- .to_clean_matrix(test, assay.type, check.missing=FALSE)
all.markers <- metadata(results)$de.genes[[label]]
Expand All @@ -75,19 +141,19 @@ plotMarkerHeatmap <- function(

ckeep <- seq_len(ncol(test))
if (!is.null(other.labels)) {
ckeep <- predictions %in% other.labels
ckeep <- which(predictions %in% other.labels)
predictions <- predictions[ckeep]
for (n in names(all.markers)) {
if (!(n %in% other.labels) && n != label) {
all.markers[[n]] <- NULL
}
}
} else if (anyNA(predictions)) {
ckeep <- !is.na(predictions)
ckeep <- which(!is.na(predictions))
predictions <- predictions[ckeep]
}

rkeep <- rownames(test) %in% unlist(all.markers)
rkeep <- which(rownames(test) %in% unlist(all.markers, use.names=FALSE))
test <- test[rkeep,ckeep,drop=FALSE]

# Prioritize the markers with interesting variation in the test data for
Expand All @@ -102,32 +168,16 @@ plotMarkerHeatmap <- function(
compute.delta.mean=(order.by.effect=="delta.mean"),
compute.delta.detected=(order.by.effect=="delta.detected")
)
stats <- interesting[[order.by.effect]][[label]][[order.by.summary]]
o <- order(stats, decreasing=(order.by.summary!="min.rank"))
to.show <- rownames(test)[o]
} else {
abundance <- rowMeans(test)
to.show <- names(abundance)[order(abundance, decreasing=TRUE)]
}

to.show <- match(head(to.show, top), rownames(test))
colnames(test) <- seq_len(ncol(test))
col.order <- order(predictions)
test <- test[to.show,col.order,drop=FALSE]
predictions <- predictions[col.order]
stats <- interesting[[order.by.effect]][[label]][[order.by.summary]]
decreasing <- (order.by.summary!="min.rank")

if (!is.null(display.row.names)) {
rownames(test) <- display.row.names[rkeep][to.show]
} else {
stats <- rowMeans(test)
decreasing <- TRUE
}

limits <- range(test, na.rm=TRUE)
pheatmap::pheatmap(
test,
breaks=seq(limits[1], limits[2], length.out=26),
color=viridis::viridis(25),
annotation_col=data.frame(labels=predictions, row.names=colnames(test)),
cluster_col=FALSE,
show_colnames=FALSE,
...
)
ro <- order(stats, decreasing=decreasing)
co <- order(predictions)
list(rows=rkeep[ro], columns=ckeep[co], predictions=predictions[co])
}
3 changes: 3 additions & 0 deletions inst/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ This should give similar results to but is faster than the previous \pkg{scran}

\item Switch to \pkg{scrapper} for variance calculation, PCA and clustering in \code{aggregateReference()}.
This should be faster than the previous \pkg{BiocSingular} and \code{stats::kmeans} functions, and avoids the need to consider the seed.

\item Added a \code{configureMarkerHeatmap()} function to perform all the calculations used by \code{plotMarkerHeatmap()}.
This allows users to re-use the calculations with a custom visualization for the expression values.
}}

\section{Version 2.8.0}{\itemize{
Expand Down
40 changes: 36 additions & 4 deletions man/plotMarkerHeatmap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0a49952

Please sign in to comment.