Skip to content

Commit

Permalink
Filter out duplicated gene names in each reference dataset.
Browse files Browse the repository at this point in the history
This ensures that we don't grab the wrong gene when mapping from marker names
back to row indices for entry into the C++ code.
  • Loading branch information
LTLA committed Dec 15, 2024
1 parent c23c2e6 commit 320d4bb
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
9 changes: 8 additions & 1 deletion R/trainSingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@
#' @importFrom beachmat initializeCpp
#' @importFrom S4Vectors List
#' @importFrom SummarizedExperiment assay
#' @importFrom DelayedArray DelayedArray
trainSingleR <- function(
ref,
labels,
Expand Down Expand Up @@ -248,11 +249,17 @@ trainSingleR <- function(
for (l in seq_along(ref)) {
curref <- .to_clean_matrix(ref[[l]], assay.type, check.missing, msg="ref", num.threads=num.threads)

# Removing duplicated names and missing labels.
if (anyDuplicated(rownames(curref))) {
keep <- !duplicated(rownames(curref))
curref <- DelayedArray(curref)[keep,,drop=FALSE]
}

curlabels <- as.character(labels[[l]])
stopifnot(length(curlabels) == ncol(curref))
keep <- !is.na(curlabels)
if (!all(keep)) {
curref <- curref[,keep,drop=FALSE]
curref <- DelayedArray(curref)[,keep,drop=FALSE]
curlabels <- curlabels[keep]
}

Expand Down
14 changes: 13 additions & 1 deletion tests/testthat/test-train.R
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,17 @@ test_that("trainSingleR auto-eliminates NA labels", {
out <- trainSingleR(training, training$label)
ref <- trainSingleR(training[,!populate], training$label[!populate])
expect_identical(out$labels, ref$labels)
expect_identical(out$ref, ref$ref)
expect_identical(as.matrix(out$ref), ref$ref)
})

test_that("trainSingleR auto-eliminates duplicate row names", {
expect_identical(anyDuplicated(rownames(training)), 0L) # make sure there weren't any to begin with.

rownames(training) <- head(rep(rownames(training), each=2), nrow(training))
out <- trainSingleR(training, training$label)

keep <- !duplicated(rownames(training))
ref <- trainSingleR(training[keep,], training$label)
expect_identical(out$labels, ref$labels)
expect_identical(as.matrix(out$ref), ref$ref)
})

0 comments on commit 320d4bb

Please sign in to comment.