Expanded tests to check for correct handling of gene intersections.

SingleR-inc · Oct 11, 2024 · 8dc0350 · 8dc0350
1 parent d158981
commit 8dc0350
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 6 deletions.
diff --git a/tests/testthat/test-classify.R b/tests/testthat/test-classify.R
@@ -46,6 +46,21 @@ test_that("classifySingleR behaves sensibly with very low 'quantile' settings",
     expect_identical(colnames(collected)[max.col(collected)], out$labels)
 })
 
+test_that("classifySingleR behaves correctly with gene intersections", {
+    random.training <- sample(rownames(training), 500)
+    random.test <- sample(rownames(test), 500)
+
+    trained <- trainSingleR(training[rownames(training) %in% random.training], training$label, test.genes=random.test)
+    out <- classifySingleR(test[random.test,], trained)
+
+    common <- intersect(random.test, random.training) # order-preserving intersection.
+    ref.trained <- trainSingleR(training[rownames(training) %in% common,], training$label)
+    ref.out <- classifySingleR(test[rownames(test) %in% common,], ref.trained)
+
+    expect_equal(out$scores, ref.out$scores)
+    expect_identical(out$labels, ref.out$labels)
+})
+
 test_that("classifySingleR behaves sensibly with very large 'quantile' settings", {
     Q <- 1
     out <- classifySingleR(test, trained, fine.tune=FALSE, quantile=Q)

diff --git a/tests/testthat/test-recomputed.R b/tests/testthat/test-recomputed.R
@@ -109,21 +109,57 @@ test_that("combineRecomputedResults handles mismatches to rows and cells", {
 })
 
 test_that("combineRecomputedResults emits warnings when missing genes are present", {
-    half <- nrow(test) / 2
+    half <- nrow(ref) / 2
 
     # Spiking in some missing genes.
-    ref1b <- ref1[seq_len(half),,drop=FALSE]
-    train1b <- trainSingleR(ref1b, labels=ref1$label, test.genes=rownames(test))
+    train1b <- trainSingleR(ref1[seq_len(half),,drop=FALSE], labels=ref1$label, test.genes=rownames(test))
+    pred1b <- classifySingleR(test, train1b)
 
-    ref2b <- ref2[half + seq_len(half),]
-    train2b <- trainSingleR(ref2b, labels=ref2$label, test.genes=rownames(test))
+    train2b <- trainSingleR(ref2[half + seq_len(half),,drop=FALSE], labels=ref2$label, test.genes=rownames(test))
+    pred2b <- classifySingleR(test, train2b)
 
     expect_warning(out <- combineRecomputedResults(
-        results=list(pred1, pred2), 
+        results=list(pred1b, pred2b), 
         test=test,
         trained=list(train1b, train2b)), "available in each reference")
 })
 
+test_that("combineRecomputedResults works with intersections", {
+    tkeep <- sample(rownames(test), 500)
+    rkeep <- sample(rownames(ref), 500)
+
+    subtest <- test[tkeep,]
+    train1b <- trainSingleR(ref1[rownames(ref1) %in% rkeep,,drop=FALSE], labels=ref1$label, test.genes=tkeep)
+    pred1b <- classifySingleR(subtest, train1b)
+
+    train2b <- trainSingleR(ref2[rownames(ref2) %in% rkeep,,drop=FALSE], labels=ref2$label, test.genes=tkeep)
+    pred2b <- classifySingleR(subtest, train2b)
+
+    out <- combineRecomputedResults(
+        results=list(pred1b, pred2b), 
+        test=subtest,
+        trained=list(train1b, train2b)
+    )
+
+    # Comparing to some explicit subsets to the intersection.
+    common <- intersect(tkeep, rkeep)
+    subtest <- test[rownames(test) %in% common,]
+    train1c <- trainSingleR(ref1[rownames(ref1) %in% common,], labels=ref1$label)
+    pred1c <- classifySingleR(subtest, train1c)
+
+    train2c <- trainSingleR(ref2[rownames(ref2) %in% common,], labels=ref2$label)
+    pred2c <- classifySingleR(subtest, train2c)
+
+    ref.out <- combineRecomputedResults(
+        results=list(pred1c, pred2c), 
+        test=subtest,
+        trained=list(train1c, train2c)
+    )
+
+    expect_equal(out$scores, ref.out$scores)
+    expect_identical(out$labels, ref.out$labels)
+})
+
 test_that("combineRecomputedResults is invariant to ordering", {
     ref3 <- .mockRefData(nreps=8)
     ref3 <- scuttle::logNormCounts(ref3)

diff --git a/tests/testthat/test-train.R b/tests/testthat/test-train.R
@@ -97,6 +97,16 @@ test_that("trainSingleR is robust to non-character labels", {
     expect_equal(out$labels, ref$labels)
 })
 
+test_that("trainSingleR behaves correctly with gene intersections", {
+    random.test <- sample(rownames(test), 500)
+    out <- trainSingleR(training, training$label, test.genes=random.test)
+    ref <- trainSingleR(training[rownames(training) %in% random.test,], training$label)
+    expect_identical(out$labels, ref$labels)
+    expect_identical(out$markers$full, ref$markers$full)
+    expect_identical(sort(out$markers$unique), sort(ref$markers$unique))
+    expect_identical(out$ref[rownames(training) %in% random.test,], ref$ref)
+})
+
 test_that("trainSingleR works on various expression matrices", {
     out <- trainSingleR(training, training$label)
     alt <- trainSingleR(logcounts(training), training$label)