Merge pull request #127 from Nanostring-Biostats/dev

Update master to 1.1.3
Nanostring-Biostats · Sep 14, 2021 · baa74c9 · baa74c9
2 parents 3177f2a + d26af29
commit baa74c9
Show file tree

Hide file tree

Showing 23 changed files with 634 additions and 152 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: GeomxTools
 Title: NanoString GeoMx Tools
 Description: Tools for NanoString Technologies GeoMx Technology.  Package provides functions for reading in DCC and
          PKC files based on an ExpressionSet derived object.  Normalization and QC functions are also included.
-Version: 1.1.2
+Version: 1.1.3
 Encoding: UTF-8
 Authors@R: c(person("Nicole", "Ortogero", email = "[email protected]", role = c("cre", "aut")), 
              person("Zhi", "Yang", email = "[email protected]", role = c("aut")))

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,14 @@
+# GeomxTools 1.1.3
+
+## Revisions:
+* Allow for multipanel background correction in `subtractBackground()`
+* Allow user to re-run `summarizeNegatives()`
+* Enable optional parameters in `readNanoStringGeoMxSet()` for annotation file reading
+* Added and revised test cases throughout
+
+## Bug fixes:
+* Fixed `writeNanoStringGeoMxSet` to work with current DCC file format
+
 # GeomxTools 1.1.2
 
 ## Revisions:
@@ -22,7 +33,7 @@
 * Fix build warning from knitr update
 
 # GeomxTools 0.99.4 - concomittant development branch version
-# Includes changes beyond 1.0.0
+v0.99.4 includes changes beyond 1.0.0
 
 ## New features:
 * New slot FeatureType to indicate if data is probe- or target-level
@@ -47,6 +58,11 @@
 ## User notes:
 * This version was included in Bioconductor release 3.13
 
+## Citation:
+Ortogero, N.; Yang, Z.; Vitancol, R.; Griswold, M.; Henderson, D. 
+GeomxTools: NanoString GeoMx Tools. R Package Version 1.0.0. 
+NanoString Technologies Inc.; Seattle, WA 98109, USA. 2021. 
+
 # GeomxTools 0.99.0
 
 * Package template creation
diff --git a/R/NanoStringGeoMxSet-accessors.R b/R/NanoStringGeoMxSet-accessors.R
@@ -51,3 +51,20 @@ setReplaceMethod("featureType", c("NanoStringGeoMxSet", "character"),
                    return(object)
                  })
 
+#' Accessor to check if "exprs" \code{assDataElement} was shifted by one
+#' 
+#' @param object name of the NanoStringGeoMxSet object
+#' 
+#' @return boolean indicating if counts in default matrix were shifted by one
+#' 
+#' @examples
+#' datadir <- system.file("extdata", "DSP_NGS_Example_Data",
+#'                        package="GeomxTools")
+#' demoData <- readRDS(file.path(datadir, "/demoData.rds"))
+#' countsShiftedByOne(demoData)
+#' 
+#' @export
+#' 
+countsShiftedByOne <- function(object) {
+    return(experimentData(object)@other$shiftedByOne)
+}
diff --git a/R/NanoStringGeoMxSet-aggregate.R b/R/NanoStringGeoMxSet-aggregate.R
@@ -118,6 +118,8 @@ summarizeNegatives <-
         summaryDF <- do.call(cbind, summaryList)
         colnames(summaryDF) <- summaryListNames
         summaryDF <- summaryDF[sampleNames(object), ]
+        pData(object) <- pData(object)[, 
+                             !colnames(pData(object)) %in% summaryListNames]
         pData(object) <- cbind(pData(object), summaryDF)
         return(object)
 }
diff --git a/R/NanoStringGeoMxSet-de.R b/R/NanoStringGeoMxSet-de.R
@@ -70,14 +70,14 @@ mixedModelDE <- function(object, elt = "exprs", modelFormula = NULL,
       } else {
         lsm <- lmerTest::ls_means(lmOut, which = groupVar, pairwise = TRUE)
       }
-      lmOut <- matrix(anova(lmOut)[groupVar, "Pr(>F)"], ncol = 1, dimnames = list(groupVar, "Pr(>F)"))
+      lmOut <- matrix(stats::anova(lmOut)[groupVar, "Pr(>F)"], ncol = 1, dimnames = list(groupVar, "Pr(>F)"))
       lsmOut <- matrix(cbind(lsm[,"Estimate"], lsm[,"Pr(>|t|)"]), ncol = 2, dimnames = list(gsub(groupVar, "", rownames(lsm)), c("Estimate", "Pr(>|t|)")))
 
       return(list(anova = lmOut, lsmeans = lsmOut))
     }
     exprs <- new.env()
     exprs$exprs <- assayDataElement(object, elt = elt)
-    if (multiCore) {
+    if (multiCore & Sys.info()['sysname'] != "Windows") {
       mixedOut <- parallel::mclapply(featureNames(object), deFunc, groupVar, pDat, formula(paste("expr", as.character(modelFormula)[2], sep = " ~ ")), exprs, mc.cores = nCores)
     }
     else {
@@ -101,6 +101,7 @@ mixedModelDE <- function(object, elt = "exprs", modelFormula = NULL,
       }
       lmOut <- matrix(stats::anova(lmOut)[groupVar, "Pr(>F)"], ncol = 1, dimnames = list(groupVar, "Pr(>F)"))
       lsmOut <- matrix(cbind(lsm[,"Estimate"], lsm[,"Pr(>|t|)"]), ncol = 2, dimnames = list(gsub(groupVar, "", rownames(lsm)), c("Estimate", "Pr(>|t|)")))
+
       return(list(anova = lmOut, lsmeans = lsmOut))
     }
     mixedOut <- assayDataApply(object, 1, deFunc, groupVar, pDat, formula(paste("expr", as.character(modelFormula)[2], sep = " ~ ")), pairwise,  elt = elt)

diff --git a/R/NanoStringGeoMxSet-normalize.R b/R/NanoStringGeoMxSet-normalize.R
@@ -10,6 +10,8 @@ HOUSEKEEPERS <- c(
 #' @param fromElt name of the assayDataElement to normalize
 #' @param toElt name of the assayDataElement to store normalized values
 #' @param housekeepers optional vector of housekeeper target names
+#' @param byPanel = TRUE, TRUE background subtraction done within panel, 
+#'        FALSE background aggregated from all negatives regardless of panel
 #' @param ... optional arguments
 #' @return a NanoStringGeoMxSet object with normalized counts and normalized factors
 #' @examples
@@ -149,19 +151,47 @@ hkNorm <- function(object, data_type, toElt, fromElt, housekeepers) {
 
 
 # subtract background
-subtractBackground <- function(object, data_type, toElt, fromElt) {
-  if (!featureType(object) == "Target") {
-    negsubset <- subset(object, subset = CodeClass %in% c("Negative01", "Negative"))
-    negs <- apply(exprs(negsubset), 2, function(x) ngeoMean(x))
-    assayDataElement(object, toElt) <-
-      t(assayDataApply(object, MARGIN = 1L, FUN = `-`, t(negs), elt = fromElt))
-  } else {
-    assayDataElement(object, toElt) <-
-      t(assayDataApply(object, MARGIN = 1L, FUN = `-`, t(exprs(object)["Negative Probe", ]), elt = fromElt))
-  }
-  return(object)
+subtractBackground <- function(object, data_type, toElt, fromElt, byPanel=TRUE) {
+    if (featureType(object) == "Target") {
+      if(!any(fData(object)$CodeClass == "Negative")){
+        stop("Error: No negative could be located for probe pool(s)")
+      }
+        negSet <- negativeControlSubset(object)
+        if (byPanel) {
+            correctedByPanel <- 
+                lapply(unique(fData(object)[["Module"]]), function(currPanel) {
+                    panelSet <- subset(object, subset=Module == currPanel)
+                    panelNegSet <- subset(negSet, subset=Module == currPanel)
+                    panelNegGeo <- 
+                        apply(assayDataElement(panelNegSet, elt=fromElt), 
+                              2L, ngeoMean)
+                    panelCorrectExprs <- 
+                        t(assayDataApply(panelSet, MARGIN=1L, 
+                          FUN=`-`, t(panelNegGeo), elt = fromElt))
+                    colnames(panelCorrectExprs) <- sampleNames(object)
+                    return(panelCorrectExprs)
+                })
+            correctedMatrix <- do.call(rbind, correctedByPanel)
+            assayDataElement(object, elt=toElt) <- 
+                correctedMatrix[featureNames(object), sampleNames(object)]
+        } else {
+            allNegGeo <- 
+                apply(assayDataElement(negSet, elt=fromElt), 2L, ngeoMean)
+            assayDataElement(object, toElt) <-
+                t(assayDataApply(object, MARGIN = 1L, 
+                  FUN = `-`, t(allNegGeo), elt = fromElt))
+        }
+        assayDataElement(object, elt=toElt)[
+            assayDataElement(object, elt=toElt) < 0] <- 0
+    } else {
+        warning("Data on probe-level; no background subtraction performed. ",
+                "Aggregate counts prior to background subtraction.\n")
+    }
+    return(object)
 }
 
+############  NOT USED OR TESTED IN DEV  ############
+
 #' Check QC Flags in the GeoMxSet and removes the probe or sample from the object
 #' @rdname checkQCFlags
 #' @param object name of the NanoStringGeoMxSet object to check the QC Flags
@@ -181,6 +211,7 @@ setGeneric("checkQCFlags",
   }
 )
 
+############  NOT USED OR TESTED IN DEV  ############
 
 #' checkQCFlags
 #' @param object name of the NanoStringGeoMxSet object to check the QC Flags

diff --git a/R/readNanoStringGeoMxSet.R b/R/readNanoStringGeoMxSet.R
@@ -6,7 +6,8 @@ function(dccFiles,
          phenoDataDccColName = "Sample_ID",
          phenoDataColPrefix = "",
          protocolDataColNames = NULL,
-         experimentDataColNames = NULL)
+         experimentDataColNames = NULL,
+         ...)
 {
   # check inputs
   if (!(sum(grepl("\\.dcc$",dccFiles)) == length(dccFiles) && length(dccFiles) > 0L)){
@@ -27,7 +28,7 @@ function(dccFiles,
   if (is.null(phenoDataFile)) {
     stop("Please specify an input for phenoDataFile.")
   } else {
-    pheno <- readxl::read_xlsx(phenoDataFile, col_names = TRUE, sheet = phenoDataSheet)
+    pheno <- readxl::read_xlsx(phenoDataFile, col_names = TRUE, sheet = phenoDataSheet, ...)
     pheno <- data.frame(pheno, stringsAsFactors = FALSE, check.names = FALSE)
     j <- colnames(pheno)[colnames(pheno) == phenoDataDccColName]
     if (length(j) == 0L){

diff --git a/R/utils.R b/R/utils.R
@@ -102,24 +102,8 @@ shiftCountsOne <- function(object, elt="exprs", useDALogic=FALSE) {
     return(object)
 }
 
-#' Accessor to check if "exprs" \code{assDataElement} was shifted by one
-#' 
-#' @param object name of the NanoStringGeoMxSet object
-#' 
-#' @return boolean indicating if counts in default matrix were shifted by one
-#' 
-#' @examples
-#' datadir <- system.file("extdata", "DSP_NGS_Example_Data",
-#'                        package="GeomxTools")
-#' demoData <- readRDS(file.path(datadir, "/demoData.rds"))
-#' countsShiftedByOne(demoData)
-#' 
-#' @export
-#' 
-countsShiftedByOne <- function(object) {
-    return(experimentData(object)@other$shiftedByOne)
-}
 
+#### NOT TESTED OR USED ####
 collapseCounts <- function(object) {
     probeCounts <- data.table(cbind(fData(object)[, c("TargetName", "Module")],
                                     assayDataElement(object, elt="exprs")))

diff --git a/R/writeNanoStringGeoMxSet.R b/R/writeNanoStringGeoMxSet.R
@@ -1,13 +1,14 @@
 writeNanoStringGeoMxSet <- function(x, dir = getwd()) {
   stopifnot(is(x, "NanoStringGeoMxSet"))
+  if (featureType(x) == "Target") {
+    stop("featureType must be 'Probe'")
+  }
   if (!dir.exists(dir)) 
     dir.create(dir)
   features <- pData(featureData(x))[, c("RTS_ID")]
   header <- "<Header>\nFileVersion,%s\nSoftwareVersion,%s\nDate,%s\n</Header>\n"
   scanAttr <- paste0("<Scan_Attributes>\nID,%s\nPlate_ID,%s\nWell,%s\n</Scan_Attributes>\n")
-  laneAttr <- paste0("<NGS_Processing_Attributes>\nSeqSetId,%s\ntamperedIni,%s\ntrimGaloreOpts,%s\n", 
-                     "flash2Opts,%s\numiExtractOpts,%s\nbowtie2Opts,%s\n", 
-                     "umiDedupOpts,%s\nRaw,%d\nTrimmed,%d\n", 
+  laneAttr <- paste0("<NGS_Processing_Attributes>\nSeqSetId,%s\nRaw,%d\nTrimmed,%d\n", 
                      "Stitched,%d\nAligned,%d\numiQ30,%.4f\n", 
                      "rtsQ30,%.4f\n</NGS_Processing_Attributes>\n")
   for (i in seq_len(dim(x)[["Samples"]])) {
@@ -23,10 +24,7 @@ writeNanoStringGeoMxSet <- function(x, dir = getwd()) {
     writeLines(sprintf(scanAttr, protocolRow[["SampleID"]], protocolRow[["Plate_ID"]], 
                        protocolRow[["Well"]]), con)
 
-    writeLines(sprintf(laneAttr, protocolRow[["SeqSetId"]], protocolRow[["tamperedIni"]], 
-                       protocolRow[["trimGaloreOpts"]], protocolRow[["flash2Opts"]], 
-                       protocolRow[["umiExtractOpts"]], protocolRow[["bowtie2Opts"]], 
-                       protocolRow[["umiDedupOpts"]], protocolRow[["Raw"]],
+    writeLines(sprintf(laneAttr, protocolRow[["SeqSetId"]], protocolRow[["Raw"]],
                        protocolRow[["Trimmed"]], protocolRow[["Stitched"]], 
                        protocolRow[["Aligned"]], protocolRow[["umiQ30"]],
                        protocolRow[["rtsQ30"]]), 
@@ -35,8 +33,7 @@ writeNanoStringGeoMxSet <- function(x, dir = getwd()) {
     write.table(cbind(features[which(exprs(x)[, i]>0)], 
                       Count = exprs(x)[which(exprs(x)[, i]>0), i]), file = con, quote = FALSE, 
                 sep = ",", row.names = FALSE, col.names = FALSE)
-    writeLines("</Code_Summary>\n", con)
-    writeLines("SOMEHASH100000000000", con)
+    writeLines("</Code_Summary>", con)
     close(con)
   }
   invisible(file.path(dir, sampleNames(x)))

diff --git a/README.md b/README.md
@@ -53,3 +53,8 @@ at any given time.
 
 The dev branch on GitHub is under active development and no guarantee 
 is made on usability at any given time.
+
+## Citation:
+Ortogero, N.; Yang, Z.; Vitancol, R.; Griswold, M.; Henderson, D. 
+GeomxTools: NanoString GeoMx Tools. R Package Version 1.0.0. 
+NanoString Technologies Inc.; Seattle, WA 98109, USA. 2021. 
diff --git a/inst/extdata/DSP_NGS_Example_Data/demoData.rds b/inst/extdata/DSP_NGS_Example_Data/demoData.rds
diff --git a/man/countsShiftedByOne.Rd b/man/countsShiftedByOne.Rd
diff --git a/man/readNanoStringGeoMxSet.Rd b/man/readNanoStringGeoMxSet.Rd
@@ -13,7 +13,7 @@
 readNanoStringGeoMxSet(dccFiles, pkcFiles, phenoDataFile, 
                      phenoDataSheet, phenoDataDccColName = "Sample_ID",
                      phenoDataColPrefix = "", protocolDataColNames = NULL,
-                     experimentDataColNames = NULL)
+                     experimentDataColNames = NULL, ...)
 }
 
 \arguments{
@@ -31,6 +31,7 @@ readNanoStringGeoMxSet(dccFiles, pkcFiles, phenoDataFile,
     featureData columns, and protocolData columns.}
   \item{protocolDataColNames}{Character list of column names from \code{phenoDataFile} containing data about the experimental protocol or sequencing data.}
   \item{experimentDataColNames}{Character list of column names from \code{phenoDataFile} containing data about the experiment's meta-data.}
+  \item{...}{Optional parameters to pass to \code{readxl::read_xlsx} function for annotation read in}
 }
 
 \value{

diff --git a/tests/testthat/test_GeoMxSet_accessors.R b/tests/testthat/test_GeoMxSet_accessors.R
@@ -9,23 +9,15 @@ library(testthat)
 datadir <- system.file("extdata", "DSP_NGS_Example_Data",
                        package="GeomxTools")
 DCCFiles <- dir(datadir, pattern=".dcc$", full.names=TRUE)
-PKCFiles <- unzip(zipfile = file.path(datadir,  "/pkcs.zip"))
-SampleAnnotationFile <- file.path(datadir, "annotations.xlsx")
 
 protocolDataColNames <- c("aoi",
                           "cell_line",
                           "roi_rep",
                           "pool_rep",
                           "slide_rep")
 
-testData <-
-  suppressWarnings(readNanoStringGeoMxSet(dccFiles = DCCFiles, # QuickBase: readNanoStringGeomxSet, need to change it
-                                          pkcFiles = PKCFiles,
-                                          phenoDataFile = SampleAnnotationFile,
-                                          phenoDataSheet = "CW005",
-                                          phenoDataDccColName = "Sample_ID",
-                                          protocolDataColNames = protocolDataColNames,
-                                          experimentDataColNames = c("panel")))
+testData <- readRDS(file= system.file("extdata", "DSP_NGS_Example_Data", 
+                                      "demoData.rds", package = "GeomxTools"))
 
 
 testData_agg <- aggregateCounts(testData)
@@ -40,8 +32,6 @@ testthat::test_that("test that the rownames and column names in sData are correc
 })
 
 
-
-
 # req 2: test that the svarLabels method gives the correct results:------
 testthat::test_that("test that the svarLabels method gives the correct results", {
   expect_true(all(svarLabels(testData) == colnames(sData(testData))))