Merge pull request #106 from Nanostring-Biostats/dev

Update master to 1.1.2
Nanostring-Biostats · Sep 8, 2021 · 3177f2a · 3177f2a
2 parents 731cd3d + 24e9b6f
commit 3177f2a
Show file tree

Hide file tree

Showing 27 changed files with 427 additions and 252 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,17 +2,19 @@ Package: GeomxTools
 Title: NanoString GeoMx Tools
 Description: Tools for NanoString Technologies GeoMx Technology.  Package provides functions for reading in DCC and
          PKC files based on an ExpressionSet derived object.  Normalization and QC functions are also included.
-Version: 1.1.1
+Version: 1.1.2
 Encoding: UTF-8
 Authors@R: c(person("Nicole", "Ortogero", email = "[email protected]", role = c("cre", "aut")), 
              person("Zhi", "Yang", email = "[email protected]", role = c("aut")))
-Depends: R (>= 3.6), NanoStringNCTools
-Imports: Biobase, S4Vectors, rjson, readxl, EnvStats, reshape2, methods, 
-         utils, stats, data.table, outliers, BiocGenerics
+Depends: R (>= 3.6), Biobase, NanoStringNCTools, S4Vectors
+Imports: BiocGenerics, rjson, readxl, EnvStats, reshape2, methods, 
+         utils, stats, data.table, outliers, lmerTest, dplyr
 Suggests: 
     rmarkdown,
     knitr,
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    parallel,
+    ggiraph
 License: Artistic-2.0
 Collate: DccMetadata.R
          NanoStringGeoMxSet-class.R

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,24 +2,33 @@
 ### Imports
 import(S4Vectors)
 import(Biobase)
-import(data.table)
+import(NanoStringNCTools)
+importClassesFrom(S4Vectors, DataFrame)
 importClassesFrom(NanoStringNCTools, SignatureSet)
-importClassesFrom(S4Vectors,DataFrame)
-importFrom(NanoStringNCTools, SignatureSet)
 importFrom(rjson, fromJSON)
 importFrom(readxl, read_xlsx)
 importFrom(EnvStats, geoMean)
 importFrom(reshape2, dcast)
 importFrom(utils, read.csv)
 importFrom(stats, as.formula)
 importFrom(stats, quantile)
-importFrom(stats, weights)
+importFrom(stats, anova)
+importFrom(stats, formula)
+importFrom(stats, p.adjust)
 importFrom(methods, callGeneric)
 importFrom(methods, callNextMethod)
 importFrom(methods, is)
 importFrom(methods, validObject)
+importFrom(dplyr, bind_rows)
 importFrom(utils, write.table)
 importFrom(outliers, grubbs.test)
+importFrom(data.table, data.table, .SD)
+importFrom(lmerTest, lmer)
+importFrom(lmerTest, ls_means)
+importFrom(parallel, mclapply)
+importFrom(parallel, parLapply)
+importFrom(parallel, makeCluster)
+importFrom(parallel, stopCluster)
 importFrom(BiocGenerics, design)
 importFrom(BiocGenerics, "design<-")
 

diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,52 @@
+# GeomxTools 1.1.2
+
+## Revisions:
+* Allow users to use more than one DCC version
+* Speed improvements in `setBioProbeQC()` and `aggregateCounts()`
+
+## Bug fixes:
+* Fix error in `setBioProbeQC()` with single panel objects
+* Fix mixed model output to reference correct p-value
+* Fix thresholding in utility functions to keep format matrix format inputs
+
+# GeomxTools 1.1.1
+
+## New features:
+* Differential expression with linear mixed model method `mixedModelDE()`
+
+## Revisions:
+* Handle multi-panel normalization
+
+## Bug fixes:
+* Fix skipping of vectors that don't meet Grubbs requirements
+* Fix build warning from knitr update
+
+# GeomxTools 0.99.4 - concomittant development branch version
+# Includes changes beyond 1.0.0
+
+## New features:
+* New slot FeatureType to indicate if data is probe- or target-level
+* Segment QC `setSegmentQCFlags()` and probe QC `setBioProbeQC()`
+* Count aggregation method `aggregateCounts()`
+* Common GeoMx normalizations `normalize()`
+* Log and count thresholding methods added to utils
+
+## Revisions:
+* Updated `readDccFile()` to expand dcc file versions accepted
+* Allow user to  without auto-aggregating counts to target-level
+* Probe annotations attached to featureData with readPKCFile
+
+# GeomxTools 1.1.0
+
+* No changes from 1.0.0
+
+# GeomxTools 1.0.0
+
+* Initial release, includes load with automatic aggregation to target-level
+
+## User notes:
+* This version was included in Bioconductor release 3.13
+
+# GeomxTools 0.99.0
+
+* Package template creation
diff --git a/R/NanoStringGeoMxSet-aggregate.R b/R/NanoStringGeoMxSet-aggregate.R
@@ -16,19 +16,53 @@
 #' @export
 #' 
 aggregateCounts <- function(object, FUN=ngeoMean) {
-    object <- summarizeNegatives(object)
-    targetCounts <- do.call(rbind, esBy(object, GROUP = "TargetName", 
+    if(featureType(object) == "Target") {
+        stop("GeoMxSet object feature type is already target-level. ",
+             "No further aggregation can be performed.")
+    }
+
+    # Skip targets with single probe
+    multiProbeTable <- with(object, table(TargetName)) > 1L
+    multiProbeTargs <- 
+        names(multiProbeTable)[which(multiProbeTable, arr.ind=TRUE)]
+    if (length(multiProbeTargs) > 0) {
+        multiObject <- 
+            object[fData(object)[["TargetName"]] %in% multiProbeTargs, ]
+        if ("Negative" %in% unique(fData(object)[["CodeClass"]])) {
+            object <- summarizeNegatives(object)
+        } else {
+            warning("Object has no negatives. ",
+                "No summary statistics for negatives will be calculated.")
+        }
+    } else {
+        warning("Object has no multiprobe targets. ",
+                "No aggregation was performed.")
+        featureNames(object) <- fData(object)[["TargetName"]]
+        featureType(object) <- "Target"
+        return(object)
+    }
+
+    targetCounts <- do.call(rbind, esBy(multiObject, GROUP = "TargetName", 
         FUN=function(x) {esApply(x, 2, FUN)}, simplify=FALSE))
-    targetFeats <- featureData(object)@data
+    singleProbeObject <- subset(object, 
+                                subset=!TargetName %in% multiProbeTargs)
+    singleProbeCounts <- exprs(singleProbeObject)
+    rownames(singleProbeCounts) <- fData(singleProbeObject)[["TargetName"]]
+    targetCounts <- rbind(targetCounts, singleProbeCounts)
+    targetCounts <- targetCounts[unique(fData(object)[["TargetName"]]), ]
+
+    targetFeats <- fData(object)
     targetFeats <- 
         targetFeats[!duplicated(targetFeats[["TargetName"]]), ]
     rownames(targetFeats) <- targetFeats[, "TargetName"]
-    probeColumns <- c("RTS_ID", "QCFlags", "ProbeID")
+    probeColumns <- c("RTS_ID", "QCFlags", "ProbeID", 
+                      "ProbeRatio", "OutlierFrequency")
     targetFeats <- 
         targetFeats[, !colnames(targetFeats) %in% probeColumns]
     targetFeats <- 
          AnnotatedDataFrame(targetFeats[rownames(targetCounts), ], 
                             dimLabels = c("featureNames", "featureColumns"))
+
     targetObject <- NanoStringGeoMxSet(
         assayData = targetCounts,
         phenoData = phenoData(object),

diff --git a/R/NanoStringGeoMxSet-class.R b/R/NanoStringGeoMxSet-class.R
@@ -21,6 +21,7 @@ function(object) {
     methods::callNextMethod(object)
     cat("feature: ")
     cat(featureType(object))
+    cat("\n")
 })
 
 # Constructors

diff --git a/R/NanoStringGeoMxSet-de.R b/R/NanoStringGeoMxSet-de.R
@@ -10,6 +10,7 @@
 #' @param nCores = 1, number of cores to use, set to 1 if running in serial mode
 #' @param multiCore = TRUE, set to TRUE to use multiCore, FALSE to run in cluster mode
 #' @param pAdjust = "BY" method for p-value adjustment
+#' @param pairwise boolean to calculate least-square means pairwise differences
 #'
 #' @return mixed model output list
 #'
@@ -61,7 +62,6 @@ mixedModelDE <- function(object, elt = "exprs", modelFormula = NULL,
     }
   }
   if (nCores > 1) {
-    require(parallel)
     deFunc <- function(i, groupVar, pDat, modelFormula, exprs, pairwise = TRUE) {
       dat <- data.frame(expr = exprs$exprs[i, ], pDat)
       lmOut <- suppressWarnings(lmerTest::lmer(modelFormula, dat))
@@ -71,19 +71,19 @@ mixedModelDE <- function(object, elt = "exprs", modelFormula = NULL,
         lsm <- lmerTest::ls_means(lmOut, which = groupVar, pairwise = TRUE)
       }
       lmOut <- matrix(anova(lmOut)[groupVar, "Pr(>F)"], ncol = 1, dimnames = list(groupVar, "Pr(>F)"))
-      lsmOut <- matrix(cbind(lsm[,1], lsm[,2]), ncol = 2, dimnames = list(gsub(groupVar, "", rownames(lsm)), c("Estimate", "PR(>|t|)")))
+      lsmOut <- matrix(cbind(lsm[,"Estimate"], lsm[,"Pr(>|t|)"]), ncol = 2, dimnames = list(gsub(groupVar, "", rownames(lsm)), c("Estimate", "Pr(>|t|)")))
 
       return(list(anova = lmOut, lsmeans = lsmOut))
     }
     exprs <- new.env()
     exprs$exprs <- assayDataElement(object, elt = elt)
     if (multiCore) {
-      mixedOut <- mclapply(featureNames(object), deFunc, groupVar, pDat, formula(paste("expr", as.character(modelFormula)[2], sep = " ~ ")), exprs, mc.cores = nCores)
+      mixedOut <- parallel::mclapply(featureNames(object), deFunc, groupVar, pDat, formula(paste("expr", as.character(modelFormula)[2], sep = " ~ ")), exprs, mc.cores = nCores)
     }
     else {
-      cl <- makeCluster(getOption("cl.cores", nCores))
-      mixedOut <- parLapply(cl, featureNames(object), deFunc, groupVar, pDat, formula(paste("expr", as.character(modelFormula)[2], sep = " ~ ")), exprs, pairwise)
-      suppressWarnings(stopCluster(cl))
+      cl <- parallel::makeCluster(getOption("cl.cores", nCores))
+      mixedOut <- parallel::parLapply(cl, featureNames(object), deFunc, groupVar, pDat, formula(paste("expr", as.character(modelFormula)[2], sep = " ~ ")), exprs, pairwise)
+      suppressWarnings(parallel::stopCluster(cl))
     }
     mixedOut <- rbind(array(lapply(mixedOut, function(x) x[["anova"]])),
                       array(lapply(mixedOut, function(x) x[["lsmeans"]])))
@@ -99,8 +99,8 @@ mixedModelDE <- function(object, elt = "exprs", modelFormula = NULL,
       } else {
         lsm <- lmerTest::ls_means(lmOut, which = groupVar, pairwise = TRUE)
       }
-      lmOut <- matrix(anova(lmOut)[groupVar, "Pr(>F)"], ncol = 1, dimnames = list(groupVar, "Pr(>F)"))
-      lsmOut <- matrix(cbind(lsm[,1], lsm[,2]), ncol = 2, dimnames = list(gsub(groupVar, "", rownames(lsm)), c("Estimate", "PR(>|t|)")))
+      lmOut <- matrix(stats::anova(lmOut)[groupVar, "Pr(>F)"], ncol = 1, dimnames = list(groupVar, "Pr(>F)"))
+      lsmOut <- matrix(cbind(lsm[,"Estimate"], lsm[,"Pr(>|t|)"]), ncol = 2, dimnames = list(gsub(groupVar, "", rownames(lsm)), c("Estimate", "Pr(>|t|)")))
       return(list(anova = lmOut, lsmeans = lsmOut))
     }
     mixedOut <- assayDataApply(object, 1, deFunc, groupVar, pDat, formula(paste("expr", as.character(modelFormula)[2], sep = " ~ ")), pairwise,  elt = elt)

diff --git a/R/NanoStringGeoMxSet-normalize.R b/R/NanoStringGeoMxSet-normalize.R
@@ -9,6 +9,8 @@ HOUSEKEEPERS <- c(
 #' @param data_type the data type of the object. Values maybe RNA, protein.
 #' @param fromElt name of the assayDataElement to normalize
 #' @param toElt name of the assayDataElement to store normalized values
+#' @param housekeepers optional vector of housekeeper target names
+#' @param ... optional arguments
 #' @return a NanoStringGeoMxSet object with normalized counts and normalized factors
 #' @examples
 #' datadir <- system.file("extdata", "DSP_NGS_Example_Data",
@@ -149,7 +151,7 @@ hkNorm <- function(object, data_type, toElt, fromElt, housekeepers) {
 # subtract background
 subtractBackground <- function(object, data_type, toElt, fromElt) {
   if (!featureType(object) == "Target") {
-    negsubset <- subset(object, subset = Codeclass %in% c("Negative01", "Negative"))
+    negsubset <- subset(object, subset = CodeClass %in% c("Negative01", "Negative"))
     negs <- apply(exprs(negsubset), 2, function(x) ngeoMean(x))
     assayDataElement(object, toElt) <-
       t(assayDataApply(object, MARGIN = 1L, FUN = `-`, t(negs), elt = fromElt))
@@ -181,8 +183,9 @@ setGeneric("checkQCFlags",
 
 
 #' checkQCFlags
-#' @param NanoStringGeoMxSet
+#' @param object name of the NanoStringGeoMxSet object to check the QC Flags
 #' @param removeLowLocalOutliers logical, if TRUE it sets outlier counts to zero,  default is FALSE,
+#' @param ... optional arguments
 #' @return NanoStringGeoMxSet
 #' @export
 #'