diff --git a/NEWS.md b/NEWS.md index 604ac39..255115e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ * Add center.median.shared and center.mean.shared normalization methods * Add maxComplexSimilarity argument to plotVolcano * Update PomBase and WormBase conversion tables +* Add contamination filtering to Spectronaut (presence of contam_ prefix) # einprot 0.9.5 diff --git a/R/doFilter.R b/R/doFilter.R index 75351a1..689e361 100644 --- a/R/doFilter.R +++ b/R/doFilter.R @@ -550,6 +550,10 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE, #' expression) used to identify decoys (reverse hits). The pattern is #' matched against the IDs in the Spectronaut \code{PG.ProteinGroups} #' column. +#' @param contamPattern Character scalar providing the pattern (a regular +#' expression) used to identify contaminants. The pattern is +#' matched against the IDs in the Spectronaut \code{PG.ProteinGroups} +#' column. #' @param exclFile Character scalar, the path to a text file where the #' features that are filtered out are written. If \code{NULL} (default), #' excluded features are not recorded. @@ -562,22 +566,27 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE, #' @importFrom rlang .data #' filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE, - revPattern = "_Decoy$", exclFile = NULL) { + revPattern = "_Decoy$", + contamPattern = "^contam_", exclFile = NULL) { .assertVector(x = sce, type = "SummarizedExperiment") .assertScalar(x = minScore, type = "numeric", allowNULL = TRUE) .assertScalar(x = minPeptides, type = "numeric", allowNULL = TRUE) .assertScalar(x = plotUpset, type = "logical") .assertScalar(x = revPattern, type = "character") + .assertScalar(x = contamPattern, type = "character") .assertScalar(x = exclFile, type = "character", allowNULL = TRUE) ## Make sure that the columns used for filtering later are character vectors rowData(sce)$Reverse <- ifelse(grepl(revPattern, rowData(sce)$PG.ProteinGroups), "+", "") + rowData(sce)$Contaminant <- ifelse(grepl(contamPattern, + rowData(sce)$PG.ProteinGroups), + "+", "") filtdf <- as.data.frame(SummarizedExperiment::rowData(sce)) %>% dplyr::select(dplyr::any_of(c("Reverse", "PG.NrOfStrippedSequencesIdentified.Experiment.wide", - "PG.Cscore"))) %>% - dplyr::mutate(across(dplyr::any_of(c("Reverse")), + "PG.Cscore", "Contaminant"))) %>% + dplyr::mutate(across(dplyr::any_of(c("Reverse", "Contaminant")), function(x) as.numeric(x == "+"))) if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(filtdf) && !is.null(minPeptides)) { @@ -601,9 +610,9 @@ filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE, if ("Reverse" %in% colnames(rowData(sce))) { keep <- intersect(keep, which(rowData(sce)$Reverse == "")) } - # if ("Potential.contaminant" %in% colnames(rowData(sce))) { - # keep <- intersect(keep, which(rowData(sce)$Potential.contaminant == "")) - # } + if ("Contaminant" %in% colnames(rowData(sce))) { + keep <- intersect(keep, which(rowData(sce)$Contaminant == "")) + } if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(rowData(sce)) && !is.null(minPeptides)) { keep <- intersect( diff --git a/inst/extdata/process_basic_template.Rmd b/inst/extdata/process_basic_template.Rmd index 598d844..1ccabbf 100644 --- a/inst/extdata/process_basic_template.Rmd +++ b/inst/extdata/process_basic_template.Rmd @@ -447,6 +447,7 @@ if (expType == "MaxQuant") { sce <- filterSpectronaut(sce = sce, minScore = minScore, minPeptides = minPeptides, plotUpset = TRUE, revPattern = "_Decoy$", + contamPattern = "^contam_", exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), knitr::current_input(dir = TRUE))) } else if (expType == "DIANN") { diff --git a/man/filterSpectronaut.Rd b/man/filterSpectronaut.Rd index 589178e..9069330 100644 --- a/man/filterSpectronaut.Rd +++ b/man/filterSpectronaut.Rd @@ -10,6 +10,7 @@ filterSpectronaut( minPeptides, plotUpset = TRUE, revPattern = "_Decoy$", + contamPattern = "^contam_", exclFile = NULL ) } @@ -32,6 +33,11 @@ expression) used to identify decoys (reverse hits). The pattern is matched against the IDs in the Spectronaut \code{PG.ProteinGroups} column.} +\item{contamPattern}{Character scalar providing the pattern (a regular +expression) used to identify contaminants. The pattern is +matched against the IDs in the Spectronaut \code{PG.ProteinGroups} +column.} + \item{exclFile}{Character scalar, the path to a text file where the features that are filtered out are written. If \code{NULL} (default), excluded features are not recorded.}