Skip to content

Commit

Permalink
Add contamination filtering to Spectronaut
Browse files Browse the repository at this point in the history
  • Loading branch information
csoneson committed Dec 4, 2024
1 parent 5b13b9a commit 436524e
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 6 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* Add center.median.shared and center.mean.shared normalization methods
* Add maxComplexSimilarity argument to plotVolcano
* Update PomBase and WormBase conversion tables
* Add contamination filtering to Spectronaut (presence of contam_ prefix)

# einprot 0.9.5

Expand Down
21 changes: 15 additions & 6 deletions R/doFilter.R
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,10 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,
#' expression) used to identify decoys (reverse hits). The pattern is
#' matched against the IDs in the Spectronaut \code{PG.ProteinGroups}
#' column.
#' @param contamPattern Character scalar providing the pattern (a regular
#' expression) used to identify contaminants. The pattern is
#' matched against the IDs in the Spectronaut \code{PG.ProteinGroups}
#' column.
#' @param exclFile Character scalar, the path to a text file where the
#' features that are filtered out are written. If \code{NULL} (default),
#' excluded features are not recorded.
Expand All @@ -562,22 +566,27 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,
#' @importFrom rlang .data
#'
filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE,
revPattern = "_Decoy$", exclFile = NULL) {
revPattern = "_Decoy$",
contamPattern = "^contam_", exclFile = NULL) {
.assertVector(x = sce, type = "SummarizedExperiment")
.assertScalar(x = minScore, type = "numeric", allowNULL = TRUE)
.assertScalar(x = minPeptides, type = "numeric", allowNULL = TRUE)
.assertScalar(x = plotUpset, type = "logical")
.assertScalar(x = revPattern, type = "character")
.assertScalar(x = contamPattern, type = "character")
.assertScalar(x = exclFile, type = "character", allowNULL = TRUE)

## Make sure that the columns used for filtering later are character vectors
rowData(sce)$Reverse <- ifelse(grepl(revPattern, rowData(sce)$PG.ProteinGroups),
"+", "")
rowData(sce)$Contaminant <- ifelse(grepl(contamPattern,
rowData(sce)$PG.ProteinGroups),
"+", "")

filtdf <- as.data.frame(SummarizedExperiment::rowData(sce)) %>%
dplyr::select(dplyr::any_of(c("Reverse", "PG.NrOfStrippedSequencesIdentified.Experiment.wide",
"PG.Cscore"))) %>%
dplyr::mutate(across(dplyr::any_of(c("Reverse")),
"PG.Cscore", "Contaminant"))) %>%
dplyr::mutate(across(dplyr::any_of(c("Reverse", "Contaminant")),
function(x) as.numeric(x == "+")))
if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(filtdf) &&
!is.null(minPeptides)) {
Expand All @@ -601,9 +610,9 @@ filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE,
if ("Reverse" %in% colnames(rowData(sce))) {
keep <- intersect(keep, which(rowData(sce)$Reverse == ""))
}
# if ("Potential.contaminant" %in% colnames(rowData(sce))) {
# keep <- intersect(keep, which(rowData(sce)$Potential.contaminant == ""))
# }
if ("Contaminant" %in% colnames(rowData(sce))) {
keep <- intersect(keep, which(rowData(sce)$Contaminant == ""))
}
if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(rowData(sce)) &&
!is.null(minPeptides)) {
keep <- intersect(
Expand Down
1 change: 1 addition & 0 deletions inst/extdata/process_basic_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ if (expType == "MaxQuant") {
sce <- filterSpectronaut(sce = sce, minScore = minScore,
minPeptides = minPeptides, plotUpset = TRUE,
revPattern = "_Decoy$",
contamPattern = "^contam_",
exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"),
knitr::current_input(dir = TRUE)))
} else if (expType == "DIANN") {
Expand Down
6 changes: 6 additions & 0 deletions man/filterSpectronaut.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 436524e

Please sign in to comment.