Skip to content

Commit

Permalink
Merge pull request #23 from fmicompbio/add-filtering
Browse files Browse the repository at this point in the history
Add filtering for Spectronaut
  • Loading branch information
csoneson authored Jul 21, 2024
2 parents cafc672 + 4077dac commit d429fd6
Show file tree
Hide file tree
Showing 19 changed files with 198 additions and 85 deletions.
11 changes: 5 additions & 6 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
config:
- { os: macOS-latest, bioc: 'release', curlConfigPath: '/usr/bin/'}
- { os: windows-latest, bioc: 'release'}
- { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_18", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
- { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_19", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
Expand Down Expand Up @@ -68,11 +68,10 @@ jobs:
if: runner.os == 'Linux'
env:
RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
run: |
Rscript -e "remotes::install_github('r-hub/sysreqs')"
sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")
sudo -s eval "$sysreqs"
sudo apt-get update && sudo apt-get -y install libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev
uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
pak-version: devel

- name: Install system dependencies (macOS)
if: runner.os == 'macOS'
Expand Down
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: einprot
Type: Package
Title: A collection of proteomics analysis utilities and workflows
Version: 0.9.4
Version: 0.9.5
Authors@R: c(
person("Charlotte", "Soneson", email = "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3833-2169")),
Expand Down Expand Up @@ -41,7 +41,7 @@ Imports:
MsCoreUtils,
msigdbr,
plotly,
QFeatures,
QFeatures (>= 1.14.0),
readr,
rlang,
rmarkdown,
Expand Down Expand Up @@ -78,7 +78,7 @@ Imports:
grid,
Biostrings,
motifStack
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Suggests:
BiocManager,
testthat (>= 3.0.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export(doNormalization)
export(doPCA)
export(emptySampleText)
export(expDesignText)
export(filterByModText)
export(filterFragPipe)
export(filterMaxQuant)
export(filterPDTMT)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# einprot 0.9.5

* Add filtering by score and number of peptides to Spectronaut workflow
* Filter by modifications after the normalization in PD-TMT peptide groups workflow

# einprot 0.9.4

* Add details about DIA-NN command line to report
Expand Down
60 changes: 39 additions & 21 deletions R/doFilter.R
Original file line number Diff line number Diff line change
Expand Up @@ -529,21 +529,27 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,

#' Filter out features in Spectronaut data
#'
#' Exclude features where the 'PG.ProteinGroups' column ends with the
#' Exclude features with 'PG.Cscore' below \code{minScore},
#' 'PG.NrOfStrippedSequencesIdentified.Experiment.wide' below
#' \code{minPeptides}, or where the 'PG.ProteinGroups' column contains the
#' specified \code{revPattern}.
#'
#' @author Charlotte Soneson
#' @export
#'
#' @param sce A \code{SummarizedExperiment} object (or a derivative).
#' @param minScore Numeric scalar, the minimum allowed value in the 'PG.Cscore'
#' column in order to retain the feature.
#' @param minPeptides Numeric scalar, the minimum allowed value in the
#' 'Combined.Total.Peptides' column in order to retain the feature.
#' 'PG.NrOfStrippedSequencesIdentified.Experiment.wide' column in order to
#' retain the feature.
#' @param plotUpset Logical scalar, whether to generate an UpSet plot
#' detailing the reasons for features being filtered out. Only
#' generated if any feature is in fact filtered out.
#' @param revPattern Character scalar providing the pattern (a regular
#' expression) used to identify decoys (reverse hits). The pattern is
#' matched against the IDs in the FragPipe \code{Protein} column.
#' matched against the IDs in the Spectronaut \code{PG.ProteinGroups}
#' column.
#' @param exclFile Character scalar, the path to a text file where the
#' features that are filtered out are written. If \code{NULL} (default),
#' excluded features are not recorded.
Expand All @@ -555,9 +561,10 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,
#' @importFrom ComplexUpset upset
#' @importFrom rlang .data
#'
filterSpectronaut <- function(sce, minPeptides, plotUpset = TRUE,
filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE,
revPattern = "_Decoy$", exclFile = NULL) {
.assertVector(x = sce, type = "SummarizedExperiment")
.assertScalar(x = minScore, type = "numeric", allowNULL = TRUE)
.assertScalar(x = minPeptides, type = "numeric", allowNULL = TRUE)
.assertScalar(x = plotUpset, type = "logical")
.assertScalar(x = revPattern, type = "character")
Expand All @@ -568,19 +575,27 @@ filterSpectronaut <- function(sce, minPeptides, plotUpset = TRUE,
"+", "")

filtdf <- as.data.frame(SummarizedExperiment::rowData(sce)) %>%
dplyr::select(dplyr::any_of(c("Reverse"))) %>%
dplyr::select(dplyr::any_of(c("Reverse", "PG.NrOfStrippedSequencesIdentified.Experiment.wide",
"PG.Cscore"))) %>%
dplyr::mutate(across(dplyr::any_of(c("Reverse")),
function(x) as.numeric(x == "+")))
# if ("Combined.Total.Peptides" %in% colnames(filtdf) &&
# !is.null(minPeptides)) {
# filtdf <- filtdf %>%
# dplyr::mutate(
# Combined.Total.Peptides = as.numeric(
# (.data$Combined.Total.Peptides < minPeptides) |
# is.na(.data$Combined.Total.Peptides)))
# } else {
# filtdf$Combined.Total.Peptides <- NULL
# }
if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(filtdf) &&
!is.null(minPeptides)) {
filtdf <- filtdf %>%
dplyr::mutate(
PG.NrOfStrippedSequencesIdentified.Experiment.wide = as.numeric(
(.data$PG.NrOfStrippedSequencesIdentified.Experiment.wide < minPeptides) |
is.na(.data$PG.NrOfStrippedSequencesIdentified.Experiment.wide)))
} else {
filtdf$PG.NrOfStrippedSequencesIdentified.Experiment.wide <- NULL
}
if ("PG.Cscore" %in% colnames(filtdf) && !is.null(minScore)) {
filtdf <- filtdf %>%
dplyr::mutate(PG.Cscore = as.numeric((.data$PG.Cscore < minScore) |
is.na(.data$PG.Cscore)))
} else {
filtdf$PG.Cscore <- NULL
}

keep <- seq_len(nrow(sce))
if ("Reverse" %in% colnames(rowData(sce))) {
Expand All @@ -589,12 +604,15 @@ filterSpectronaut <- function(sce, minPeptides, plotUpset = TRUE,
# if ("Potential.contaminant" %in% colnames(rowData(sce))) {
# keep <- intersect(keep, which(rowData(sce)$Potential.contaminant == ""))
# }
# if ("Combined.Total.Peptides" %in% colnames(rowData(sce)) &&
# !is.null(minPeptides)) {
# keep <- intersect(
# keep, which(rowData(sce)$Combined.Total.Peptides >= minPeptides)
# )
# }
if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(rowData(sce)) &&
!is.null(minPeptides)) {
keep <- intersect(
keep, which(rowData(sce)$PG.NrOfStrippedSequencesIdentified.Experiment.wide >= minPeptides)
)
}
if ("PG.Cscore" %in% colnames(rowData(sce)) && !is.null(minScore)) {
keep <- intersect(keep, which(rowData(sce)$PG.Cscore >= minScore))
}
exclude <- rowData(sce[setdiff(seq_len(nrow(sce)), keep), ])
sce <- sce[keep, ]

Expand Down
2 changes: 1 addition & 1 deletion R/importDIANN.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ importDIANN <- function(inFile, fileType = "pg_matrix", outLevel = "pg",
stopIfEmpty = stopIfEmpty)

sce <- QFeatures::readSummarizedExperiment(
inFile, ecol = iCols, sep = "\t", check.names = FALSE, ...
inFile, quantCols = iCols, sep = "\t", check.names = FALSE, ...
)

SummarizedExperiment::assayNames(sce) <- aName
Expand Down
2 changes: 1 addition & 1 deletion R/importExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ importExperiment <- function(inFile, iColPattern, includeOnlySamples = "",

if (length(icols) > 0) {
se <- QFeatures::readSummarizedExperiment(
inFile, ecol = icols, sep = "\t", ...
inFile, quantCols = icols, sep = "\t", ...
)
## Add list of columns to metadata
S4Vectors::metadata(se)$cols <- icols
Expand Down
6 changes: 4 additions & 2 deletions R/runMaxQuantAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@
#' retained in the analysis. Set to \code{NULL} if no filtering on the
#' number of peptides is desired.
#' @param imputeMethod Character string defining the imputation method to use.
#' Currently, \code{"impSeqRob"} and \code{"MinProb"} are supported.
#' Currently, \code{"impSeqRob"}, \code{"MinProb"}, and
#' \code{"MinProbGlobal"} are supported. See \code{\link{doImputation}} for
#' more details about the methods.
#' @param assaysForExport Character vector defining the name(s) of the assays
#' to use for exported abundances and barplots. This could, for example,
#' be set to an assay containing 'absolute' abundances, if available, even
Expand Down Expand Up @@ -161,7 +163,7 @@
#' @param seed Numeric, random seed to use for any non-deterministic
#' calculations.
#' @param includeFeatureCollections Character vector, a subset of
#' \code{c("complexes", "GO")}.
#' \code{c("complexes", "GO", "pathways")}.
#' @param minSizeToKeepSet Numeric scalar indicating the smallest number of
#' features that have to overlap with the current data set in order to
#' retain a feature set for testing.
Expand Down
18 changes: 18 additions & 0 deletions R/textSnippets.R
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,24 @@ introText <- function(expType) {
}
}

#' @rdname textSnippets
#' @export
filterByModText <- function(excludeUnmodifiedPeptides, keepModifications) {
if (excludeUnmodifiedPeptides && !is.null(keepModifications)) {
paste0("Next, we filter out unmodified peptides and peptides ",
"without any of the requested modifications ",
"(", paste(keepModifications, collapse = ", "), ").")
} else if (excludeUnmodifiedPeptides) {
paste0("Next, we filter out unmodified peptides.")
} else if (!is.null(keepModifications)) {
paste0("Next, we filter out peptides ",
"without any of the requested modifications ",
"(", paste(keepModifications, collapse = ", "), ").")
} else {
""
}
}

#' @rdname textSnippets
#' @export
inputText <- function(expTypeLevel) {
Expand Down
25 changes: 19 additions & 6 deletions inst/extdata/einprot_bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ @ARTICLE{AhlmannEltze2020proda
title = "{proDA}: Probabilistic Dropout Analysis for Identifying
Differentially Abundant Proteins in {Label-Free} Mass
Spectrometry",
author = "Ahlmann-Eltze, Constantin and Anders, Simon",
author = "Ahlmann-Eltze, C and Anders, S",
journal = "bioRxiv doi:https://doi.org/10.1101/661496",
year = 2020
}
Expand Down Expand Up @@ -140,6 +140,19 @@ @ARTICLE{Cox2008maxquant
url = "https://www.nature.com/articles/nbt.1511"
}

@ARTICLE{Soneson2023einprot,
title = "einprot: Flexible, easy-to-use, reproducible workflows for
statistical analysis of quantitative proteomics data",
author = "Soneson, C and Iesmantavicius, V and Hess, D
and Stadler, MB and Seebacher, J",
journal = "J. Open Source Softw.",
volume = 8,
number = 89,
pages = 5750,
url = "https://doi.org/10.21105/joss.05750",
year = 2023
}

@ARTICLE{Orsburn2021pd,
title = "Proteome {Discoverer-A} Community Enhanced Data Processing Suite
for Protein Informatics",
Expand All @@ -153,8 +166,8 @@ @ARTICLE{Orsburn2021pd

@ARTICLE{Rue-Albrecht2018isee,
title = "{iSEE}: Interactive {SummarizedExperiment} Explorer",
author = "Rue-Albrecht, Kevin and Marini, Federico and Soneson, Charlotte
and Lun, Aaron T L",
author = "Rue-Albrecht, K and Marini, F and Soneson, C
and Lun, ATL",
journal = "F1000Res.",
volume = 7,
pages = 741,
Expand All @@ -165,7 +178,7 @@ @ARTICLE{Rue-Albrecht2018isee
@ARTICLE{BenjaminiHochberg1995fdr,
title = "Controlling the false discovery rate: a practical and powerful
approach to multiple testing",
author = "Benjamini, Yoav and Hochberg, Yosef",
author = "Benjamini, Y and Hochberg, Y",
journal = "J. R. Stat. Soc. Series B Stat. Methodol.",
volume = 57,
number = 1,
Expand All @@ -176,8 +189,8 @@ @ARTICLE{BenjaminiHochberg1995fdr
@ARTICLE{Demichev2020diann,
title = "{DIA-NN}: neural networks and interference correction enable deep
proteome coverage in high throughput",
author = "Demichev, Vadim and Messner, Christoph B and Vernardis, Spyros I
and Lilley, Kathryn S and Ralser, Markus",
author = "Demichev, V and Messner, CB and Vernardis, SI
and Lilley, KS and Ralser, M",
journal = "Nat. Methods",
volume = 17,
number = 1,
Expand Down
3 changes: 3 additions & 0 deletions inst/extdata/process_PD_TMT_PTM_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -821,10 +821,13 @@ That will open up an iSEE session where you can interactively explore your data.
imptd <- sub("imputed_", "",
grep("imputed_", SummarizedExperiment::assayNames(scePeptides),
value = TRUE))
hmFeature <- rownames(scePeptides)[
min(which(rowSums(!is.na(assay(scePeptides, assayForTests))) > 0))]
makeiSEEScript(iSEEScript = iSEEScript, sceFile = sceFile,
aName = imptd, tests = tests,
assayForPlots = assayForTests,
assayForHeatmaps = assayForTests,
featureForHeatmaps = hmFeature,
includeFeatureSetTable = FALSE)
```

Expand Down
Loading

0 comments on commit d429fd6

Please sign in to comment.