diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a7e2940..13b91da 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -21,7 +21,7 @@ jobs: config: - { os: macOS-latest, bioc: 'release', curlConfigPath: '/usr/bin/'} - { os: windows-latest, bioc: 'release'} - - { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_18", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"} + - { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_19", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true @@ -68,11 +68,10 @@ jobs: if: runner.os == 'Linux' env: RHUB_PLATFORM: linux-x86_64-ubuntu-gcc - run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - sudo -s eval "$sysreqs" - sudo apt-get update && sudo apt-get -y install libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev + uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + pak-version: devel - name: Install system dependencies (macOS) if: runner.os == 'macOS' diff --git a/DESCRIPTION b/DESCRIPTION index 6b6abd1..1237f01 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: einprot Type: Package Title: A collection of proteomics analysis utilities and workflows -Version: 0.9.4 +Version: 0.9.5 Authors@R: c( person("Charlotte", "Soneson", email = "charlotte.soneson@fmi.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3833-2169")), @@ -41,7 +41,7 @@ Imports: MsCoreUtils, msigdbr, plotly, - QFeatures, + QFeatures (>= 1.14.0), readr, rlang, rmarkdown, @@ -78,7 +78,7 @@ Imports: grid, Biostrings, motifStack -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Suggests: BiocManager, testthat (>= 3.0.0), diff --git a/NAMESPACE b/NAMESPACE index 8bedf4e..8cabe74 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(doNormalization) export(doPCA) export(emptySampleText) export(expDesignText) +export(filterByModText) export(filterFragPipe) export(filterMaxQuant) export(filterPDTMT) diff --git a/NEWS.md b/NEWS.md index 0a974f9..0dc9062 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# einprot 0.9.5 + +* Add filtering by score and number of peptides to Spectronaut workflow +* Filter by modifications after the normalization in PD-TMT peptide groups workflow + # einprot 0.9.4 * Add details about DIA-NN command line to report diff --git a/R/doFilter.R b/R/doFilter.R index 3e9bf89..75351a1 100644 --- a/R/doFilter.R +++ b/R/doFilter.R @@ -529,21 +529,27 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE, #' Filter out features in Spectronaut data #' -#' Exclude features where the 'PG.ProteinGroups' column ends with the +#' Exclude features with 'PG.Cscore' below \code{minScore}, +#' 'PG.NrOfStrippedSequencesIdentified.Experiment.wide' below +#' \code{minPeptides}, or where the 'PG.ProteinGroups' column contains the #' specified \code{revPattern}. #' #' @author Charlotte Soneson #' @export #' #' @param sce A \code{SummarizedExperiment} object (or a derivative). +#' @param minScore Numeric scalar, the minimum allowed value in the 'PG.Cscore' +#' column in order to retain the feature. #' @param minPeptides Numeric scalar, the minimum allowed value in the -#' 'Combined.Total.Peptides' column in order to retain the feature. +#' 'PG.NrOfStrippedSequencesIdentified.Experiment.wide' column in order to +#' retain the feature. #' @param plotUpset Logical scalar, whether to generate an UpSet plot #' detailing the reasons for features being filtered out. Only #' generated if any feature is in fact filtered out. #' @param revPattern Character scalar providing the pattern (a regular #' expression) used to identify decoys (reverse hits). The pattern is -#' matched against the IDs in the FragPipe \code{Protein} column. +#' matched against the IDs in the Spectronaut \code{PG.ProteinGroups} +#' column. #' @param exclFile Character scalar, the path to a text file where the #' features that are filtered out are written. If \code{NULL} (default), #' excluded features are not recorded. @@ -555,9 +561,10 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE, #' @importFrom ComplexUpset upset #' @importFrom rlang .data #' -filterSpectronaut <- function(sce, minPeptides, plotUpset = TRUE, +filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE, revPattern = "_Decoy$", exclFile = NULL) { .assertVector(x = sce, type = "SummarizedExperiment") + .assertScalar(x = minScore, type = "numeric", allowNULL = TRUE) .assertScalar(x = minPeptides, type = "numeric", allowNULL = TRUE) .assertScalar(x = plotUpset, type = "logical") .assertScalar(x = revPattern, type = "character") @@ -568,19 +575,27 @@ filterSpectronaut <- function(sce, minPeptides, plotUpset = TRUE, "+", "") filtdf <- as.data.frame(SummarizedExperiment::rowData(sce)) %>% - dplyr::select(dplyr::any_of(c("Reverse"))) %>% + dplyr::select(dplyr::any_of(c("Reverse", "PG.NrOfStrippedSequencesIdentified.Experiment.wide", + "PG.Cscore"))) %>% dplyr::mutate(across(dplyr::any_of(c("Reverse")), function(x) as.numeric(x == "+"))) - # if ("Combined.Total.Peptides" %in% colnames(filtdf) && - # !is.null(minPeptides)) { - # filtdf <- filtdf %>% - # dplyr::mutate( - # Combined.Total.Peptides = as.numeric( - # (.data$Combined.Total.Peptides < minPeptides) | - # is.na(.data$Combined.Total.Peptides))) - # } else { - # filtdf$Combined.Total.Peptides <- NULL - # } + if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(filtdf) && + !is.null(minPeptides)) { + filtdf <- filtdf %>% + dplyr::mutate( + PG.NrOfStrippedSequencesIdentified.Experiment.wide = as.numeric( + (.data$PG.NrOfStrippedSequencesIdentified.Experiment.wide < minPeptides) | + is.na(.data$PG.NrOfStrippedSequencesIdentified.Experiment.wide))) + } else { + filtdf$PG.NrOfStrippedSequencesIdentified.Experiment.wide <- NULL + } + if ("PG.Cscore" %in% colnames(filtdf) && !is.null(minScore)) { + filtdf <- filtdf %>% + dplyr::mutate(PG.Cscore = as.numeric((.data$PG.Cscore < minScore) | + is.na(.data$PG.Cscore))) + } else { + filtdf$PG.Cscore <- NULL + } keep <- seq_len(nrow(sce)) if ("Reverse" %in% colnames(rowData(sce))) { @@ -589,12 +604,15 @@ filterSpectronaut <- function(sce, minPeptides, plotUpset = TRUE, # if ("Potential.contaminant" %in% colnames(rowData(sce))) { # keep <- intersect(keep, which(rowData(sce)$Potential.contaminant == "")) # } - # if ("Combined.Total.Peptides" %in% colnames(rowData(sce)) && - # !is.null(minPeptides)) { - # keep <- intersect( - # keep, which(rowData(sce)$Combined.Total.Peptides >= minPeptides) - # ) - # } + if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(rowData(sce)) && + !is.null(minPeptides)) { + keep <- intersect( + keep, which(rowData(sce)$PG.NrOfStrippedSequencesIdentified.Experiment.wide >= minPeptides) + ) + } + if ("PG.Cscore" %in% colnames(rowData(sce)) && !is.null(minScore)) { + keep <- intersect(keep, which(rowData(sce)$PG.Cscore >= minScore)) + } exclude <- rowData(sce[setdiff(seq_len(nrow(sce)), keep), ]) sce <- sce[keep, ] diff --git a/R/importDIANN.R b/R/importDIANN.R index b91cb5c..c515ffa 100644 --- a/R/importDIANN.R +++ b/R/importDIANN.R @@ -72,7 +72,7 @@ importDIANN <- function(inFile, fileType = "pg_matrix", outLevel = "pg", stopIfEmpty = stopIfEmpty) sce <- QFeatures::readSummarizedExperiment( - inFile, ecol = iCols, sep = "\t", check.names = FALSE, ... + inFile, quantCols = iCols, sep = "\t", check.names = FALSE, ... ) SummarizedExperiment::assayNames(sce) <- aName diff --git a/R/importExperiment.R b/R/importExperiment.R index b47f5ca..22a1403 100644 --- a/R/importExperiment.R +++ b/R/importExperiment.R @@ -294,7 +294,7 @@ importExperiment <- function(inFile, iColPattern, includeOnlySamples = "", if (length(icols) > 0) { se <- QFeatures::readSummarizedExperiment( - inFile, ecol = icols, sep = "\t", ... + inFile, quantCols = icols, sep = "\t", ... ) ## Add list of columns to metadata S4Vectors::metadata(se)$cols <- icols diff --git a/R/runMaxQuantAnalysis.R b/R/runMaxQuantAnalysis.R index 6df77d3..5efa711 100644 --- a/R/runMaxQuantAnalysis.R +++ b/R/runMaxQuantAnalysis.R @@ -71,7 +71,9 @@ #' retained in the analysis. Set to \code{NULL} if no filtering on the #' number of peptides is desired. #' @param imputeMethod Character string defining the imputation method to use. -#' Currently, \code{"impSeqRob"} and \code{"MinProb"} are supported. +#' Currently, \code{"impSeqRob"}, \code{"MinProb"}, and +#' \code{"MinProbGlobal"} are supported. See \code{\link{doImputation}} for +#' more details about the methods. #' @param assaysForExport Character vector defining the name(s) of the assays #' to use for exported abundances and barplots. This could, for example, #' be set to an assay containing 'absolute' abundances, if available, even @@ -161,7 +163,7 @@ #' @param seed Numeric, random seed to use for any non-deterministic #' calculations. #' @param includeFeatureCollections Character vector, a subset of -#' \code{c("complexes", "GO")}. +#' \code{c("complexes", "GO", "pathways")}. #' @param minSizeToKeepSet Numeric scalar indicating the smallest number of #' features that have to overlap with the current data set in order to #' retain a feature set for testing. diff --git a/R/textSnippets.R b/R/textSnippets.R index 8c43dff..30493e5 100644 --- a/R/textSnippets.R +++ b/R/textSnippets.R @@ -231,6 +231,24 @@ introText <- function(expType) { } } +#' @rdname textSnippets +#' @export +filterByModText <- function(excludeUnmodifiedPeptides, keepModifications) { + if (excludeUnmodifiedPeptides && !is.null(keepModifications)) { + paste0("Next, we filter out unmodified peptides and peptides ", + "without any of the requested modifications ", + "(", paste(keepModifications, collapse = ", "), ").") + } else if (excludeUnmodifiedPeptides) { + paste0("Next, we filter out unmodified peptides.") + } else if (!is.null(keepModifications)) { + paste0("Next, we filter out peptides ", + "without any of the requested modifications ", + "(", paste(keepModifications, collapse = ", "), ").") + } else { + "" + } +} + #' @rdname textSnippets #' @export inputText <- function(expTypeLevel) { diff --git a/inst/extdata/einprot_bibliography.bib b/inst/extdata/einprot_bibliography.bib index d55c8cc..31f63ae 100644 --- a/inst/extdata/einprot_bibliography.bib +++ b/inst/extdata/einprot_bibliography.bib @@ -41,7 +41,7 @@ @ARTICLE{AhlmannEltze2020proda title = "{proDA}: Probabilistic Dropout Analysis for Identifying Differentially Abundant Proteins in {Label-Free} Mass Spectrometry", - author = "Ahlmann-Eltze, Constantin and Anders, Simon", + author = "Ahlmann-Eltze, C and Anders, S", journal = "bioRxiv doi:https://doi.org/10.1101/661496", year = 2020 } @@ -140,6 +140,19 @@ @ARTICLE{Cox2008maxquant url = "https://www.nature.com/articles/nbt.1511" } +@ARTICLE{Soneson2023einprot, + title = "einprot: Flexible, easy-to-use, reproducible workflows for + statistical analysis of quantitative proteomics data", + author = "Soneson, C and Iesmantavicius, V and Hess, D + and Stadler, MB and Seebacher, J", + journal = "J. Open Source Softw.", + volume = 8, + number = 89, + pages = 5750, + url = "https://doi.org/10.21105/joss.05750", + year = 2023 +} + @ARTICLE{Orsburn2021pd, title = "Proteome {Discoverer-A} Community Enhanced Data Processing Suite for Protein Informatics", @@ -153,8 +166,8 @@ @ARTICLE{Orsburn2021pd @ARTICLE{Rue-Albrecht2018isee, title = "{iSEE}: Interactive {SummarizedExperiment} Explorer", - author = "Rue-Albrecht, Kevin and Marini, Federico and Soneson, Charlotte - and Lun, Aaron T L", + author = "Rue-Albrecht, K and Marini, F and Soneson, C + and Lun, ATL", journal = "F1000Res.", volume = 7, pages = 741, @@ -165,7 +178,7 @@ @ARTICLE{Rue-Albrecht2018isee @ARTICLE{BenjaminiHochberg1995fdr, title = "Controlling the false discovery rate: a practical and powerful approach to multiple testing", - author = "Benjamini, Yoav and Hochberg, Yosef", + author = "Benjamini, Y and Hochberg, Y", journal = "J. R. Stat. Soc. Series B Stat. Methodol.", volume = 57, number = 1, @@ -176,8 +189,8 @@ @ARTICLE{BenjaminiHochberg1995fdr @ARTICLE{Demichev2020diann, title = "{DIA-NN}: neural networks and interference correction enable deep proteome coverage in high throughput", - author = "Demichev, Vadim and Messner, Christoph B and Vernardis, Spyros I - and Lilley, Kathryn S and Ralser, Markus", + author = "Demichev, V and Messner, CB and Vernardis, SI + and Lilley, KS and Ralser, M", journal = "Nat. Methods", volume = 17, number = 1, diff --git a/inst/extdata/process_PD_TMT_PTM_template.Rmd b/inst/extdata/process_PD_TMT_PTM_template.Rmd index efc693f..ba3b825 100644 --- a/inst/extdata/process_PD_TMT_PTM_template.Rmd +++ b/inst/extdata/process_PD_TMT_PTM_template.Rmd @@ -821,10 +821,13 @@ That will open up an iSEE session where you can interactively explore your data. imptd <- sub("imputed_", "", grep("imputed_", SummarizedExperiment::assayNames(scePeptides), value = TRUE)) +hmFeature <- rownames(scePeptides)[ + min(which(rowSums(!is.na(assay(scePeptides, assayForTests))) > 0))] makeiSEEScript(iSEEScript = iSEEScript, sceFile = sceFile, aName = imptd, tests = tests, assayForPlots = assayForTests, assayForHeatmaps = assayForTests, + featureForHeatmaps = hmFeature, includeFeatureSetTable = FALSE) ``` diff --git a/inst/extdata/process_basic_template.Rmd b/inst/extdata/process_basic_template.Rmd index b09f19a..023afef 100644 --- a/inst/extdata/process_basic_template.Rmd +++ b/inst/extdata/process_basic_template.Rmd @@ -69,6 +69,10 @@ differentially abundant features. A [summary table](#linktable) provides direct links to external resources, and an additional global overview of the data is provided via [principal component analysis](#run-pca). +If you are using the results from this report in published work, please cite +@Soneson2023einprot, as well as the underlying packages that are used for the +analysis (indicated in the text). + ```{r get-basic-info} ## Get species info and define STRINGdb object speciesInfo <- getSpeciesInfo(species) @@ -156,7 +160,8 @@ if (expType == "MaxQuant") { stop("Unknown 'expType'") } -makeTableFromList(tb) +makeTableFromList(c(list("einprot version" = packageVersion("einprot")), + tb)) ``` ```{r diann-cmd, echo=FALSE, eval = (expType == "DIANN"), results="asis"} @@ -398,10 +403,10 @@ makeIntensityBoxplots(sce = sce, assayName = aNames$assayInput, doLog = TRUE, maxNGroups = 25) ``` -# Filter out contaminants, reverse hits, and features with low confidence +# Filter out contaminants, decoy hits, and features with low confidence Next, we filter out any features classified as potential -contaminants or reverse (decoy) hits, and features identified only by site (which +contaminants or decoy hits, and features identified only by site (which removes proteins that are only identified by peptides carrying one or more modified amino acids). In addition, we may remove protein identifications based on score and number of peptides (i.e. to exclude one-hit wonders). @@ -415,25 +420,30 @@ nbrFeaturesBefore <- nrow(sce) if (expType == "MaxQuant") { sce <- filterMaxQuant(sce = sce, minScore = minScore, minPeptides = minPeptides, plotUpset = TRUE, - exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), knitr::current_input(dir = TRUE))) + exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), + knitr::current_input(dir = TRUE))) } else if (expType == "FragPipe") { sce <- filterFragPipe(sce = sce, minPeptides = minPeptides, plotUpset = TRUE, revPattern = paste0("^", tb[["Database decoy tag"]]), - exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), knitr::current_input(dir = TRUE))) + exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), + knitr::current_input(dir = TRUE))) } else if (expType == "ProteomeDiscoverer") { + ## At this point, don't filter by modification type - this will be done later sce <- filterPDTMT(sce = sce, inputLevel = inputLevel, minScore = minScore, minPeptides = minPeptides, minDeltaScore = minDeltaScore, minPSMs = minPSMs, masterProteinsOnly = masterProteinsOnly, modificationsCol = modificationsCol, - excludeUnmodifiedPeptides = excludeUnmodifiedPeptides, - keepModifications = keepModifications, plotUpset = TRUE, - exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), knitr::current_input(dir = TRUE))) + excludeUnmodifiedPeptides = FALSE, + keepModifications = NULL, plotUpset = TRUE, + exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), + knitr::current_input(dir = TRUE))) } else if (expType == "Spectronaut") { - sce <- filterSpectronaut(sce = sce, minPeptides = 0, - plotUpset = TRUE, + sce <- filterSpectronaut(sce = sce, minScore = minScore, + minPeptides = minPeptides, plotUpset = TRUE, revPattern = "_Decoy$", - exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), knitr::current_input(dir = TRUE))) + exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), + knitr::current_input(dir = TRUE))) } else if (expType == "DIANN") { sce <- sce } else { @@ -528,19 +538,59 @@ assay(sce, aNames$assayLog2WithNA) <- log2(assay(sce, aNames$assayInput)) ## Add assay indicating missing values, which will be imputed assay(sce, aNames$assayImputIndic) <- !is.finite(assay(sce, aNames$assayLog2WithNA)) + +## Replace zeros/-Inf values by explicit NA values in the assays +assay(sce, aNames$assayInput)[assay(sce, aNames$assayInput) == 0] <- NA +assay(sce, aNames$assayLog2WithNA)[!is.finite(assay(sce, aNames$assayLog2WithNA))] <- NA + sce ``` +# Normalize + +```{r text-norm, results="asis", echo=FALSE} +cat(normText(normMethod = normMethod)) +``` + +```{r normalize, echo=(normMethod != "none"), eval=(normMethod != "none"), message=FALSE, fig.width = min(14, max(7, 0.5 * ncol(sce))), fig.height = 5/7 * min(14, max(7, 0.5 * ncol(sce)))} +sce <- doNormalization(sce, method = normMethod, + assayName = aNames$assayLog2WithNA, + normalizedAssayName = aNames$assayLog2NormWithNA, + spikeFeatures = spikeFeatures) + +makeIntensityBoxplots(sce = sce, assayName = aNames$assayLog2NormWithNA, + doLog = FALSE, + ylab = aNames$assayLog2NormWithNA, + maxNGroups = 25) +``` + +```{r text-filter-by-mod, results="asis", echo=FALSE, eval=(expType == "ProteomeDiscoverer" && inputLevel == "PeptideGroups")} +cat(filterByModText(excludeUnmodifiedPeptides = excludeUnmodifiedPeptides, + keepModifications = keepModifications)) +``` + +```{r filter-by-mod} +if (expType == "ProteomeDiscoverer" && inputLevel == "PeptideGroups") { + ## Filter by modification type + sce <- filterPDTMT(sce = sce, inputLevel = inputLevel, minScore = minScore, + minPeptides = minPeptides, minDeltaScore = minDeltaScore, + minPSMs = minPSMs, masterProteinsOnly = masterProteinsOnly, + modificationsCol = modificationsCol, + excludeUnmodifiedPeptides = excludeUnmodifiedPeptides, + keepModifications = keepModifications, plotUpset = TRUE, + exclFile = sub("\\.Rmd$", paste0("_filtered_out_features_modifications.txt"), + knitr::current_input(dir = TRUE))) +} +``` + +At this stage, the `sce` object contains `r nrow(sce)` features. + # Visualize missing value patterns The plot below shows the fraction of the total set of features that are detected (with a non-missing value) in each of the samples. ```{r missing-values, fig.width = min(14, max(7, 0.5 * ncol(sce))), fig.height = 5/7 * min(14, max(7, 0.5 * ncol(sce)))} -## Replace zeros/-Inf values by explicit NA values in the assays -assay(sce, aNames$assayInput)[assay(sce, aNames$assayInput) == 0] <- NA -assay(sce, aNames$assayLog2WithNA)[!is.finite(assay(sce, aNames$assayLog2WithNA))] <- NA - ## Count number of NA values and add to SCE colData(sce)$nNA <- colSums(is.na(assay(sce, aNames$assayInput))) colData(sce)$pNA <- 100 * sce$nNA/nrow(sce) @@ -603,23 +653,6 @@ names(Lobs) <- c(paste0("Number of features observed in at least ", makeTableFromList(Lobs) ``` - -```{r text-norm, results="asis", echo=FALSE} -cat(normText(normMethod = normMethod)) -``` - -```{r normalize, echo=(normMethod != "none"), eval=(normMethod != "none"), message=FALSE, fig.width = min(14, max(7, 0.5 * ncol(sce))), fig.height = 5/7 * min(14, max(7, 0.5 * ncol(sce)))} -sce <- doNormalization(sce, method = normMethod, - assayName = aNames$assayLog2WithNA, - normalizedAssayName = aNames$assayLog2NormWithNA, - spikeFeatures = spikeFeatures) - -makeIntensityBoxplots(sce = sce, assayName = aNames$assayLog2NormWithNA, - doLog = FALSE, - ylab = aNames$assayLog2NormWithNA, - maxNGroups = 25) -``` - # Imputation Next, we apply the `r imputeMethod` method to perform imputation diff --git a/man/filterSpectronaut.Rd b/man/filterSpectronaut.Rd index 9d2720e..589178e 100644 --- a/man/filterSpectronaut.Rd +++ b/man/filterSpectronaut.Rd @@ -6,6 +6,7 @@ \usage{ filterSpectronaut( sce, + minScore, minPeptides, plotUpset = TRUE, revPattern = "_Decoy$", @@ -15,8 +16,12 @@ filterSpectronaut( \arguments{ \item{sce}{A \code{SummarizedExperiment} object (or a derivative).} +\item{minScore}{Numeric scalar, the minimum allowed value in the 'PG.Cscore' +column in order to retain the feature.} + \item{minPeptides}{Numeric scalar, the minimum allowed value in the -'Combined.Total.Peptides' column in order to retain the feature.} +'PG.NrOfStrippedSequencesIdentified.Experiment.wide' column in order to +retain the feature.} \item{plotUpset}{Logical scalar, whether to generate an UpSet plot detailing the reasons for features being filtered out. Only @@ -24,7 +29,8 @@ generated if any feature is in fact filtered out.} \item{revPattern}{Character scalar providing the pattern (a regular expression) used to identify decoys (reverse hits). The pattern is -matched against the IDs in the FragPipe \code{Protein} column.} +matched against the IDs in the Spectronaut \code{PG.ProteinGroups} +column.} \item{exclFile}{Character scalar, the path to a text file where the features that are filtered out are written. If \code{NULL} (default), @@ -34,7 +40,9 @@ excluded features are not recorded.} A filtered object of the same type as \code{sce}. } \description{ -Exclude features where the 'PG.ProteinGroups' column ends with the +Exclude features with 'PG.Cscore' below \code{minScore}, +'PG.NrOfStrippedSequencesIdentified.Experiment.wide' below +\code{minPeptides}, or where the 'PG.ProteinGroups' column contains the specified \code{revPattern}. } \author{ diff --git a/man/runDIANNAnalysis.Rd b/man/runDIANNAnalysis.Rd index aabdc09..70f26ed 100644 --- a/man/runDIANNAnalysis.Rd +++ b/man/runDIANNAnalysis.Rd @@ -166,7 +166,9 @@ retained in the analysis. Set to \code{NULL} if no filtering on the number of peptides is desired.} \item{imputeMethod}{Character string defining the imputation method to use. -Currently, \code{"impSeqRob"} and \code{"MinProb"} are supported.} +Currently, \code{"impSeqRob"}, \code{"MinProb"}, and +\code{"MinProbGlobal"} are supported. See \code{\link{doImputation}} for +more details about the methods.} \item{assaysForExport}{Character vector defining the name(s) of the assays to use for exported abundances and barplots. This could, for example, @@ -285,7 +287,7 @@ complexes for which to make separate volcano plots. Defaults to calculations.} \item{includeFeatureCollections}{Character vector, a subset of -\code{c("complexes", "GO")}.} +\code{c("complexes", "GO", "pathways")}.} \item{minSizeToKeepSet}{Numeric scalar indicating the smallest number of features that have to overlap with the current data set in order to diff --git a/man/runFragPipeAnalysis.Rd b/man/runFragPipeAnalysis.Rd index 0d8c29d..34c5536 100644 --- a/man/runFragPipeAnalysis.Rd +++ b/man/runFragPipeAnalysis.Rd @@ -150,7 +150,9 @@ retained in the analysis. Set to \code{NULL} if no filtering on the number of peptides is desired.} \item{imputeMethod}{Character string defining the imputation method to use. -Currently, \code{"impSeqRob"} and \code{"MinProb"} are supported.} +Currently, \code{"impSeqRob"}, \code{"MinProb"}, and +\code{"MinProbGlobal"} are supported. See \code{\link{doImputation}} for +more details about the methods.} \item{assaysForExport}{Character vector defining the name(s) of the assays to use for exported abundances and barplots. This could, for example, @@ -269,7 +271,7 @@ complexes for which to make separate volcano plots. Defaults to calculations.} \item{includeFeatureCollections}{Character vector, a subset of -\code{c("complexes", "GO")}.} +\code{c("complexes", "GO", "pathways")}.} \item{minSizeToKeepSet}{Numeric scalar indicating the smallest number of features that have to overlap with the current data set in order to diff --git a/man/runMaxQuantAnalysis.Rd b/man/runMaxQuantAnalysis.Rd index 3aa51a6..b737027 100644 --- a/man/runMaxQuantAnalysis.Rd +++ b/man/runMaxQuantAnalysis.Rd @@ -157,7 +157,9 @@ retained in the analysis. Set to \code{NULL} if no filtering on the number of peptides is desired.} \item{imputeMethod}{Character string defining the imputation method to use. -Currently, \code{"impSeqRob"} and \code{"MinProb"} are supported.} +Currently, \code{"impSeqRob"}, \code{"MinProb"}, and +\code{"MinProbGlobal"} are supported. See \code{\link{doImputation}} for +more details about the methods.} \item{assaysForExport}{Character vector defining the name(s) of the assays to use for exported abundances and barplots. This could, for example, @@ -276,7 +278,7 @@ complexes for which to make separate volcano plots. Defaults to calculations.} \item{includeFeatureCollections}{Character vector, a subset of -\code{c("complexes", "GO")}.} +\code{c("complexes", "GO", "pathways")}.} \item{minSizeToKeepSet}{Numeric scalar indicating the smallest number of features that have to overlap with the current data set in order to diff --git a/man/runPDTMTAnalysis.Rd b/man/runPDTMTAnalysis.Rd index 8ba5ced..3be902b 100644 --- a/man/runPDTMTAnalysis.Rd +++ b/man/runPDTMTAnalysis.Rd @@ -196,7 +196,9 @@ proteins (where the \code{Master} column value is \code{inputLevel} is \code{"Proteins"}.} \item{imputeMethod}{Character string defining the imputation method to use. -Currently, \code{"impSeqRob"} and \code{"MinProb"} are supported.} +Currently, \code{"impSeqRob"}, \code{"MinProb"}, and +\code{"MinProbGlobal"} are supported. See \code{\link{doImputation}} for +more details about the methods.} \item{assaysForExport}{Character vector defining the name(s) of the assays to use for exported abundances and barplots. This could, for example, @@ -315,7 +317,7 @@ complexes for which to make separate volcano plots. Defaults to calculations.} \item{includeFeatureCollections}{Character vector, a subset of -\code{c("complexes", "GO")}.} +\code{c("complexes", "GO", "pathways")}.} \item{minSizeToKeepSet}{Numeric scalar indicating the smallest number of features that have to overlap with the current data set in order to diff --git a/man/runSpectronautAnalysis.Rd b/man/runSpectronautAnalysis.Rd index 22bdfca..ce361b7 100644 --- a/man/runSpectronautAnalysis.Rd +++ b/man/runSpectronautAnalysis.Rd @@ -167,7 +167,9 @@ retained in the analysis. Set to \code{NULL} if no filtering on the number of peptides is desired.} \item{imputeMethod}{Character string defining the imputation method to use. -Currently, \code{"impSeqRob"} and \code{"MinProb"} are supported.} +Currently, \code{"impSeqRob"}, \code{"MinProb"}, and +\code{"MinProbGlobal"} are supported. See \code{\link{doImputation}} for +more details about the methods.} \item{assaysForExport}{Character vector defining the name(s) of the assays to use for exported abundances and barplots. This could, for example, @@ -286,7 +288,7 @@ complexes for which to make separate volcano plots. Defaults to calculations.} \item{includeFeatureCollections}{Character vector, a subset of -\code{c("complexes", "GO")}.} +\code{c("complexes", "GO", "pathways")}.} \item{minSizeToKeepSet}{Numeric scalar indicating the smallest number of features that have to overlap with the current data set in order to diff --git a/man/textSnippets.Rd b/man/textSnippets.Rd index 0767432..66d6cc9 100644 --- a/man/textSnippets.Rd +++ b/man/textSnippets.Rd @@ -8,6 +8,7 @@ \alias{saText} \alias{expDesignText} \alias{introText} +\alias{filterByModText} \alias{inputText} \title{Text snippets for use in analysis reports} \usage{ @@ -23,6 +24,8 @@ expDesignText(testType) introText(expType) +filterByModText(excludeUnmodifiedPeptides, keepModifications) + inputText(expTypeLevel) } \arguments{