Skip to content

Commit

Permalink
Merge pull request #5 from fmicompbio/add-fragpipe
Browse files Browse the repository at this point in the history
Add FragPipe
  • Loading branch information
csoneson authored Mar 5, 2023
2 parents 2910869 + 7c05481 commit adc1c8a
Show file tree
Hide file tree
Showing 65 changed files with 5,489 additions and 832 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: einprot
Type: Package
Title: A collection of proteomics analysis utilities and workflows
Version: 0.6.5
Version: 0.6.8
Authors@R: c(
person("Charlotte", "Soneson", email = "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3833-2169")),
Expand Down Expand Up @@ -74,6 +74,7 @@ RoxygenNote: 7.2.3
Suggests:
BiocManager,
testthat (>= 3.0.0),
iSEE
iSEE,
matrixStats
Config/testthat/edition: 3
VignetteBuilder: knitr
10 changes: 7 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ export(getConvTable)
export(getFirstId)
export(getIntensityColumns)
export(getMatSubtractedBaseline)
export(getMaxMissedCleavagesFrompdAnalysis)
export(getNthId)
export(getPSMValidationInfoFrompdAnalysis)
export(getQuantInfoFrompdAnalysis)
export(getQuantOrderFrompdAnalysis)
Expand Down Expand Up @@ -48,8 +50,10 @@ export(plotPDTMTqc)
export(plotVolcano)
export(prepareFeatureCollections)
export(prepareFinalSCE)
export(readFragPipeInfo)
export(readMaxQuantXML)
export(readProteomeDiscovererInfo)
export(runFragPipeAnalysis)
export(runMaxQuantAnalysis)
export(runPDTMTAnalysis)
export(runPDTMTptmAnalysis)
Expand Down Expand Up @@ -96,7 +100,6 @@ importFrom(dplyr,arrange)
importFrom(dplyr,between)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,contains)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
Expand Down Expand Up @@ -134,7 +137,9 @@ importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_line)
importFrom(ggplot2,element_text)
importFrom(ggplot2,expand_limits)
importFrom(ggplot2,facet_grid)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_abline)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_boxplot)
importFrom(ggplot2,geom_col)
Expand All @@ -153,6 +158,7 @@ importFrom(ggplot2,ggtitle)
importFrom(ggplot2,labs)
importFrom(ggplot2,layer_data)
importFrom(ggplot2,position_dodge)
importFrom(ggplot2,position_jitterdodge)
importFrom(ggplot2,rel)
importFrom(ggplot2,scale_fill_continuous)
importFrom(ggplot2,scale_fill_gradient2)
Expand All @@ -165,8 +171,6 @@ importFrom(ggplot2,theme)
importFrom(ggplot2,theme_bw)
importFrom(ggplot2,theme_minimal)
importFrom(ggrepel,geom_text_repel)
importFrom(grDevices,dev.off)
importFrom(grDevices,pdf)
importFrom(htmltools,tagList)
importFrom(iSEEu,registerAveAbFields)
importFrom(iSEEu,registerFeatureSetCollections)
Expand Down
35 changes: 35 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,43 @@
# einprot 0.6.8

* Add idCol and labelCol arguments to PTM workflow
* Change default behaviour of fixFeatureIds when column name is given to not make output unique
* Add stringVersion and stringDir arguments, allowing the use of local STRING files
* Make filter functions more robust to missing columns
* Add possibility to write excluded features to a file
* Represent link table columns as integers/factors when appropriate
* Allow displaying any column in rowData(sce) in the interactive volcano plot tooltip
* In the case of long labels, attempt to auto-adapt text size in PCA coefficient and logFC plots
* Add overview and crosslinks in the beginning of the reports
* Include bar plot of significant features in pdf output
* Allow iColPattern without Sample for PDTMT
* Make plot axis labels less ambiguous

# einprot 0.6.7

* Add signifDigits argument to makeDbLinkTable, and round to 4 significant digits in the templates
* Include the maximum number of missed cleavages in PDTMT tables
* Let maxNbrComplexesToPlot determine also the maximum number of top feature sets displayed in the reports
* Bugfix for sample plot ordering in complex bar plots
* Add bar plot for significant features

# einprot 0.6.6

* Bring FragPipe workflows up to date
* Add individual PTM volcano plots to table of content
* Add modificationsCol and keepModifications arguments to PTM workflow
* Increase control level in deparsing to allow e.g. multi-line functions
* Fill feature sets plot by direction
* Suppress legend in PCA plot if there are too many groups
* Add option to only retain master proteins in PDTMT Proteins workflow
* Allow inclusion of extra columns in the link table

# einprot 0.6.5

* Change correlation heatmap appearance
* Provide the possibility to run the workflows without statistical tests
* Allow specifying the iColPattern without escaped periods
* Add interactiveDisplayColumns arguments to volcano plots

# einprot 0.6.4

Expand Down
201 changes: 201 additions & 0 deletions R/checkArgumentsFragPipe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
#' Check validity of arguments for FragPipe analysis
#'
#' @keywords internal
#' @noRd
#' @author Charlotte Soneson
#'
#' @importFrom MsCoreUtils normalizeMethods
.checkArgumentsFragPipe <- function(
templateRmd, outputDir, outputBaseName, reportTitle, reportAuthor,
forceOverwrite, experimentInfo, species, fragpipeDir,
idCol, labelCol, geneIdCol, proteinIdCol, stringIdCol,
iColPattern, sampleAnnot, includeOnlySamples,
excludeSamples, minScore, minPeptides, imputeMethod, mergeGroups,
comparisons, ctrlGroup, allPairwiseComparisons, singleFit,
subtractBaseline, baselineGroup, normMethod, spikeFeatures, stattest,
minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
volcanoLog2FCThr, volcanoMaxFeatures, volcanoS0, volcanoFeaturesToLabel,
addInteractiveVolcanos, interactiveDisplayColumns, complexFDRThr,
maxNbrComplexesToPlot, seed,
includeFeatureCollections, minSizeToKeepSet, customComplexes,
complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
customYml, doRender
) {
## templateRmd
.assertScalar(x = templateRmd, type = "character")
if (!file.exists(templateRmd)) {
stop("'templateRmd' must point to an existing file")
}

## Output specifications
.assertScalar(x = outputDir, type = "character")
.assertScalar(x = outputBaseName, type = "character")
.assertScalar(x = reportTitle, type = "character")
.assertScalar(x = reportAuthor, type = "character")
.assertScalar(x = forceOverwrite, type = "logical")
.assertScalar(x = doRender, type = "logical")

## Experiment info
.assertVector(x = experimentInfo, type = "list")
if (length(experimentInfo) > 0) {
.assertVector(x = names(experimentInfo), type = "character")
}
tmp <- getSpeciesInfo(species) ## gives an error for unsupported species

## FP files
.assertScalar(x = fragpipeDir, type = "character")
if (!file.exists(file.path(fragpipeDir, "combined_protein.tsv"))) {
stop("The file ",
file.path(fragpipeDir, "combined_protein.tsv"),
" doesn't exist")
}
fpConfigFile <- list.files(fragpipeDir, pattern = "^fragpipe.+.config$",
full.names = TRUE)
if (length(fpConfigFile) > 1) {
stop("There are more than one config file in the FragPipe directory")
}
fpLogFile <- list.files(fragpipeDir, pattern = "^log_.+.txt$",
full.names = TRUE)
if (length(fpLogFile) > 1) {
stop("There are more than one log file in the FragPipe directory")
}

## Samples to include or exclude
.assertVector(x = includeOnlySamples, type = "character")
.assertVector(x = excludeSamples, type = "character")
if ((length(includeOnlySamples) > 1 || includeOnlySamples != "") &&
(length(excludeSamples) > 1 || excludeSamples != "")) {
stop("Please specify max one of includeOnlySamples and excludeSamples")
}

## Names and patterns
validPatterns <- c("\\\\.Unique\\\\.Spectral\\\\.Count$",
"\\\\.Total\\\\.Spectral\\\\.Count$",
"\\\\.Unique\\\\.Intensity$",
"\\\\.MaxLFQ\\\\.Unique\\\\.Intensity$",
"\\\\.MaxLFQ\\\\.Total\\\\.Intensity$",
"\\\\.MaxLFQ\\\\.Intensity$")
.assertScalar(x = iColPattern, type = "character",
validValues = c(validPatterns,
gsub("\\\\", "", validPatterns, fixed = TRUE)))
.assertVector(x = sampleAnnot, type = "data.frame")
.assertVector(x = colnames(sampleAnnot), type = "character")
stopifnot(all(c("sample", "group") %in% colnames(sampleAnnot)))
.assertVector(x = sampleAnnot$group, type = "character")
ics <- getIntensityColumns(inFile = file.path(fragpipeDir,
"combined_protein.tsv"),
iColPattern = gsub("\\\\", "\\", iColPattern,
fixed = TRUE),
includeOnlySamples = includeOnlySamples,
excludeSamples = excludeSamples,
stopIfEmpty = TRUE)
ics <- gsub(gsub("\\\\", "\\", iColPattern,
fixed = TRUE), "", ics$iCols)
msg <- setdiff(ics, sampleAnnot$sample)
if (length(msg) > 0) {
stop("Not all sample names are available in the sample annotation. ",
"Missing samples: ", paste(msg, collapse = ","))
}

if (is(idCol, "function")) {
stopifnot(length(formals(idCol)) == 1)
} else {
.assertVector(x = idCol, type = "character")
}
if (is(labelCol, "function")) {
stopifnot(length(formals(labelCol)) == 1)
} else {
.assertVector(x = labelCol, type = "character")
}
if (is(geneIdCol, "function")) {
stopifnot(length(formals(geneIdCol)) == 1)
} else {
.assertVector(x = geneIdCol, type = "character", allowNULL = TRUE)
}
if (is(proteinIdCol, "function")) {
stopifnot(length(formals(proteinIdCol)) == 1)
} else {
.assertVector(x = proteinIdCol, type = "character")
}
if (is(stringIdCol, "function")) {
stopifnot(length(formals(stringIdCol)) == 1)
} else {
.assertVector(x = stringIdCol, type = "character", allowNULL = TRUE)
}

.assertVector(x = linkTableColumns, type = "character", allowNULL = TRUE)

## Score thresholds
.assertScalar(x = minScore, type = "numeric")
.assertScalar(x = minPeptides, type = "numeric")

## Method choices
.assertScalar(x = imputeMethod, type = "character",
validValues = c("impSeqRob", "MinProb"))
.assertScalar(x = normMethod, type = "character",
validValues = c(MsCoreUtils::normalizeMethods(), "none"))
.assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
.assertScalar(x = stattest, type = "character",
validValues = c("limma", "ttest", "proDA", "none"))

## Test parameters
.assertScalar(x = minNbrValidValues, type = "numeric", rngIncl = c(0, Inf))
.assertScalar(x = minlFC, type = "numeric", rngIncl = c(0, Inf))
.assertScalar(x = samSignificance, type = "logical")
.assertScalar(x = nperm, type = "numeric", rngIncl = c(1, Inf))
.assertScalar(x = volcanoAdjPvalThr, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = volcanoLog2FCThr, type = "numeric", rngIncl = c(0, Inf))
.assertScalar(x = volcanoMaxFeatures, type = "numeric", rngIncl = c(0, Inf))
.assertScalar(x = volcanoS0, type = "numeric", rngIncl = c(0, Inf))
.assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = maxNbrComplexesToPlot, type = "numeric", rngIncl = c(0, Inf))
.assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
.assertVector(x = volcanoFeaturesToLabel, type = "character")
.assertVector(x = mergeGroups, type = "list")
.assertVector(x = comparisons, type = "list")
.assertScalar(x = ctrlGroup, type = "character")
.assertScalar(x = allPairwiseComparisons, type = "logical")
.assertScalar(x = addInteractiveVolcanos, type = "logical")
.assertVector(x = interactiveDisplayColumns, type = "character", allowNULL = TRUE)
.assertScalar(x = singleFit, type = "logical")
.assertScalar(x = subtractBaseline, type = "logical")
.assertScalar(x = baselineGroup, type = "character")

if (length(mergeGroups) > 0) {
if (is.null(names(mergeGroups)) || any(names(mergeGroups) == "") ||
any(duplicated(names(mergeGroups)))) {
stop("'mergeGroups' must be a named list, without duplicated names")
}
}

if (length(comparisons) > 0) {
if (!all(vapply(comparisons, length, 0) == 2)) {
stop("Each entry in 'comparisons' must have exactly two elements")
}
}

## seed
.assertScalar(x = seed, type = "numeric", rngIncl = c(1, Inf))

## Complexes
.assertVector(x = includeFeatureCollections, type = "character",
validValues = c("complexes", "GO"), allowNULL = TRUE)
.assertVector(x = customComplexes, type = "list")
if (length(customComplexes) > 0) {
.assertVector(x = names(customComplexes), type = "character")
}
.assertScalar(x = complexSpecies, type = "character",
validValues = c("current", "all"), allowNULL = TRUE)
.assertScalar(x = complexDbPath, type = "character", allowNULL = TRUE)
if (!is.null(complexDbPath) && !file.exists(complexDbPath)) {
stop("'complexDbPath' must point to an existing file")
}

.assertScalar(x = stringVersion, type = "character")
.assertScalar(x = stringDir, type = "character", allowNULL = TRUE)

.assertScalar(x = customYml, type = "character", allowNULL = TRUE)
if (!is.null(customYml) && !file.exists(customYml)) {
stop("'customYml' must point to an existing file")
}
}
8 changes: 7 additions & 1 deletion R/checkArgumentsMaxQuant.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
addInteractiveVolcanos, interactiveDisplayColumns, complexFDRThr,
maxNbrComplexesToPlot, seed,
includeFeatureCollections, minSizeToKeepSet, customComplexes,
complexSpecies, complexDbPath, customYml, doRender
complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
customYml, doRender
) {
## templateRmd
.assertScalar(x = templateRmd, type = "character")
Expand Down Expand Up @@ -112,6 +113,8 @@
.assertVector(x = stringIdCol, type = "character", allowNULL = TRUE)
}

.assertVector(x = linkTableColumns, type = "character", allowNULL = TRUE)

## Score thresholds
.assertScalar(x = minScore, type = "numeric")
.assertScalar(x = minPeptides, type = "numeric")
Expand Down Expand Up @@ -178,6 +181,9 @@
stop("'complexDbPath' must point to an existing file")
}

.assertScalar(x = stringVersion, type = "character")
.assertScalar(x = stringDir, type = "character", allowNULL = TRUE)

.assertScalar(x = customYml, type = "character", allowNULL = TRUE)
if (!is.null(customYml) && !file.exists(customYml)) {
stop("'customYml' must point to an existing file")
Expand Down
Loading

0 comments on commit adc1c8a

Please sign in to comment.