Skip to content

Commit

Permalink
Upgrade gsc_mannwhitney_de tool (#651)
Browse files Browse the repository at this point in the history
* Update .shed.yml

* reorganize test and check conda upgrade

* generate valid new tests

* Lint R code

* remove deployment on testtoolshed
  • Loading branch information
drosofff authored Nov 30, 2023
1 parent a3dc683 commit c394391
Show file tree
Hide file tree
Showing 16 changed files with 4,272 additions and 47,107 deletions.
8 changes: 0 additions & 8 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -385,14 +385,6 @@ jobs:
with:
path: ~/.cache/pip
key: pip_cache_py_${{ matrix.python-version }}_gxy_${{ needs.setup.outputs.galaxy-head-sha }}
- name: Deploy on testtoolshed
uses: galaxyproject/planemo-ci-action@v1
with:
mode: deploy
repository-list: ${{ needs.setup.outputs.repository-list }}
shed-target: testtoolshed
shed-key: ${{ secrets.TTS_API_KEY }}
continue-on-error: true
- name: Deploy on toolshed
uses: galaxyproject/planemo-ci-action@v1
with:
Expand Down
2 changes: 1 addition & 1 deletion tools/gsc_mannwhitney_de/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ long_description:
categories:
- Transcriptomics
homepage_url: http://artbio.fr
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_mannwhitney_de
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_mannwhitney_de
toolshed:
- toolshed
104 changes: 54 additions & 50 deletions tools/gsc_mannwhitney_de/MannWhitney_DE.R
Original file line number Diff line number Diff line change
@@ -1,97 +1,102 @@
####################
# Differential #
# analysis #
####################

# Perform a differential analysis between 2
# groups of cells.
# Perform a differential analysis between 2 groups of cells.

# Example of command
# Rscript MannWhitney_DE.R --input <input.tsv> --sep <tab> --colnames <TRUE> --metadata <signature.tsv> --column_name <rate> --fdr <0.01> --output <diff_analysis.tsv>

# load packages that are provided in the conda env
options( show.error.messages=F,
error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
options(show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
)

loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
warnings()
library(optparse)

#Arguments
option_list = list(
suppressPackageStartupMessages({
library(optparse)
})

sessionInfo()

option_list <- list(
make_option(
"--input",
default = NA,
type = 'character',
type = "character",
help = "Input file that contains log2(CPM +1) values"
),
make_option(
"--sep",
default = '\t',
type = 'character',
default = "\t",
type = "character",
help = "File separator [default : '%default' ]"
),
make_option(
"--colnames",
default = TRUE,
type = 'logical',
type = "logical",
help = "Consider first line as header ? [default : '%default' ]"
),
),
make_option(
"--comparison_factor_file",
default = NA,
type = 'character',
type = "character",
help = " A two column table : cell identifiers and a comparison factor that split cells in two categories (high/low, HOM/HET,...)"
),
make_option(
"--factor1",
type = 'character',
type = "character",
help = "level associated to the control condition in the factor file"
),
),
make_option(
"--factor2",
type = 'character',
type = "character",
help = "level associated to the test condition in the factor file"
),
make_option(
"--fdr",
default = 0.01,
type = 'numeric',
type = "numeric",
help = "FDR threshold [default : '%default' ]"
),
make_option(
"--log",
default=FALSE,
action="store_true",
type = 'logical',
default = FALSE,
action = "store_true",
type = "logical",
help = "Expression data are log-transformed [default : '%default' ]"
),
make_option(
"--output",
default = "results.tsv",
type = 'character',
type = "character",
help = "Output name [default : '%default' ]"
)
)

opt = parse_args(OptionParser(option_list = option_list),
args = commandArgs(trailingOnly = TRUE))
opt <- parse_args(OptionParser(option_list = option_list),
args = commandArgs(trailingOnly = TRUE))

if (opt$sep == "tab") {opt$sep = "\t"}
if (opt$sep == "comma") {opt$sep = ","}
if (opt$sep == "tab") {
opt$sep <- "\t"
}
if (opt$sep == "comma") {
opt$sep <- ","
}

#Open files
data.counts <- read.table(
opt$input,
h = opt$colnames,
row.names = 1,
sep = opt$sep,
check.names = F
check.names = FALSE
)

metadata <- read.table(
opt$comparison_factor_file,
header = TRUE,
stringsAsFactors = F,
stringsAsFactors = FALSE,
sep = "\t",
check.names = FALSE,
row.names = 1
Expand All @@ -100,35 +105,34 @@ metadata <- read.table(
metadata <- subset(metadata, rownames(metadata) %in% colnames(data.counts))

# Create two logical named vectors for each factor level of cell signature
factor1_cells <- setNames(metadata[,1] == opt$factor1, rownames(metadata))
factor2_cells <- setNames(metadata[,1] == opt$factor2, rownames(metadata))
factor1_cells <- setNames(metadata[, 1] == opt$factor1, rownames(metadata))
factor2_cells <- setNames(metadata[, 1] == opt$factor2, rownames(metadata))

## Mann-Whitney test (Two-sample Wilcoxon test)
MW_test <- data.frame(t(apply(data.counts, 1, function(x) {
do.call("cbind", wilcox.test(x[names(factor1_cells)[factor1_cells]], x[names(factor2_cells)[factor2_cells]]))[, 1:2]
})), stringsAsFactors = F)
})), stringsAsFactors = FALSE)

# Benjamini-Hochberg correction and significativity
MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH" , n = nrow(MW_test))
# MW_test$Critical.value <- (rank(MW_test$p.value) / nrow(MW_test)) * opt$fdr
MW_test$p.adjust <- p.adjust(as.numeric(MW_test$p.value), method = "BH", n = nrow(MW_test))
MW_test$Significant <- MW_test$p.adjust < opt$fdr

## Descriptive Statistics Function
descriptive_stats <- function(InputData) {
SummaryData = data.frame(
SummaryData <- data.frame(
mean = rowMeans(InputData),
SD = apply(InputData, 1, sd),
Variance = apply(InputData, 1, var),
Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
(sum(x != 0) / ncol(y)) * 100
}),
mean_condition2 = rowMeans(InputData[,factor2_cells]),
mean_condition2 = rowMeans(InputData[, factor2_cells]),
mean_condition1 = rowMeans(InputData[, factor1_cells])
)
if(opt$log) {
SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1
if (opt$log) {
SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1
} else {
SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1)
SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1)
}
return(SummaryData)
}
Expand All @@ -139,16 +143,16 @@ results <- merge(gene_stats, MW_test, by = "row.names")
colnames(results)[1] <- "genes"

## Annotate Significant column
results$Significant[results$Significant == T & !is.na(results$Significant)] <- ifelse(subset(results, Significant == T)$log2FC > 0, "UP", "DOWN")
results$Significant[results$Significant == F & !is.na(results$Significant)] <- "NS"
results$Significant[results$Significant == TRUE & !is.na(results$Significant)] <- ifelse(subset(results, Significant == TRUE)$log2FC > 0, "UP", "DOWN")
results$Significant[results$Significant == FALSE & !is.na(results$Significant)] <- "NS"


# Save files
write.table(
results[order(results$p.adjust),],
results[order(results$p.adjust), ],
opt$output,
sep = "\t",
quote = F,
col.names = T,
row.names = F
quote = FALSE,
col.names = TRUE,
row.names = FALSE
)
33 changes: 11 additions & 22 deletions tools/gsc_mannwhitney_de/mannwhitney_de.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<tool id="mannwhitney_de" name="Perform a differential analysis" version="0.9.4">
<tool id="mannwhitney_de" name="Perform a differential analysis" version="4.1.3+galaxy0">
<description>using a Mann-Whitney test</description>
<requirements>
<requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
<requirement type="package" version="1.7.1">r-optparse</requirement>
</requirements>
<stdio>
<exit_code range="1:" level="fatal" description="Tool exception" />
Expand Down Expand Up @@ -48,37 +48,26 @@
</outputs>
<tests>
<test>
<param name="input" value="filtered-0.05.tab" ftype="txt"/>
<param name="input" value="input.tsv" ftype="tabular"/>
<param name="sep" value="tab" />
<param name="colnames" value="TRUE"/>
<param name="comparison_factor_file" value="signature_2columns.tsv" ftype="tabular"/>
<param name="comparison_factor_file" value="factor_2col.tsv" ftype="tabular"/>
<param name="factor1" value="LOW"/>
<param name="factor2" value="HIGH"/>
<param name="fdr" value="0.01"/>
<param name="fdr" value="0.05"/>
<param name="log" value="true"/>
<output name="output" file="geneMetadata_log.tsv" ftype="tabular"/>
<output name="output" file="result.tsv" ftype="tabular"/>
</test>
<test>
<param name="input" value="filtered-0.05.tab" ftype="txt"/>
<param name="sep" value="tab" />
<param name="input" value="input.csv" ftype="txt"/>
<param name="sep" value="comma" />
<param name="colnames" value="TRUE"/>
<param name="comparison_factor_file" value="signature_3columns.tsv" ftype="tabular"/>
<param name="comparison_factor_file" value="factor_3col.tsv" ftype="tabular"/>
<param name="factor1" value="LOW"/>
<param name="factor2" value="HIGH"/>
<param name="fdr" value="0.01"/>
<param name="fdr" value="0.05"/>
<param name="log" value="true"/>
<output name="output" file="geneMetadata_log.tsv" ftype="tabular"/>
</test>
<test>
<param name="input" value="filterCells_100.tsv" ftype="txt"/>
<param name="sep" value="tab" />
<param name="colnames" value="TRUE"/>
<param name="comparison_factor_file" value="signature_2columns.tsv" ftype="tabular"/>
<param name="factor1" value="LOW"/>
<param name="factor2" value="HIGH"/>
<param name="fdr" value="0.01"/>
<param name="log" value="false"/>
<output name="output" file="geneMetadata_nolog.tsv" ftype="tabular"/>
<output name="output" file="result_from_csv.tsv" ftype="tabular"/>
</test>
</tests>
<help>
Expand Down
101 changes: 101 additions & 0 deletions tools/gsc_mannwhitney_de/test-data/factor_2col.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
cell rate
c173.B4 LOW
c173.A2 LOW
c173.E2 LOW
c173.F6 LOW
c173.B12 LOW
c173.H1 HIGH
c173.E4 HIGH
c173.C2 LOW
c173.F4 LOW
c173.E10 HIGH
c173.E6 LOW
c173.C4 HIGH
c173.B10 LOW
c173.A7 LOW
c173.B8 LOW
c173.F9 LOW
c173.G6 HIGH
c173.A9 LOW
c173.D2 HIGH
c173.F2 LOW
c173.H9 LOW
c173.G1 LOW
c173.B3 LOW
c173.D7 HIGH
c173.A12 LOW
c173.C1 LOW
c173.H7 HIGH
c173.H5 LOW
c173.G2 LOW
c173.A3 LOW
c173.D3 LOW
c173.E1 LOW
c173.G12 HIGH
c173.C10 HIGH
c173.C3 LOW
c173.A5 LOW
c173.G4 LOW
c173.B11 LOW
c173.G9 HIGH
c173.F1 LOW
c173.D8 LOW
c173.E12 HIGH
c173.F11 LOW
c173.E5 LOW
c173.G10 LOW
c173.B9 LOW
c173.C7 HIGH
c173.B2 LOW
c173.H4 LOW
c174.G5 LOW
c174.H9 LOW
c174.A10 LOW
c174.G10 HIGH
c174.C8 LOW
c174.F6 LOW
c174.B4 LOW
c174.H7 LOW
c174.E10 HIGH
c174.D5 LOW
c174.E5 LOW
c174.B11 LOW
c174.F10 LOW
c174.A5 LOW
c174.B8 LOW
c174.F2 LOW
c174.F9 HIGH
c174.F4 HIGH
c174.B6 LOW
c174.E2 LOW
c174.G2 LOW
c174.D8 LOW
c174.H6 LOW
c174.C3 HIGH
c174.B7 LOW
c174.A7 LOW
c174.H10 LOW
c174.F7 HIGH
c174.A2 LOW
c174.E4 LOW
c174.D12 LOW
c174.H4 LOW
c174.C5 LOW
c174.E9 LOW
c174.D1 LOW
c174.G4 LOW
c174.G9 LOW
c174.D4 HIGH
c174.H11 HIGH
c174.A9 LOW
c174.A4 HIGH
c174.C7 LOW
c174.D11 LOW
c174.F5 LOW
c174.G6 LOW
c174.D6 LOW
c174.H3 LOW
c174.F12 LOW
c174.E6 LOW
c174.G3 LOW
c174.A8 LOW
Loading

0 comments on commit c394391

Please sign in to comment.