Skip to content

Commit

Permalink
upgrade gsc_signature_score tool (#653)
Browse files Browse the repository at this point in the history
* upgrade gsc_signature_score tool conda package (R 4.2 env)

* Update signature_score.R

* Update .shed.yml
  • Loading branch information
drosofff authored Dec 10, 2023
1 parent a14fb3d commit 84220e7
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 71 deletions.
2 changes: 1 addition & 1 deletion tools/gsc_signature_score/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ long_description: |
categories:
- Transcriptomics
homepage_url: http://artbio.fr
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_signature_score
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_signature_score
toolshed:
- toolshed
132 changes: 67 additions & 65 deletions tools/gsc_signature_score/signature_score.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
#########################
# Signature score #
#########################

# Signature score
# Compute the signature score based on the geometric mean of the target gene expression
# and split cells in 2 groups (high/low) using this signature score.

# Example of command
# Rscript 4-signature_score.R --input <input.tsv> --genes ARNT2,SALL2,SOX9,OLIG2,POU3F2
# --output <output.tab> --pdf <output.pdf>

# load packages that are provided in the conda env
options( show.error.messages=F,
error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
options(show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
)
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
warnings()

Expand All @@ -21,110 +17,118 @@ library(ggplot2)
library(gridExtra)

# Arguments
option_list = list(
option_list <- list(
make_option(
"--input",
default = NA,
type = 'character',
type = "character",
help = "Input file that contains log2(CPM +1) values"
),
make_option(
"--sep",
default = '\t',
type = 'character',
default = "\t",
type = "character",
help = "File separator [default : '%default' ]"
),
make_option(
"--colnames",
default = TRUE,
type = 'logical',
type = "logical",
help = "Consider first line as header ? [default : '%default' ]"
),
),
make_option(
"--genes",
default = NA,
type = 'character',
type = "character",
help = "List of genes comma separated"
),
make_option(
"--percentile_threshold",
default = 20,
type = 'integer',
type = "integer",
help = "detection threshold to keep a gene in signature set [default : '%default' ]"
),
make_option(
"--output",
default = "./output.tab",
type = 'character',
type = "character",
help = "Output path [default : '%default' ]"
),
make_option(
"--stats",
default = "./statistics.tab",
type = 'character',
type = "character",
help = "statistics path [default : '%default' ]"
),
make_option(
"--correlations",
default = "./correlations.tab",
type = 'character',
type = "character",
help = "Correlations between signature genes [default : '%default' ]"
),
make_option(
"--covariances",
default = "./statistics.tab",
type = 'character',
type = "character",
help = "Covariances between signature genes [default : '%default' ]"
),
make_option(
"--pdf",
default = "~/output.pdf",
type = 'character',
type = "character",
help = "pdf path [default : '%default' ]"
)
)

opt = parse_args(OptionParser(option_list = option_list),
args = commandArgs(trailingOnly = TRUE))
opt <- parse_args(OptionParser(option_list = option_list),
args = commandArgs(trailingOnly = TRUE))

if (opt$sep == "tab") {opt$sep = "\t"}
if (opt$sep == "comma") {opt$sep = ","}
if (opt$sep == "tab") {
opt$sep <- "\t"
}
if (opt$sep == "comma") {
opt$sep <- ","
}

# Take input data
data.counts <- read.table(
opt$input,
h = opt$colnames,
row.names = 1,
sep = opt$sep,
check.names = F
check.names = FALSE
)

# Get vector of target genes
genes <- unlist(strsplit(opt$genes, ","))

if (length(unique(genes %in% rownames(data.counts))) == 1) {
if (unique(genes %in% rownames(data.counts)) == F)
if (unique(genes %in% rownames(data.counts)) == FALSE)
stop("None of these genes are in your dataset: ", opt$genes)
}

logical_genes <- rownames(data.counts) %in% genes

# Retrieve target genes in counts data
signature.counts <- subset(data.counts, logical_genes)

# compute covariance
signature.covariances <- as.data.frame(cov(t(signature.counts)))
signature.covariances <- cbind(gene=rownames(signature.covariances), signature.covariances)
write.table(signature.covariances, file=opt$covariances, quote=F, row.names=F, sep="\t")
signature.covariances <- cbind(gene = rownames(signature.covariances), signature.covariances)
write.table(signature.covariances,
file = opt$covariances,
quote = FALSE,
row.names = FALSE,
sep = "\t")

# compute signature.correlations
signature.correlations <- as.data.frame(cor(t(signature.counts)))
signature.correlations <- cbind(gene=rownames(signature.correlations), signature.correlations)
write.table(signature.correlations, file=opt$correlations, quote=F, row.names=F, sep="\t")
signature.correlations <- cbind(gene = rownames(signature.correlations), signature.correlations)
write.table(signature.correlations, file = opt$correlations, quote = FALSE, row.names = FALSE, sep = "\t")

## Descriptive Statistics Function
descriptive_stats = function(InputData) {
SummaryData = data.frame(
descriptive_stats <- function(InputData) {
SummaryData <- data.frame(
mean = rowMeans(InputData),
SD = apply(InputData, 1, sd),
Variance = apply(InputData, 1, var),
Expand All @@ -137,7 +141,7 @@ descriptive_stats = function(InputData) {

signature_stats <- descriptive_stats(signature.counts)

# Find poorly detected genes from the signature
# Find poorly detected genes from the signature
kept_genes <- signature_stats$Percentage_Detection >= opt$percentile_threshold

# Add warnings
Expand All @@ -148,7 +152,7 @@ if (length(unique(kept_genes)) > 1) {
"\n"
)
} else {
if (unique(kept_genes) == F) {
if (unique(kept_genes) == FALSE) {
stop(
"None of these genes are detected in ",
opt$percent,
Expand All @@ -160,46 +164,44 @@ if (length(unique(kept_genes)) > 1) {
}

# Remove genes poorly detected in the dataset
signature.counts <- signature.counts[kept_genes,]
signature.counts <- signature.counts[kept_genes, ]

# Replace 0 by 1 counts
signature.counts[signature.counts == 0] <- 1

# Geometric mean by cell
score <- apply(signature.counts, 2, geometric.mean) # geometric.mean requires psych

# Add results in signature_output
signature_output <- data.frame(
cell = names(score),
score = score,
rate = ifelse(score > mean(score), "HIGH", "LOW"),
nGenes = colSums(data.counts != 0),
total_counts = colSums(data.counts)
)
signature_output <- data.frame(cell = names(score),
score = score,
rate = ifelse(score > mean(score), "HIGH", "LOW"),
nGenes = colSums(data.counts != 0),
total_counts = colSums(data.counts))

# statistics of input genes, signature genes first lines
statistics.counts <- rbind(subset(data.counts, logical_genes),
subset(data.counts, !logical_genes))
subset(data.counts, !logical_genes))
statistics <- descriptive_stats(statistics.counts)
statistics <- cbind(gene=rownames(statistics), statistics)
statistics <- cbind(gene = rownames(statistics), statistics)



# Re-arrange score matrix for plots
score <- data.frame(score = score,
order = rank(score, ties.method = "first"),
signature = signature_output$rate,
stringsAsFactors = F)
stringsAsFactors = FALSE)

pdf(file = opt$pdf)
myplot <- ggplot(signature_output, aes(x=rate, y=score)) +
geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F, cex=0.5) +
geom_abline(slope=0, intercept=mean(score$score), lwd=.5, color="red") +
scale_fill_manual(values=c("#ff0000", "#08661e")) +
geom_jitter(size=0.2) + labs(y = "Score", x = "Rate") +
annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust=1.5,
color="black", label = mean(score$score), parse = TRUE) +
labs(title = "Violin plots of Cell signature scores")
myplot <- ggplot(signature_output, aes(x = rate, y = score)) +
geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) +
geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") +
scale_fill_manual(values = c("#ff0000", "#08661e")) +
geom_jitter(size = 0.2) + labs(y = "Score", x = "Rate") +
annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5,
color = "black", label = mean(score$score), parse = TRUE) +
labs(title = "Violin plots of Cell signature scores")

print(myplot)
dev.off()
Expand All @@ -209,16 +211,16 @@ write.table(
signature_output,
opt$output,
sep = "\t",
quote = F,
col.names = T,
row.names = F
quote = FALSE,
col.names = TRUE,
row.names = FALSE
)

write.table(
statistics,
opt$stats,
sep = "\t",
quote = F,
col.names = T,
row.names = F
quote = FALSE,
col.names = TRUE,
row.names = FALSE
)
10 changes: 5 additions & 5 deletions tools/gsc_signature_score/signature_score.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<tool id="signature_score" name="Compute signature scores" version="0.9.1">
<tool id="signature_score" name="Compute signature scores" version="2.3.9+galaxy0">
<description>in single cell RNAseq</description>
<requirements>
<requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
<requirement type="package" version="2.2.1=r3.3.2_0">r-ggplot2</requirement>
<requirement type="package" version="2.3=r3.3.2_0">r-gridextra</requirement>
<requirement type="package" version="1.6.9=r3.3.2_0">r-psych</requirement>
<requirement type="package" version="1.7.3">r-optparse</requirement>
<requirement type="package" version="3.4.4">r-ggplot2</requirement>
<requirement type="package" version="2.3">r-gridextra</requirement>
<requirement type="package" version="2.3.9">r-psych</requirement>
</requirements>
<stdio>
<exit_code range="1:" level="fatal" description="Tool exception" />
Expand Down
Binary file modified tools/gsc_signature_score/test-data/signature.pdf
Binary file not shown.

0 comments on commit 84220e7

Please sign in to comment.