Skip to content

Commit

Permalink
Merge pull request #711 from ARTbio/gsc_signature_score
Browse files Browse the repository at this point in the history
Update Gsc_signature_score
  • Loading branch information
drosofff authored Nov 7, 2024
2 parents 7343be2 + 3a8b848 commit ae36c63
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 133 deletions.
1 change: 1 addition & 0 deletions tools/gsc_signature_score/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ long_description: |
Compute signature scores from single cell RNAseq data
categories:
- Transcriptomics
- Single Cell
homepage_url: http://artbio.fr
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_signature_score
toolshed:
Expand Down
277 changes: 145 additions & 132 deletions tools/gsc_signature_score/signature_score.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
# Compute the signature score based on the geometric mean of the target gene expression
# and split cells in 2 groups (high/low) using this signature score.

options(show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
options(
show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
)
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
warnings()
Expand All @@ -18,93 +19,95 @@ library(gridExtra)

# Arguments
option_list <- list(
make_option(
"--input",
default = NA,
type = "character",
help = "Input file that contains log2(CPM +1) values"
),
make_option(
"--sep",
default = "\t",
type = "character",
help = "File separator [default : '%default' ]"
),
make_option(
"--colnames",
default = TRUE,
type = "logical",
help = "Consider first line as header ? [default : '%default' ]"
),
make_option(
"--genes",
default = NA,
type = "character",
help = "List of genes comma separated"
),
make_option(
"--percentile_threshold",
default = 20,
type = "integer",
help = "detection threshold to keep a gene in signature set [default : '%default' ]"
),
make_option(
"--output",
default = "./output.tab",
type = "character",
help = "Output path [default : '%default' ]"
),
make_option(
"--stats",
default = "./statistics.tab",
type = "character",
help = "statistics path [default : '%default' ]"
),
make_option(
"--correlations",
default = "./correlations.tab",
type = "character",
help = "Correlations between signature genes [default : '%default' ]"
),
make_option(
"--covariances",
default = "./statistics.tab",
type = "character",
help = "Covariances between signature genes [default : '%default' ]"
),
make_option(
"--pdf",
default = "~/output.pdf",
type = "character",
help = "pdf path [default : '%default' ]"
)
make_option(
"--input",
default = NA,
type = "character",
help = "Input file that contains log2(CPM +1) values"
),
make_option(
"--sep",
default = "\t",
type = "character",
help = "File separator [default : '%default' ]"
),
make_option(
"--colnames",
default = TRUE,
type = "logical",
help = "Consider first line as header ? [default : '%default' ]"
),
make_option(
"--genes",
default = NA,
type = "character",
help = "List of genes comma separated"
),
make_option(
"--percentile_threshold",
default = 20,
type = "integer",
help = "detection threshold to keep a gene in signature set [default : '%default' ]"
),
make_option(
"--output",
default = "./output.tab",
type = "character",
help = "Output path [default : '%default' ]"
),
make_option(
"--stats",
default = "./statistics.tab",
type = "character",
help = "statistics path [default : '%default' ]"
),
make_option(
"--correlations",
default = "./correlations.tab",
type = "character",
help = "Correlations between signature genes [default : '%default' ]"
),
make_option(
"--covariances",
default = "./statistics.tab",
type = "character",
help = "Covariances between signature genes [default : '%default' ]"
),
make_option(
"--pdf",
default = "~/output.pdf",
type = "character",
help = "pdf path [default : '%default' ]"
)
)

opt <- parse_args(OptionParser(option_list = option_list),
args = commandArgs(trailingOnly = TRUE))
args = commandArgs(trailingOnly = TRUE)
)

if (opt$sep == "tab") {
opt$sep <- "\t"
opt$sep <- "\t"
}
if (opt$sep == "comma") {
opt$sep <- ","
opt$sep <- ","
}

# Take input data
data.counts <- read.table(
opt$input,
h = opt$colnames,
row.names = 1,
sep = opt$sep,
check.names = FALSE
opt$input,
h = opt$colnames,
row.names = 1,
sep = opt$sep,
check.names = FALSE
)

# Get vector of target genes
genes <- unlist(strsplit(opt$genes, ","))

if (length(unique(genes %in% rownames(data.counts))) == 1) {
if (unique(genes %in% rownames(data.counts)) == FALSE)
stop("None of these genes are in your dataset: ", opt$genes)
if (unique(genes %in% rownames(data.counts)) == FALSE) {
stop("None of these genes are in your dataset: ", opt$genes)
}
}

logical_genes <- rownames(data.counts) %in% genes
Expand All @@ -116,10 +119,11 @@ signature.counts <- subset(data.counts, logical_genes)
signature.covariances <- as.data.frame(cov(t(signature.counts)))
signature.covariances <- cbind(gene = rownames(signature.covariances), signature.covariances)
write.table(signature.covariances,
file = opt$covariances,
quote = FALSE,
row.names = FALSE,
sep = "\t")
file = opt$covariances,
quote = FALSE,
row.names = FALSE,
sep = "\t"
)

# compute signature.correlations
signature.correlations <- as.data.frame(cor(t(signature.counts)))
Expand All @@ -128,15 +132,15 @@ write.table(signature.correlations, file = opt$correlations, quote = FALSE, row.

## Descriptive Statistics Function
descriptive_stats <- function(InputData) {
SummaryData <- data.frame(
mean = rowMeans(InputData),
SD = apply(InputData, 1, sd),
Variance = apply(InputData, 1, var),
Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
(sum(x != 0) / ncol(y)) * 100
})
)
return(SummaryData)
SummaryData <- data.frame(
mean = rowMeans(InputData),
SD = apply(InputData, 1, sd),
Variance = apply(InputData, 1, var),
Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
(sum(x != 0) / ncol(y)) * 100
})
)
return(SummaryData)
}

signature_stats <- descriptive_stats(signature.counts)
Expand All @@ -146,21 +150,21 @@ kept_genes <- signature_stats$Percentage_Detection >= opt$percentile_threshold

# Add warnings
if (length(unique(kept_genes)) > 1) {
cat(
"WARNINGS ! Following genes were removed from further analysis due to low gene expression :",
paste(paste(rownames(signature.counts)[!kept_genes], round(signature_stats$Percentage_Detection[!kept_genes], 2), sep = " : "), collapse = ", "),
"\n"
)
} else {
if (unique(kept_genes) == FALSE) {
stop(
"None of these genes are detected in ",
opt$percent,
"% of your cells: ",
paste(rownames(signature_stats), collapse = ", "),
". You can be less stringent thanks to --percent parameter."
cat(
"WARNINGS ! Following genes were removed from further analysis due to low gene expression :",
paste(paste(rownames(signature.counts)[!kept_genes], round(signature_stats$Percentage_Detection[!kept_genes], 2), sep = " : "), collapse = ", "),
"\n"
)
}
} else {
if (unique(kept_genes) == FALSE) {
stop(
"None of these genes are detected in ",
opt$percent,
"% of your cells: ",
paste(rownames(signature_stats), collapse = ", "),
". You can be less stringent thanks to --percent parameter."
)
}
}

# Remove genes poorly detected in the dataset
Expand All @@ -173,54 +177,63 @@ signature.counts[signature.counts == 0] <- 1
score <- apply(signature.counts, 2, geometric.mean) # geometric.mean requires psych

# Add results in signature_output
signature_output <- data.frame(cell = names(score),
score = score,
rate = ifelse(score > mean(score), "HIGH", "LOW"),
nGenes = colSums(data.counts != 0),
total_counts = colSums(data.counts))
signature_output <- data.frame(
cell = names(score),
score = score,
rate = ifelse(score > mean(score), "HIGH", "LOW"),
nGenes = colSums(data.counts != 0),
total_counts = colSums(data.counts)
)

# statistics of input genes, signature genes first lines
statistics.counts <- rbind(subset(data.counts, logical_genes),
subset(data.counts, !logical_genes))
statistics.counts <- rbind(
subset(data.counts, logical_genes),
subset(data.counts, !logical_genes)
)
statistics <- descriptive_stats(statistics.counts)
statistics <- cbind(gene = rownames(statistics), statistics)



# Re-arrange score matrix for plots
score <- data.frame(score = score,
order = rank(score, ties.method = "first"),
signature = signature_output$rate,
stringsAsFactors = FALSE)
score <- data.frame(
score = score,
order = rank(score, ties.method = "first"),
signature = signature_output$rate,
stringsAsFactors = FALSE
)

pdf(file = opt$pdf)
myplot <- ggplot(signature_output, aes(x = rate, y = score)) +
geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) +
geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") +
scale_fill_manual(values = c("#ff0000", "#08661e")) +
geom_jitter(size = 0.2) + labs(y = "Score", x = "Rate") +
annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5,
color = "black", label = mean(score$score), parse = TRUE) +
labs(title = "Violin plots of Cell signature scores")
geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) +
geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") +
scale_fill_manual(values = c("#ff0000", "#08661e")) +
geom_jitter(size = 0.2) +
labs(y = "Score", x = "Rate") +
annotate("text",
x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5,
color = "black", label = mean(score$score), parse = TRUE
) +
labs(title = "Violin plots of Cell signature scores")

print(myplot)
dev.off()

# Save file
write.table(
signature_output,
opt$output,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
signature_output,
opt$output,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
)

write.table(
statistics,
opt$stats,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
statistics,
opt$stats,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
)
5 changes: 4 additions & 1 deletion tools/gsc_signature_score/signature_score.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
<tool id="signature_score" name="Compute signature scores" version="2.3.9+galaxy0">
<tool id="signature_score" name="Compute signature scores" version="2.3.9+galaxy1">
<description>in single cell RNAseq</description>
<xrefs>
<xref type="bio.tools">galaxy_single_cell_suite</xref>
</xrefs>
<requirements>
<requirement type="package" version="1.7.3">r-optparse</requirement>
<requirement type="package" version="3.4.4">r-ggplot2</requirement>
Expand Down

0 comments on commit ae36c63

Please sign in to comment.