From 311cb2ca97b08ae67a80c8765bd19d603cbf1fac Mon Sep 17 00:00:00 2001 From: drosofff Date: Thu, 7 Nov 2024 23:28:14 +0100 Subject: [PATCH 1/2] update gsc_signature_score tool --- tools/gsc_signature_score/.shed.yml | 1 + tools/gsc_signature_score/signature_score.xml | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/gsc_signature_score/.shed.yml b/tools/gsc_signature_score/.shed.yml index 662d3a4bc..c518b5562 100644 --- a/tools/gsc_signature_score/.shed.yml +++ b/tools/gsc_signature_score/.shed.yml @@ -6,6 +6,7 @@ long_description: | Compute signature scores from single cell RNAseq data categories: - Transcriptomics + - Single Cell homepage_url: http://artbio.fr remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_signature_score toolshed: diff --git a/tools/gsc_signature_score/signature_score.xml b/tools/gsc_signature_score/signature_score.xml index fe63c3c52..2c932a9a5 100644 --- a/tools/gsc_signature_score/signature_score.xml +++ b/tools/gsc_signature_score/signature_score.xml @@ -1,5 +1,8 @@ - + in single cell RNAseq + + galaxy_single_cell_suite + r-optparse r-ggplot2 From 3a8b8485c7b6927510c076ab9c544af2215032b6 Mon Sep 17 00:00:00 2001 From: Christophe Antoniewski Date: Thu, 7 Nov 2024 23:29:31 +0100 Subject: [PATCH 2/2] reindent R code --- tools/gsc_signature_score/signature_score.R | 277 ++++++++++---------- 1 file changed, 145 insertions(+), 132 deletions(-) diff --git a/tools/gsc_signature_score/signature_score.R b/tools/gsc_signature_score/signature_score.R index ccbd26dbc..d18dd842a 100644 --- a/tools/gsc_signature_score/signature_score.R +++ b/tools/gsc_signature_score/signature_score.R @@ -2,11 +2,12 @@ # Compute the signature score based on the geometric mean of the target gene expression # and split cells in 2 groups (high/low) using this signature score. -options(show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") warnings() @@ -18,93 +19,95 @@ library(gridExtra) # Arguments option_list <- list( - make_option( - "--input", - default = NA, - type = "character", - help = "Input file that contains log2(CPM +1) values" - ), - make_option( - "--sep", - default = "\t", - type = "character", - help = "File separator [default : '%default' ]" - ), - make_option( - "--colnames", - default = TRUE, - type = "logical", - help = "Consider first line as header ? [default : '%default' ]" - ), - make_option( - "--genes", - default = NA, - type = "character", - help = "List of genes comma separated" - ), - make_option( - "--percentile_threshold", - default = 20, - type = "integer", - help = "detection threshold to keep a gene in signature set [default : '%default' ]" - ), - make_option( - "--output", - default = "./output.tab", - type = "character", - help = "Output path [default : '%default' ]" - ), - make_option( - "--stats", - default = "./statistics.tab", - type = "character", - help = "statistics path [default : '%default' ]" - ), - make_option( - "--correlations", - default = "./correlations.tab", - type = "character", - help = "Correlations between signature genes [default : '%default' ]" - ), - make_option( - "--covariances", - default = "./statistics.tab", - type = "character", - help = "Covariances between signature genes [default : '%default' ]" - ), - make_option( - "--pdf", - default = "~/output.pdf", - type = "character", - help = "pdf path [default : '%default' ]" - ) + make_option( + "--input", + default = NA, + type = "character", + help = "Input file that contains log2(CPM +1) values" + ), + make_option( + "--sep", + default = "\t", + type = "character", + help = "File separator [default : '%default' ]" + ), + make_option( + "--colnames", + default = TRUE, + type = "logical", + help = "Consider first line as header ? [default : '%default' ]" + ), + make_option( + "--genes", + default = NA, + type = "character", + help = "List of genes comma separated" + ), + make_option( + "--percentile_threshold", + default = 20, + type = "integer", + help = "detection threshold to keep a gene in signature set [default : '%default' ]" + ), + make_option( + "--output", + default = "./output.tab", + type = "character", + help = "Output path [default : '%default' ]" + ), + make_option( + "--stats", + default = "./statistics.tab", + type = "character", + help = "statistics path [default : '%default' ]" + ), + make_option( + "--correlations", + default = "./correlations.tab", + type = "character", + help = "Correlations between signature genes [default : '%default' ]" + ), + make_option( + "--covariances", + default = "./statistics.tab", + type = "character", + help = "Covariances between signature genes [default : '%default' ]" + ), + make_option( + "--pdf", + default = "~/output.pdf", + type = "character", + help = "pdf path [default : '%default' ]" + ) ) opt <- parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) + args = commandArgs(trailingOnly = TRUE) +) if (opt$sep == "tab") { - opt$sep <- "\t" + opt$sep <- "\t" } if (opt$sep == "comma") { - opt$sep <- "," + opt$sep <- "," } # Take input data data.counts <- read.table( - opt$input, - h = opt$colnames, - row.names = 1, - sep = opt$sep, - check.names = FALSE + opt$input, + h = opt$colnames, + row.names = 1, + sep = opt$sep, + check.names = FALSE ) # Get vector of target genes genes <- unlist(strsplit(opt$genes, ",")) if (length(unique(genes %in% rownames(data.counts))) == 1) { - if (unique(genes %in% rownames(data.counts)) == FALSE) - stop("None of these genes are in your dataset: ", opt$genes) + if (unique(genes %in% rownames(data.counts)) == FALSE) { + stop("None of these genes are in your dataset: ", opt$genes) + } } logical_genes <- rownames(data.counts) %in% genes @@ -116,10 +119,11 @@ signature.counts <- subset(data.counts, logical_genes) signature.covariances <- as.data.frame(cov(t(signature.counts))) signature.covariances <- cbind(gene = rownames(signature.covariances), signature.covariances) write.table(signature.covariances, - file = opt$covariances, - quote = FALSE, - row.names = FALSE, - sep = "\t") + file = opt$covariances, + quote = FALSE, + row.names = FALSE, + sep = "\t" +) # compute signature.correlations signature.correlations <- as.data.frame(cor(t(signature.counts))) @@ -128,15 +132,15 @@ write.table(signature.correlations, file = opt$correlations, quote = FALSE, row. ## Descriptive Statistics Function descriptive_stats <- function(InputData) { - SummaryData <- data.frame( - mean = rowMeans(InputData), - SD = apply(InputData, 1, sd), - Variance = apply(InputData, 1, var), - Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { - (sum(x != 0) / ncol(y)) * 100 - }) - ) - return(SummaryData) + SummaryData <- data.frame( + mean = rowMeans(InputData), + SD = apply(InputData, 1, sd), + Variance = apply(InputData, 1, var), + Percentage_Detection = apply(InputData, 1, function(x, y = InputData) { + (sum(x != 0) / ncol(y)) * 100 + }) + ) + return(SummaryData) } signature_stats <- descriptive_stats(signature.counts) @@ -146,21 +150,21 @@ kept_genes <- signature_stats$Percentage_Detection >= opt$percentile_threshold # Add warnings if (length(unique(kept_genes)) > 1) { - cat( - "WARNINGS ! Following genes were removed from further analysis due to low gene expression :", - paste(paste(rownames(signature.counts)[!kept_genes], round(signature_stats$Percentage_Detection[!kept_genes], 2), sep = " : "), collapse = ", "), - "\n" - ) -} else { - if (unique(kept_genes) == FALSE) { - stop( - "None of these genes are detected in ", - opt$percent, - "% of your cells: ", - paste(rownames(signature_stats), collapse = ", "), - ". You can be less stringent thanks to --percent parameter." + cat( + "WARNINGS ! Following genes were removed from further analysis due to low gene expression :", + paste(paste(rownames(signature.counts)[!kept_genes], round(signature_stats$Percentage_Detection[!kept_genes], 2), sep = " : "), collapse = ", "), + "\n" ) - } +} else { + if (unique(kept_genes) == FALSE) { + stop( + "None of these genes are detected in ", + opt$percent, + "% of your cells: ", + paste(rownames(signature_stats), collapse = ", "), + ". You can be less stringent thanks to --percent parameter." + ) + } } # Remove genes poorly detected in the dataset @@ -173,54 +177,63 @@ signature.counts[signature.counts == 0] <- 1 score <- apply(signature.counts, 2, geometric.mean) # geometric.mean requires psych # Add results in signature_output -signature_output <- data.frame(cell = names(score), - score = score, - rate = ifelse(score > mean(score), "HIGH", "LOW"), - nGenes = colSums(data.counts != 0), - total_counts = colSums(data.counts)) +signature_output <- data.frame( + cell = names(score), + score = score, + rate = ifelse(score > mean(score), "HIGH", "LOW"), + nGenes = colSums(data.counts != 0), + total_counts = colSums(data.counts) +) # statistics of input genes, signature genes first lines -statistics.counts <- rbind(subset(data.counts, logical_genes), - subset(data.counts, !logical_genes)) +statistics.counts <- rbind( + subset(data.counts, logical_genes), + subset(data.counts, !logical_genes) +) statistics <- descriptive_stats(statistics.counts) statistics <- cbind(gene = rownames(statistics), statistics) # Re-arrange score matrix for plots -score <- data.frame(score = score, - order = rank(score, ties.method = "first"), - signature = signature_output$rate, - stringsAsFactors = FALSE) +score <- data.frame( + score = score, + order = rank(score, ties.method = "first"), + signature = signature_output$rate, + stringsAsFactors = FALSE +) pdf(file = opt$pdf) myplot <- ggplot(signature_output, aes(x = rate, y = score)) + - geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) + - geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") + - scale_fill_manual(values = c("#ff0000", "#08661e")) + - geom_jitter(size = 0.2) + labs(y = "Score", x = "Rate") + - annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5, - color = "black", label = mean(score$score), parse = TRUE) + - labs(title = "Violin plots of Cell signature scores") + geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) + + geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") + + scale_fill_manual(values = c("#ff0000", "#08661e")) + + geom_jitter(size = 0.2) + + labs(y = "Score", x = "Rate") + + annotate("text", + x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5, + color = "black", label = mean(score$score), parse = TRUE + ) + + labs(title = "Violin plots of Cell signature scores") print(myplot) dev.off() # Save file write.table( - signature_output, - opt$output, - sep = "\t", - quote = FALSE, - col.names = TRUE, - row.names = FALSE + signature_output, + opt$output, + sep = "\t", + quote = FALSE, + col.names = TRUE, + row.names = FALSE ) write.table( - statistics, - opt$stats, - sep = "\t", - quote = FALSE, - col.names = TRUE, - row.names = FALSE + statistics, + opt$stats, + sep = "\t", + quote = FALSE, + col.names = TRUE, + row.names = FALSE )