Skip to content

Commit

Permalink
Merge pull request #710 from ARTbio/gsc_scran_normalize
Browse files Browse the repository at this point in the history
update Gsc_scran_normalize
  • Loading branch information
drosofff authored Nov 7, 2024
2 parents 9286176 + 0d1ea42 commit 9ab8243
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 60 deletions.
1 change: 1 addition & 0 deletions tools/gsc_scran_normalize/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ long_description: |
Normalizes raw counts expression matrix using deconvolution size factors
categories:
- Transcriptomics
- Single Cell
homepage_url: http://artbio.fr
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_scran_normalize
toolshed:
Expand Down
109 changes: 55 additions & 54 deletions tools/gsc_scran_normalize/scran-normalize.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
options(show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
options(
show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
)
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
warnings()
Expand All @@ -13,63 +14,63 @@ library(dynamicTreeCut)

# Arguments
option_list <- list(
make_option(
c("-d", "--data"),
default = NA,
type = "character",
help = "Input file that contains count values to transform"
),
make_option(
"--cluster",
default = FALSE,
action = "store_true",
type = "logical",
help = "Whether to calculate the size factor per cluster or on all cell"
),
make_option(
c("-m", "--method"),
default = "hclust",
type = "character",
help = "The clustering method to use for grouping cells into cluster : hclust or igraph [default : '%default' ]"
),
make_option(
"--size",
default = 100,
type = "integer",
help = "Minimal number of cells in each cluster : hclust or igraph [default : '%default' ]"
),
make_option(
c("-o", "--out"),
default = "res.tab",
type = "character",
help = "Output name [default : '%default' ]"
)
make_option(
c("-d", "--data"),
default = NA,
type = "character",
help = "Input file that contains count values to transform"
),
make_option(
"--cluster",
default = FALSE,
action = "store_true",
type = "logical",
help = "Whether to calculate the size factor per cluster or on all cell"
),
make_option(
c("-m", "--method"),
default = "hclust",
type = "character",
help = "The clustering method to use for grouping cells into cluster : hclust or igraph [default : '%default' ]"
),
make_option(
"--size",
default = 100,
type = "integer",
help = "Minimal number of cells in each cluster : hclust or igraph [default : '%default' ]"
),
make_option(
c("-o", "--out"),
default = "res.tab",
type = "character",
help = "Output name [default : '%default' ]"
)
)

opt <- parse_args(OptionParser(option_list = option_list),
args = commandArgs(trailingOnly = TRUE))
args = commandArgs(trailingOnly = TRUE)
)


data <- read.table(
opt$data,
check.names = FALSE,
header = TRUE,
row.names = 1,
sep = "\t"
opt$data,
check.names = FALSE,
header = TRUE,
row.names = 1,
sep = "\t"
)

## Import data as a SingleCellExperiment object
sce <- SingleCellExperiment(list(counts = as.matrix(data)))

if (opt$cluster) {
clusters <- quickCluster(sce, min.size = opt$size, method = opt$method)
clusters <- quickCluster(sce, min.size = opt$size, method = opt$method)

## Compute sum factors
sce <- computeSumFactors(sce, cluster = clusters)
## Compute sum factors
sce <- computeSumFactors(sce, cluster = clusters)
} else {

## Compute sum factors
sce <- computeSumFactors(sce)
## Compute sum factors
sce <- computeSumFactors(sce)
}

sce <- logNormCounts(sce)
Expand All @@ -78,10 +79,10 @@ logcounts <- data.frame(genes = rownames(sce), round(logcounts(sce), digits = 5)


write.table(
logcounts,
opt$out,
col.names = TRUE,
row.names = FALSE,
quote = FALSE,
sep = "\t"
logcounts,
opt$out,
col.names = TRUE,
row.names = FALSE,
quote = FALSE,
sep = "\t"
)
16 changes: 10 additions & 6 deletions tools/gsc_scran_normalize/scran_normalize.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
<tool id="scran_normalize" name="scran_normalize" version="1.28.1+galaxy0">
<tool id="scran_normalize" name="scran_normalize" version="1.28.1+galaxy1">
<description>Normalize raw counts expression values using deconvolution size factors</description>
<xrefs>
<xref type="bio.tools">galaxy_single_cell_suite</xref>
</xrefs>
<requirements>
<requirement type="package" version="1.28.1">bioconductor-scran</requirement>
<requirement type="package" version="1.63_1">r-dynamictreecut</requirement>
Expand Down Expand Up @@ -71,18 +74,19 @@ expression across the majority of genes represents some technical bias that shou

Cell-specific biases are normalized using the computeSumFactors method, which implements the
deconvolution strategy for scaling normalization (A. T. Lun, Bach, and Marioni 2016). It creates a reference :
- if no clustering step : the average count of all transcriptomes
- if you choose to cluster your cells : the average count of each cluster.

- if no clustering step : the average count of all transcriptomes
- if you choose to cluster your cells : the average count of each cluster.

Then it pools cells and then sum their expression profiles. The size factor is described as the median ration
between the count sums and the average across all genes. Finally it constructs a linear distribution (deconvolution method)
of size factors by taking multiple pools of cells.

You can apply this method on cell cluster instead of your all set of cells by using quickCluster.
It defines cluster using distances based on Spearman correlation on counts between cells, there is two available methods :

- *hclust* : hierarchical clustering on the distance matrix and dynamic tree cut.
- *igraph* : constructs a Shared Nearest Neighbor graph (SNN) on the distance matrix and identifies highly connected communities.

- *hclust* : hierarchical clustering on the distance matrix and dynamic tree cut.
- *igraph* : constructs a Shared Nearest Neighbor graph (SNN) on the distance matrix and identifies highly connected communities.

Note: First header row must NOT start with a '#' comment character

Expand Down

0 comments on commit 9ab8243

Please sign in to comment.