Skip to content

Commit

Permalink
Replace SatuRn with DEXSeq (#47)
Browse files Browse the repository at this point in the history
* switch saturn for dexseq in differential_apa

* remove mentions to saturn and associated scripts

* add dexseq_apa to cluster.yaml
  • Loading branch information
SamBryce-Smith authored Apr 25, 2023
1 parent 66a77e7 commit aa03455
Show file tree
Hide file tree
Showing 14 changed files with 663 additions and 585 deletions.
33 changes: 14 additions & 19 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ if config["run_identification"] and config["use_precomputed_salmon_index"]:
sys.stderr.write("run_identification will be overridden and pre-provided salmon index, id and info tables will be used\n")


# If differential, make sure that sample table only has two conditions & to set a contrast name
if config["run_differential"]:
assert sample_tbl["condition"].nunique() == 2, f"condition column in sample table must only contain two distinct conditions, following n found - {sample_tbl['condition'].nunique()}"
# firs key in sample table condition column = base_key
BASE_KEY = sample_tbl["condition"][0]
CONTRAST_KEY = sample_tbl.loc[sample_tbl["condition"] != BASE_KEY, "condition"][0]
CONTRAST_NAME = CONTRAST_KEY + "vs" + BASE_KEY
sys.stderr.write(f"Inferred base key for condition - {BASE_KEY}\n")
sys.stderr.write(f"Inferred contrast key for condition - {CONTRAST_KEY}\n")
sys.stderr.write(f"Constructed contrast name - {CONTRAST_NAME}\n")


include: "rules/filter_gtf.smk"
include: "rules/stringtie.smk"
include: "rules/tx_filtering.smk"
Expand All @@ -60,15 +72,15 @@ include: "rules/differential_apa.smk"

# sys.stderr.write(OPTIONS + "\n")

localrules: all, gtf_list_by_condition, gtf_list_all_tpm_filtered, check_per_sample_mean_tpm_filtered
localrules: all, gtf_list_by_condition, gtf_list_all_tpm_filtered, check_per_sample_mean_tpm_filtered, make_formulas_txt

wildcard_constraints:
sample = "|".join(SAMPLES),
condition = "|".join(CONDITIONS)

rule all:
input:
rules.process_saturn_tbl.output.processed_tbl if config["run_differential"] else rules.tx_to_le_quant.output.ppau,
rules.process_dexseq_tbl.output if config["run_differential"] else rules.tx_to_le_quant.output.ppau,
rules.tx_to_le_quant.output.counts,
os.path.join(DAPA_SUBDIR,
"summarised_pas_quantification.tpm.tsv"),
Expand All @@ -78,20 +90,3 @@ rule all:
# sample=SAMPLES,
# ),



# def get_stringtie_assembled(sample, output_dir):
# '''
# Return path to target StringTie transcriptome assembly
#
# Want functionality to provide a range of parameter values in same pipeline
# and Snakemake's Paramspace docs aren't quite cutting it right now...
#
# If provide a list for given parameter, will perform assembly for each combo of values
# min_isoform_fraction_abundance (-f)
# min_junction_reads (-j)
# min_transcript_coverage (-c) (minimum reads per bp coverage)
# To be added: disable_end_trimming (-t), point-features (--ptf)
# '''
#
# if isinstance(list(), config["min_isoform_fraction_abundance"]):
5 changes: 5 additions & 0 deletions config/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,8 @@ tx_to_le_quant:
h_rt: 4:00:00
submission_string: " "

dexseq_apa:
h_vmem: 10G
tmem: 10G
submission_string: "-pe smp 2 -R y"
h_rt: 12:00:00
17 changes: 13 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ pre_stringtie_processing: "none"
# If False, pipeline jumps to generating a GTF of last exons from the reference input only
run_identification: True

# Whether to run differential usage analysis with SatuRn - bool
# Whether to run differential usage analysis with DEXSeq - bool
# If False, pipeline terminates after generating matrices of summarised quantification for each 'last exon isoform'
run_differential: True

Expand All @@ -79,7 +79,7 @@ use_provided_novel_les: False
# Whether to use pre-computed salmon index to quantify samples. Useful if wish to quantify multiple datasets with a common annotation and wish to save on unnecessary computation
# Note: If set to True, run_identification will be overriden
# Note: You will also need to provide the tx2le, tx2gene, le2gene and 'info' tables produced for the same run.
use_precomputed_salmon_index: True
use_precomputed_salmon_index: False


###
Expand Down Expand Up @@ -308,8 +308,17 @@ precomputed_info_tbl: "tests/dryruns/novel_ref_combined.info.tsv"
# Default: 10
min_mean_count: 10

# Number of threads to use for SatuRn analysis
saturn_threads: 2
# Minimum mean relative usage of an isoform in either base/contrast condition for it to be retained for differential usage analysis
# see https://doi.org/10.1186/s13059-015-0862-3
# Note: provide value in fractional terms (i.e. between 0 & 0.5)
min_relative_usage: 0.05


dexseq_formula_full: "~ sample_name + exon + condition:exon"
dexseq_formula_reduced: "~ sample_name + exon"

# Number of threads to use for DEXSeq analysis
dexseq_threads: 2

#####-----------------------------------------
# OUTPUT SUBDIRECTORY NAMES
Expand Down
Loading

0 comments on commit aa03455

Please sign in to comment.