diff --git a/acanthophis/template/workflow/config.schema.yml b/acanthophis/template/workflow/config.schema.yml index 2121aec..a6a3f17 100644 --- a/acanthophis/template/workflow/config.schema.yml +++ b/acanthophis/template/workflow/config.schema.yml @@ -32,10 +32,10 @@ properties: additionalProperties: type: object properties: - nodes: - type: string - fmi: + dir: type: string + bracken: + type: integer required: - dir - bracken diff --git a/acanthophis/template/workflow/rules/align.rules b/acanthophis/template/workflow/rules/align.rules index 4eed406..a0abd95 100644 --- a/acanthophis/template/workflow/rules/align.rules +++ b/acanthophis/template/workflow/rules/align.rules @@ -3,7 +3,7 @@ # please, if you find a bug, raise an issue on github so the fix gets shared # with everyone. # -# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting +# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting # # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can @@ -22,6 +22,7 @@ rule ngmap_idx: L("{path}-index.log"), resources: **rule_resources(config, "ngmap_idx", runtime=40, mem_gb=2, cores=1) conda: "envs/align.yml" + container: "docker://ghcr.io/kdm9/align:latest" shell: "( ngm" " -r {input.ref}" @@ -39,6 +40,7 @@ rule ngmap: resources: **rule_resources(config, "ngmap", runtime=240, mem_gb=16, cores=8) params: sensitivity=config["tool_settings"]["ngm"]["sensitivity"], + container: "docker://ghcr.io/kdm9/align:latest" conda: "envs/align.yml" shell: "( ngm" @@ -66,6 +68,7 @@ rule bwaidx: R("{path}.pac"), R("{path}.sa"), conda: "envs/align.yml" + container: "docker://ghcr.io/kdm9/align:latest" log: L("{path}_index.log"), resources: **rule_resources(config, "bwaidx", runtime=20, mem_gb=8) shell: @@ -85,6 +88,7 @@ rule bwamem: bam=temp(T("alignments/byrun.raw/bwa/{ref}/{run}~{lib}~{sample}.bam")), log: L("alignments/byrun.raw/bwa/{ref}/{run}~{lib}~{sample}.bam.log") resources: **rule_resources(config, "bwamem", runtime=240, mem_gb=10, cores=8) + container: "docker://ghcr.io/kdm9/align:latest" conda: "envs/align.yml" shell: "( bwa mem" @@ -112,6 +116,7 @@ rule bam_merge_markdups_sort: resources: **rule_resources(config, "bam_merge_markdups_sort", runtime=240, mem_gb=16, disk_gb=50, cores=8) log: L("alignments/samples/{aligner}~{ref}~{sample}.bam.log") conda: "envs/align.yml" + container: "docker://ghcr.io/kdm9/align:latest" priority: 2 params: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), @@ -169,6 +174,7 @@ rule mergebam_set: log: L("alignments/sets/{aligner}~{ref}~{sampleset}.bam.log"), resources: **rule_resources(config, "mergebam_set", runtime=2880, mem_gb=16, disk_gb=1000, cores=64) + container: "docker://ghcr.io/kdm9/align:latest" conda: "envs/align.yml" params: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), @@ -195,6 +201,7 @@ rule bamstat_sample: L("alignments/bamstats/sample/{aligner}~{ref}~{sample}.samtools.stats.log") resources: **rule_resources(config, "bamstat_sample", runtime=120, mem_gb=4, cores=1) conda: "envs/align.yml" + container: "docker://ghcr.io/kdm9/align:latest" shell: "(samtools stats -i 5000 -x {input} >{output}) >{log} 2>&1" @@ -208,6 +215,7 @@ rule multiqc_samstats: log=L("stats/multiqc/bamstats_{aligner}~{ref}~{sampleset}_multiqc.log"), resources: **rule_resources(config, "multiqc_samstats", runtime=30, mem_gb=2, cores=1) conda: "envs/qcstats.yml" + container: "docker://multiqc/multiqc:v1.20" shell: "multiqc" " --no-megaqc-upload" @@ -251,6 +259,7 @@ rule multiqc_qualimap: log=L("stats/multiqc/qualimap_{aligner}~{ref}~{sampleset}_multiqc.log"), resources: **rule_resources(config, "multiqc_qualimap", runtime=30, mem_gb=2, cores=1) conda: "envs/qcstats.yml" + container: "docker://multiqc/multiqc:v1.20" shell: "multiqc" " --no-megaqc-upload" @@ -276,6 +285,7 @@ rule extract_unmapped: L("alignments/unmapped_reads/{aligner}~{ref}~{sample}.fastq.gz.log"), resources: **rule_resources(config, "extract_unmapped", runtime=120, mem_gb=1, cores=8) conda: "envs/align.yml" + container: "docker://ghcr.io/kdm9/align:latest" params: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), shell: diff --git a/acanthophis/template/workflow/rules/base.rules b/acanthophis/template/workflow/rules/base.rules index f3c4a62..5a4a63f 100644 --- a/acanthophis/template/workflow/rules/base.rules +++ b/acanthophis/template/workflow/rules/base.rules @@ -3,7 +3,7 @@ # please, if you find a bug, raise an issue on github so the fix gets shared # with everyone. # -# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting +# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting # # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can @@ -101,12 +101,11 @@ def parse_metadata(s2rl_file): if s2rl_file.endswith(".tsv"): dialect = "excel-tab" for run in csv.DictReader(fh, dialect=dialect): - if not run["library"] or run["library"].lower().startswith("blank"): - # Skip blanks - continue if run.get("include", "Y").upper() != "Y" or run.get("exclude", "N").upper() == "Y": # Remove non-sequenced ones continue + if run.get("exclude_why", ""): + continue meta.append({k.lower(): v for k, v in run.items()}) return meta @@ -119,6 +118,7 @@ def make_runlib2samp(rl2s_meta): samp = run["sample"] rl2s[rl] = samp s2rl[samp].append(rl) + print(f"Parsed {len(rl2s)} run-libs from {len(s2rl)} samples") return dict(rl2s), dict(s2rl) diff --git a/acanthophis/template/workflow/rules/deepvariant.rules b/acanthophis/template/workflow/rules/deepvariant.rules index b50e3d0..a3e353c 100644 --- a/acanthophis/template/workflow/rules/deepvariant.rules +++ b/acanthophis/template/workflow/rules/deepvariant.rules @@ -1,3 +1,13 @@ +# These rules are part of Acanthophis. See https://github.com/kdm9/Acanthophis. +# This file *could* be modified, but then be careful when you update them. And +# please, if you find a bug, raise an issue on github so the fix gets shared +# with everyone. +# +# Copyright 2020-2024 Kevin Murray/Gekkonid Consulting +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at http://mozilla.org/MPL/2.0/. rule deepvariant_gvcf: input: @@ -17,7 +27,7 @@ rule deepvariant_gvcf: model=lambda wc: config["tool_settings"]["varcall"].get("deepvariant_model", "WGS"), extra="", shadow: "shallow" - resources: **rule_resources(config, "deepvariant_gvcf", runtime=600, mem_gb=96, cores=48, disk_mb=400_000) + resources: **rule_resources(config, "deepvariant_gvcf", runtime=600, mem_gb=96, cores=32, disk_mb=400_000) shell: "( /opt/deepvariant/bin/run_deepvariant" " --model_type={params.model}" @@ -29,29 +39,6 @@ rule deepvariant_gvcf: " --intermediate_results_dir=$TMPDIR" " --num_shards={threads}" ") &> {log}" - #"( mkdir -p {params.tmp_dir}" - #" && dv_make_examples.py" - #" --cores {threads}" - #" --ref {input.ref}" - #" --reads {input.bam}" - #" --sample {wildcards.sample}" - #" --examples {params.tmp_dir}" - #" --logdir {params.tmp_dir}" - #" --gvcf {params.tmp_dir}" - #" {params.extra}" - #" && dv_call_variants.py" - #" --cores {threads}" - #" --outfile {params.tmp_dir}/{wc.sample}.calls" - #" --sample {wildcards.sample} " - #" --examples {params.tmp_dir}" - #" --model {params.model}" - #"&& dv_postprocess_variants.py " - #" --ref {input.ref} " - #" --gvcf_infile {params.tmp_dir}/{wc.sample}.gvcf.tfrecord@{threads}.gz" - #" --gvcf_outfile {output.gvcf} " - #" --infile {params.tmp_dir}/{wc.sample}.calls" - #" --outfile {output.vcf}" - #") &> {log}" localrules: glnexus_fofn @@ -79,10 +66,10 @@ rule glnexus_call: T("deepvariant/{aligner}~{ref}~{sampleset}.vcf.gz.log"), conda: "envs/glnexus.yml", - #container: - # "docker://ghcr.io/dnanexus-rnd/glnexus:v1.4.1" + container: + "docker://ghcr.io/kdm9/glnexus-bcftools:latest" shadow: "shallow" - resources: **rule_resources(config, "glnexus_call", runtime=180, mem_gb=128, cores=128) + resources: **rule_resources(config, "glnexus_call", runtime=180, mem_gb=512, cores=128) shell: "( glnexus_cli" " --config DeepVariant" diff --git a/acanthophis/template/workflow/rules/denovo.rules b/acanthophis/template/workflow/rules/denovo.rules index 2d6efb2..9ee3947 100644 --- a/acanthophis/template/workflow/rules/denovo.rules +++ b/acanthophis/template/workflow/rules/denovo.rules @@ -3,7 +3,7 @@ # please, if you find a bug, raise an issue on github so the fix gets shared # with everyone. # -# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting +# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting # # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can @@ -65,6 +65,7 @@ rule mash_sketch_set: log: L("mash/{set}~k{ksize}~s{sketchsize}.sketch.msh.log") resources: **rule_resources(config, "mash_sketch_set", runtime=2880, mem_gb=16, cores=48) conda: "envs/mash.yml" + container: "docker://ghcr.io/kdm9/mash:latest" shell: " mash sketch" " -k {wildcards.ksize}" @@ -84,6 +85,7 @@ rule mash_dist_set: L("mash/{set}~k{ksize}~s{sketchsize}.dist.log") resources: **rule_resources(config, "mash_dist_set", runtime=2880, mem_gb=16, cores=48) conda: "envs/mash.yml" + container: "docker://ghcr.io/kdm9/mash:latest" shell: "mash dist" " -p {threads}" diff --git a/acanthophis/template/workflow/rules/metagenome.rules b/acanthophis/template/workflow/rules/metagenome.rules index 9396c01..73ea965 100644 --- a/acanthophis/template/workflow/rules/metagenome.rules +++ b/acanthophis/template/workflow/rules/metagenome.rules @@ -3,7 +3,7 @@ # please, if you find a bug, raise an issue on github so the fix gets shared # with everyone. # -# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting +# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting # # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can @@ -53,12 +53,13 @@ rule diamondx_reads2db: db=lambda wc: R(config["data_paths"]["diamond"][wc.db]), reads=diamondx_reads2db_input, output: - tsv=P("metagenome/diamondx/{type}/{sample}~{db}.tsv.xz"), + tsv=P("metagenome/diamondx/{type}/{sample}~{db}.tsv.zstd"), log: L("metagenome/diamondx/{type}/{sample}~{db}.tsv.log"), benchmark: L("metagenome/diamondx/{type}/{sample}~{db}.tsv.bench.csv"), conda: "envs/diamond.yml" + container: "docker://ghcr.io/kdm9/acanthophis-diamond:latest" resources: **rule_resources(config, "diamondx_reads2db", runtime=7200, mem_gb=72, disk_gb=5, cores=24) shell: 'T=/tmp/holopipe_$RANDOM; mkdir -p $T; trap "rm -rf $T" INT EXIT TERM;' @@ -73,7 +74,7 @@ rule diamondx_reads2db: " --index-chunks 4" " --tmpdir $T" " --ignore-warnings" - " --out >(xz -T{threads} >{output.tsv})" + " --out >(zstd -T{threads} >{output.tsv})" " &> {log}" rule humann: @@ -196,6 +197,7 @@ rule plass_quant_diamond: L("metagenome/plass/{type}/{samplelike}~renamed.faa.quant.blast.tsv.log"), benchmark: L("metagenome/plass/{type}/{samplelike}~renamed.faa.quant.blast.tsv.bench.csv"), conda: "envs/diamond.yml" + container: "docker://ghcr.io/kdm9/acanthophis-diamond:latest" resources: **rule_resources(config, "plass_quant_diamond", runtime=1440, mem_gb=90, disk_gb=16, cores=32) shell: "diamond blastx" @@ -220,6 +222,7 @@ rule plass_diamond: L("metagenome/plass/{path}.{db}.blasttab.log") benchmark: P("metagenome/plass/{path}.{db}.blasttab.bench.csv") conda: "envs/diamond.yml" + container: "docker://ghcr.io/kdm9/acanthophis-diamond:latest" resources: **rule_resources(config, "plass_diamond", runtime=1440, mem_gb=90, disk_gb=16, cores=32) shell: "diamond blastp" @@ -298,7 +301,7 @@ rule all_megahit: rule all_diamondx: input: - [P(f"metagenome/diamondx/{type}/{sample}~{db}.tsv.xz") + [P(f"metagenome/diamondx/{type}/{sample}~{db}.tsv.zstd") for sampleset in config["samplesets"] for sample in config["SAMPLESETS"][sampleset] for type in config["samplesets"][sampleset].get("diamondx", {}).get("types", []) diff --git a/acanthophis/template/workflow/rules/reads.rules b/acanthophis/template/workflow/rules/reads.rules index a118c20..076126b 100644 --- a/acanthophis/template/workflow/rules/reads.rules +++ b/acanthophis/template/workflow/rules/reads.rules @@ -3,7 +3,7 @@ # please, if you find a bug, raise an issue on github so the fix gets shared # with everyone. # -# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting +# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting # # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can @@ -33,6 +33,7 @@ rule qcreads_paired_il: maxqualval=lambda wc: _qcparam(wc, "maxqualval"), ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), conda: "envs/reads.yml" + container: "docker://ghcr.io/kdm9/acanthophis-qc:latest" shell: "( AdapterRemoval" " --file1 {input.reads}" @@ -68,6 +69,7 @@ rule qcreads_paired_r12: maxqualval=lambda wc: _qcparam(wc, "maxqualval"), ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), conda: "envs/reads.yml" + container: "docker://ghcr.io/kdm9/acanthophis-qc:latest" shell: "( AdapterRemoval" " --file1 {input.r1}" @@ -104,6 +106,7 @@ rule qcreads_se: maxqualval=lambda wc: _qcparam(wc, "maxqualval"), ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), conda: "envs/reads.yml" + container: "docker://ghcr.io/kdm9/acanthophis-qc:latest" shell: "( AdapterRemoval" " --file1 {input.se}" @@ -133,6 +136,7 @@ rule merge_qcd_reads: L("reads/runs/{run}~{lib}.fastq.gz.log"), resources: **rule_resources(config, "merge_qcd_reads", runtime=30, mem_gb=1, disk_gb=1, cores=1) conda: "envs/reads.yml" + container: "docker://ghcr.io/kdm9/acanthophis-qc:latest" shell: "(cat {input} >{output} ) >{log} 2>&1" @@ -145,6 +149,7 @@ rule read_count_librun_indiv: log: L("stats/reads/readnum_librun/{run}~{lib}.tsv.log"), conda: "envs/reads.yml" + container: "docker://ghcr.io/kdm9/acanthophis-qc:latest" resources: **rule_resources(config, "read_count_librun_indiv", runtime=10, mem_gb=1, disk_gb=1) shell: "( seqhax stats" @@ -206,6 +211,7 @@ rule split_pair_sample: L("reads/samples/{sample}_split.log"), resources: **rule_resources(config, "split_pair_sample", runtime=30, mem_gb=1, disk_gb=1, cores=8) conda: "envs/reads.yml" + container: "docker://ghcr.io/kdm9/acanthophis-qc:latest" params: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), shell: @@ -236,6 +242,7 @@ rule fastqc_preqc: fqczip=P("stats/fastqc/preqc/{run}~{lib}_fastqc.zip"), log: L("stats/fastqc/preqc/{run}~{lib}_fastqc.log"), resources: **rule_resources(config, "fastqc_preqc", runtime=30, mem_gb=1, cores=1) + container: "docker://ghcr.io/kdm9/acanthophis-qc:latest" conda: "envs/qcstats.yml" shell: "(T=$(mktemp -d);" @@ -256,6 +263,7 @@ rule fastqc_postqc: fqczip=P("stats/fastqc/postqc/{run}~{lib}_fastqc.zip"), log: L("stats/fastqc/postqc/{run}~{lib}_fastqc.zip.log"), resources: **rule_resources(config, "fastqc_postqc", runtime=30, mem_gb=1, cores=1) + container: "docker://ghcr.io/kdm9/acanthophis-qc:latest" conda: "envs/qcstats.yml" shell: "set -x; (T=$(mktemp -d);" @@ -279,6 +287,7 @@ rule multiqc_fastqc: log=L("stats/multiqc/reads-{prepost}~{sampleset}_multiqc.log"), resources: **rule_resources(config, "multiqc_fastqc", runtime=30, mem_gb=2) conda: "envs/qcstats.yml" + container: "docker://multiqc/multiqc:v1.20" shell: "multiqc" " --no-megaqc-upload" diff --git a/acanthophis/template/workflow/rules/sampleset.rules b/acanthophis/template/workflow/rules/sampleset.rules index 8df82b6..8143089 100644 --- a/acanthophis/template/workflow/rules/sampleset.rules +++ b/acanthophis/template/workflow/rules/sampleset.rules @@ -1,3 +1,14 @@ +# These rules are part of Acanthophis. See https://github.com/kdm9/Acanthophis. +# This file *could* be modified, but then be careful when you update them. And +# please, if you find a bug, raise an issue on github so the fix gets shared +# with everyone. +# +# Copyright 2020-2024 Kevin Murray/Gekkonid Consulting +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at http://mozilla.org/MPL/2.0/. + def PL(fns): ret = P(fns) @@ -16,7 +27,7 @@ def all_sampleset_files(wc): if config["samplesets"][wc.sampleset].get("varcall", {}).get("snpeff", False): print(" - snpeff") everything.update( - PL(expand("variants/annotated/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}~snpEff.vcf.gz", + PL(expand("variants/annotated/{caller}~{aligner}~{ref}~{sampleset}~{filter}~snpEff.vcf.gz", caller=config["samplesets"][wc.sampleset]["varcall"]["callers"], aligner=config["samplesets"][wc.sampleset]["varcall"]["aligners"], ref=config["samplesets"][wc.sampleset]["varcall"]["refs"], @@ -27,16 +38,24 @@ def all_sampleset_files(wc): if "varcall" in config["samplesets"][wc.sampleset]: print(" - varcall") everything.update( - PL(expand("variants/final/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}.{ext}", + PL(expand("variants/final/{caller}~{aligner}~{ref}~{sampleset}~{filter}.{ext}", ext=["bcf", "bcf.csi", "vcf.gz", "vcf.gz.csi", "vcf.gz.stats"] if config["tool_settings"].get("varcall", {}).get("make_bcfs", False) else ["vcf.gz", "vcf.gz.csi", "vcf.gz.stats"], - caller=config["samplesets"][wc.sampleset]["varcall"]["callers"], + caller=filter(lambda x: x in ["mpileup", "freebayes"], config["samplesets"][wc.sampleset]["varcall"]["callers"]), aligner=config["samplesets"][wc.sampleset]["varcall"]["aligners"], ref=config["samplesets"][wc.sampleset]["varcall"]["refs"], filter=config["samplesets"][wc.sampleset]["varcall"]["filters"], sampleset=wc.sampleset)) ) + if "deepvariant" in config["samplesets"][wc.sampleset].get("varcall", {}).get("callers", []): + everything.update( + TL(expand("deepvariant/{aligner}~{ref}~{sampleset}.vcf.gz", + aligner=config["samplesets"][wc.sampleset]["varcall"]["aligners"], + ref=config["samplesets"][wc.sampleset]["varcall"]["refs"], + sampleset=wc.sampleset)) + ) + if "graftm" in config["samplesets"][wc.sampleset]: print(" - graftm") everything.update( @@ -176,6 +195,8 @@ def all_sampleset_files(wc): aligner=config["samplesets"][wc.sampleset].get("align", {}).get("aligners", []), sampleset=wc.sampleset)) ) + if config["samplesets"][wc.sampleset].get("align", {}).get("qualimap", False): + print(" - qualimap") everything.update( PL(expand("stats/multiqc/qualimap_{aligner}~{ref}~{sampleset}_multiqc.html", ref=config["samplesets"][wc.sampleset].get("align", {}).get("references", []), diff --git a/acanthophis/template/workflow/rules/taxonid.rules b/acanthophis/template/workflow/rules/taxonid.rules index e30bd44..efe414e 100644 --- a/acanthophis/template/workflow/rules/taxonid.rules +++ b/acanthophis/template/workflow/rules/taxonid.rules @@ -3,7 +3,7 @@ # please, if you find a bug, raise an issue on github so the fix gets shared # with everyone. # -# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting +# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting # # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can @@ -26,6 +26,7 @@ rule kraken_noreads: log: L("taxonid/kraken/{db}~{sample}.log"), benchmark: L("taxonid/kraken/{db}~{sample}.bench.csv"), resources: **rule_resources(config, "kraken_noreads", runtime=90, mem_gb=100, disk_gb=24, cores=4) + container: "docker://ghcr.io/kdm9/kraken2:latest" conda: "envs/kraken.yml" params: ziplevel=int(config.get("tool_settings", {}).get('ziplevel', 6)) + 2, @@ -56,6 +57,7 @@ rule kraken_reads: benchmark: L("taxonid/kraken/{db}~{sample}.bench.csv"), resources: **rule_resources(config, "kraken_reads", runtime=90, mem_gb=100, disk_gb=24, cores=4) conda: "envs/kraken.yml" + container: "docker://ghcr.io/kdm9/kraken2:latest" params: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), shell: @@ -83,6 +85,7 @@ rule bracken: benchmark: L("taxonid/bracken/{db}~{sample}~k{len}.bench.csv"), resources: **rule_resources(config, "bracken", runtime=10, mem_gb=20, disk_gb=24, cores=1) conda: "envs/kraken.yml" + container: "docker://ghcr.io/kdm9/kraken2:latest" shell: "bracken" " -d {input.db}" @@ -106,6 +109,7 @@ rule multiqc_kraken: log: L("stats/multiqc/kraken_{db}~{sampleset}_multiqc.log"), benchmark: L("stats/multiqc/kraken_{db}~{sampleset}_multiqc.bench.csv"), conda: "envs/qcstats.yml" + container: "docker://multiqc/multiqc:v1.20" resources: **rule_resources(config, "multiqc_kraken", runtime=30, mem_gb=2) shell: "multiqc" diff --git a/acanthophis/template/workflow/rules/varcall.rules b/acanthophis/template/workflow/rules/varcall.rules index 2e53622..1f6d717 100644 --- a/acanthophis/template/workflow/rules/varcall.rules +++ b/acanthophis/template/workflow/rules/varcall.rules @@ -3,7 +3,7 @@ # please, if you find a bug, raise an issue on github so the fix gets shared # with everyone. # -# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting +# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting # # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can @@ -118,6 +118,7 @@ rule mpileup: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), resources: **rule_resources(config, "mpileup", runtime=120, mem_gb=4, cores=1) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" group: "varcall_oneregion" shell: "( bcftools mpileup" @@ -168,6 +169,7 @@ rule freebayes: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), resources: **rule_resources(config, "freebayes", runtime=240, mem_gb=8, cores=1) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" shell: "( freebayes" " --theta {params.theta}" @@ -199,14 +201,15 @@ rule bcffilter: ref=lambda wc: R(config["data_paths"]["references"][wc.ref]["fasta"], keep_local=True), output: # Not a pipe! can't run all regions separately if this is a pipe into merge - bcf=temp(T("variants/filter_split/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}/{region}.bcf")), + bcf=temp(T("variants/filter_split/{caller}~{aligner}~{ref}~{sampleset}~{filter}/{region}.bcf")), log: - L("variants/filter_split/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}/{region}.bcf.log"), + L("variants/filter_split/{caller}~{aligner}~{ref}~{sampleset}~{filter}/{region}.bcf.log"), params: filtarg=lambda wc: config["tool_settings"]["varcall"]["filters"][wc.filter].replace('\n', ' ') group: "varcall_oneregion" resources: **rule_resources(config, "bcffilter", runtime=120, mem_gb=4, cores=1) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" shell: "( bcftools norm" " --fasta-ref {input.ref}" @@ -228,13 +231,14 @@ rule bcffilter: rule premergevariantidx: input: - T("variants/filter_split/{path}") + T("variants/{stage}_split/{path}") output: - temp(T("variants/filter_split/{path}.csi")) + temp(T("variants/{stage}_split/{path}.csi")) log: - T("variants/filter_split/{path}.csi.log") + T("variants/{stage}_split/{path}.csi.log") resources: **rule_resources(config, "variantidx", runtime=720, mem_gb=8, cores=1) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" group: "varcall_oneregion" shell: "bcftools index -f {input}" @@ -265,7 +269,7 @@ def get_regions(wc, chunked=False): regions = [] with open(regbed) as fh: for line in fh: - region_chr, region_start, region_stop, _ = line.rstrip("\n").split("\t") + region_chr, region_start, region_stop, = line.rstrip("\n").split("\t")[:3] region_start = int(region_start) + 1 regions.append(f"{region_chr}:{region_start}-{region_stop}") if chunked: @@ -275,20 +279,25 @@ def get_regions(wc, chunked=False): rule bcfmerge2group: input: - bcf=lambda wc: expand(T("variants/filter_split/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}/{region}.bcf"), - caller=wc.caller, aligner=wc.aligner, ref=wc.ref, sampleset=wc.sampleset, filter=wc.filter, + bcf=lambda wc: expand(T("variants/{stage}_split/{caller}~{aligner}~{ref}~{sampleset}{filter}/{region}.bcf"), + caller=wc.caller, aligner=wc.aligner, ref=wc.ref, sampleset=wc.sampleset, + filter=(f"~{wc.filter}" if wc.filter != "raw" else ""), + stage=("filter" if wc.filter != "raw" else "raw"), region=get_regions(wc, chunked=True)[wc.group]), - bcfi=lambda wc: expand(T("variants/filter_split/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}/{region}.bcf.csi"), - caller=wc.caller, aligner=wc.aligner, ref=wc.ref, sampleset=wc.sampleset, filter=wc.filter, + bcfi=lambda wc: expand(T("variants/{stage}_split/{caller}~{aligner}~{ref}~{sampleset}{filter}/{region}.bcf.csi"), + caller=wc.caller, aligner=wc.aligner, ref=wc.ref, sampleset=wc.sampleset, + filter=(f"~{wc.filter}" if wc.filter != "raw" else ""), + stage=("filter" if wc.filter != "raw" else "raw"), region=get_regions(wc, chunked=True)[wc.group]), regbed=lambda wc: config["samplesets"][wc.sampleset].get("varcall", {}).get("region_beds", {}).get(wc.ref, {}).get(wc.caller, []) output: - bcf=T("variants/group_merged/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}~{group}.bcf"), - bcfi=T("variants/group_merged/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}~{group}.bcf.csi"), + bcf=T("variants/group_merged/{caller}~{aligner}~{ref}~{sampleset}~{filter}~{group}.bcf"), + bcfi=T("variants/group_merged/{caller}~{aligner}~{ref}~{sampleset}~{filter}~{group}.bcf.csi"), log: - T("variants/group_merged/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}~{group}.bcf.log"), + T("variants/group_merged/{caller}~{aligner}~{ref}~{sampleset}~{filter}~{group}.bcf.log"), resources: **rule_resources(config, "bcfmerge2group", runtime=1440, mem_gb=12, cores=4) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" params: shell: "( bcftools concat" @@ -304,16 +313,17 @@ rule bcfmerge2group: rule bcfmerge: input: - bcf=lambda wc: expand(T("variants/group_merged/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}~{group}.bcf"), + bcf=lambda wc: expand(T("variants/group_merged/{caller}~{aligner}~{ref}~{sampleset}~{filter}~{group}.bcf"), caller=wc.caller, aligner=wc.aligner, ref=wc.ref, sampleset=wc.sampleset, filter=wc.filter, group=get_regions(wc, chunked=True)), regbed=lambda wc: config["samplesets"][wc.sampleset].get("varcall", {}).get("region_beds", {}).get(wc.ref, {}).get(wc.caller, []) output: - vcf=P("variants/final/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}.vcf.gz"), + vcf=P("variants/final/{caller}~{aligner}~{ref}~{sampleset}~{filter}.vcf.gz"), log: - L("variants/final/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}.vcf.gz.log"), + L("variants/final/{caller}~{aligner}~{ref}~{sampleset}~{filter}.vcf.gz.log"), resources: **rule_resources(config, "bcfmerge", runtime=1440, mem_gb=12, cores=64) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" params: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), shell: @@ -336,6 +346,7 @@ rule vcfstats: P("variants/{path}.vcf.gz.stats") log: L("variants/{path}.vcf.gz.stats.log") conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" resources: **rule_resources(config, "vcfstats", runtime=720, mem_gb=4, cores=4) shell: "bcftools stats -s - -d 0,1000,1 --threads {threads} {input} >{output}" @@ -352,6 +363,7 @@ rule vcf2bcf: log: L("{path}.vcf.gz.log"), resources: **rule_resources(config, "vcf2bcf", runtime=720, mem_gb=8, cores=8) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" params: ziplevel=config.get("tool_settings", {}).get('ziplevel', 6), shell: @@ -370,6 +382,7 @@ rule finalvarianttbi: P("variants/final/{path}.vcf.gz.tbi"), resources: **rule_resources(config, "finalvarianttbi", runtime=720, mem_gb=8, cores=1) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" shell: "bcftools index -t -f {input}" @@ -381,6 +394,7 @@ rule finalvariantidx: P("variants/final/{path}.csi") resources: **rule_resources(config, "finalvariantidx", runtime=720, mem_gb=8, cores=1) conda: "envs/varcall.yml" + container: "docker://ghcr.io/kdm9/varcall:latest" shell: "bcftools index -f {input}" @@ -390,7 +404,7 @@ rule finalvariantidx: ####################################################################### rule all_filtered_variants: input: - [P(expand("variants/final/{caller}~{aligner}~{ref}~{sampleset}~filtered-{filter}.{ext}", + [P(expand("variants/final/{caller}~{aligner}~{ref}~{sampleset}~{filter}.{ext}", ext=["bcf", "bcf.csi", "vcf.gz", "vcf.gz.csi", "vcf.gz.stats"] if config["tool_settings"].get("varcall", {}).get("make_bcfs", False) else ["vcf.gz", "vcf.gz.csi", "vcf.gz.stats"], caller=filter(lambda x: x in ["mpileup", "freebayes"], config["samplesets"][sampleset]["varcall"]["callers"]),