Skip to content

Commit

Permalink
allow for unfiltered variants; bump copyright, minor tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
kdm9 committed Jul 3, 2024
1 parent bf15a39 commit f898be4
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 63 deletions.
6 changes: 3 additions & 3 deletions acanthophis/template/workflow/config.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ properties:
additionalProperties:
type: object
properties:
nodes:
type: string
fmi:
dir:
type: string
bracken:
type: integer
required:
- dir
- bracken
Expand Down
12 changes: 11 additions & 1 deletion acanthophis/template/workflow/rules/align.rules
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# please, if you find a bug, raise an issue on github so the fix gets shared
# with everyone.
#
# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting
# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
Expand All @@ -22,6 +22,7 @@ rule ngmap_idx:
L("{path}-index.log"),
resources: **rule_resources(config, "ngmap_idx", runtime=40, mem_gb=2, cores=1)
conda: "envs/align.yml"
container: "docker://ghcr.io/kdm9/align:latest"
shell:
"( ngm"
" -r {input.ref}"
Expand All @@ -39,6 +40,7 @@ rule ngmap:
resources: **rule_resources(config, "ngmap", runtime=240, mem_gb=16, cores=8)
params:
sensitivity=config["tool_settings"]["ngm"]["sensitivity"],
container: "docker://ghcr.io/kdm9/align:latest"
conda: "envs/align.yml"
shell:
"( ngm"
Expand Down Expand Up @@ -66,6 +68,7 @@ rule bwaidx:
R("{path}.pac"),
R("{path}.sa"),
conda: "envs/align.yml"
container: "docker://ghcr.io/kdm9/align:latest"
log: L("{path}_index.log"),
resources: **rule_resources(config, "bwaidx", runtime=20, mem_gb=8)
shell:
Expand All @@ -85,6 +88,7 @@ rule bwamem:
bam=temp(T("alignments/byrun.raw/bwa/{ref}/{run}~{lib}~{sample}.bam")),
log: L("alignments/byrun.raw/bwa/{ref}/{run}~{lib}~{sample}.bam.log")
resources: **rule_resources(config, "bwamem", runtime=240, mem_gb=10, cores=8)
container: "docker://ghcr.io/kdm9/align:latest"
conda: "envs/align.yml"
shell:
"( bwa mem"
Expand Down Expand Up @@ -112,6 +116,7 @@ rule bam_merge_markdups_sort:
resources: **rule_resources(config, "bam_merge_markdups_sort", runtime=240, mem_gb=16, disk_gb=50, cores=8)
log: L("alignments/samples/{aligner}~{ref}~{sample}.bam.log")
conda: "envs/align.yml"
container: "docker://ghcr.io/kdm9/align:latest"
priority: 2
params:
ziplevel=config.get("tool_settings", {}).get('ziplevel', 6),
Expand Down Expand Up @@ -169,6 +174,7 @@ rule mergebam_set:
log:
L("alignments/sets/{aligner}~{ref}~{sampleset}.bam.log"),
resources: **rule_resources(config, "mergebam_set", runtime=2880, mem_gb=16, disk_gb=1000, cores=64)
container: "docker://ghcr.io/kdm9/align:latest"
conda: "envs/align.yml"
params:
ziplevel=config.get("tool_settings", {}).get('ziplevel', 6),
Expand All @@ -195,6 +201,7 @@ rule bamstat_sample:
L("alignments/bamstats/sample/{aligner}~{ref}~{sample}.samtools.stats.log")
resources: **rule_resources(config, "bamstat_sample", runtime=120, mem_gb=4, cores=1)
conda: "envs/align.yml"
container: "docker://ghcr.io/kdm9/align:latest"
shell:
"(samtools stats -i 5000 -x {input} >{output}) >{log} 2>&1"

Expand All @@ -208,6 +215,7 @@ rule multiqc_samstats:
log=L("stats/multiqc/bamstats_{aligner}~{ref}~{sampleset}_multiqc.log"),
resources: **rule_resources(config, "multiqc_samstats", runtime=30, mem_gb=2, cores=1)
conda: "envs/qcstats.yml"
container: "docker://multiqc/multiqc:v1.20"
shell:
"multiqc"
" --no-megaqc-upload"
Expand Down Expand Up @@ -251,6 +259,7 @@ rule multiqc_qualimap:
log=L("stats/multiqc/qualimap_{aligner}~{ref}~{sampleset}_multiqc.log"),
resources: **rule_resources(config, "multiqc_qualimap", runtime=30, mem_gb=2, cores=1)
conda: "envs/qcstats.yml"
container: "docker://multiqc/multiqc:v1.20"
shell:
"multiqc"
" --no-megaqc-upload"
Expand All @@ -276,6 +285,7 @@ rule extract_unmapped:
L("alignments/unmapped_reads/{aligner}~{ref}~{sample}.fastq.gz.log"),
resources: **rule_resources(config, "extract_unmapped", runtime=120, mem_gb=1, cores=8)
conda: "envs/align.yml"
container: "docker://ghcr.io/kdm9/align:latest"
params:
ziplevel=config.get("tool_settings", {}).get('ziplevel', 6),
shell:
Expand Down
8 changes: 4 additions & 4 deletions acanthophis/template/workflow/rules/base.rules
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# please, if you find a bug, raise an issue on github so the fix gets shared
# with everyone.
#
# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting
# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
Expand Down Expand Up @@ -101,12 +101,11 @@ def parse_metadata(s2rl_file):
if s2rl_file.endswith(".tsv"):
dialect = "excel-tab"
for run in csv.DictReader(fh, dialect=dialect):
if not run["library"] or run["library"].lower().startswith("blank"):
# Skip blanks
continue
if run.get("include", "Y").upper() != "Y" or run.get("exclude", "N").upper() == "Y":
# Remove non-sequenced ones
continue
if run.get("exclude_why", ""):
continue
meta.append({k.lower(): v for k, v in run.items()})
return meta

Expand All @@ -119,6 +118,7 @@ def make_runlib2samp(rl2s_meta):
samp = run["sample"]
rl2s[rl] = samp
s2rl[samp].append(rl)
print(f"Parsed {len(rl2s)} run-libs from {len(s2rl)} samples")
return dict(rl2s), dict(s2rl)


Expand Down
41 changes: 14 additions & 27 deletions acanthophis/template/workflow/rules/deepvariant.rules
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# These rules are part of Acanthophis. See https://github.com/kdm9/Acanthophis.
# This file *could* be modified, but then be careful when you update them. And
# please, if you find a bug, raise an issue on github so the fix gets shared
# with everyone.
#
# Copyright 2020-2024 Kevin Murray/Gekkonid Consulting
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at http://mozilla.org/MPL/2.0/.

rule deepvariant_gvcf:
input:
Expand All @@ -17,7 +27,7 @@ rule deepvariant_gvcf:
model=lambda wc: config["tool_settings"]["varcall"].get("deepvariant_model", "WGS"),
extra="",
shadow: "shallow"
resources: **rule_resources(config, "deepvariant_gvcf", runtime=600, mem_gb=96, cores=48, disk_mb=400_000)
resources: **rule_resources(config, "deepvariant_gvcf", runtime=600, mem_gb=96, cores=32, disk_mb=400_000)
shell:
"( /opt/deepvariant/bin/run_deepvariant"
" --model_type={params.model}"
Expand All @@ -29,29 +39,6 @@ rule deepvariant_gvcf:
" --intermediate_results_dir=$TMPDIR"
" --num_shards={threads}"
") &> {log}"
#"( mkdir -p {params.tmp_dir}"
#" && dv_make_examples.py"
#" --cores {threads}"
#" --ref {input.ref}"
#" --reads {input.bam}"
#" --sample {wildcards.sample}"
#" --examples {params.tmp_dir}"
#" --logdir {params.tmp_dir}"
#" --gvcf {params.tmp_dir}"
#" {params.extra}"
#" && dv_call_variants.py"
#" --cores {threads}"
#" --outfile {params.tmp_dir}/{wc.sample}.calls"
#" --sample {wildcards.sample} "
#" --examples {params.tmp_dir}"
#" --model {params.model}"
#"&& dv_postprocess_variants.py "
#" --ref {input.ref} "
#" --gvcf_infile {params.tmp_dir}/{wc.sample}.gvcf.tfrecord@{threads}.gz"
#" --gvcf_outfile {output.gvcf} "
#" --infile {params.tmp_dir}/{wc.sample}.calls"
#" --outfile {output.vcf}"
#") &> {log}"


localrules: glnexus_fofn
Expand Down Expand Up @@ -79,10 +66,10 @@ rule glnexus_call:
T("deepvariant/{aligner}~{ref}~{sampleset}.vcf.gz.log"),
conda:
"envs/glnexus.yml",
#container:
# "docker://ghcr.io/dnanexus-rnd/glnexus:v1.4.1"
container:
"docker://ghcr.io/kdm9/glnexus-bcftools:latest"
shadow: "shallow"
resources: **rule_resources(config, "glnexus_call", runtime=180, mem_gb=128, cores=128)
resources: **rule_resources(config, "glnexus_call", runtime=180, mem_gb=512, cores=128)
shell:
"( glnexus_cli"
" --config DeepVariant"
Expand Down
4 changes: 3 additions & 1 deletion acanthophis/template/workflow/rules/denovo.rules
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# please, if you find a bug, raise an issue on github so the fix gets shared
# with everyone.
#
# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting
# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
Expand Down Expand Up @@ -65,6 +65,7 @@ rule mash_sketch_set:
log: L("mash/{set}~k{ksize}~s{sketchsize}.sketch.msh.log")
resources: **rule_resources(config, "mash_sketch_set", runtime=2880, mem_gb=16, cores=48)
conda: "envs/mash.yml"
container: "docker://ghcr.io/kdm9/mash:latest"
shell:
" mash sketch"
" -k {wildcards.ksize}"
Expand All @@ -84,6 +85,7 @@ rule mash_dist_set:
L("mash/{set}~k{ksize}~s{sketchsize}.dist.log")
resources: **rule_resources(config, "mash_dist_set", runtime=2880, mem_gb=16, cores=48)
conda: "envs/mash.yml"
container: "docker://ghcr.io/kdm9/mash:latest"
shell:
"mash dist"
" -p {threads}"
Expand Down
11 changes: 7 additions & 4 deletions acanthophis/template/workflow/rules/metagenome.rules
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# please, if you find a bug, raise an issue on github so the fix gets shared
# with everyone.
#
# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting
# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
Expand Down Expand Up @@ -53,12 +53,13 @@ rule diamondx_reads2db:
db=lambda wc: R(config["data_paths"]["diamond"][wc.db]),
reads=diamondx_reads2db_input,
output:
tsv=P("metagenome/diamondx/{type}/{sample}~{db}.tsv.xz"),
tsv=P("metagenome/diamondx/{type}/{sample}~{db}.tsv.zstd"),
log:
L("metagenome/diamondx/{type}/{sample}~{db}.tsv.log"),
benchmark:
L("metagenome/diamondx/{type}/{sample}~{db}.tsv.bench.csv"),
conda: "envs/diamond.yml"
container: "docker://ghcr.io/kdm9/acanthophis-diamond:latest"
resources: **rule_resources(config, "diamondx_reads2db", runtime=7200, mem_gb=72, disk_gb=5, cores=24)
shell:
'T=/tmp/holopipe_$RANDOM; mkdir -p $T; trap "rm -rf $T" INT EXIT TERM;'
Expand All @@ -73,7 +74,7 @@ rule diamondx_reads2db:
" --index-chunks 4"
" --tmpdir $T"
" --ignore-warnings"
" --out >(xz -T{threads} >{output.tsv})"
" --out >(zstd -T{threads} >{output.tsv})"
" &> {log}"

rule humann:
Expand Down Expand Up @@ -196,6 +197,7 @@ rule plass_quant_diamond:
L("metagenome/plass/{type}/{samplelike}~renamed.faa.quant.blast.tsv.log"),
benchmark: L("metagenome/plass/{type}/{samplelike}~renamed.faa.quant.blast.tsv.bench.csv"),
conda: "envs/diamond.yml"
container: "docker://ghcr.io/kdm9/acanthophis-diamond:latest"
resources: **rule_resources(config, "plass_quant_diamond", runtime=1440, mem_gb=90, disk_gb=16, cores=32)
shell:
"diamond blastx"
Expand All @@ -220,6 +222,7 @@ rule plass_diamond:
L("metagenome/plass/{path}.{db}.blasttab.log")
benchmark: P("metagenome/plass/{path}.{db}.blasttab.bench.csv")
conda: "envs/diamond.yml"
container: "docker://ghcr.io/kdm9/acanthophis-diamond:latest"
resources: **rule_resources(config, "plass_diamond", runtime=1440, mem_gb=90, disk_gb=16, cores=32)
shell:
"diamond blastp"
Expand Down Expand Up @@ -298,7 +301,7 @@ rule all_megahit:

rule all_diamondx:
input:
[P(f"metagenome/diamondx/{type}/{sample}~{db}.tsv.xz")
[P(f"metagenome/diamondx/{type}/{sample}~{db}.tsv.zstd")
for sampleset in config["samplesets"]
for sample in config["SAMPLESETS"][sampleset]
for type in config["samplesets"][sampleset].get("diamondx", {}).get("types", [])
Expand Down
11 changes: 10 additions & 1 deletion acanthophis/template/workflow/rules/reads.rules
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# please, if you find a bug, raise an issue on github so the fix gets shared
# with everyone.
#
# Copyright 2016-2022 Kevin Murray/Gekkonid Consulting
# Copyright 2016-2024 Kevin Murray/Gekkonid Consulting
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
Expand Down Expand Up @@ -33,6 +33,7 @@ rule qcreads_paired_il:
maxqualval=lambda wc: _qcparam(wc, "maxqualval"),
ziplevel=config.get("tool_settings", {}).get('ziplevel', 6),
conda: "envs/reads.yml"
container: "docker://ghcr.io/kdm9/acanthophis-qc:latest"
shell:
"( AdapterRemoval"
" --file1 {input.reads}"
Expand Down Expand Up @@ -68,6 +69,7 @@ rule qcreads_paired_r12:
maxqualval=lambda wc: _qcparam(wc, "maxqualval"),
ziplevel=config.get("tool_settings", {}).get('ziplevel', 6),
conda: "envs/reads.yml"
container: "docker://ghcr.io/kdm9/acanthophis-qc:latest"
shell:
"( AdapterRemoval"
" --file1 {input.r1}"
Expand Down Expand Up @@ -104,6 +106,7 @@ rule qcreads_se:
maxqualval=lambda wc: _qcparam(wc, "maxqualval"),
ziplevel=config.get("tool_settings", {}).get('ziplevel', 6),
conda: "envs/reads.yml"
container: "docker://ghcr.io/kdm9/acanthophis-qc:latest"
shell:
"( AdapterRemoval"
" --file1 {input.se}"
Expand Down Expand Up @@ -133,6 +136,7 @@ rule merge_qcd_reads:
L("reads/runs/{run}~{lib}.fastq.gz.log"),
resources: **rule_resources(config, "merge_qcd_reads", runtime=30, mem_gb=1, disk_gb=1, cores=1)
conda: "envs/reads.yml"
container: "docker://ghcr.io/kdm9/acanthophis-qc:latest"
shell:
"(cat {input} >{output} ) >{log} 2>&1"

Expand All @@ -145,6 +149,7 @@ rule read_count_librun_indiv:
log:
L("stats/reads/readnum_librun/{run}~{lib}.tsv.log"),
conda: "envs/reads.yml"
container: "docker://ghcr.io/kdm9/acanthophis-qc:latest"
resources: **rule_resources(config, "read_count_librun_indiv", runtime=10, mem_gb=1, disk_gb=1)
shell:
"( seqhax stats"
Expand Down Expand Up @@ -206,6 +211,7 @@ rule split_pair_sample:
L("reads/samples/{sample}_split.log"),
resources: **rule_resources(config, "split_pair_sample", runtime=30, mem_gb=1, disk_gb=1, cores=8)
conda: "envs/reads.yml"
container: "docker://ghcr.io/kdm9/acanthophis-qc:latest"
params:
ziplevel=config.get("tool_settings", {}).get('ziplevel', 6),
shell:
Expand Down Expand Up @@ -236,6 +242,7 @@ rule fastqc_preqc:
fqczip=P("stats/fastqc/preqc/{run}~{lib}_fastqc.zip"),
log: L("stats/fastqc/preqc/{run}~{lib}_fastqc.log"),
resources: **rule_resources(config, "fastqc_preqc", runtime=30, mem_gb=1, cores=1)
container: "docker://ghcr.io/kdm9/acanthophis-qc:latest"
conda: "envs/qcstats.yml"
shell:
"(T=$(mktemp -d);"
Expand All @@ -256,6 +263,7 @@ rule fastqc_postqc:
fqczip=P("stats/fastqc/postqc/{run}~{lib}_fastqc.zip"),
log: L("stats/fastqc/postqc/{run}~{lib}_fastqc.zip.log"),
resources: **rule_resources(config, "fastqc_postqc", runtime=30, mem_gb=1, cores=1)
container: "docker://ghcr.io/kdm9/acanthophis-qc:latest"
conda: "envs/qcstats.yml"
shell:
"set -x; (T=$(mktemp -d);"
Expand All @@ -279,6 +287,7 @@ rule multiqc_fastqc:
log=L("stats/multiqc/reads-{prepost}~{sampleset}_multiqc.log"),
resources: **rule_resources(config, "multiqc_fastqc", runtime=30, mem_gb=2)
conda: "envs/qcstats.yml"
container: "docker://multiqc/multiqc:v1.20"
shell:
"multiqc"
" --no-megaqc-upload"
Expand Down
Loading

0 comments on commit f898be4

Please sign in to comment.