Skip to content
This repository has been archived by the owner on Nov 8, 2024. It is now read-only.

Adding in many staging wrappers #31

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,8 @@ recursive-include docs *.rst
recursive-include snakescale/wrappers Snakefile
recursive-include snakescale/wrappers *.yaml wrapper.py
recursive-include snakescale/wrappers *.bed *.genome
recursive-include snakescale/wrappers *.bam *.bai *vcf *vcf.gz
recursive-include snakescale/wrappers *.fa *.fasta *.fa.gz *.fasta.gz
recursive-include snakescale/wrappers *mutations.txt # dwgsim wrapper


5 changes: 5 additions & 0 deletions snakescale/wrappers/latest/bwa/mem/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- bioconda
dependencies:
- bwa
- picard
4 changes: 4 additions & 0 deletions snakescale/wrappers/latest/bwa/mem/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: bwa mem
description: Align sequence alignment data.
authors:
- Clint Valentine
26 changes: 26 additions & 0 deletions snakescale/wrappers/latest/bwa/mem/test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from snakescale import scale

rule align_with_bwa_mem:
input:
reference='data/reference.fa',
unmapped_bam='data/infile.bam'
output: 'data/result.bam'
params:
sam_to_fastq={
'clipping_action': 'N',
'clipping_attribute': 'XT',
'clipping_min_length': 25,
'interleave': True,
'include_non_pf_reads': False
},
bwa_mem={'p': True, 'v': 2},
merge_bam_alignment={
'aligner_proper_pair_flags': False,
'attributes_to_retain': ['X0', 'ZS', 'ZI', 'ZM', 'ZC', 'ZN'],
'clip_adapters': False,
'expected_orientations': 'FR',
'max_insertions_or_deletions': -1,
'sort_order': 'coordinate'
}
log: 'data/result.log'
wrapper: scale('bwa', 'mem')
10 changes: 10 additions & 0 deletions snakescale/wrappers/latest/bwa/mem/test/data/reference.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>NC_001416.1 Enterobacteria phage lambda, complete genome
GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCGTTCTTCTTCG
TCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAAGGAAACGACAGGTGCTGAAAGCGAGGC
TTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTTGTCCGTGGAATGAACAATGGAAGTCAACAAAAAGCA
GCTGGCTGACATTTTCGGTGCGAGTATCCGTACCATTCAGAACTGGCAGGAACAGGGAATGCCCGTTCTG
CGAGGCGGTGGCAAGGGTAATGAGGTGCTTTATGACTCTGCCGCCGTCATAAAATGGTATGCCGAAAGGG
ATGCTGAAATTGAGAACGAAAAGCTGCGCCGGGAGGTTGAAGAACTGCGGCAGGCCAGCGAGGCAGATCT
CCAGCCAGGAACTATTGAGTACGAACGCCATCGACTTACGCGTGCGCAGGCCGACGCACAGGAACTGAAG
AATGCCAGAGACTCCGCTGAAGTGGTGGAAACCGCATTCTGTACTTTCGTGCTGTCGCGGATCGCAGGTG
AAATTGCCAGTATTCTCGACGGGCTCCCCCTGTCGGTGCAGCGGCGTTTTCCGGAACTGGAAAACCGACA
44 changes: 44 additions & 0 deletions snakescale/wrappers/latest/bwa/mem/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Snakemake wrapper for bwa mem ClipBam."""

__author__ = 'clintval'
__copyright__ = 'Copyright 2018, Clint Valentine'
__email__ = '[email protected]'
__license__ = 'MIT'

from snakemake.shell import shell

from snakescale.utils import collect_jvm_resources
from snakescale.utils import collect_picard_style_jvm_resources
from snakescale.utils import make_bwa_params
from snakescale.utils import make_picard_params

extra = snakemake.params.get('extra', '')
extra += collect_jvm_resources()
extra += collect_picard_style_jvm_resources()

sam_to_fastq_params = make_picard_params(snakemake.params.get('sam_to_fastq', {}))
bwa_mem_params = make_bwa_params(snakemake.params.get('bwa_mem', {}))
merge_bam_alignment_params = make_picard_params(snakemake.params.get('merge_bam_alignment', {}))

log = snakemake.log_fmt_shell(stdout=False, stderr=True, append=True)

shell(
'picard {extra} SamToFastq'
' INPUT={snakemake.input.unmapped_bam}'
' FASTQ=/dev/stdout'
' {sam_to_fastq_params}'
' {log}'
' | bwa mem'
' -t {snakemake.threads}'
' {snakemake.input.reference}'
' /dev/stdin'
' {bwa_mem_params}'
' {log}'
' | picard {extra} MergeBamAlignment '
' UNMAPPED={snakemake.input.unmapped_bam}'
' REFERENCE_SEQUENCE={snakemake.input.reference}'
' ALIGNED=/dev/stdin'
' OUTPUT={snakemake.output}'
' {merge_bam_alignment_params}'
' {log}'
)
4 changes: 4 additions & 0 deletions snakescale/wrappers/latest/dwgsim/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
channels:
- bioconda
dependencies:
- dwgsim
4 changes: 4 additions & 0 deletions snakescale/wrappers/latest/dwgsim/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: dwgsim
description: Simulate sequencing reads.
authors:
- Clint Valentine
31 changes: 31 additions & 0 deletions snakescale/wrappers/latest/dwgsim/test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from snakescale import scale

output_prefix = 'data/result'

rule dwgsim:
input: 'data/reference.fa'
output:
output_prefix + '.bfast.fastq',
output_prefix + '.bwa.read1.fastq',
output_prefix + '.bwa.read2.fastq',
output_prefix + '.mutations.txt',
output_prefix + '.mutations.vcf',
params:
output_prefix=output_prefix,
r1=141, # Length of read one (flag `-1`)
r2=141, # Length of read two (flag `-2`)
c=0, # Generate reads for platform `Illumina`
d=150, # Outer distance between two ends for pairs
s=50, # Standard deviation of the distance between pairs
C=100, # Mean coverage across available positions
e=0.003, # Error rate per base in first read
E=0.004, # Error rate per base in second read
r=0.001, # Mutation rate
F=0.0001, # Somatic mutation rate
R=0, # Fraction of mutations that are indel
X=0, # Probability an indel is extended
y=0.01, # Probability of a random DNA read
n=10, # Maximum number of Ns allowed in a given read
z=42, # Random seed
log: 'data/result.log'
wrapper: scale('dwgsim')
10 changes: 10 additions & 0 deletions snakescale/wrappers/latest/dwgsim/test/data/reference.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>NC_001416.1 Enterobacteria phage lambda, complete genome
GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCGTTCTTCTTCG
TCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAAGGAAACGACAGGTGCTGAAAGCGAGGC
TTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTTGTCCGTGGAATGAACAATGGAAGTCAACAAAAAGCA
GCTGGCTGACATTTTCGGTGCGAGTATCCGTACCATTCAGAACTGGCAGGAACAGGGAATGCCCGTTCTG
CGAGGCGGTGGCAAGGGTAATGAGGTGCTTTATGACTCTGCCGCCGTCATAAAATGGTATGCCGAAAGGG
ATGCTGAAATTGAGAACGAAAAGCTGCGCCGGGAGGTTGAAGAACTGCGGCAGGCCAGCGAGGCAGATCT
CCAGCCAGGAACTATTGAGTACGAACGCCATCGACTTACGCGTGCGCAGGCCGACGCACAGGAACTGAAG
AATGCCAGAGACTCCGCTGAAGTGGTGGAAACCGCATTCTGTACTTTCGTGCTGTCGCGGATCGCAGGTG
AAATTGCCAGTATTCTCGACGGGCTCCCCCTGTCGGTGCAGCGGCGTTTTCCGGAACTGGAAAACCGACA
Loading