Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix HISAT2 strandedness parameter #1470

Merged
merged 12 commits into from
Sep 26, 2017
1,258 changes: 837 additions & 421 deletions tools/hisat2/hisat2.xml

Large diffs are not rendered by default.

160 changes: 21 additions & 139 deletions tools/hisat2/hisat2_macros.xml
Original file line number Diff line number Diff line change
@@ -1,148 +1,30 @@
<?xml version="1.0"?>
<macros>
<xml name="single_paired_selector">
<param name="paired_selector" type="select" label="Single end or paired reads?">
<option value="paired">Paired reads</option>
<option value="single">Unpaired reads</option>
<xml name="paired_end_options">

<param name="rna_strandness" argument="--rna-strandness" type="select" label="Specify strand information"
help="'FR' means a read corresponds to a transcript. 'RF' means a read corresponds to the reverse complemented counterpart of a transcript. With this option being used, every read alignment will have an XS attribute tag: '+' means a read belongs to a transcript on '+' strand of genome. '-' means a read belongs to a transcript on '-' strand of genome.">
<option value="">Unstranded</option>
<option value="FR">Forward (FR)</option>
<option value="RF">Reverse (RF)</option>
</param>
</xml>
<xml name="paired_input_conditional" tokens="ftype">
<conditional name="paired">
<expand macro="single_paired_selector" />
<when value="paired">
<conditional name="collection" label="Data structure">
<param name="collection_selector" type="select" label="Input is structured as" help="If a list of pairs is selected, HISAT2 will run in batch mode over each pair in the list, producing a list of output bam files">
<option value="files">Individual files</option>
<option value="collection">Pair collection or list of pairs</option>
</param>
<when value="collection">
<param name="reads" type="data_collection" collection_type="paired" format="@FTYPE@" label="Paired reads" />
</when>
<when value="files">
<param name="forward" type="data" format="@FTYPE@" label="Forward reads" />
<param name="reverse" type="data" format="@FTYPE@" label="Reverse reads" />
</when>
</conditional>
<expand macro="paired_end_conditional" />
<expand macro="paired_end_output" />
</when>
<when value="single">
<param name="reads" type="data" format="@FTYPE@" label="Reads" />
<param name="unaligned_file" argument="--un-gz" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" />
<param name="aligned_file" argument="--al-gz" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write aligned reads (in fastq format) to separate file(s)" />
</when>
</conditional>
</xml>
<xml name="paired_end_conditional">
<conditional name="paired_end_options">
<param name="paired_end_options_selector" type="select" label="Paired-end options">

<conditional name="paired_options">
<param name="paired_options_selector" type="select" label="Paired-end options" help="See &quot;Alignment Options&quot; section of Help below for information">
<option value="defaults">Use default values</option>
<option value="advanced">Specify paired-end parameters</option>
</param>
<when value="defaults" />
<when value="defaults"/>
<when value="advanced">
<param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" label="Disable alignments of individual mates" help="By default, when hisat cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates. This option disables that behavior" />
<param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" label="Disable alignments of individual mates" help="By default, hisat looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior" />
<param name="skip_reverse" type="boolean" truevalue="--norc" falsevalue="" label="Skip reference strand of reference" help="If --norc is specified, hisat will not attempt to align unpaired reads against the reverse-complement (Crick) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes hisat to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand" />
<param name="fr_rf_ff" argument="--fr/--rf/--ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand"
help="E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriate for Illumina's Paired-end Sequencing Assay)">
<option value="--fr" selected="True">--fr</option>
<option value="--rf">--rf</option>
<option value="--ff">--ff</option>
</param>
<param argument="--no-mixed" name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="By default, when `hisat2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default: False"/>
<param argument="--no-discordant" name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `hisat2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default: False"/>
</when>
</conditional>
</xml>
<xml name="paired_end_output">
<param name="unaligned_file" argument="--un-conc-gz" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" />
<param name="aligned_file" argument="--al-conc-gz" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write aligned reads (in fastq format) to separate file(s)" />
</xml>
<token name="@paired_end_options@">
#if str( $input_format.paired.unaligned_file ) == "true":
#if $compressed == "GZ":
--un-conc-gz '${output_unaligned_reads_l}'
#else if $compressed == "BZ2":
--un-conc-bz2 '${output_unaligned_reads_l}'
#else:
--un-conc '${output_unaligned_reads_l}'
#end if
#end if
#if str( $input_format.paired.aligned_file ) == "true":
#if $compressed == "GZ":
--al-conc-gz '${output_aligned_reads_l}'
#else if $compressed == "BZ2"
--al-conc-bz2 '${output_aligned_reads_l}'
#else:
--al-conc '${output_aligned_reads_l}'
#end if
#end if
#if str($input_format.paired.paired_end_options.paired_end_options_selector) == 'advanced':
${input_format.paired.paired_end_options.no_mixed}
${input_format.paired.paired_end_options.no_discordant}
#end if
</token>
<token name="@strandedness_parameters@">
#if str($spliced_options.spliced_options_selector) == "advanced":
#if str($spliced_options.rna_strandness).strip() != '':
--rna-strandness $spliced_options.rna_strandness
#end if
#end if
</token>
<token name="@FASTQGZ_SETUP@">
<![CDATA[
#set compressed="False"
#if str($input_format.paired.paired_selector) == 'paired':
#if str($input_format.paired.collection.collection_selector) == 'collection':
#if $input_format.paired.collection.reads.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.collection.reads.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read1 = "input_f.fastq"
#end if
ln -f -s '${input_format.paired.collection.reads.forward}' ${read1} &&

#if $input_format.paired.collection.reads.reverse.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read2 = "input_r.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.collection.reads.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read2 = "input_r.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read2 = "input_r.fastq"
#end if
ln -f -s '${input_format.paired.collection.reads.reverse}' ${read2} &&
#else:
#if $input_format.paired.collection.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.collection.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read1 = "input_f.fastq"
#end if
ln -f -s '${input_format.paired.collection.forward}' ${read1} &&

#if $input_format.paired.collection.reverse.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read2 = "input_r.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.collection.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read2 = "input_r.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read2 = "input_r.fastq"
#end if
ln -f -s '${input_format.paired.collection.reverse}' ${read2} &&
#end if
#else:
#if $input_format.paired.reads.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.reads.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read1 = "input_f.fastq"
#end if
ln -s '${input_format.paired.reads}' ${read1} &&
#end if
]]>
</token>
</macros>
</xml>
</macros>
1 change: 1 addition & 0 deletions tools/hisat2/test-data/cached_locally/hisat2_indexes.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
phiX phiX PhiX ${__HERE__}/phiX
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.2.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.3.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.4.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.5.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.6.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.7.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.8.ht2
Binary file not shown.
79 changes: 79 additions & 0 deletions tools/hisat2/test-data/cached_locally/phiX.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
>phiX174
GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA

6 changes: 6 additions & 0 deletions tools/hisat2/test-data/hisat_output.summary
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
HISAT2 summary stats:
Total reads: 10
Aligned 0 time: 0 (0.00%)
Aligned 1 time: 10 (100.00%)
Aligned >1 times: 0 (0.00%)
Overall alignment rate: 100.00%
Binary file modified tools/hisat2/test-data/hisat_output_1.bam
Binary file not shown.
Binary file modified tools/hisat2/test-data/hisat_output_2.bam
Binary file not shown.
Binary file modified tools/hisat2/test-data/hisat_output_3.bam
Binary file not shown.
Binary file added tools/hisat2/test-data/hisat_output_4.bam
Binary file not shown.
7 changes: 7 additions & 0 deletions tools/hisat2/tool_data_table_conf.xml.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<tables>
<!-- Locations of indexes in the HISAT mapper format -->
<table name="hisat2_indexes" comment_char="#" allow_duplicate_entries="False">
<columns>value, dbkey, name, path</columns>
<file path="${__HERE__}/test-data/cached_locally/hisat2_indexes.loc" />
</table>
</tables>