Skip to content

Commit

Permalink
Merge pull request #1470 from mblue9/fix-hisat2
Browse files Browse the repository at this point in the history
Fix HISAT2 strandedness parameter
  • Loading branch information
bgruening authored Sep 26, 2017
2 parents 422f699 + d16f91c commit 118b80f
Show file tree
Hide file tree
Showing 18 changed files with 951 additions and 560 deletions.
1,258 changes: 837 additions & 421 deletions tools/hisat2/hisat2.xml

Large diffs are not rendered by default.

160 changes: 21 additions & 139 deletions tools/hisat2/hisat2_macros.xml
Original file line number Diff line number Diff line change
@@ -1,148 +1,30 @@
<?xml version="1.0"?>
<macros>
<xml name="single_paired_selector">
<param name="paired_selector" type="select" label="Single end or paired reads?">
<option value="paired">Paired reads</option>
<option value="single">Unpaired reads</option>
<xml name="paired_end_options">

<param name="rna_strandness" argument="--rna-strandness" type="select" label="Specify strand information"
help="'FR' means a read corresponds to a transcript. 'RF' means a read corresponds to the reverse complemented counterpart of a transcript. With this option being used, every read alignment will have an XS attribute tag: '+' means a read belongs to a transcript on '+' strand of genome. '-' means a read belongs to a transcript on '-' strand of genome.">
<option value="">Unstranded</option>
<option value="FR">Forward (FR)</option>
<option value="RF">Reverse (RF)</option>
</param>
</xml>
<xml name="paired_input_conditional" tokens="ftype">
<conditional name="paired">
<expand macro="single_paired_selector" />
<when value="paired">
<conditional name="collection" label="Data structure">
<param name="collection_selector" type="select" label="Input is structured as" help="If a list of pairs is selected, HISAT2 will run in batch mode over each pair in the list, producing a list of output bam files">
<option value="files">Individual files</option>
<option value="collection">Pair collection or list of pairs</option>
</param>
<when value="collection">
<param name="reads" type="data_collection" collection_type="paired" format="@FTYPE@" label="Paired reads" />
</when>
<when value="files">
<param name="forward" type="data" format="@FTYPE@" label="Forward reads" />
<param name="reverse" type="data" format="@FTYPE@" label="Reverse reads" />
</when>
</conditional>
<expand macro="paired_end_conditional" />
<expand macro="paired_end_output" />
</when>
<when value="single">
<param name="reads" type="data" format="@FTYPE@" label="Reads" />
<param name="unaligned_file" argument="--un-gz" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" />
<param name="aligned_file" argument="--al-gz" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write aligned reads (in fastq format) to separate file(s)" />
</when>
</conditional>
</xml>
<xml name="paired_end_conditional">
<conditional name="paired_end_options">
<param name="paired_end_options_selector" type="select" label="Paired-end options">

<conditional name="paired_options">
<param name="paired_options_selector" type="select" label="Paired-end options" help="See &quot;Alignment Options&quot; section of Help below for information">
<option value="defaults">Use default values</option>
<option value="advanced">Specify paired-end parameters</option>
</param>
<when value="defaults" />
<when value="defaults"/>
<when value="advanced">
<param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" label="Disable alignments of individual mates" help="By default, when hisat cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates. This option disables that behavior" />
<param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" label="Disable alignments of individual mates" help="By default, hisat looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior" />
<param name="skip_reverse" type="boolean" truevalue="--norc" falsevalue="" label="Skip reference strand of reference" help="If --norc is specified, hisat will not attempt to align unpaired reads against the reverse-complement (Crick) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes hisat to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand" />
<param name="fr_rf_ff" argument="--fr/--rf/--ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand"
help="E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriate for Illumina's Paired-end Sequencing Assay)">
<option value="--fr" selected="True">--fr</option>
<option value="--rf">--rf</option>
<option value="--ff">--ff</option>
</param>
<param argument="--no-mixed" name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="By default, when `hisat2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default: False"/>
<param argument="--no-discordant" name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `hisat2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default: False"/>
</when>
</conditional>
</xml>
<xml name="paired_end_output">
<param name="unaligned_file" argument="--un-conc-gz" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" />
<param name="aligned_file" argument="--al-conc-gz" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write aligned reads (in fastq format) to separate file(s)" />
</xml>
<token name="@paired_end_options@">
#if str( $input_format.paired.unaligned_file ) == "true":
#if $compressed == "GZ":
--un-conc-gz '${output_unaligned_reads_l}'
#else if $compressed == "BZ2":
--un-conc-bz2 '${output_unaligned_reads_l}'
#else:
--un-conc '${output_unaligned_reads_l}'
#end if
#end if
#if str( $input_format.paired.aligned_file ) == "true":
#if $compressed == "GZ":
--al-conc-gz '${output_aligned_reads_l}'
#else if $compressed == "BZ2"
--al-conc-bz2 '${output_aligned_reads_l}'
#else:
--al-conc '${output_aligned_reads_l}'
#end if
#end if
#if str($input_format.paired.paired_end_options.paired_end_options_selector) == 'advanced':
${input_format.paired.paired_end_options.no_mixed}
${input_format.paired.paired_end_options.no_discordant}
#end if
</token>
<token name="@strandedness_parameters@">
#if str($spliced_options.spliced_options_selector) == "advanced":
#if str($spliced_options.rna_strandness).strip() != '':
--rna-strandness $spliced_options.rna_strandness
#end if
#end if
</token>
<token name="@FASTQGZ_SETUP@">
<![CDATA[
#set compressed="False"
#if str($input_format.paired.paired_selector) == 'paired':
#if str($input_format.paired.collection.collection_selector) == 'collection':
#if $input_format.paired.collection.reads.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.collection.reads.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read1 = "input_f.fastq"
#end if
ln -f -s '${input_format.paired.collection.reads.forward}' ${read1} &&

#if $input_format.paired.collection.reads.reverse.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read2 = "input_r.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.collection.reads.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read2 = "input_r.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read2 = "input_r.fastq"
#end if
ln -f -s '${input_format.paired.collection.reads.reverse}' ${read2} &&
#else:
#if $input_format.paired.collection.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.collection.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read1 = "input_f.fastq"
#end if
ln -f -s '${input_format.paired.collection.forward}' ${read1} &&
#if $input_format.paired.collection.reverse.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read2 = "input_r.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.collection.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read2 = "input_r.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read2 = "input_r.fastq"
#end if
ln -f -s '${input_format.paired.collection.reverse}' ${read2} &&
#end if
#else:
#if $input_format.paired.reads.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#else if $input_format.paired.reads.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#else:
#set read1 = "input_f.fastq"
#end if
ln -s '${input_format.paired.reads}' ${read1} &&
#end if
]]>
</token>
</macros>
</xml>
</macros>
1 change: 1 addition & 0 deletions tools/hisat2/test-data/cached_locally/hisat2_indexes.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
phiX phiX PhiX ${__HERE__}/phiX
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.2.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.3.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.4.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.5.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.6.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.7.ht2
Binary file not shown.
Binary file added tools/hisat2/test-data/cached_locally/phiX.8.ht2
Binary file not shown.
79 changes: 79 additions & 0 deletions tools/hisat2/test-data/cached_locally/phiX.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
>phiX174
GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA

6 changes: 6 additions & 0 deletions tools/hisat2/test-data/hisat_output.summary
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
HISAT2 summary stats:
Total reads: 10
Aligned 0 time: 0 (0.00%)
Aligned 1 time: 10 (100.00%)
Aligned >1 times: 0 (0.00%)
Overall alignment rate: 100.00%
Binary file modified tools/hisat2/test-data/hisat_output_1.bam
Binary file not shown.
Binary file modified tools/hisat2/test-data/hisat_output_2.bam
Binary file not shown.
Binary file modified tools/hisat2/test-data/hisat_output_3.bam
Binary file not shown.
Binary file added tools/hisat2/test-data/hisat_output_4.bam
Binary file not shown.
7 changes: 7 additions & 0 deletions tools/hisat2/tool_data_table_conf.xml.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<tables>
<!-- Locations of indexes in the HISAT mapper format -->
<table name="hisat2_indexes" comment_char="#" allow_duplicate_entries="False">
<columns>value, dbkey, name, path</columns>
<file path="${__HERE__}/test-data/cached_locally/hisat2_indexes.loc" />
</table>
</tables>

0 comments on commit 118b80f

Please sign in to comment.