-
Notifications
You must be signed in to change notification settings - Fork 441
/
hisat2.xml
1367 lines (1178 loc) · 82.5 KB
/
hisat2.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<tool id="hisat2" name="HISAT2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
<description>A fast and sensitive alignment program</description>
<macros>
<import>hisat2_macros.xml</import>
</macros>
<xrefs>
<xref type="bio.tools">hisat2</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">hisat2</requirement>
<requirement type="package" version="1.12">samtools</requirement>
<requirement type="package" version="1.3">seqtk</requirement>
</requirements>
<stdio>
<regex level="fatal" match="hisat2-align exited with value 1" source="both" />
<regex level="fatal" match="hisat2: not found" source="both" />
<exit_code range="1:" />
</stdio>
<version_command>hisat2 --version</version_command>
<command><![CDATA[
set -o pipefail;
## Prepare HISAT2 index
#if $reference_genome.source == "history":
ln -s '$reference_genome.history_item' genome.fa &&
hisat2-build -p \${GALAXY_SLOTS:-1} genome.fa genome &&
#set index_path = 'genome'
#else:
#set index_path = $reference_genome.index.fields.path
#end if
## If using known splice sites
#if str($adv.spliced_options.spliced_options_selector) == "advanced" and str($adv.spliced_options.known_splice_gtf) != 'None':
ln -s '${adv.spliced_options.known_splice_gtf}' splice_sites.gtf &&
hisat2_extract_splice_sites.py splice_sites.gtf > splice_sites.txt &&
#end if
## Link in the input files, so HISAT2 can tell their type
#set compressed="False"
#set reads_are_fastq = True
#if str($library.type) == 'paired':
#if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#elif $library.input_1.is_of_type('fasta'):
#set reads_are_fastq = False
#set read1 = "input_f.fasta"
#else:
#set read1 = "input_f.fastq"
#end if
ln -f -s '${library.input_1}' ${read1} &&
#if $library.input_2.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read2 = "input_r.fastq.gz"
#set compressed = "GZ"
#elif $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read2 = "input_r.fastq.bz2"
#set compressed = "BZ2"
#elif $library.input_2.is_of_type('fasta'):
#set read2 = "input_r.fasta"
#else:
#set read2 = "input_r.fastq"
#end if
ln -f -s '${library.input_2}' ${read2} &&
#elif str($library.type) == 'paired_collection':
#if $library.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#elif $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#elif $library.input_1.forward.is_of_type('fasta'):
#set reads_are_fastq = False
#set read1 = "input_f.fasta"
#else:
#set read1 = "input_f.fastq"
#end if
ln -s '${library.input_1.forward}' ${read1} &&
#if $library.input_1.reverse.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read2 = "input_r.fastq.gz"
#set compressed = "GZ"
#elif $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read2 = "input_r.fastq.bz2"
#set compressed = "BZ2"
#elif $library.input_1.reverse.is_of_type("fasta"):
#set read2 = "input_r.fasta"
#else:
#set read2 = "input_r.fastq"
#end if
ln -s '${library.input_1.reverse}' ${read2} &&
#elif str( $library.type ) == "paired_interleaved":
#if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
#set compressed = "GZ"
#elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set compressed = "BZ2"
#elif $library.input_1.is_of_type('fasta'):
#set reads_are_fastq = False
#end if
#set read1 = "input_f.fastq" if reads_are_fastq else "input_f.fasta"
#set read2 = "input_r.fastq" if reads_are_fastq else "input_r.fasta"
#if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
bzcat '${library.input_1}' | seqtk seq -1 /dev/stdin > $read1 &&
bzcat '${library.input_1}' | seqtk seq -2 /dev/stdin > $read2 &&
#else
seqtk seq -1 '${library.input_1}' > $read1 &&
seqtk seq -2 '${library.input_1}' > $read2 &&
#end if
#else:
#if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
#set read1 = "input_f.fastq.gz"
#set compressed = "GZ"
#elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
#set read1 = "input_f.fastq.bz2"
#set compressed = "BZ2"
#elif $library.input_1.is_of_type('fasta'):
#set reads_are_fastq = False
#set read1 = "input_f.fasta"
#else:
#set read1 = "input_f.fastq"
#end if
ln -f -s '${library.input_1}' ${read1} &&
#end if
## Run HISAT2
hisat2
## number threads
-p \${GALAXY_SLOTS:-1}
## ref genome index path
-x '${index_path}'
## input reads are fasta?
#if not $reads_are_fastq:
-f
#end if
## Input reads
#if str( $library.type ) == "single":
-U '${read1}'
#if str($adv.output_options.output_options_selector) == "advanced":
#if str( $adv.output_options.unaligned_file ) == "true":
#if $compressed == "GZ":
--un-gz '$output_unaligned_reads_l'
#elif $compressed == "BZ2":
--un-bz2 '$output_unaligned_reads_l'
#else:
--un '$output_unaligned_reads_l'
#end if
#end if
#if str( $adv.output_options.aligned_file ) == "true":
#if $compressed == "GZ":
--al-gz '$output_aligned_reads_l'
#elif $compressed == "BZ2":
--al-bz2 '$output_aligned_reads_l'
#else:
--al '$output_aligned_reads_l'
#end if
#end if
#end if
#else:
##quotes are embedded in r1 and r2 variables, needed to allow use of <()
#if str( $library.type ) == "paired_interleaved":
-1 ${read1}
-2 ${read2}
#else:
-1 '${read1}'
-2 '${read2}'
#end if
#if str($adv.output_options.output_options_selector) == "advanced":
#if str( $adv.output_options.unaligned_file ) == "true":
#if $compressed == "GZ":
--un-conc-gz '${output_unaligned_reads_l}'
#elif $compressed == "BZ2":
--un-conc-bz2 '${output_unaligned_reads_l}'
#else:
--un-conc '${output_unaligned_reads_l}'
#end if
#end if
#if str( $adv.output_options.aligned_file ) == "true":
#if $compressed == "GZ":
--al-conc-gz '${output_aligned_reads_l}'
#elif $compressed == "BZ2":
--al-conc-bz2 '${output_aligned_reads_l}'
#else:
--al-conc '${output_aligned_reads_l}'
#end if
#end if
#end if
#if str($library.paired_options.paired_options_selector) == "advanced":
${library.paired_options.fr_rf_ff}
${library.paired_options.no_mixed}
${library.paired_options.no_discordant}
#end if
#end if
## Specify strandedness of reads
#if str($library.rna_strandness):
--rna-strandness ${library.rna_strandness}
#end if
## Input options
#if str($adv.input_options.input_options_selector) == "advanced":
#if int( $adv.input_options.skip ) > 0:
--skip ${adv.input_options.skip}
#end if
#if int( $adv.input_options.qupto ) > 0:
--qupto ${adv.input_options.qupto}
#end if
--trim5 '${adv.input_options.trim5}'
--trim3 '${adv.input_options.trim3}'
${adv.input_options.qv_encoding}
${adv.input_options.solexa_quals}
${adv.input_options.int_quals}
#end if
## Alignment options
#if str($adv.alignment_options.alignment_options_selector) == "advanced":
--n-ceil ${adv.alignment_options.function_type},${adv.alignment_options.constant_term},${adv.alignment_options.coefficient}
${adv.alignment_options.ignore_quals}
${adv.alignment_options.skip_forward}
${adv.alignment_options.skip_reverse}
#end if
## Scoring options
#if str($adv.scoring_options.scoring_options_selector) == "advanced":
--mp ${adv.scoring_options.max_mismatch},${adv.scoring_options.min_mismatch}
${adv.scoring_options.no_softclip}
--np ${adv.scoring_options.ambiguous_penalty}
--rdg ${adv.scoring_options.read_open_penalty},${adv.scoring_options.read_extend_penalty}
--rfg ${adv.scoring_options.ref_open_penalty},${adv.scoring_options.ref_extend_penalty}
--sp ${adv.scoring_options.soft_clip_penalty_max},${adv.scoring_options.soft_clip_penalty_min}
--score-min ${adv.scoring_options.function_type},${adv.scoring_options.constant_term},${adv.scoring_options.coefficient}
#end if
## Spliced alignment options
#if str($adv.spliced_options.spliced_options_selector) == "advanced":
--pen-cansplice ${adv.spliced_options.canonical_penalty}
--pen-noncansplice ${adv.spliced_options.noncanonical_penalty}
--pen-canintronlen ${adv.spliced_options.function_type},${adv.spliced_options.constant_term},${adv.spliced_options.coefficient}
--pen-noncanintronlen ${adv.spliced_options.nc_function_type},${adv.spliced_options.nc_constant_term},${adv.spliced_options.nc_coefficient}
#if str($adv.spliced_options.known_splice_gtf) != 'None':
--known-splicesite-infile splice_sites.txt
#end if
${adv.spliced_options.no_spliced_alignment_options.no_spliced_alignment}
#if $adv.spliced_options.no_spliced_alignment_options.no_spliced_alignment == '--no-spliced-alignment'
-I ${adv.spliced_options.no_spliced_alignment_options.minins}
-X ${adv.spliced_options.no_spliced_alignment_options.maxins}
#end if
--min-intronlen ${adv.spliced_options.min_intron}
--max-intronlen ${adv.spliced_options.max_intron}
${adv.spliced_options.tma}
#if str($adv.spliced_options.novel_splicesite_outfile) == "true":
--novel-splicesite-outfile '$novel_splicesite_output'
#end if
#if str($adv.spliced_options.notmplen):
${adv.spliced_options.notmplen}
#end if
#end if
## Reporting options
#if str($adv.reporting_options.reporting_options_selector) == "advanced":
#if str($adv.reporting_options.max_primary) != '':
-k ${adv.reporting_options.max_primary}
#end if
#end if
## SAM options
#if str($adv.sam_options.sam_options_selector) == "advanced":
#if $adv.sam_options.no_unal:
--no-unal
#end if
#if str($adv.sam_options.read_groups.rg_labels) == "Yes":
--rg-id '$adv.sam_options.read_groups.rg_id'
#if len('$adv.sam_options.read_groups.read_groups'):
#for $i, $id in enumerate($adv.sam_options.read_groups.read_groups):
--rg '$id.rg'
#end for
#end if
#end if
$adv.sam_options.chr_text
#if $adv.sam_options.omit_sec_seq:
--omit-sec-seq
#end if
#end if
## Other options
#if str( $adv.other_options.other_options_selector ) == "advanced":
${adv.other_options.non_deterministic}
--seed '${adv.other_options.seed}'
#end if
## Output Summary
#if str($sum.new_summary) == "true":
--new-summary
#end if
#if str($sum.summary_file) == "true":
--summary-file summary.txt
#end if
## Convert SAM output to sorted BAM
## using the two pipe stages has the following effect
## - hisat2 and sort run in parallel, during this time sort produces
## presorted temporary files but does not produce output (hence
## view does not run)
## - once hisat is finished sort will start to merge the temporary
## files (which should be fast also on a single thread) gives the
## sorted output to view which only compresses the files (now
## using full parallelism again)
| samtools sort --no-PG -l 0 -T "\${TMPDIR:-.}" -O bam | samtools view --no-PG -O bam -@ \${GALAXY_SLOTS:-1} -o '${output_alignments}'
## Rename any output fastq files
#if $output_unaligned_reads_l and $output_unaligned_reads_r:
#from os.path import splitext
#set _unaligned_root, _unaligned_ext = splitext( str( $output_unaligned_reads_l ) )
&& mv '${ _unaligned_root }.1${_unaligned_ext}' '$output_unaligned_reads_l'
&& mv '${ _unaligned_root }.2${_unaligned_ext}' '$output_unaligned_reads_r'
#end if
#if $output_aligned_reads_l and $output_aligned_reads_r:
#from os.path import splitext
#set _aligned_root, _aligned_ext = splitext( str( $output_aligned_reads_l ) )
&& mv '${ _aligned_root }.1${_aligned_ext}' '$output_aligned_reads_l'
&& mv '${ _aligned_root }.2${_aligned_ext}' '$output_aligned_reads_r'
#end if
]]></command>
<!-- Define inputs -->
<inputs>
<!-- Reference genome -->
<conditional name="reference_genome">
<param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options">
<option value="indexed" selected="true">Use a built-in genome</option>
<option value="history">Use a genome from history</option>
</param>
<when value="indexed">
<param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
<options from_data_table="hisat2_indexes">
<filter type="sort_by" column="2" />
<validator type="no_options" message="No genomes are available for the selected input dataset" />
</options>
</param>
</when>
<when value="history">
<param name="history_item" type="data" format="fasta" label="Select the reference genome" />
</when>
</conditional>
<!-- Reads -->
<conditional name="library">
<param name="type" type="select" label="Is this a single or paired library">
<option value="single">Single-end</option>
<option value="paired">Paired-end</option>
<option value="paired_collection">Paired-end Dataset Collection</option>
<option value="paired_interleaved">Paired-end data from single interleaved dataset</option>
</param>
<when value="single">
<param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file" help="Must be of datatype "fastqsanger" or "fasta"" />
<param name="rna_strandness" argument="--rna-strandness" type="select" label="Specify strand information"
help="'F' means a read corresponds to a transcript. 'R' means a read corresponds to the reverse complemented counterpart of a transcript. With this option being used, every read alignment will have an XS attribute tag: '+' means a read belongs to a transcript on '+' strand of genome. '-' means a read belongs to a transcript on '-' strand of genome.">
<option value="">Unstranded</option>
<option value="F">Forward (F)</option>
<option value="R">Reverse (R)</option>
</param>
</when>
<when value="paired">
<param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file #1" help="Must be of datatype "fastqsanger"or "fasta"" />
<param name="input_2" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file #2" help="Must be of datatype "fastqsanger"or "fasta"" />
<expand macro="paired_end_options" />
</when>
<when value="paired_collection">
<param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="Must be of datatype "fastqsanger" or "fasta"" />
<expand macro="paired_end_options" />
</when>
<when value="paired_interleaved">
<param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="Interleaved FASTA/Q file" help="Must be of datatype "fastqsanger" or "fasta". --interleaved"/>
<expand macro="paired_end_options" />
</when>
</conditional>
<!-- Summary Options -->
<section name="sum" title="Summary Options" expanded="False">
<param name="new_summary" argument="--new-summary" type="boolean" checked="false" label="Output alignment summary in a more machine-friendly style." help="Select this option for compatibility with MultiQC" />
<param name="summary_file" argument="--summary-file" type="boolean" checked="false" label="Print alignment summary to a file." help="Output alignment summary to a file in addition to stderr." />
</section>
<!-- Advanced Options -->
<section name="adv" title="Advanced Options" expanded="False">
<conditional name="input_options">
<param name="input_options_selector" type="select" label="Input options">
<option value="defaults">Use default values</option>
<option value="advanced">Specify input options</option>
</param>
<when value="defaults" />
<when value="advanced">
<param name="skip" argument="-s" type="integer" min="0" value="0" label="Skip the first N reads or pairs in the input" help="default: 0" />
<param name="qupto" argument="-u" type="integer" min="0" value="0" label="Align the first N reads or read pairs from the input (after the first N reads or pairs have been skipped), then stop" help="default: no limit" />
<param name="trim5" argument="-5" type="integer" min="0" value="0" label="Trim 5' end" help="Trim N bases from 5' (left) end of each read before alignment, default: 0" />
<param name="trim3" argument="-3" type="integer" min="0" value="0" label="Trim 3' end" help="Trim N bases from 3' (right) end of each read before alignment, default: 0"/>
<param name="qv_encoding" type="select" display="radio" label="Select quality score encoding" help="See help below for more details">
<option value="--phred33" selected="True">Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines (--phred33)</option>
<option value="--phred64">Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding (--phred64)</option>
</param>
<param name="solexa_quals" argument="--solexa-quals" type="boolean" truevalue="--solexa-quals" falsevalue="" checked="false" label="Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3)" help="--solexa-quals; default: False"/>
<param name="int_quals" argument="--int-quals" type="boolean" truevalue="--int-quals" falsevalue="" checked="false" label="Are quality values provided as space separated integers?" help="Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified [default: False]"/>
</when>
</conditional>
<conditional name="alignment_options">
<param name="alignment_options_selector" type="select" label="Alignment options">
<option value="defaults">Use default values</option>
<option value="advanced">Specify alignment options</option>
</param>
<when value="defaults" />
<when value="advanced">
<expand macro="nc_function" name="function_type" argument="--n-ceil" label="Function governing the maximum number of ambiguous characters (usually Ns and/or .s) allowed in a read as a function of read length" help="Reads exceeding this ceiling are filtered out" lselected="true"/>
<param name="constant_term" type="float" value="0" label="Constant term (B)" help="Constant term for the above function" />
<param name="coefficient" type="float" value="0.15" label="Coefficient (A)" help="Coefficient for the above function" />
<param argument="--ignore-quals" name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" label="Ignore quality values" help="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value. I.e. input is treated as though all quality values are high. This is also the default behavior when the input doesn't specify quality values" />
<param argument="--nofw" name="skip_forward" type="boolean" truevalue="--nofw" falsevalue="" label="Skip forward strand of reference" help="If --nofw is specified, HISAT2 will not attempt to align unpaired reads to the forward (Watson) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes HISAT2 to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand" />
<param argument="--norc" name="skip_reverse" type="boolean" truevalue="--norc" falsevalue="" label="Skip reverse strand of reference" help="If --norc is specified, HISAT2 will not attempt to align unpaired reads against the reverse-complement (Crick) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --norc causes HISAT2 to explore only those paired-end configurations corresponding to fragments from the forward-complement (Watson) strand" />
</when>
</conditional>
<conditional name="scoring_options">
<param name="scoring_options_selector" type="select" label="Scoring options">
<option value="defaults">Use default values</option>
<option value="advanced">Specify scoring options</option>
</param>
<when value="defaults" />
<when value="advanced">
<param name="constant_term" type="float" value="0" label="Constant term (B)" help="Constant term for the above function" />
<param name="coefficient" type="float" value="-0.2" label="Coefficient (A)" help="Coefficient for the above function" />
<param argument="--mp" name="max_mismatch" type="integer" value="6" min="0" label="Maximum mismatch penalty" help="Sets the maximum mismatch penalty. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" />
<param argument="--mp" name="min_mismatch" type="integer" value="2" min="0" label="Minimum mismatch penalty" help="Sets the minimum mismatch penalty. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" />
<param argument="--sp" name="soft_clip_penalty_max" type="integer" value="2" min="0" label="Maximum soft-clipping penalty" help="Sets the maximum (MX) penalty for soft-clipping per base. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" />
<param argument="--sp" name="soft_clip_penalty_min" type="integer" value="1" min="0" label="Minimum soft-clipping penalty" help="Sets the minimum (MN) penalty for soft-clipping per base. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" />
<param argument="--no-softclip" name="no_softclip" type="boolean" truevalue="" falsevalue="--no-softclip" label="Allow soft-clipping" />
<param argument="--np" name="ambiguous_penalty" type="integer" value="1" min="0" label="Ambiguous read penalty" help="Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as N" />
<param argument="--rdg" name="read_open_penalty" type="integer" value="5" min="0" label="Read gap open penalty" help="A read gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" />
<param argument="--rdg" name="read_extend_penalty" type="integer" value="3" min="0" label="Read gap extend penalty" help="A read gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" />
<param argument="--rfg" name="ref_open_penalty" type="integer" value="5" min="0" label="Reference gap open penalty" help="A reference gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" />
<param argument="--rfg" name="ref_extend_penalty" type="integer" value="3" min="0" label="Reference gap extend penalty" help="A reference gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" />
<expand macro="nc_function" name="function_type" argument="--score-min" label="Function governing the minimum alignment score needed for an alignment to be considered "valid" (i.e. good enough to report)" help="This is a function of read length" lselected="true"/>
</when>
</conditional>
<conditional name="spliced_options">
<param name="spliced_options_selector" type="select" label="Spliced alignment options">
<option value="defaults">Use default values</option>
<option value="advanced">Specify spliced alignment options</option>
</param>
<when value="defaults" />
<when value="advanced">
<param name="canonical_penalty" argument="--pen-cansplice" type="integer" value="0" min="0" label="Penalty for canonical splice sites" />
<param name="noncanonical_penalty" argument="--pen-noncansplice" type="integer" value="12" min="0" label="Penalty for non-canonical splice sites" />
<expand macro="nc_function" name="function_type" argument="--pen-canintronlen" label="Penalty function for long introns with canonical splice sites" help="Alignments with shorter introns are preferred to those with longer ones" gselected="true"/>
<param name="constant_term" type="float" value="-8" label="Constant term (B)" help="Constant term for the above function" />
<param name="coefficient" type="float" value="1" label="Coefficient (A)" help="Coefficient for the above function" />
<expand macro="nc_function" name="nc_function_type" argument="--pen-noncanintronlen" label="Penalty function for long introns with non-canonical splice sites" help="Alignments with shorter introns are preferred to those with longer ones" gselected="true"/>
<param name="nc_constant_term" type="float" value="-8" label="Constant term (B)" help="Constant term for the above function" />
<param name="nc_coefficient" type="float" value="1" label="Coefficient (A)" help="Coefficient for the above function" />
<param name="min_intron" type="integer" value="20" min="0" label="Minimum intron length" />
<param name="max_intron" type="integer" value="500000" min="0" label="Maximum intron length" />
<conditional name="no_spliced_alignment_options">
<param name="no_spliced_alignment" argument="--no-spliced-alignment" type="select" label="Disable spliced alignment">
<option value="">False</option>
<option value="--no-spliced-alignment">True</option>
</param>
<when value="--no-spliced-alignment">
<param name="minins" argument="-I" type="integer" value="0" min="0" label="Minimum fragment length for valid paired-end alignments" help="E.g. if -I 60 is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as -X is also satisfied). A 19-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -I constraint is applied with respect to the untrimmed mates. The larger the difference between -I and -X, the slower HISAT2 will run. This is because larger differences between -I and -X require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient" />
<param name="maxins" argument="-X" type="integer" value="500" min="0" label="Maximum fragment length for valid paired-end alignments" help="E.g. if -X 100 is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates. The larger the difference between -I and -X, the slower HISAT2 will run. This is because larger differences between -I and -X require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient" />
</when>
<when value="" />
</conditional>
<param name="known_splice_gtf" type="data" format="gtf" optional="true" label="GTF file with known splice sites" />
<param name="tma" type="select" display="radio" label="Transcriptome assembly reporting">
<option value="">Use default reporting</option>
<option value="--tmo">Report only those alignments within known transcripts</option>
<option value="--dta">Report alignments tailored for transcript assemblers including StringTie</option>
<option value="--dta-cufflinks">Report alignments tailored specifically for Cufflinks</option>
</param>
<param name="notmplen" argument="--no-templatelen-adjustment" type="boolean" truevalue="--no-templatelen-adjustment" falsevalue="" label="Disable automatic template length adjustment for RNA-seq reads" help="Default: false" />
<param name="novel_splicesite_outfile" type="boolean" checked="false" label="reports a list of novel splice sites" help="Default: false" />
</when>
</conditional>
<conditional name="reporting_options">
<param name="reporting_options_selector" type="select" label="Reporting options">
<option value="defaults">Use default values</option>
<option value="advanced">Specify reporting options</option>
</param>
<when value="defaults" />
<when value="advanced">
<param name="max_primary" argument="-k" type="integer" min="0" optional="true" label="Primary alignments" help="Search for at most K distinct, primary alignments for each read. Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. The search terminates when it can't find more distinct valid alignments, or when it finds K, whichever happens first. The alignment score for a paired-end alignment equals the sum of the alignment scores of the individual mates. Each reported read or pair alignment beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS field. For reads that have more than K distinct, valid alignments, HISAT2 does not guarantee that the K alignments reported are the best possible in terms of alignment score. HISAT2 is not designed with large values for -k in mind, so when aligning reads to long repetitive genomes, a large K can be very, very slow. Default: 5 (HFM) or 10 (HGFM)" />
</when>
</conditional>
<conditional name="output_options">
<param name="output_options_selector" type="select" label="Output options">
<option value="defaults">Use default values</option>
<option value="advanced">Specify output options</option>
</param>
<when value="defaults" />
<when value="advanced">
<param name="unaligned_file" argument="--un/--un-conc" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write unaligned reads (in fastq format) to separate file(s)" help="This triggers --un parameter for single reads and --un-conc for paired reads" />
<param name="aligned_file" argument="--al/--al-conc" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write aligned reads (in fastq format) to separate file(s)" help="This triggers --al parameter for single reads and --al-conc for paired reads" />
</when>
</conditional>
<conditional name="sam_options">
<param name="sam_options_selector" type="select" label="SAM options">
<option value="defaults">Use default values</option>
<option value="advanced">Specify output options</option>
</param>
<when value="defaults" />
<when value="advanced">
<param name="no_unal" argument="--no-unal" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Suppress SAM records for reads that failed to align." help="Default: false"/>
<conditional name="read_groups">
<param name="rg_labels" type="select" label="Edit Read Group IDs">
<option value="No">No</option>
<option value="Yes">Yes</option>
</param>
<when value="No"/>
<when value="Yes">
<param name="rg_id" argument="--rg-id" type="text" value="read_group" label="Read Group ID" help="Set the read group ID to the value set here. This causes the SAM @RG header line to be printed, with this input value as the value associated with the ID: tag. It also causes the RG:Z: extra field to be attached to each SAM output record, with value set to the value set here." />
<repeat name="read_groups" title="Read Groups text">
<param name="rg" argument="--rg" type="text" optional="true" label="Read Group text" help="Add specified text (usually of the form TAG:VAL, e.g. SM:Pool1) as a field on the @RG header line. Note: in order for the @RG line to appear, Read Group ID must also be specified. This is because the ID tag is required by the SAM Spec. Specify --rg multiple times to set multiple fields. See the SAM Spec for details about what fields are legal." />
</repeat>
</when>
</conditional>
<param name="chr_text" type="select" label="Add/remove 'chr' from reference names in alignment files">
<option value="" selected="true">Use default values</option>
<option value="--remove-chrname">Remove ‘chr’ from reference names in alignment (e.g., chr18 to 18)</option>
<option value="--add-chrname">Add ‘chr’ to reference names in alignment (e.g., 18 to chr18)</option>
</param>
<param name="omit_sec_seq" argument="--omit-sec-seq" type="boolean" truevalue="true" falsevalue="false" checked="false" label="When printing secondary alignments, HISAT2 by default will write out the SEQ and QUAL strings. Specifying this option causes HISAT2 to print an asterisk in those fields instead." help="Default: false"/>
</when>
</conditional>
<conditional name="other_options">
<param name="other_options_selector" type="select" label="Other options">
<option value="defaults">Use default values</option>
<option value="advanced">Specify other options</option>
</param>
<when value="defaults" />
<when value="advanced">
<param name="seed" argument="--seed" type="integer" value="0" min="0" label="Use this number as the seed for pseudo-random number generator" help="Default=0" />
<param name="non_deterministic" argument="--non-deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" label="Re-initialize the pseudo-random generator for each read using the current time" help="see Help below for explanation of this option; default: False"/>
</when>
</conditional>
</section>
</inputs>
<!-- Define outputs -->
<outputs>
<!-- BAM -->
<data name="output_alignments" format="bam" label="${tool.name} on ${on_string}: aligned reads (BAM)">
<actions>
<expand macro="dbKeyActions" />
</actions>
</data>
<!-- Unaligned fastq (L) -->
<data name="output_unaligned_reads_l" format="fastqsanger" label="${tool.name} on ${on_string}: unaligned reads (L)">
<filter>adv['output_options']['output_options_selector'] == "advanced" and adv['output_options']['unaligned_file'] is True</filter>
<actions>
<conditional name="library.type">
<when value="single">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="ext" />
</action>
</when>
<when value="paired">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="ext" />
</action>
</when>
<when value="paired_collection">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="forward.ext" />
</action>
</when>
<when value="paired_interleaved">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="forward.ext" />
</action>
</when>
</conditional>
<expand macro="dbKeyActions" />
</actions>
</data>
<!-- Aligned fastq (L) -->
<data name="output_aligned_reads_l" format="fastqsanger" label="${tool.name} on ${on_string}: aligned reads (L)">
<filter>adv['output_options']['output_options_selector'] == "advanced" and adv['output_options']['aligned_file'] is True</filter>
<actions>
<conditional name="library.type">
<when value="single">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="ext" />
</action>
</when>
<when value="paired">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="ext" />
</action>
</when>
<when value="paired_collection">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="forward.ext" />
</action>
</when>
<when value="paired_interleaved">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="forward.ext" />
</action>
</when>
</conditional>
<expand macro="dbKeyActions" />
</actions>
</data>
<!-- Unaligned fastq (R) -->
<data name="output_unaligned_reads_r" format="fastqsanger" label="${tool.name} on ${on_string}: unaligned reads (R)">
<filter>(library['type'] == 'paired' or library['type'] == 'paired_collection') and (adv['output_options']['output_options_selector'] == "advanced" and adv['output_options']['unaligned_file'] is True) </filter>
<actions>
<conditional name="library.type">
<when value="single">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="ext" />
</action>
</when>
<when value="paired">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="ext" />
</action>
</when>
<when value="paired_collection">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="forward.ext" />
</action>
</when>
<when value="paired_interleaved">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="forward.ext" />
</action>
</when>
</conditional>
<expand macro="dbKeyActions" />
</actions>
</data>
<!-- Aligned fastq (R) -->
<data name="output_aligned_reads_r" format="fastqsanger" label="${tool.name} on ${on_string}: aligned reads (R)">
<filter>(library['type'] == 'paired' or library['type'] == 'paired_collection') and (adv['output_options']['output_options_selector'] == "advanced" and adv['output_options']['aligned_file'] is True) </filter>
<actions>
<conditional name="library.type">
<when value="single">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="ext" />
</action>
</when>
<when value="paired">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="ext" />
</action>
</when>
<when value="paired_collection">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="forward.ext" />
</action>
</when>
<when value="paired_interleaved">
<action type="format">
<option type="from_param" name="library.input_1" param_attribute="forward.ext" />
</action>
</when>
</conditional>
<expand macro="dbKeyActions" />
</actions>
</data>
<!-- Alignment summary file -->
<data name="summary_file" format="txt" from_work_dir="summary.txt" label="${tool.name} on ${on_string}: Mapping summary">
<filter>sum['summary_file'] is True</filter>
<actions>
<expand macro="dbKeyActions" />
</actions>
</data>
<!-- Novel Splice file -->
<data name="novel_splicesite_output" format="tabular" label="${tool.name} on ${on_string}: Novel Splice Sites">
<filter>adv['spliced_options']['spliced_options_selector'] == 'advanced' and adv['spliced_options']['novel_splicesite_outfile'] is True</filter>
<actions>
<expand macro="dbKeyActions" />
</actions>
</data>
</outputs>
<!-- Define tests -->
<tests>
<!-- Ensure bam output works -->
<test expect_num_outputs="1" >
<param name="type" value="paired" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger" value="hisat_input_1_forward.fastq" />
<param name="input_2" ftype="fastqsanger" value="hisat_input_1_reverse.fastq" />
<param name="adv|reporting_options|reporting_options_selector" value="advanced"/>
<param name="novel_splicesite_outfile" value="false" />
<output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure advanced scoring options work -->
<test expect_num_outputs="1" >
<param name="type" value="paired" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger" value="hisat_input_1_forward.fastq" />
<param name="input_2" ftype="fastqsanger" value="hisat_input_1_reverse.fastq" />
<param name="adv|scoring_options|coefficient" value="-0.3"/>
<output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure built-in reference works -->
<test expect_num_outputs="1">
<param name="type" value="paired" />
<param name="source" value="indexed" />
<param name="input_1" ftype="fastqsanger" dbkey="phiX" value="hisat_input_1_forward.fastq" />
<param name="input_2" ftype="fastqsanger" dbkey="phiX" value="hisat_input_1_reverse.fastq" />
<output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure trimming works -->
<test expect_num_outputs="1">
<param name="type" value="paired" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_options_selector" value="advanced" />
<param name="trim3" value="15" />
<param name="trim5" value="15" />
<param name="input_1" ftype="fastqsanger" value="hisat_input_2_forward.fastq" />
<param name="input_2" ftype="fastqsanger" value="hisat_input_2_reverse.fastq" />
<output name="output_alignments" file="hisat_output_2.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure paired options works -->
<test expect_num_outputs="1">
<param name="type" value="paired" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_options_selector" value="advanced" />
<param name="trim3" value="15" />
<param name="trim5" value="15" />
<param name="input_1" ftype="fastqsanger" value="hisat_input_2_forward.fastq" />
<param name="input_2" ftype="fastqsanger" value="hisat_input_2_reverse.fastq" />
<param name="paired_options_selector" value="advanced" />
<param name="no_mixed" value="True" />
<param name="no_discordant" value="True" />
<output name="output_alignments" file="hisat_output_3.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure single unaligned output works -->
<test expect_num_outputs="2">
<param name="type" value="single" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="output_options_selector" value="advanced" />
<param name="unaligned_file" value="true" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fasta" value="test_unaligned_reads.fasta" />
<output name="output_unaligned_reads_l" file="test_unaligned_reads.fasta" sort="true"/>
</test>
<!-- Ensure paired unaligned/aligned output works -->
<test expect_num_outputs="5">
<param name="type" value="paired" />
<param name="source" value="history" />
<param name="output_options_selector" value="advanced" />
<param name="unaligned_file" value="true" />
<param name="aligned_file" value="true" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fasta" value="test_unaligned_reads.fasta" />
<param name="input_2" ftype="fasta" value="test_unaligned_reads.fasta" />
<output name="output_unaligned_reads_l" file="test_unaligned_reads.fasta" sort="true"/>
<output name="output_unaligned_reads_r" file="test_unaligned_reads.fasta" sort="true"/>
</test>
<!-- Ensure fastqsanger.gz works -->
<test expect_num_outputs="1">
<param name="type" value="paired" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_options_selector" value="advanced" />
<param name="trim3" value="15" />
<param name="trim5" value="15" />
<param name="input_1" ftype="fastqsanger.gz" value="hisat_input_2_forward.fastq.gz" />
<param name="input_2" ftype="fastqsanger.gz" value="hisat_input_2_reverse.fastq.gz" />
<param name="paired_options_selector" value="advanced" />
<param name="no_mixed" value="True" />
<param name="no_discordant" value="True" />
<output name="output_alignments" file="hisat_output_3.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure fastqsanger.bz2 works -->
<test expect_num_outputs="1">
<param name="type" value="paired" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_options_selector" value="advanced" />
<param name="trim3" value="15" />
<param name="trim5" value="15" />
<param name="input_1" ftype="fastqsanger.bz2" value="hisat_input_2_forward.fastq.bz2" />
<param name="input_2" ftype="fastqsanger.bz2" value="hisat_input_2_reverse.fastq.bz2" />
<param name="paired_options_selector" value="advanced" />
<param name="no_mixed" value="True" />
<param name="no_discordant" value="True" />
<output name="output_alignments" file="hisat_output_3.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure paired strandness works -->
<test expect_num_outputs="1">
<param name="type" value="paired" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger" value="hisat_input_1_forward.fastq" />
<param name="input_2" ftype="fastqsanger" value="hisat_input_1_reverse.fastq" />
<param name="rna_strandness" value="FR" />
<output name="output_alignments" file="hisat_output_4.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure summary file output works -->
<test expect_num_outputs="2">
<param name="type" value="single" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger" value="hisat_input_1_forward.fastq" />
<param name="rna_strandness" value="R" />
<param name="new_summary" value="true" />
<param name="summary_file" value="true" />
<output name="summary_file" file="hisat_output.summary" ftype="txt" />
</test>
<!-- Ensure interleaved input works -->
<test expect_num_outputs="1" >
<param name="type" value="paired_interleaved" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger" value="hisat_input_1_interleaved.fastq" />
<output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure interleaved bz input works -->
<test expect_num_outputs="1" >
<param name="type" value="paired_interleaved" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger.bz2" value="hisat_input_1_interleaved.fastq.bz2" />
<output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure interleaved gz input works -->
<test expect_num_outputs="1" >
<param name="type" value="paired_interleaved" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger.gz" value="hisat_input_1_interleaved.fastq.gz" />
<output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure interleaved fasta input works -->
<test expect_num_outputs="1" >
<param name="type" value="paired_interleaved" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fasta" value="hisat_input_1_interleaved.fasta" />
<output name="output_alignments" file="hisat_output_1_noqual.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
<!-- Ensure novel splicesite file output works -->
<test>
<param name="type" value="single" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger" value="hisat_input_1_split_forward.fastq" />
<param name="rna_strandness" value="R" />
<param name="adv|spliced_options|spliced_options_selector" value="advanced"/>
<param name="adv|spliced_options|novel_splicesite_outfile" value="true" />
<output name="output_alignments" file="hisat_output_spliced_1.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
<output name="novel_splicesite_output" file="novel_splicesite_out.tab" ftype="tabular" />
</test>
<!-- Ensure SAM output settings work -->
<test expect_num_outputs="1" >
<param name="type" value="paired_interleaved" />
<param name="source" value="history" />
<param name="history_item" ftype="fasta" value="phiX.fa" />
<param name="input_1" ftype="fastqsanger.bz2" value="hisat_input_1_interleaved.fastq.bz2" />
<param name="adv|sam_options|sam_options_selector" value="advanced"/>
<param name="adv|sam_options|no_unal" value="true"/>
<param name="adv|sam_options|read_groups|rg_labels" value="Yes"/>
<repeat name="adv|sam_options|read_groups|read_groups">
<param name="rg" value="BC:test1"/>
</repeat>
<repeat name="adv|sam_options|read_groups|read_groups">
<param name="rg" value="CN:test2"/>
</repeat>
<param name="adv|sam_options|chr_text" value="--add-chrname"/>
<param name="adv|sam_options|omit_sec_seq" value="True"/>
<output name="output_alignments" file="hisat_output_5.bam" ftype="bam" lines_diff="2" sort="true" >
<metadata name="sort_order" value="coordinate"/>
</output>
</test>
</tests>
<help><![CDATA[
Introduction
============
What is HISAT?
--------------
`HISAT <http://ccb.jhu.edu/software/hisat>`__ is a fast and sensitive
spliced alignment program. As part of HISAT, we have developed a new
indexing scheme based on the Burrows-Wheeler transform
(`BWT <http://en.wikipedia.org/wiki/Burrows-Wheeler_transform>`__) and
the `FM index <http://en.wikipedia.org/wiki/FM-index>`__, called
hierarchical indexing, that employs two types of indexes: (1) one global
FM index representing the whole genome, and (2) many separate local FM
indexes for small regions collectively covering the genome. Our
hierarchical index for the human genome (about 3 billion bp) includes
~48,000 local FM indexes, each representing a genomic region of
~64,000bp. As the basis for non-gapped alignment, the FM index is
extremely fast with a low memory footprint, as demonstrated by
`Bowtie <http://bowtie-bio.sf.net>`__. In addition, HISAT provides
several alignment strategies specifically designed for mapping different
types of RNA-seq reads. All these together, HISAT enables extremely fast