Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version Update of HapCUT2 and Restructuring of XML file #5975

Merged
merged 3 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 38 additions & 86 deletions tools/hapcut2/hapcut2.xml
Original file line number Diff line number Diff line change
@@ -1,40 +1,16 @@
<tool id="hapcut2" name="Hapcut2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<tool id="hapcut2" name="Hapcut2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2">
<description>haplotype assembly for diploid organisms</description>

<macros>
<token name="@TOOL_VERSION@">1.3.3</token>
<token name="@TOOL_VERSION@">1.3.4</token>
<token name="@VERSION_SUFFIX@">1</token>
<xml name="reference_genome_input">
<conditional name="reference_genome">
<param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options.">
<option value="indexed" selected="true">Use a built-in genome</option>
<option value="history">Use a genome from history</option>
</param>
<when value="indexed">
<param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team.">
<options from_data_table="fasta_indexes">
<filter type="sort_by" column="2" />
<validator type="no_options" message="No genomes are available for the selected input dataset" />
</options>
</param>
</when>
<when value="history">
<param name="fasta" type="data" format="fasta,fasta.gz"
label="Reference genome fasta file"
help="The reference genome is required for long-read optimization."
/>
</when>
</conditional>
</xml>
<import>macros.xml</import>
</macros>

<xrefs>
<xref type="bio.tools">hapcut2</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">hapcut2</requirement>
</requirements>

<command detect_errors="exit_code"><![CDATA[

## Prep inputs
Expand Down Expand Up @@ -78,27 +54,11 @@ ln -s '$input_bam' input.bam
#if $optimization.choice == 'hic':
--HiC 1
#end if

]]></command>

<inputs>
<param
name="input_bam"
argument="--bam"
type="data"
format="bam"
label="Input BAM file"
help="Coordinate-sorted BAM file"
/>
<param
name="input_vcf"
argument="--VCF"
type="data"
format="vcf"
label="Input VCF file"
help="Variant file with genotypes for a single individual"
/>

<param name="input_bam" argument="--bam" type="data" format="bam" label="Input BAM file" help="Coordinate-sorted BAM file"/>
<param name="input_vcf" argument="--VCF" type="data" format="vcf" label="Input VCF file" help="Variant file with genotypes for a single individual"/>
<conditional name="optimization">
<!-- TODO: include 10X (requires extra processing step) -->
<param name="choice" type="select" display="radio" label="Optimization">
Expand All @@ -107,49 +67,31 @@ ln -s '$input_bam' input.bam
<option value="ont">Oxford Nanopore</option>
<option value="hic">Hi-C</option>
</param>

<when value="default"></when>
<when value="pacbio">
<expand macro="reference_genome_input" />
<expand macro="reference_genome_input"/>
</when>

<when value="ont">
<expand macro="reference_genome_input" />
<expand macro="reference_genome_input"/>
</when>
<when value="hic"></when>
</conditional>

<param name="output_phased" type="boolean" label="Output phased VCF file?"
checked="true"
help="Output variant calls on the haplotype assembly"
/>
<param name="output_fragments" type="boolean" label="Output fragments file?"
help="Output fragments collected by extractHAIRS"
/>
<param name="output_phased" type="boolean" label="Output phased VCF file?" checked="true" help="Output variant calls on the haplotype assembly"/>
<param name="output_fragments" type="boolean" label="Output fragments file?" help="Output fragments collected by extractHAIRS"/>

<section name="advanced" title="Advanced parameters">
<param argument="--maxIS" type="integer" label="Maximum insert size"
optional="true" value="1000" min="0"
help="Maximum insert size for a paired-end read to be considered as a single fragment for phasing"
/>

<param argument="--minIS" type="integer" label="Minimum insert size"
optional="true" value="0" min="0"
help="Minimum insert size for a paired-end read to be considered as a single fragment for phasing"
/>
<param argument="--maxIS" type="integer" label="Maximum insert size" optional="true" value="1000" min="0" help="Maximum insert size for a paired-end read to be considered as a single fragment for phasing"/>
<param argument="--minIS" type="integer" label="Minimum insert size" optional="true" value="0" min="0" help="Minimum insert size for a paired-end read to be considered as a single fragment for phasing"/>
</section>
</inputs>

<outputs>
<data name="haplotype" format="txt" from_work_dir="haplotype.out"
label="${tool.name} on ${on_string}: Haplotype block"
/>
<data name="haplotype_phased" format="vcf" from_work_dir="haplotype.out.phased.VCF"
label="${tool.name} on ${on_string}: Phased haplotype VCF"
>
<data name="haplotype" format="txt" from_work_dir="haplotype.out" label="${tool.name} on ${on_string}: Haplotype block"/>
<data name="haplotype_phased" format="vcf" from_work_dir="haplotype.out.phased.VCF" label="${tool.name} on ${on_string}: Phased haplotype VCF">
<filter>output_phased</filter>
</data>
<data name="frags" format="txt" from_work_dir="frags.dat"
label="${tool.name} on ${on_string}: Fragments"
>
<data name="frags" format="txt" from_work_dir="frags.dat" label="${tool.name} on ${on_string}: Fragments">
<filter>output_fragments</filter>
</data>
</outputs>
Expand All @@ -161,7 +103,9 @@ ln -s '$input_bam' input.bam
<param name="input_vcf" ftype="vcf" value="input.vcf"/>
<param name="output_fragments" value="0"/>
<param name="output_phased" value="0"/>
<param name="optimization" value="default"/>
<conditional name="optimization">
<param name="choice" value="default"/>
</conditional>
<output name="haplotype" ftype="txt" file="output_haplotype.out"/>
</test>

Expand All @@ -171,7 +115,9 @@ ln -s '$input_bam' input.bam
<param name="input_vcf" ftype="vcf" value="input.vcf"/>
<param name="output_fragments" value="1"/>
<param name="output_phased" value="1"/>
<param name="optimization" value="default"/>
<conditional name="optimization">
<param name="choice" value="default"/>
</conditional>
<output name="frags" ftype="txt" file="output_frag.dat"/>
<output name="haplotype" ftype="txt" file="output_haplotype.out"/>
<output name="haplotype_phased" ftype="vcf" file="output_haplotype.out.phased.vcf"/>
Expand All @@ -183,7 +129,9 @@ ln -s '$input_bam' input.bam
<param name="input_vcf" ftype="vcf" value="input.vcf"/>
<param name="output_fragments" value="1"/>
<param name="output_phased" value="1"/>
<param name="optimization" value="default"/>
<conditional name="optimization">
<param name="choice" value="default"/>
</conditional>
<output name="frags" ftype="txt" file="output_frag.dat"/>
<output name="haplotype" ftype="txt" file="output_haplotype.out"/>
<output name="haplotype_phased" ftype="vcf" file="output_haplotype.out.phased.vcf"/>
Expand All @@ -195,8 +143,12 @@ ln -s '$input_bam' input.bam
<param name="input_vcf" ftype="vcf" value="input.vcf"/>
<param name="output_fragments" value="1"/>
<param name="output_phased" value="1"/>
<param name="optimization" value="pacbio"/>
<param name="reference_genome" value="history"/>
<conditional name="optimization">
<param name="choice" value="pacbio"/>
</conditional>
<conditional name="reference_genome">
<param name="source" value="history"/>
</conditional>
<param name="fasta" ftype="fasta" value="ref.fasta"/>
<output name="frags" ftype="txt" file="output_frag.dat"/>
<output name="haplotype" ftype="txt" file="output_haplotype.out"/>
Expand All @@ -212,8 +164,12 @@ ln -s '$input_bam' input.bam
<output name="frags" ftype="txt" file="output_frag.dat"/>
<output name="haplotype" ftype="txt" file="output_haplotype.out"/>
<output name="haplotype_phased" ftype="vcf" file="output_haplotype.out.phased.vcf"/>
<param name="optimization" value="ont"/>
<param name="reference_genome" value="history"/>
<conditional name="optimization">
<param name="optimization" value="ont"/>
</conditional>
<conditional name="reference_genome">
<param name="source" value="history"/>
</conditional>
<param name="fasta" ftype="fasta" value="ref.fasta"/>
</test>
</tests>
Expand Down Expand Up @@ -259,13 +215,9 @@ Input data should reference a single diploid individual mapped to a reference ge

See `HapCUT2 on GitHib <https://github.com/vibansal/HapCUT2>`_ for more detailed information.


.. class:: infomark

Quickmerge was wrapped by the Galaxy Australia team.

]]></help>
<citations>
<citation type="doi">10.1101/gr.213462.116</citation>
</citations>
<expand macro="creator"/>
</tool>
30 changes: 30 additions & 0 deletions tools/hapcut2/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<macro>
<xml name="reference_genome_input">
<conditional name="reference_genome">
<param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options.">
<option value="indexed" selected="true">Use a built-in genome</option>
<option value="history">Use a genome from history</option>
</param>
<when value="indexed">
<param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team.">
<options from_data_table="fasta_indexes">
<filter type="sort_by" column="2" />
<validator type="no_options" message="No genomes are available for the selected input dataset" />
</options>
</param>
</when>
<when value="history">
<param name="fasta" type="data" format="fasta,fasta.gz"
label="Reference genome fasta file"
help="The reference genome is required for long-read optimization."
/>
</when>
</conditional>
</xml>

<xml name="creator">
<creator>
<organization name="Galaxy Australia" url="https://site.usegalaxy.org.au"/>
</creator>
</xml>
</macro>