Skip to content

Commit

Permalink
Merge pull request galaxyproject#6163 from rlibouba/add_evidencemodeler
Browse files Browse the repository at this point in the history
add evidencemodeler
  • Loading branch information
bgruening authored Sep 15, 2024
2 parents 3d8e0f1 + 873f4d3 commit 982fe89
Show file tree
Hide file tree
Showing 10 changed files with 4,542 additions and 0 deletions.
9 changes: 9 additions & 0 deletions tools/evidencemodeler/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
categories: [Genome annotation]
description: EVidenceModeler (EVM) combines ab intio genetic predictions with protein and transcript alignments in weighted consensus genetic structures.
homepage_url: https://github.com/EVidenceModeler/EVidenceModeler?tab=readme-ov-file
long_description: |
EVM provides a flexible and intuitive framework for combining various types of evidence in a single
automated system for annotating genetic structures.
owner: iuc
name: evidencemodeler
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/evidencemodeler
132 changes: 132 additions & 0 deletions tools/evidencemodeler/evidencemodeler.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
<tool id="evidencemodeler" name="EVidenceModeler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<description>combines ab intio gene predictions, protein and transcript alignments into gene structures</description>
<macros>
<import>macros.xml</import>
</macros>
<xrefs>
<xref type="bio.tools">EvidenceModeler</xref>
</xrefs>
<expand macro="requirements"/>

<command detect_errors="exit_code"><![CDATA[
ln -s '$input_genome' ./input_genome.fasta &&
ln -s '$input_predictions' ./input_predictions.gff &&
ln -s '$input_weights' ./input_weights.txt &&
ln -s '$input_proteins' ./input_proteins.gff &&
ln -s '$input_transcript' ./input_transcript.gff &&
EVidenceModeler
--sample_id galaxy
--genome './input_genome.fasta'
--gene_predictions './input_predictions.gff'
--weights './input_weights.txt'
--protein_alignments './input_proteins.gff'
--segmentSize $segmentsize
--overlapSize $overlapsize
#if $input_transcript:
--transcript_alignments './input_transcript.gff'
#end if
#if $opt.input_repeat:
--repeats '$opt.input_repeat'
#end if
#if $opt.input_terminalexon:
--terminalExons '$opt.input_terminalexon'
#end if
--stop_codons $opt.stop_codon
--min_intron_length $opt.min_intron_length
--search_long_introns $opt.search_long_introns
--re_search_intergenic $opt.re_search_intergenic
--terminal_intergenic_re_search $opt.terminal_intergenic_re_search
]]></command>

<inputs>
<param name="input_genome" type="data" format="fasta" label="Genome input"/>
<param name="input_predictions" type="data" format="gff3" label="Gene predictions input"/>
<param name="input_weights" type="data" format="gff3" label="Weights for evidence types file" help="See documentation for formatting: 'Weights' section"/>
<param name="input_proteins" type="data" format="gff3" label="Protein alignments input" help="Optional but recommended"/>
<param name="input_transcript" type="data" optional="true" format="gff3" label="Transcript alignments input" help="Optional but recommended"/>
<param argument="--segmentSize" name="segmentsize" value="100000" type="integer" label="Length of a single sequence" help="This value must be less than 1 MB" />
<param argument="--overlapSize" name="overlapsize" value="10000" type="integer" label="Length of sequence overlap between segmented sequences" help="The length must be at least equivalent to one or two expected gene lengths" />
<section name="opt" title="Advanced option" expanded="false">
<param name="input_repeat" type="data" optional="true" format="gff3" label="Masked genome repeats"/>
<param name="input_terminalexon" type="data" optional="true" format="gff3" label="Additional file of terminal exons to be taken into account" help="From long-orfs PASA"/>
<param name="stop_codon" argument="--stop_codons" type="select" multiple="true" optional="true" label="List of stop codon" help="For Tetrahymena, set TGA">
<option value="TAA,TGA,TAG" selected="true">TAA,TGA,TAG</option>
<option value="TAA">TAA</option>
<option value="TGA">TGA</option>
<option value="TAG">TAG</option>
</param>
<param argument="--min_intron_length" type="integer" value="20" label="Minimum length for an intron" help="Default 20 bp" />
<param argument="--search_long_introns" type="select" label="Reexamine long introns" help="Can find nested genes, but also can result in false positives">
<option value="0" selected="true">Off</option>
<option value="1">On</option>
</param>
<param argument="--re_search_intergenic" type="select" label="Reexamines intergenic regions of minimum length">
<option value="0" selected="true">Off</option>
<option value="1">On</option>
</param>
<param argument="--terminal_intergenic_re_search" type="select" label="Reexamines intergenic regions of minimum length">
<option value="0" selected="true">Off</option>
<option value="1">On</option>
</param>
</section>
</inputs>

<outputs>
<data name='evm_gff' format='gff' label="${tool.name} on ${on_string}: GFF3" from_work_dir="galaxy.EVM.gff3"/>
<data name='evm_pep' format='fasta' label="${tool.name} on ${on_string}: PEP" from_work_dir="galaxy.EVM.pep"/>
</outputs>

<tests>
<test expect_num_outputs="2">
<param name="input_genome" value="genome.fasta"/>
<param name="input_predictions" value="gene_predictions.gff3"/>
<param name="input_weights" value="weights.txt"/>
<param name="input_proteins" value="protein_alignments.gff3"/>
<param name="input_transcript" value="transcript_alignments.gff3"/>
<param name="segmentsize" value="100000"/>
<param name="overlapsize" value="10000"/>
<conditional name="opt">
<param name="adv" value="true"/>
<param name="min_intron_length" value="20"/>
<param name="search_long_introns" value="0"/>
<param name="re_search_intergenic" value="0"/>
<param name="terminal_intergenic_re_search" value="0"/>
</conditional>
<output name="evm_pep" ftype="fasta">
<assert_contents>
<has_text text="evm.model.Contig1.3 evm.TU.Contig1.3 EVM prediction Contig1.3 Contig1:7611-9749(-)"/>
<has_text text="evm.model.Contig1.10 evm.TU.Contig1.10 EVM prediction Contig1.10 Contig1:57371-59941(+)"/>
<has_n_lines n="108" delta="0"/>
<has_n_columns n="1" delta="0"/>
</assert_contents>
</output>
<output name="evm_gff" ftype="gff">
<assert_contents>
<has_text text="ID=evm.TU.Contig1.1;Name=EVM%20prediction%20Contig1.1"/>
<has_text text="ID=evm.TU.Contig1.4;Name=EVM%20prediction%20Contig1.4"/>
<has_n_lines n="191" delta="0"/>
<has_n_columns n="9" delta="0"/>
<!-- the sep=";" is used to count the gff properties -->
<has_n_columns n="2" delta="0" sep=";"/>
</assert_contents>
</output>
</test>
</tests>

<help><![CDATA[
EvidenceModeler_: EVidenceModeler (aka EVM) is a software package that combines ab intio
gene predictions and protein and transcript alignments into weighted consensus gene structures.
EVM provides a flexible and intuitive framework for combining various types of evidence into a
single automated gene structure annotation system.
.. _EvidenceModeler: https://github.com/EVidenceModeler/EVidenceModeler.github.io
]]></help>
<expand macro="citation"></expand>
</tool>
18 changes: 18 additions & 0 deletions tools/evidencemodeler/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<macros>
<token name="@TOOL_VERSION@">2.1.0</token>
<token name="@VERSION_SUFFIX@">0</token>

<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">evidencemodeler</requirement>
</requirements>
</xml>

<xml name="citation">
<citations>
<citation type="doi">10.1186/gb-2008-9-1-r7</citation>
<citation type="doi">10.1080/21501203.2011.606851</citation>
</citations>
</xml>

</macros>
Loading

0 comments on commit 982fe89

Please sign in to comment.