forked from galaxyproject/tools-iuc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add metawrap (from tools-au) (galaxyproject#5936)
* working binning, requires singularity hack * working output collection -> test * try smaller test data * working test. add params * add shed and help text * rename shed file * catch contig and stat outputs * start wrapper * rebase deleted macros * add test output * fix DB location, add parameter test * checkm database in package. don't need to set * move Snakefile * investigate symlink * use discover_datasets for recursive search * true * tidy whitespace * fix repo link * try smaller subset * try single test * only run a single test to reduce time * rename test input * make sure PR.yaml is using the right biocontainer * make sure PR.yaml is using the right biocontainer * Revert "make sure PR.yaml is using the right biocontainer" This reverts commit 3b2eef3f3931ca29657b121cfc24f43e70b06f2d. * Revert "make sure PR.yaml is using the right biocontainer" This reverts commit 68242df48e1983166ba3594e0f6ab701c6cc6aca. * try to request less RAM * Mem parameter is ignored by component pplacer * Revert RAM * tidy up snakefile * metadata for toolshed * exclude extras * port metawrap * lint * update quoting * try quick to reduce RAM usage * conditional quick * tidy up ram test * use hidden param to run tests * Update tools/metawrapmg/.shed.yml Co-authored-by: Björn Grüning <[email protected]> * test main output * Update tools/metawrapmg/.shed.yml Co-authored-by: Bérénice Batut <[email protected]> * remove fasta.gz handling * remove visible from collection output --------- Co-authored-by: Björn Grüning <[email protected]> Co-authored-by: Bérénice Batut <[email protected]>
- Loading branch information
1 parent
af418b2
commit 26fe7d2
Showing
8 changed files
with
269 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
--- | ||
categories: | ||
- Metagenomics | ||
description: A flexible pipeline for genome-resolved metagenomic data analysis | ||
homepage_url: https://github.com/bxlab/metaWRAP | ||
long_description: | | ||
A convenient wrapper around three metagenomic binning software: MaxBin2, | ||
metaBAT2, and CONCOCT. Bin refinement utilizes a hybrid approach to take | ||
in two or three bin sets that were obtained with different software and | ||
produces a consolidated, improved bin set. | ||
name: metawrapmg_binning | ||
owner: galaxy-australia | ||
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/metawrapmg | ||
type: unrestricted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<macros> | ||
<token name="@TOOL_VERSION@">1.3.0</token> | ||
<token name="@VERSION_SUFFIX@">1</token> | ||
<token name="@PROFILE@">22.05</token> | ||
<xml name="requirements"> | ||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">metawrap-mg</requirement> | ||
</requirements> | ||
</xml> | ||
<xml name="citations"> | ||
<citations> | ||
<citation type="doi"> | ||
https://doi.org/10.1186/s40168-018-0541-1 | ||
</citation> | ||
</citations> | ||
</xml> | ||
<xml name="xrefs"> | ||
<xrefs> | ||
<xref type="bio.tools">metawrap</xref> | ||
</xrefs> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
<tool id="metawrapmg_binning" name="MetaWRAP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | ||
<description>metagenome binning pipeline</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="xrefs"/> | ||
<expand macro="requirements"/> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
## set memory usage | ||
if [ -n "\${GALAXY_MEMORY_MB}" ] ; then | ||
export GALAXY_MEMORY_GB="\$((GALAXY_MEMORY_MB / 1024))" ; | ||
fi ; | ||
################## | ||
## SET UP FILES ## | ||
################## | ||
## only plain FASTA and FASTQ | ||
ln -s '$metagenome' metagenome.fasta | ||
&& | ||
## Metawrap checks for files named _1.fastq and _2.fastq. | ||
ln -s '$input_1' reads_1.fastq | ||
&& | ||
ln -s '$input_2' reads_2.fastq | ||
&& | ||
##################### | ||
## INITIAL BINNING ## | ||
##################### | ||
metawrap binning | ||
--metabat2 --maxbin2 --concoct | ||
-a metagenome.fasta | ||
-m "\${GALAXY_MEMORY_GB:-16}" | ||
-o INITIAL_BINNING | ||
-t "\${GALAXY_SLOTS:-4}" | ||
reads_1.fastq | ||
reads_2.fastq | ||
&& | ||
## Check which binning programs produced bins | ||
bin_dirs=(INITIAL_BINNING/concoct_bins INITIAL_BINNING/maxbin2_bins INITIAL_BINNING/metabat2_bins) && | ||
switches=('-A' '-B' '-C') && | ||
i=0 && | ||
bin_string="" && | ||
for dir in "\${bin_dirs[@]}" ; do | ||
if [ "\$(find "\$dir" -mindepth 1 -maxdepth 1 -exec echo found \;)" ]; then | ||
bin_string+=" \${switches[\$i]} \$dir" ; | ||
((i++)) ; | ||
fi | ||
done && | ||
#################### | ||
## BIN REFINEMENT ## | ||
#################### | ||
## The checkm database is in the conda package, see | ||
## https://github.com/bioconda/bioconda-recipes/pull/38299. | ||
metawrap bin_refinement | ||
-t "\${GALAXY_SLOTS:-4}" | ||
-m "\${GALAXY_MEMORY_GB:-16}" | ||
'$hidden_quick' | ||
-c '${binning.c}' | ||
-x '${binning.x}' | ||
-o BIN_REFINEMENT | ||
## Only run bin_refinement on bins with contigs | ||
"\${bin_string}" | ||
]]></command> | ||
<inputs> | ||
<param name="metagenome" format="fasta" type="data" label="Metagenome" help="Metagenome co-assembly for binning"/> | ||
<param name="input_1" format="fastqsanger" type="data" label="Read 1" help="Original reads that were used for the assembly: read 1."/> | ||
<param name="input_2" format="fastqsanger" type="data" label="Read 2" help="Original reads that were used for the assembly: read 2."/> | ||
<section name="binning" title="Binning parameters" expanded="false"> | ||
<param argument="-c" type="integer" value="70" min="50" max="100" label="Percent completion" help="Minimum % completion of bins"/> | ||
<param argument="-x" type="integer" value="10" min="0" max="100" label="Percent contamination" help="Maximum % contamination of bins that is acceptable"/> | ||
</section> | ||
<!-- the pplacer component requires 40 GB per thread. Skip pplacer for | ||
testing by setting this to "quick" --> | ||
<param name="hidden_quick" type="hidden" value=""/> | ||
</inputs> | ||
<outputs> | ||
<!-- contigs binned into fasta files --> | ||
<collection name="metawrap_bins" type="list" label="MetaWRAP on ${on_string}: bins"> | ||
<discover_datasets pattern="metawrap_\d+_\d+_bins/(?P<designation>.+)\.fa" format="fasta" directory="BIN_REFINEMENT" recurse="true" match_relative_path="true"/> | ||
</collection> | ||
<!-- summary figures --> | ||
<collection name="metawrap_figures" type="list" label="MetaWRAP on ${on_string}: summary figures"> | ||
<discover_datasets pattern="__designation_and_ext__" directory="BIN_REFINEMENT/figures"/> | ||
</collection> | ||
<!-- statistics on binning --> | ||
<collection name="metawrap_stats" type="list" label="MetaWRAP on ${on_string}: stat files"> | ||
<discover_datasets pattern="(?P<designation>.+)\.stats" format="tabular" directory="BIN_REFINEMENT"/> | ||
</collection> | ||
<!-- which contig went into which bin --> | ||
<collection name="metawrap_contigs" type="list" label="MetaWRAP on ${on_string}: contig assignments"> | ||
<discover_datasets pattern="(?P<designation>.+)\.contigs" format="tabular" directory="BIN_REFINEMENT"/> | ||
</collection> | ||
</outputs> | ||
<tests> | ||
<!-- 01: basic function --> | ||
<test> | ||
<param name="metagenome" value="subset.fasta.gz"/> | ||
<param name="input_1" value="mapped_reads.r1.fastq.gz"/> | ||
<param name="input_2" value="mapped_reads.r2.fastq.gz"/> | ||
<param name="c" value="60"/> | ||
<param name="x" value="15"/> | ||
<param name="hidden_quick" value="--quick"/> | ||
<output_collection name="metawrap_bins" type="list"> | ||
<element name="bin.1" ftype="fasta"> | ||
<assert_contents> | ||
<has_text text="NODE_2_length_"/> | ||
</assert_contents> | ||
</element> | ||
</output_collection> | ||
<output_collection name="metawrap_stats" type="list"> | ||
<element name="metawrap_60_15_bins" file="test02.stats" ftype="tabular"/> | ||
</output_collection> | ||
<output_collection name="metawrap_contigs" type="list"> | ||
<element name="metawrap_60_15_bins" file="test02.contigs" ftype="tabular"/> | ||
</output_collection> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
MetaWRAP | ||
-------- | ||
MetaWRAP aims to be an easy-to-use metagenomic wrapper suite that | ||
accomplishes the core tasks of metagenomic analysis. Additionally, | ||
metaWRAP takes bin extraction and analysis to the next level. metaWRAP | ||
is meant to be a fast and simple approach before you delve deeper into | ||
parameterization of your analysis. MetaWRAP can be applied to a variety | ||
of environments, including gut, water, and soil microbiomes (see | ||
metaWRAP paper for benchmarks). | ||
MetaWRAP binning module | ||
~~~~~~~~~~~~~~~~~~~~~~~ | ||
The metaWRAP::Binning module is meant to be a convenient wrapper around | ||
three metagenomic binning software: MaxBin2, metaBAT2, and CONCOCT. | ||
First the metagenomic assembly is indexed with bwa-index, and then | ||
paired end reads from any number of samples are aligned to it. The | ||
alignments are sorted and compressed with samtools, and library insert | ||
size statistics are also gathered at the same time (insert size average | ||
and standard deviation). metaBAT2’s jgi_summarize_bam_contig_depths | ||
function is used to generate contig adundance table, and it is then | ||
converted into the correct format for each of the three binners to take | ||
as input. After MaxBin2, metaBAT2, and CONCOCT finish binning the | ||
contigs with default settings, the final bins folders are created with | ||
formatted bin fasta files. CheckM’s lineage_wf function is used to | ||
predict essential genes and estimate the completion and contamination of | ||
each bin. | ||
MetaWRAP bin refinement | ||
~~~~~~~~~~~~~~~~~~~~~~~ | ||
The metaWRAP::Bin_refinement module utilizes a hybrid approach to take | ||
in two or three bin sets that were obtained with different software and | ||
produces a consolidated, improved bin set. First, binning_refiner is | ||
used to create hybridized bins from every possible combination of sets. | ||
If there were three bin sets: A, B, and C, then the following hybrid | ||
sets will be produced with binning_refiner: AB, BC, AC, and ABC. CheckM | ||
is then run to evaluate the completion and contamination of the bins in | ||
each of the 7 bin sets (3 originals, 4 hybridized). The bins sets are | ||
then iteratively compared to each other, and each pair is consolidated | ||
into an improved bin set. To do this, the same bin is identified within | ||
the two bin sets based on a minimum of 80% overlap in genome length, and | ||
the better bin is determined based on which bin has the higher score. | ||
The scoring function is S=Completion-5*Contamination. After all bin sets | ||
are incorporated into the consolidated bin collection, a de-replication | ||
function removes any duplicate contigs. If a contig is present in more | ||
than one bin, it is removed from all but the best bin (based on scoring | ||
function). CheckM is then run on the final bin set and a final report | ||
file is generated showing the completion, contamination, and other | ||
statistics generated by CheckM for each bin. Completion and | ||
contamination rank plots are also generated to evaluate the success of | ||
the Bin_refinement module, and compare its output to the quality of the | ||
original bins. | ||
-------------- | ||
MetaWRAP’s home page is | ||
`bxlab/metaWRAP <https://github.com/bxlab/metaWRAP>`__. | ||
This tool was wrapped by the Galaxy Australia team. | ||
]]></help> | ||
<expand macro="citations"/> | ||
</tool> |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
NODE_2_length_158684_cov_2.789534 bin.1 | ||
NODE_3_length_138621_cov_2.416422 bin.1 | ||
NODE_6_length_106569_cov_3.096156 bin.1 | ||
NODE_7_length_99368_cov_2.860562 bin.1 | ||
NODE_8_length_95669_cov_2.506714 bin.1 | ||
NODE_10_length_88523_cov_2.243252 bin.1 | ||
NODE_11_length_86536_cov_2.926990 bin.1 | ||
NODE_13_length_73331_cov_2.369780 bin.1 | ||
NODE_14_length_72311_cov_2.340345 bin.1 | ||
NODE_15_length_72135_cov_2.745671 bin.1 | ||
NODE_16_length_71859_cov_2.918389 bin.1 | ||
NODE_17_length_70006_cov_2.553159 bin.1 | ||
NODE_24_length_58826_cov_2.290024 bin.1 | ||
NODE_26_length_57188_cov_2.464320 bin.1 | ||
NODE_27_length_54578_cov_2.838857 bin.1 | ||
NODE_30_length_51316_cov_2.828934 bin.1 | ||
NODE_44_length_41143_cov_2.951908 bin.1 | ||
NODE_47_length_40493_cov_2.795440 bin.1 | ||
NODE_49_length_39976_cov_3.111871 bin.1 | ||
NODE_58_length_35924_cov_2.623965 bin.1 | ||
NODE_72_length_33102_cov_2.542954 bin.1 | ||
NODE_89_length_30260_cov_2.967621 bin.1 | ||
NODE_102_length_28495_cov_2.496167 bin.1 | ||
NODE_118_length_26032_cov_2.640605 bin.1 | ||
NODE_119_length_26028_cov_2.951065 bin.1 | ||
NODE_153_length_22539_cov_2.899173 bin.1 | ||
NODE_167_length_21736_cov_2.597805 bin.1 | ||
NODE_229_length_18213_cov_2.462496 bin.1 | ||
NODE_260_length_17127_cov_3.016343 bin.1 | ||
NODE_277_length_16414_cov_2.366465 bin.1 | ||
NODE_370_length_13686_cov_3.065733 bin.1 | ||
NODE_381_length_13339_cov_3.032972 bin.1 | ||
NODE_485_length_11839_cov_2.628564 bin.1 | ||
NODE_502_length_11654_cov_2.455643 bin.1 | ||
NODE_616_length_10584_cov_2.555798 bin.1 | ||
NODE_725_length_9651_cov_2.904023 bin.1 | ||
NODE_1206_length_7144_cov_2.231768 bin.1 | ||
NODE_1409_length_6558_cov_2.842996 bin.1 | ||
NODE_1437_length_6494_cov_3.114769 bin.1 | ||
NODE_1488_length_6399_cov_3.331494 bin.1 | ||
NODE_2109_length_5159_cov_3.299177 bin.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
bin completeness contamination GC lineage N50 size binner | ||
bin.1 93.73 0.335 0.406 Clostridiales 70006 1855509 binsAB |