Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chewbbaca update #6000

Merged
merged 6 commits into from
May 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 47 additions & 3 deletions tools/chewbbaca/AlleleCall.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
</macros>
<expand macro="requirements" />
<command detect_errors="exit_code"><![CDATA[
#import re
mkdir 'input' &&
mkdir 'schema' &&
#for $file in $input_file
ln -sf '$file' 'input/${file.element_identifier}' &&
#set escaped_element_identifier = re.sub('[^\w\-]', '_', str($file.element_identifier))
ln -sf '$file' 'input/${escaped_element_identifier}.${file.ext}' &&
#end for
unzip '$input_schema' -d 'schema' &&
chewBBACA.py AlleleCall
Expand Down Expand Up @@ -105,7 +107,7 @@
</outputs>
<tests>
<test expect_num_outputs="4">
<param name="input_file" value="GCA_000007265.1_ASM726v1_genomic.fna"/>
<param name="input_file" value="GCA_000007265.1_ASM726v1_genomic"/>
<param name="input_schema" value="GCA_000007265.1_ASM726v1_schema_seed.zip"/>
<param name="output_selector" value="output_unclassified,output_missing,hash_profile" />
<output_collection name="allelecall_results" type="list">
Expand Down Expand Up @@ -140,7 +142,49 @@
</output_collection>
<output name="unclassified_fasta">
<assert_contents>
<has_text_matching expression="GCA_000007265-protein15"/>
<has_text_matching expression="GCA_000007265_1_ASM726v1_genomic-protein15"/>
<has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/>
</assert_contents>
</output>
<output name="missing_fasta">
<assert_contents>
<has_text_matching expression="1|GCA_000007265|GCA-000007265-protein16&amp;NIPHEM|GCA_000007265-protein16&amp;EXC"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="4">
<param name="input_file" value="GCA_000007265.1_ASM726v1_genomic.fna"/>
<param name="input_schema" value="GCA_000007265.1_ASM726v1_schema_seed.zip"/>
<param name="output_selector" value="output_unclassified,output_missing,hash_profile" />
<output_collection name="allelecall_results" type="list">
<element name="paralogous_loci" ftype="tabular">
<assert_contents>
<has_text_matching expression="Genome.*Loci.*CDS"/>
</assert_contents>
</element>
<element name="results_alleles" ftype="tabular">
<assert_contents>
<has_text_matching expression="1.*1.*NIPHEM.*1.*1"/>
<has_text_matching expression="GCA_000007265.*1"/>
</assert_contents>
</element>
<element name="results_alleles_hashed" ftype="tabular">
<assert_contents>
<has_text_matching expression="FILE.*GCA-000007265-protein1.*GCA-000007265-protein10.*GCA-000007265-protein100"/>
<has_text_matching expression="GCA_000007265.*308e7666834338d0530d925b2737f2c6.*4aece26d201d59a90947e3400c7abf3f.*ebea148832aa2ae2704d37ebd5123169"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="allelcall_log" type="list">
<element name="logging_info" ftype="txt">
<assert_contents>
<has_text_matching expression="Used a BSR of: 0.6"/>
</assert_contents>
</element>
</output_collection>
<output name="unclassified_fasta">
<assert_contents>
<has_text_matching expression="GCA_000007265_1_ASM726v1_genomic_fna-protein83"/>
<has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/>
</assert_contents>
</output>
Expand Down
20 changes: 6 additions & 14 deletions tools/chewbbaca/ExtractCgMLST.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,24 @@
</macros>
<expand macro="requirements" />
<command detect_errors="exit_code"><![CDATA[
mkdir 'input' &&
#for $file in $input_file
ln -sf '$file' 'input/${file.element_identifier}.tsv' &&
#end for
chewBBACA.py ExtractCgMLST
--t $threshold
#if str($genes2remove) != 'false'
--r 'input/paralogous_counts.tsv'
#if $genes2remove:
--r '$genes2remove'
#end if
#if $genomes2remove:
--g '$genomes2remove'
#end if
-i 'input/results_alleles.tsv' -o 'output'
-i '$input_file' -o 'output'
]]></command>
<inputs>
<param name="input_file" type="data_collection" collection_type="list" label="AlleleCall Results" format="tabular"/>
<param argument="--input-file" type="data" label="Allelic Profiles" format="tabular"/>
<section name="advanced" title="Advanced options">
<param argument="--genomes2remove" type="data" format="txt" label="Genomes/rows to remove from the matrix" optional="true" help="One genome identifier per line"/>
<param argument="--threshold" type="text" value="0.95 0.99 1" label="threshold" help="Genes that constitute the core genome must be in a proportion of genomes that is at least equal to this value. Users can provide multiple values as a space-separated list.">
<validator type="regex">[ .0-9]+</validator>
</param>
<param name="genes2remove" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Remove paralogous counts?" help="the list of genes listed in the &quot;paralogous_counts.tsv&quot; file created by the AlleleCall process. --genes2remove"/>
<param argument="--genes2remove" type="data" format="tabular" label="List of genes to exclude from analysis" optional="true" help="the list of genes listed in the &quot;paralogous_counts.tsv&quot; file created by the AlleleCall process."/>
</section>
</inputs>
<outputs>
Expand All @@ -36,11 +32,7 @@
</outputs>
<tests>
<test>
<param name="input_file">
<collection type="list">
<element name="results_alleles" value="results_alleles.tsv" ftype="tabular"/>
</collection>
</param>
<param name="input_file" value="results_alleles.tsv"/>
<output_collection name="output_collection" type="list">
<element name="missing_loci_stats">
<assert_contents>
Expand Down
2 changes: 1 addition & 1 deletion tools/chewbbaca/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@CHEW_VERSION@">3.3.3</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@PROFILE@">22.05</token>
<xml name="requirements">
<requirements>
Expand Down
Loading