Skip to content

Commit

Permalink
Chewbbaca update (#6000)
Browse files Browse the repository at this point in the history
* AlleleCall now recognizes .fa files

* ExtractCgMLST input is manual now

* updated VERSION_SUFFIX

* sanitizing file name, help ;)

* corrected test-data

* updated all test-data files
  • Loading branch information
nilchia authored May 11, 2024
1 parent 1056d5c commit 2564b0c
Show file tree
Hide file tree
Showing 10 changed files with 1,673 additions and 137 deletions.
50 changes: 47 additions & 3 deletions tools/chewbbaca/AlleleCall.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
</macros>
<expand macro="requirements" />
<command detect_errors="exit_code"><![CDATA[
#import re
mkdir 'input' &&
mkdir 'schema' &&
#for $file in $input_file
ln -sf '$file' 'input/${file.element_identifier}' &&
#set escaped_element_identifier = re.sub('[^\w\-]', '_', str($file.element_identifier))
ln -sf '$file' 'input/${escaped_element_identifier}.${file.ext}' &&
#end for
unzip '$input_schema' -d 'schema' &&
chewBBACA.py AlleleCall
Expand Down Expand Up @@ -105,7 +107,7 @@
</outputs>
<tests>
<test expect_num_outputs="4">
<param name="input_file" value="GCA_000007265.1_ASM726v1_genomic.fna"/>
<param name="input_file" value="GCA_000007265.1_ASM726v1_genomic"/>
<param name="input_schema" value="GCA_000007265.1_ASM726v1_schema_seed.zip"/>
<param name="output_selector" value="output_unclassified,output_missing,hash_profile" />
<output_collection name="allelecall_results" type="list">
Expand Down Expand Up @@ -140,7 +142,49 @@
</output_collection>
<output name="unclassified_fasta">
<assert_contents>
<has_text_matching expression="GCA_000007265-protein15"/>
<has_text_matching expression="GCA_000007265_1_ASM726v1_genomic-protein15"/>
<has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/>
</assert_contents>
</output>
<output name="missing_fasta">
<assert_contents>
<has_text_matching expression="1|GCA_000007265|GCA-000007265-protein16&amp;NIPHEM|GCA_000007265-protein16&amp;EXC"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="4">
<param name="input_file" value="GCA_000007265.1_ASM726v1_genomic.fna"/>
<param name="input_schema" value="GCA_000007265.1_ASM726v1_schema_seed.zip"/>
<param name="output_selector" value="output_unclassified,output_missing,hash_profile" />
<output_collection name="allelecall_results" type="list">
<element name="paralogous_loci" ftype="tabular">
<assert_contents>
<has_text_matching expression="Genome.*Loci.*CDS"/>
</assert_contents>
</element>
<element name="results_alleles" ftype="tabular">
<assert_contents>
<has_text_matching expression="1.*1.*NIPHEM.*1.*1"/>
<has_text_matching expression="GCA_000007265.*1"/>
</assert_contents>
</element>
<element name="results_alleles_hashed" ftype="tabular">
<assert_contents>
<has_text_matching expression="FILE.*GCA-000007265-protein1.*GCA-000007265-protein10.*GCA-000007265-protein100"/>
<has_text_matching expression="GCA_000007265.*308e7666834338d0530d925b2737f2c6.*4aece26d201d59a90947e3400c7abf3f.*ebea148832aa2ae2704d37ebd5123169"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="allelcall_log" type="list">
<element name="logging_info" ftype="txt">
<assert_contents>
<has_text_matching expression="Used a BSR of: 0.6"/>
</assert_contents>
</element>
</output_collection>
<output name="unclassified_fasta">
<assert_contents>
<has_text_matching expression="GCA_000007265_1_ASM726v1_genomic_fna-protein83"/>
<has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/>
</assert_contents>
</output>
Expand Down
20 changes: 6 additions & 14 deletions tools/chewbbaca/ExtractCgMLST.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,24 @@
</macros>
<expand macro="requirements" />
<command detect_errors="exit_code"><![CDATA[
mkdir 'input' &&
#for $file in $input_file
ln -sf '$file' 'input/${file.element_identifier}.tsv' &&
#end for
chewBBACA.py ExtractCgMLST
--t $threshold
#if str($genes2remove) != 'false'
--r 'input/paralogous_counts.tsv'
#if $genes2remove:
--r '$genes2remove'
#end if
#if $genomes2remove:
--g '$genomes2remove'
#end if
-i 'input/results_alleles.tsv' -o 'output'
-i '$input_file' -o 'output'
]]></command>
<inputs>
<param name="input_file" type="data_collection" collection_type="list" label="AlleleCall Results" format="tabular"/>
<param argument="--input-file" type="data" label="Allelic Profiles" format="tabular"/>
<section name="advanced" title="Advanced options">
<param argument="--genomes2remove" type="data" format="txt" label="Genomes/rows to remove from the matrix" optional="true" help="One genome identifier per line"/>
<param argument="--threshold" type="text" value="0.95 0.99 1" label="threshold" help="Genes that constitute the core genome must be in a proportion of genomes that is at least equal to this value. Users can provide multiple values as a space-separated list.">
<validator type="regex">[ .0-9]+</validator>
</param>
<param name="genes2remove" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Remove paralogous counts?" help="the list of genes listed in the &quot;paralogous_counts.tsv&quot; file created by the AlleleCall process. --genes2remove"/>
<param argument="--genes2remove" type="data" format="tabular" label="List of genes to exclude from analysis" optional="true" help="the list of genes listed in the &quot;paralogous_counts.tsv&quot; file created by the AlleleCall process."/>
</section>
</inputs>
<outputs>
Expand All @@ -36,11 +32,7 @@
</outputs>
<tests>
<test>
<param name="input_file">
<collection type="list">
<element name="results_alleles" value="results_alleles.tsv" ftype="tabular"/>
</collection>
</param>
<param name="input_file" value="results_alleles.tsv"/>
<output_collection name="output_collection" type="list">
<element name="missing_loci_stats">
<assert_contents>
Expand Down
2 changes: 1 addition & 1 deletion tools/chewbbaca/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@CHEW_VERSION@">3.3.3</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@PROFILE@">22.05</token>
<xml name="requirements">
<requirements>
Expand Down
Loading

0 comments on commit 2564b0c

Please sign in to comment.