-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #52 from EBI-Metagenomics/new-module--taxonkit/ref…
…ormat New module taxonkit/reformat
- Loading branch information
Showing
10 changed files
with
298 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
--- | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- "bioconda::taxonkit=0.17.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
process TAXONKIT_REFORMAT { | ||
tag "$meta.id" | ||
label 'process_single' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/taxonkit:0.17.0--h9ee0642_1': | ||
'biocontainers/taxonkit:0.17.0--h9ee0642_1' }" | ||
|
||
input: | ||
tuple val(meta), path(tsv) | ||
path taxdb | ||
|
||
output: | ||
tuple val(meta), path("*.tsv"), emit: reformat_tsv | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
|
||
""" | ||
taxonkit \\ | ||
reformat \\ | ||
$args \\ | ||
--threads $task.cpus \\ | ||
--data-dir $taxdb \\ | ||
--out-file ${prefix}.tsv \\ | ||
$tsv | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
taxonkit: \$( taxonkit version | sed 's/.* v//' ) | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
|
||
""" | ||
touch ${prefix}.tsv | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
taxonkit: \$( taxonkit version | sed 's/.* v//' ) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||
name: "taxonkit_reformat" | ||
description: Reformat lineage in canonical ranks | ||
keywords: | ||
- taxonomy | ||
- taxids | ||
- ncbi | ||
- lineage | ||
tools: | ||
- "taxonkit": | ||
description: "A Cross-platform and Efficient NCBI Taxonomy Toolkit" | ||
homepage: "https://bioinf.shenwei.me/taxonkit/" | ||
documentation: "https://bioinf.shenwei.me/taxonkit/usage/#reformat" | ||
tool_dev_url: "https://github.com/shenwei356/taxonkit" | ||
doi: "10.1016/j.jgg.2021.03.006" | ||
licence: ["MIT"] | ||
identifier: biotools:taxonkit | ||
|
||
input: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- tsv: | ||
type: file | ||
description: Input TSV file | ||
pattern: "*.{tsv,tsv.gz}" | ||
- - taxdb: | ||
type: file | ||
description: Taxonomy database unpacked from ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz | ||
output: | ||
- reformat_tsv: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- "*.tsv": | ||
type: file | ||
description: Reformated output TSV file | ||
pattern: "*.{tsv}" | ||
- versions: | ||
- versions.yml: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@arajkovic" | ||
maintainers: | ||
- "@arajkovic" |
3 changes: 3 additions & 0 deletions
3
modules/ebi-metagenomics/taxonkit/reformat/tests/fixtures/nucl_gb.accession2taxid
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
accession accession.version taxid gi | ||
MT192765 MT192765.1 2697049 1821109001 | ||
NZ_LS483480 NZ_LS483480.1 727 1409087034 |
26 changes: 26 additions & 0 deletions
26
modules/ebi-metagenomics/taxonkit/reformat/tests/fixtures/taxdump/names.dmp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
2697049 | 2019-nCoV | | equivalent name | | ||
2697049 | COVID-19 virus | | equivalent name | | ||
2697049 | HCoV-19 | | equivalent name | | ||
2697049 | Human coronavirus 2019 | | equivalent name | | ||
2697049 | SARS-2 | | equivalent name | | ||
2697049 | SARS2 | | equivalent name | | ||
2697049 | SARS-CoV-2 | | acronym | | ||
2697049 | SARS-CoV2 | | equivalent name | | ||
2697049 | Severe acute respiratory syndrome coronavirus 2 | | scientific name | | ||
727 | ATCC 33391 | ATCC 33391 <type strain> | type material | | ||
727 | "Bacterium influenzae" Lehmann and Neumann 1896 | | authority | | ||
727 | Bacterium influenzae | | synonym | | ||
727 | CCUG 23945 | CCUG 23945 <type strain> | type material | | ||
727 | CIP 102514 | CIP 102514 <type strain> | type material | | ||
727 | "Coccobacillus pfeifferi" Neveu-Lemaire 1921 | | authority | | ||
727 | Coccobacillus pfeifferi | | synonym | | ||
727 | DSM 4690 | DSM 4690 <type strain> | type material | | ||
727 | Haemophilus influenzae (Lehmann and Neumann 1896) Winslow et al. 1917 | | authority | | ||
727 | Haemophilus influenzae | | scientific name | | ||
727 | "Haemophilus meningitidis" (Martins) Hauduroy et al. 1937 | | authority | | ||
727 | Haemophilus meningitidis | | synonym | | ||
727 | "Influenza-bacillus" Pfeiffer 1892 | | authority | | ||
727 | Influenza-bacillus | | synonym | | ||
727 | "Mycobacterium influenzae" (Lehmann and Neumann 1896) Chester 1901 | | authority | | ||
727 | Mycobacterium influenzae | | synonym | | ||
727 | NCTC 8143 | NCTC 8143 <type strain> | type material | |
22 changes: 22 additions & 0 deletions
22
modules/ebi-metagenomics/taxonkit/reformat/tests/fixtures/taxdump/nodes.dmp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | | ||
10239 | 1 | superkingdom | | 9 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | | ||
2559587 | 10239 | clade | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | | ||
2732396 | 2559587 | kingdom | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | | ||
2732408 | 2732396 | phylum | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | | ||
2732506 | 2732408 | class | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | | ||
76804 | 2732506 | order | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
2499399 | 76804 | suborder | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
11118 | 2499399 | family | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
2501931 | 11118 | subfamily | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
694002 | 2501931 | genus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
2509511 | 694002 | subgenus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
694009 | 2509511 | species | SA | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant; specified | | ||
2697049 | 694009 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | | ||
131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | | ||
2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | | ||
1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | | ||
1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
135625 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
712 | 135625 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
724 | 712 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | code compliant | | ||
727 | 724 | species | HI | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | code compliant; specified | |
61 changes: 61 additions & 0 deletions
61
modules/ebi-metagenomics/taxonkit/reformat/tests/main.nf.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
nextflow_process { | ||
|
||
name "Test Process TAXONKIT_REFORMAT" | ||
script "../main.nf" | ||
process "TAXONKIT_REFORMAT" | ||
|
||
tag "modules" | ||
tag "modules_ebimetagenomics" | ||
tag "taxonkit" | ||
tag "taxonkit/reformat" | ||
|
||
config "./nextflow.config" | ||
|
||
test("tsv with header") { | ||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file("$moduleTestDir/fixtures/nucl_gb.accession2taxid", checkIfExists: true) | ||
] | ||
input[1] = file("$moduleTestDir/fixtures/taxdump", checkIfExists: true) | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
test("tsv with header - stub") { | ||
|
||
options "-stub" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file("$moduleTestDir/fixtures/nucl_gb.accession2taxid", checkIfExists: true) | ||
] | ||
input[1] = file("$moduleTestDir/fixtures/taxdump", checkIfExists: true) | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
} |
72 changes: 72 additions & 0 deletions
72
modules/ebi-metagenomics/taxonkit/reformat/tests/main.nf.test.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
{ | ||
"tsv with header": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.tsv:md5,331e1290ca8ae7e3962b43e28d865bae" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,25e183506a3cbe3ddc98e6036b4a0525" | ||
], | ||
"reformat_tsv": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.tsv:md5,331e1290ca8ae7e3962b43e28d865bae" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,25e183506a3cbe3ddc98e6036b4a0525" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.3" | ||
}, | ||
"timestamp": "2024-10-08T16:51:54.404921" | ||
}, | ||
"tsv with header - stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,25e183506a3cbe3ddc98e6036b4a0525" | ||
], | ||
"reformat_tsv": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,25e183506a3cbe3ddc98e6036b4a0525" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.3" | ||
}, | ||
"timestamp": "2024-10-08T16:52:03.328778" | ||
} | ||
} |
3 changes: 3 additions & 0 deletions
3
modules/ebi-metagenomics/taxonkit/reformat/tests/nextflow.config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
process { | ||
ext.args = '-t -r Unassigned -R -1 --taxid-field 3' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
taxonkit/reformat: | ||
- "modules/ebi-metagenomics/taxonkit/reformat/**" |