Skip to content

Commit

Permalink
Merge pull request #52 from EBI-Metagenomics/new-module--taxonkit/ref…
Browse files Browse the repository at this point in the history
…ormat

New module taxonkit/reformat
  • Loading branch information
mberacochea authored Oct 18, 2024
2 parents 2730184 + dba2496 commit 6da8e72
Show file tree
Hide file tree
Showing 10 changed files with 298 additions and 0 deletions.
6 changes: 6 additions & 0 deletions modules/ebi-metagenomics/taxonkit/reformat/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::taxonkit=0.17.0"
52 changes: 52 additions & 0 deletions modules/ebi-metagenomics/taxonkit/reformat/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
process TAXONKIT_REFORMAT {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/taxonkit:0.17.0--h9ee0642_1':
'biocontainers/taxonkit:0.17.0--h9ee0642_1' }"

input:
tuple val(meta), path(tsv)
path taxdb

output:
tuple val(meta), path("*.tsv"), emit: reformat_tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
taxonkit \\
reformat \\
$args \\
--threads $task.cpus \\
--data-dir $taxdb \\
--out-file ${prefix}.tsv \\
$tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
taxonkit: \$( taxonkit version | sed 's/.* v//' )
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
touch ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
taxonkit: \$( taxonkit version | sed 's/.* v//' )
END_VERSIONS
"""
}
51 changes: 51 additions & 0 deletions modules/ebi-metagenomics/taxonkit/reformat/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "taxonkit_reformat"
description: Reformat lineage in canonical ranks
keywords:
- taxonomy
- taxids
- ncbi
- lineage
tools:
- "taxonkit":
description: "A Cross-platform and Efficient NCBI Taxonomy Toolkit"
homepage: "https://bioinf.shenwei.me/taxonkit/"
documentation: "https://bioinf.shenwei.me/taxonkit/usage/#reformat"
tool_dev_url: "https://github.com/shenwei356/taxonkit"
doi: "10.1016/j.jgg.2021.03.006"
licence: ["MIT"]
identifier: biotools:taxonkit

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- tsv:
type: file
description: Input TSV file
pattern: "*.{tsv,tsv.gz}"
- - taxdb:
type: file
description: Taxonomy database unpacked from ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
output:
- reformat_tsv:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.tsv":
type: file
description: Reformated output TSV file
pattern: "*.{tsv}"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@arajkovic"
maintainers:
- "@arajkovic"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
accession accession.version taxid gi
MT192765 MT192765.1 2697049 1821109001
NZ_LS483480 NZ_LS483480.1 727 1409087034
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
2697049 | 2019-nCoV | | equivalent name |
2697049 | COVID-19 virus | | equivalent name |
2697049 | HCoV-19 | | equivalent name |
2697049 | Human coronavirus 2019 | | equivalent name |
2697049 | SARS-2 | | equivalent name |
2697049 | SARS2 | | equivalent name |
2697049 | SARS-CoV-2 | | acronym |
2697049 | SARS-CoV2 | | equivalent name |
2697049 | Severe acute respiratory syndrome coronavirus 2 | | scientific name |
727 | ATCC 33391 | ATCC 33391 <type strain> | type material |
727 | "Bacterium influenzae" Lehmann and Neumann 1896 | | authority |
727 | Bacterium influenzae | | synonym |
727 | CCUG 23945 | CCUG 23945 <type strain> | type material |
727 | CIP 102514 | CIP 102514 <type strain> | type material |
727 | "Coccobacillus pfeifferi" Neveu-Lemaire 1921 | | authority |
727 | Coccobacillus pfeifferi | | synonym |
727 | DSM 4690 | DSM 4690 <type strain> | type material |
727 | Haemophilus influenzae (Lehmann and Neumann 1896) Winslow et al. 1917 | | authority |
727 | Haemophilus influenzae | | scientific name |
727 | "Haemophilus meningitidis" (Martins) Hauduroy et al. 1937 | | authority |
727 | Haemophilus meningitidis | | synonym |
727 | "Influenza-bacillus" Pfeiffer 1892 | | authority |
727 | Influenza-bacillus | | synonym |
727 | "Mycobacterium influenzae" (Lehmann and Neumann 1896) Chester 1901 | | authority |
727 | Mycobacterium influenzae | | synonym |
727 | NCTC 8143 | NCTC 8143 <type strain> | type material |
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
10239 | 1 | superkingdom | | 9 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
2559587 | 10239 | clade | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
2732396 | 2559587 | kingdom | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
2732408 | 2732396 | phylum | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
2732506 | 2732408 | class | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
76804 | 2732506 | order | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
2499399 | 76804 | suborder | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
11118 | 2499399 | family | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
2501931 | 11118 | subfamily | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
694002 | 2501931 | genus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
2509511 | 694002 | subgenus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
694009 | 2509511 | species | SA | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant; specified |
2697049 | 694009 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | |
2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | code compliant |
135625 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | code compliant |
712 | 135625 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | code compliant |
724 | 712 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | code compliant |
727 | 724 | species | HI | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | code compliant; specified |
61 changes: 61 additions & 0 deletions modules/ebi-metagenomics/taxonkit/reformat/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
nextflow_process {

name "Test Process TAXONKIT_REFORMAT"
script "../main.nf"
process "TAXONKIT_REFORMAT"

tag "modules"
tag "modules_ebimetagenomics"
tag "taxonkit"
tag "taxonkit/reformat"

config "./nextflow.config"

test("tsv with header") {
when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file("$moduleTestDir/fixtures/nucl_gb.accession2taxid", checkIfExists: true)
]
input[1] = file("$moduleTestDir/fixtures/taxdump", checkIfExists: true)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("tsv with header - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file("$moduleTestDir/fixtures/nucl_gb.accession2taxid", checkIfExists: true)
]
input[1] = file("$moduleTestDir/fixtures/taxdump", checkIfExists: true)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
72 changes: 72 additions & 0 deletions modules/ebi-metagenomics/taxonkit/reformat/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"tsv with header": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test.tsv:md5,331e1290ca8ae7e3962b43e28d865bae"
]
],
"1": [
"versions.yml:md5,25e183506a3cbe3ddc98e6036b4a0525"
],
"reformat_tsv": [
[
{
"id": "test",
"single_end": false
},
"test.tsv:md5,331e1290ca8ae7e3962b43e28d865bae"
]
],
"versions": [
"versions.yml:md5,25e183506a3cbe3ddc98e6036b4a0525"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.3"
},
"timestamp": "2024-10-08T16:51:54.404921"
},
"tsv with header - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,25e183506a3cbe3ddc98e6036b4a0525"
],
"reformat_tsv": [
[
{
"id": "test",
"single_end": false
},
"test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,25e183506a3cbe3ddc98e6036b4a0525"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.3"
},
"timestamp": "2024-10-08T16:52:03.328778"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
process {
ext.args = '-t -r Unassigned -R -1 --taxid-field 3'
}
2 changes: 2 additions & 0 deletions modules/ebi-metagenomics/taxonkit/reformat/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
taxonkit/reformat:
- "modules/ebi-metagenomics/taxonkit/reformat/**"

0 comments on commit 6da8e72

Please sign in to comment.