Skip to content

Commit

Permalink
Refactor checkm2/databasedownload using aria2 (nf-core#6654)
Browse files Browse the repository at this point in the history
* Use aria2 for CHECKM2_DATABASEDOWNLOAD, update snaps

* Fix linting

* Add bioconda to environment.yml

* Add zenodo_id as input to select which version to download

* Add input param to the test

* Add process input for predict

* Fix databasedownload meta

* Improve field description

---------

Co-authored-by: James A. Fellows Yates <[email protected]>
  • Loading branch information
2 people authored and Helle Rus Povlsen committed Oct 2, 2024
1 parent 57ee9f6 commit 3a7af72
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 26 deletions.
2 changes: 1 addition & 1 deletion modules/nf-core/checkm2/databasedownload/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- bioconda::checkm2=1.0.2
- conda-forge::aria2=1.36.0
31 changes: 18 additions & 13 deletions modules/nf-core/checkm2/databasedownload/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ import groovy.json.JsonSlurper
process CHECKM2_DATABASEDOWNLOAD {
label 'process_single'

conda "bioconda::checkm2=1.0.2"
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.2--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.2--pyh7cba7a3_0' }"
'https://depot.galaxyproject.org/singularity/aria2:1.36.0':
'biocontainers/aria2:1.36.0' }"

input:
val(db_zenodo_id)

output:
tuple val(meta), path("checkm2_db_v${db_version}.dmnd"), emit: database
Expand All @@ -16,25 +19,27 @@ process CHECKM2_DATABASEDOWNLOAD {
task.ext.when == null || task.ext.when

script:
zenodo_id = 5571251
def jsonSlurper = new JsonSlurper()
db_version = jsonSlurper.parseText(file("https://zenodo.org/api/records/${zenodo_id}").text).metadata.version
meta = [id: 'checkm2_db', version: db_version]
def args = task.ext.args ?: ''
zenodo_id = db_zenodo_id ?: 5571251 // Default to latest version if no ID provided
api_data = (new JsonSlurper()).parseText(file("https://zenodo.org/api/records/${zenodo_id}").text)
db_version = api_data.metadata.version
checksum = api_data.files[0].checksum.replaceFirst(/^md5:/, "md5=")
meta = [id: 'checkm2_db', version: db_version]
"""
# Automatic download is broken when using singularity/apptainer (https://github.com/chklovski/CheckM2/issues/73)
# So we download the database manually
wget https://zenodo.org/records/${zenodo_id}/files/checkm2_database.tar.gz
# So it's necessary to download the database manually
aria2c \
${args} \
--checksum ${checksum} \
https://zenodo.org/records/${zenodo_id}/files/checkm2_database.tar.gz
tar -xzf checkm2_database.tar.gz
db_path=\$(find -name *.dmnd)
MD5=\$(grep -o '\\.dmnd": "[^"]*"' CONTENTS.json | cut -d '"' -f 3)
md5sum -c <<< "\$MD5 \$db_path"
mv \$db_path checkm2_db_v${db_version}.dmnd
cat <<-END_VERSIONS > versions.yml
"${task.process}":
checkm2: \$(checkm2 --version)
aria2: \$(echo \$(aria2c --version 2>&1) | grep 'aria2 version' | cut -f3 -d ' ')
END_VERSIONS
"""

Expand Down
5 changes: 5 additions & 0 deletions modules/nf-core/checkm2/databasedownload/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ tools:
licence: ["GPL v3"]
identifier: ""

input:
- - db_zenodo_id:
type: integer
description: Zenodo ID of the CheckM2 database to download

output:
- database:
- meta:
Expand Down
8 changes: 8 additions & 0 deletions modules/nf-core/checkm2/databasedownload/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ nextflow_process {

test("Test CheckM2 Database Download") {

when {
process {
"""
input[0] = []
"""
}
}

then {
assertAll(
{ assert process.success },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@
"Test CheckM2 Database Download": {
"content": [
[
"versions.yml:md5,a0f7b47476ffc62ce27870f0503e6c04"
"versions.yml:md5,6201d5ac7aca6e32b98daf4f8656aa2a"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-12T06:29:33.222099893"
"timestamp": "2024-09-16T22:23:54.183040031"
}
}
2 changes: 1 addition & 1 deletion modules/nf-core/checkm2/predict/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process CHECKM2_PREDICT {
tag "${meta.id}"
label 'process_medium'

conda "bioconda::checkm2=1.0.2"
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.2--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.2--pyh7cba7a3_0' }"
Expand Down
5 changes: 5 additions & 0 deletions modules/nf-core/checkm2/predict/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ nextflow_process {
setup {
run("CHECKM2_DATABASEDOWNLOAD") {
script "../../databasedownload/main.nf"
process {
"""
input[0] = []
"""
}
}
}

Expand Down
6 changes: 1 addition & 5 deletions modules/nf-core/checkm2/predict/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@
"versions.yml:md5,088ec2d8a46efd530c11019328064bff"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-12T06:34:56.335651565"
"timestamp": "2024-09-16T22:43:50.787486798"
}
}

0 comments on commit 3a7af72

Please sign in to comment.