From 6bd552f608e89ee9ddb3d0ce539704cc59cd4c43 Mon Sep 17 00:00:00 2001 From: prototaxites Date: Tue, 15 Nov 2022 14:44:31 +0000 Subject: [PATCH 1/4] Added busco_clean parameter and code --- modules/local/busco.nf | 11 +++++++++++ nextflow.config | 1 + 2 files changed, 12 insertions(+) diff --git a/modules/local/busco.nf b/modules/local/busco.nf index 31a4889c..91b57dc4 100644 --- a/modules/local/busco.nf +++ b/modules/local/busco.nf @@ -32,6 +32,10 @@ process BUSCO { if (params.busco_reference) lineage_dataset_provided = "Y" + def busco_clean = "N" + if (params.busco_clean) + busco_clean = "Y" + def p = "--auto-lineage" if (params.busco_reference){ p = "--lineage_dataset dataset/${db}" @@ -177,6 +181,13 @@ process BUSCO { mv BUSCO/logs/prodigal_out.log "${bin}_prodigal.gff" fi + # if needed delete temporary BUSCO files + if [ ${busco_clean} ]; then + find . -depth -type d -name "augustus_config" -execdir rm -rf "{}" \\; + find . -depth -type d -name "auto_lineage" -execdir rm -rf "{}" \\; + find . -depth -type d -name "run_*" -execdir rm -rf "{}" + + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version 2>&1 | sed 's/Python //g') diff --git a/nextflow.config b/nextflow.config index 11854378..f22a62d2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -97,6 +97,7 @@ params { busco_download_path = null busco_auto_lineage_prok = false save_busco_reference = false + busco_clean = false // Reproducibility options megahit_fix_cpu_1 = false From 61e63aa0ace72d3c5a3bc53c3817ee9b4f1e6928 Mon Sep 17 00:00:00 2001 From: Jim Downie Date: Tue, 15 Nov 2022 14:51:01 +0000 Subject: [PATCH 2/4] Added busco_clean to schema. --- nextflow_schema.json | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2af80722..8cc3573b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -171,7 +174,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -279,7 +289,10 @@ "type": "string", "default": "fastp", "description": "Specify which adapter clipping tool to use. Options: 'fastp', 'adapterremoval'", - "enum": ["fastp", "adapterremoval"] + "enum": [ + "fastp", + "adapterremoval" + ] }, "reads_minlength": { "type": "integer", @@ -622,7 +635,16 @@ "default": "raw_bins_only", "description": "Specify which binning output is sent for downstream annotation, taxonomic classification, bin quality control etc.", "help_text": "`raw_bins_only`: only bins (and unbinned contigs) from the binners.\n`refined_bins_only`: only bins (and unbinned contigs) from the bin refinement step .\n`both`: bins and unbinned contigs from both the binning and bin refinement steps.", - "enum": ["raw_bins_only", "refined_bins_only", "both"] + "enum": [ + "raw_bins_only", + "refined_bins_only", + "both" + ] + }, + "busco_clean": { + "type": "boolean", + "description": "Enable clean-up of temporary files created during BUSCO runs.", + "help_text": "By default, BUSCO creates a large number of intermediate files every run. This may cause problems on some clusters which have file number limits in plate, particularly with large numbers of bins. Enabling this option cleans these files, reducing the total file count of the work directory." } } }, From 795986bf7ac7ead7ca9d1735b17686010e43044e Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 15 Nov 2022 15:25:37 +0000 Subject: [PATCH 3/4] [automated] Fix linting with Prettier --- nextflow_schema.json | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 8cc3573b..9ce6a710 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -174,14 +171,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -289,10 +279,7 @@ "type": "string", "default": "fastp", "description": "Specify which adapter clipping tool to use. Options: 'fastp', 'adapterremoval'", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "reads_minlength": { "type": "integer", @@ -635,11 +622,7 @@ "default": "raw_bins_only", "description": "Specify which binning output is sent for downstream annotation, taxonomic classification, bin quality control etc.", "help_text": "`raw_bins_only`: only bins (and unbinned contigs) from the binners.\n`refined_bins_only`: only bins (and unbinned contigs) from the bin refinement step .\n`both`: bins and unbinned contigs from both the binning and bin refinement steps.", - "enum": [ - "raw_bins_only", - "refined_bins_only", - "both" - ] + "enum": ["raw_bins_only", "refined_bins_only", "both"] }, "busco_clean": { "type": "boolean", From db08008b309087270b6f3b69a4cc19c5b675d35b Mon Sep 17 00:00:00 2001 From: Jim Downie Date: Tue, 15 Nov 2022 15:39:39 +0000 Subject: [PATCH 4/4] Added to tests and changelog. --- CHANGELOG.md | 6 ++++++ conf/test.config | 1 + 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86d63d74..2827ac70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## dev + +### `Added` + +- [#353](https://github.com/nf-core/mag/pull/353) - Added the busco_clean parameter to optionally clean each BUSCO directory after a successful + ## v2.2.1 - 2022/08/25 ### `Added` diff --git a/conf/test.config b/conf/test.config index 5df32bdb..922ca6ff 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,5 +27,6 @@ params { min_length_unbinned_contigs = 1 max_unbinned_contigs = 2 busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz" + busco_clean = true gtdb = false }