diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000..e01bb42 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,35 @@ +id: "ebi-metagenomics/emg-viral-pipeline-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "ebi-metagenomics/emg-viral-pipeline Methods Description" +section_href: "https://github.com/EBI-Metagenomics/emg-viral-pipeline" +plot_type: "html" +data: | +

Methods

+

Data was processed using ebi-metagenomics/genomes-generation v${workflow.manifest.version} (${doi_text}; Krakau et al., 2022) of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (GrĂ¼ning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+ +
+
Notes:
+ +
diff --git a/assets/mgnify_logo.png b/assets/mgnify_logo.png new file mode 100644 index 0000000..fe6112b Binary files /dev/null and b/assets/mgnify_logo.png differ diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 0000000..986fed1 --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,61 @@ +report_comment: > + + This report has been generated by the ebi-metagenomics/emg-viral-pipeline pipeline. + +report_section_order: + "ebi-metagenomics/emg-viral-pipeline-methods-description": + order: -1000 + software_versions: + order: -1001 + "ebi-metagenomics/emg-viral-pipeline-summary": + order: -1002 + +export_plots: true + +data_format: "yaml" + +run_modules: + - fastqc + - fastp + +## Module order +module_order: + - fastqc + - fastp + +## File name cleaning +extra_fn_clean_exts: + - "_fastp" + +## Prettification +custom_logo: "mgnify_logo.png" +custom_logo_url: https://github.com/ebi-metagenomics/emg-viral-pipeline/ +custom_logo_title: "ebi-metagenomics/emg-viral-pipeline" + +## General Stats customisation +table_columns_visible: + "fastp": + pct_duplication: False + after_filtering_q30_rate: False + after_filtering_q30_bases: False + filtering_result_passed_filter_reads: 3300 + after_filtering_gc_content: False + pct_surviving: True + pct_adapter: True + +table_columns_placement: + "fastp": + pct_duplication: 3000 + after_filtering_q30_rate: 3100 + after_filtering_q30_bases: 3200 + filtering_result_passed_filter_reads: 3300 + after_filtering_gc_content: 3400 + pct_surviving: 3500 + pct_adapter: 3600 + +custom_table_header_config: + general_stats_table: + "Total length": + hidden: True + N50: + hidden: True diff --git a/configs/modules.config b/configs/modules.config index d0c3d8c..d435c71 100644 --- a/configs/modules.config +++ b/configs/modules.config @@ -12,7 +12,7 @@ process { withName: ANNOTATION { publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/annotation/", + path: "${params.output}/${meta.id}/${params.finaldir}/annotation/", mode: params.publish_dir_mode, failOnError: false, pattern: "*_annotation.tsv" @@ -23,13 +23,13 @@ process { withName: ASSIGN { publishDir = [ [ - path: "${params.output}/${name}/${params.taxdir}", + path: "${params.output}/${meta.id}/${params.taxdir}", mode: params.publish_dir_mode, failOnError: false, pattern: "*_taxonomy.tsv" ], [ - path: "${params.output}/${name}/${params.finaldir}/taxonomy", + path: "${params.output}/${meta.id}/${params.finaldir}/taxonomy", mode: params.publish_dir_mode, failOnError: false, pattern: "*_taxonomy.tsv" @@ -40,7 +40,7 @@ process { withName: BALLOON { publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/balloon/", + path: "${params.output}/${meta.id}/${params.finaldir}/balloon/", mode: params.publish_dir_mode, failOnError: false, pattern: "*.{pdf,svg}" @@ -89,13 +89,13 @@ process { withName: CHECKV { publishDir = [ [ - path: "${params.output}/${name}/${params.checkvdir}/", + path: "${params.output}/${meta.id}/${params.checkvdir}/", mode: params.publish_dir_mode, failOnError: false, pattern: "${confidence_set_name}" ], [ - path: "${params.output}/${name}/${params.checkvdir}/", + path: "${params.output}/${meta.id}/${params.checkvdir}/", mode: params.publish_dir_mode, failOnError: false, pattern: "*.tsv" @@ -106,7 +106,7 @@ process { withName: GENERATE_CHROMOMAP_TABLE { publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/chromomap/", + path: "${params.output}/${meta.id}/${params.finaldir}/chromomap/", mode: params.publish_dir_mode, failOnError: false, pattern: "${id}.filtered-*.contigs.txt" @@ -117,13 +117,13 @@ process { withName: GENERATE_KRONA_TABLE { publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}", + path: "${params.output}/${meta.id}/${params.plotdir}", mode: params.publish_dir_mode, failOnError: false, pattern: "*.krona.tsv" ], [ - path: "${params.output}/${name}/${params.finaldir}/krona/", + path: "${params.output}/${meta.id}/${params.finaldir}/krona/", mode: params.publish_dir_mode, failOnError: false, pattern: "*.krona.tsv" @@ -134,13 +134,13 @@ process { withName: GENERATE_SANKEY_TABLE { publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}", + path: "${params.output}/${meta.id}/${params.plotdir}", mode: params.publish_dir_mode, failOnError: false, pattern: "${set_name}.sankey.*" ], [ - path: "${params.output}/${name}/${params.finaldir}/sankey/", + path: "${params.output}/${meta.id}/${params.finaldir}/sankey/", mode: params.publish_dir_mode, failOnError: false, pattern: "${set_name}.sankey.filtered-${params.sankey}.json" @@ -151,7 +151,7 @@ process { withName: CHROMOMAP { publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/chromomap/", + path: "${params.output}/${meta.id}/${params.finaldir}/chromomap/", mode: params.publish_dir_mode, failOnError: false, pattern: "*.html" @@ -162,10 +162,10 @@ process { withName: FILTER_READS { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}/${meta.id}/", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}.filtered.fastq" + pattern: "${meta.id}.filtered.fastq" ] ] } @@ -173,7 +173,7 @@ process { withName: HMM_POSTPROCESSING { publishDir = [ [ - path: "${params.output}/${name}/${params.hmmerdir}/", + path: "${params.output}/${meta.id}/${params.hmmerdir}/", mode: params.publish_dir_mode, failOnError: false, pattern: "${set_name}_modified.tsv" @@ -184,7 +184,7 @@ process { withName: HMMSCAN { publishDir = [ [ - path: "${params.output}/${name}/${params.hmmerdir}/${params.db}", + path: "${params.output}/${meta.id}/${params.hmmerdir}/${params.db}", mode: params.publish_dir_mode, failOnError: false, pattern: "${set_name}_${params.db}_hmmscan.tbl" @@ -195,10 +195,10 @@ process { withName: KAIJU { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}/${meta.id}/", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}.out" + pattern: "${meta.id}.out" ] ] } @@ -206,13 +206,13 @@ process { withName: KRONA { publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}/krona/", + path: "${params.output}/${meta.id}/${params.plotdir}/krona/", mode: params.publish_dir_mode, failOnError: false, pattern: "*.krona.html" ], [ - path: "${params.output}/${name}/${params.finaldir}/krona/", + path: "${params.output}/${meta.id}/${params.finaldir}/krona/", mode: params.publish_dir_mode, failOnError: false, pattern: "*.krona.html" @@ -223,10 +223,10 @@ process { withName: LENGTH_FILTERING { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}/${meta.id}/", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}*filt*.fasta" + pattern: "${meta.id}*filt*.fasta" ] ] } @@ -251,10 +251,10 @@ process { withName: MULTIQC { publishDir = [ [ - path: "${params.output}/${name}/${params.assemblydir}", + path: "${params.output}/${meta.id}/${params.assemblydir}", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}_multiqc_report.html" + pattern: "${meta.id}_multiqc_report.html" ] ] } @@ -262,22 +262,22 @@ process { withName: PARSE { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}/${meta.id}/", mode: params.publish_dir_mode, failOnError: false, pattern: "*.fna" ], [ - path: "${params.output}/${name}/", + path: "${params.output}/${meta.id}/", mode: params.publish_dir_mode, failOnError: false, pattern: "virsorter_metadata.tsv" ], [ - path: "${params.output}/${name}/${params.finaldir}/", + path: "${params.output}/${meta.id}/${params.finaldir}/", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}_virus_predictions.log" + pattern: "${meta.id}_virus_predictions.log" ] ] } @@ -285,7 +285,7 @@ process { withName: PHANOTATE { publishDir = [ [ - path: "${params.output}/${name}/${params.phanotatedir}", + path: "${params.output}/${meta.id}/${params.phanotatedir}", mode: params.publish_dir_mode, failOnError: false, pattern: "*.faa" @@ -299,13 +299,13 @@ process { } publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}/", + path: "${params.output}/${meta.id}/${params.plotdir}/", mode: params.publish_dir_mode, failOnError: false, pattern: "${set_name}_mapping_results" ], [ - path: "${params.output}/${name}/${params.finaldir}/annotation/", + path: "${params.output}/${meta.id}/${params.finaldir}/annotation/", mode: params.publish_dir_mode, failOnError: false, pattern: "${set_name}_prot_ann_table_filtered.tsv" @@ -316,10 +316,10 @@ process { withName: PPRMETA { publishDir = [ [ - path: "${params.output}/${name}/${params.virusdir}/pprmeta", + path: "${params.output}/${meta.id}/${params.virusdir}/pprmeta", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}_pprmeta.csv" + pattern: "${meta.id}_pprmeta.csv" ] ] } @@ -350,7 +350,7 @@ process { } publishDir = [ [ - path: "${params.output}/${name}/ratio_evalue_tables", + path: "${params.output}/${meta.id}/ratio_evalue_tables", mode: params.publish_dir_mode, failOnError: false, pattern: "${set_name}_modified_informative.tsv" @@ -361,10 +361,10 @@ process { withName: RENAME { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}/${meta.id}/", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}_renamed.fasta" + pattern: "${meta.id}_renamed.fasta" ] ] } @@ -372,13 +372,13 @@ process { withName: RESTORE { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}/${meta.id}/", mode: params.publish_dir_mode, failOnError: false, pattern: "*_original.fasta" ], [ - path: "${params.output}/${name}/${params.finaldir}/contigs/", + path: "${params.output}/${meta.id}/${params.finaldir}/contigs/", mode: params.publish_dir_mode, failOnError: false, pattern: "*_original.fasta" @@ -389,13 +389,13 @@ process { withName: SANKEY { publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}", + path: "${params.output}/${meta.id}/${params.plotdir}", mode: params.publish_dir_mode, failOnError: false, pattern: "*.sankey.html" ], [ - path: "${params.output}/${name}/${params.finaldir}/sankey/", + path: "${params.output}/${meta.id}/${params.finaldir}/sankey/", mode: params.publish_dir_mode, failOnError: false, pattern: "*.sankey.html" @@ -406,10 +406,10 @@ process { withName: SPADES { publishDir = [ [ - path: "${params.output}/${name}/${params.assemblydir}", + path: "${params.output}/${meta.id}/${params.assemblydir}", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}.fasta" + pattern: "${meta.id}.fasta" ] ] } @@ -420,10 +420,10 @@ process { } publishDir = [ [ - path: "${params.output}/${name}/${params.virusdir}/virfinder", + path: "${params.output}/${meta.id}/${params.virusdir}/virfinder", mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}.txt" + pattern: "${meta.id}.txt" ] ] } @@ -431,7 +431,7 @@ process { withName: VIRSORTER { publishDir = [ [ - path: "${params.output}/${name}/${params.virusdir}/", + path: "${params.output}/${meta.id}/${params.virusdir}/", mode: params.publish_dir_mode, failOnError: false ] @@ -442,7 +442,7 @@ process { errorStrategy 'ignore' publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/gff", + path: "${params.output}/${meta.id}/${params.finaldir}/gff", mode: params.publish_dir_mode, failOnError: false, pattern: "*.gff" diff --git a/modules/local/annotation/main.nf b/modules/local/annotation/main.nf index 7aedbc2..4744998 100644 --- a/modules/local/annotation/main.nf +++ b/modules/local/annotation/main.nf @@ -15,16 +15,16 @@ process ANNOTATION { help="Name of processing .fna file to write correct output name") */ - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_low' container 'quay.io/microbiome-informatics/virify-python3:1.1' input: - tuple val(name), val(set_name), file(tab), file(faa) + tuple val(meta), val(set_name), file(tab), file(faa) output: - tuple val(name), val(set_name), file("*_annotation.tsv") + tuple val(meta), val(set_name), file("*_annotation.tsv"), emit: annotations script: """ diff --git a/modules/local/assign/main.nf b/modules/local/assign/main.nf index 4fda3a2..6db0e14 100644 --- a/modules/local/assign/main.nf +++ b/modules/local/assign/main.nf @@ -4,18 +4,18 @@ process ASSIGN { provides the taxonomic lineage of each viral contig, based on the corresponding ViPhOG annotations''' */ - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_low' container 'quay.io/microbiome-informatics/virify-python3:1.1' input: - tuple val(name), val(set_name), file(tab) + tuple val(meta), val(set_name), file(tab) file(db) file(factor) output: - tuple val(name), val(set_name), file("*_taxonomy.tsv") + tuple val(meta), val(set_name), file("*_taxonomy.tsv") script: """ diff --git a/modules/local/balloon/main.nf b/modules/local/balloon/main.nf index 06c8ee6..9184a03 100644 --- a/modules/local/balloon/main.nf +++ b/modules/local/balloon/main.nf @@ -1,11 +1,11 @@ process BALLOON { - tag "${name}" + tag "${meta.id}" label 'process_medium' container 'nanozoo/r_balloon:3.1.1--64f0f7d' input: - tuple val(name), val(set_name), file(tbl) + tuple val(meta), val(set_name), file(tbl) output: path ("*.{pdf,svg}") optional true @@ -22,16 +22,16 @@ process BALLOON { fi # genus - grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$2!="" && \$2 !~ /^0/){print SAMPLE"\\tgenus\\t"\$2}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' > \$NAME"_summary.tsv" + grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$2!="" && \$2 !~ /^0/){print SAMPLE"\\tgenus\\t"\$2}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' > \$NAME"_summary.tsv" # subfamily - grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$3!="" && \$3 !~ /^0/){print SAMPLE"\\tsubfamily\\t"\$3}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" + grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$3!="" && \$3 !~ /^0/){print SAMPLE"\\tsubfamily\\t"\$3}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" # family - grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$4!="" && \$4 !~ /^0/){print SAMPLE"\\tfamily\\t"\$4}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" + grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$4!="" && \$4 !~ /^0/){print SAMPLE"\\tfamily\\t"\$4}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" # order - grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$5!="" && \$5 !~ /^0/){print SAMPLE"\\torder\\t"\$5}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" + grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$5!="" && \$5 !~ /^0/){print SAMPLE"\\torder\\t"\$5}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" if [ -s \$NAME"_summary.tsv" ]; then balloon.R "\${NAME}_summary.tsv" "\${NAME}_balloon.svg" 10 8 diff --git a/modules/local/blast/main.nf b/modules/local/blast/main.nf index 73b162c..ffaefca 100644 --- a/modules/local/blast/main.nf +++ b/modules/local/blast/main.nf @@ -1,15 +1,15 @@ process BLAST { label 'process_high' - tag "${assembly_name}" + tag "${meta.id} ${confidence_set_name}" container 'quay.io/microbiome-informatics/blast:2.9.0' input: - tuple val(assembly_name), val(confidence_set_name), file(fasta) + tuple val(meta), val(confidence_set_name), file(fasta) file(db) output: - tuple val(assembly_name), val(confidence_set_name), file("${confidence_set_name}.blast"), file("${confidence_set_name}.filtered.blast") + tuple val(meta), val(confidence_set_name), file("${confidence_set_name}.blast"), file("${confidence_set_name}.filtered.blast") script: if (task.attempt.toString() == '1') diff --git a/modules/local/blast_filter/main.nf b/modules/local/blast_filter/main.nf index 623aa4d..543b2c6 100644 --- a/modules/local/blast_filter/main.nf +++ b/modules/local/blast_filter/main.nf @@ -1,14 +1,14 @@ process BLAST_FILTER { label 'process_low' - tag "${assembly_name}" + tag "${meta.id} ${confidence_set_name}" container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(assembly_name), val(confidence_set_name), file(blast), file(blast_filtered) + tuple val(meta), val(confidence_set_name), file(blast), file(blast_filtered) file(db) output: - tuple val(assembly_name), val(confidence_set_name), file("*.meta") + tuple val(meta), val(confidence_set_name), file("*.meta") script: if (task.attempt.toString() == '1') diff --git a/modules/local/checkv/main.nf b/modules/local/checkv/main.nf index 11bcea9..050b25f 100644 --- a/modules/local/checkv/main.nf +++ b/modules/local/checkv/main.nf @@ -1,27 +1,22 @@ process CHECKV { label 'process_medium' - tag "${name}" + tag "${meta.id} ${confidence_set_name}" container 'quay.io/microbiome-informatics/checkv:0.8.1__1' input: - tuple val(name), val(confidence_set_name), file(fasta), file(contigs) + tuple val(meta), val(confidence_set_name), file(fasta), file(contigs) file(database) output: - tuple val(name), val(confidence_set_name), file("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/") + tuple val(meta), val(confidence_set_name), file("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/") script: - if (confidence_set_name == 'prophages') { - """ - checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name} - cp ${confidence_set_name}/quality_summary.tsv ${confidence_set_name}_quality_summary.tsv - """ - } else { + """ - checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name} + checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name} cp ${confidence_set_name}/quality_summary.tsv ${confidence_set_name}_quality_summary.tsv """ - } + stub: """ mkdir negative_result_${confidence_set_name}.tsv diff --git a/modules/local/chromomap/main.nf b/modules/local/chromomap/main.nf index 5d7af19..25ef58f 100644 --- a/modules/local/chromomap/main.nf +++ b/modules/local/chromomap/main.nf @@ -1,17 +1,17 @@ process GENERATE_CHROMOMAP_TABLE { label 'process_low' - tag "${name}" + tag "${meta.id}" container 'quay.io/microbiome-informatics/bioruby:2.0.1' input: - tuple val(name), val(set_name), file(assembly), file(annotation_table) + tuple val(meta), val(set_name), file(assembly), file(annotation_table) output: - tuple val(name), val(set_name), file("${id}.filtered-*.contigs.txt"), file("${id}.filtered-*.anno.txt") + tuple val(meta), val(set_name), file("${id}.filtered-*.contigs.txt"), file("${id}.filtered-*.anno.txt") script: id = set_name - if (set_name == "all") { id = name } + if (set_name == "all") { id = meta.id } """ # combine if [[ ${set_name} == "all" ]]; then @@ -30,17 +30,18 @@ process GENERATE_CHROMOMAP_TABLE { process CHROMOMAP { label 'process_medium' + tag "${meta.id}" container 'quay.io/microbiome-informatics/r_chromomap:0.3' input: - tuple val(name), val(set_name), file(contigs), file(annotations) + tuple val(meta), val(set_name), file(contigs), file(annotations) output: - tuple val(name), val(set_name), file("*.html") optional true + tuple val(meta), val(set_name), file("*.html") optional true script: id = set_name - if (set_name == "all") { id = name } + if (set_name == "all") { id = meta.id } """ #!/usr/bin/env Rscript diff --git a/modules/local/fastp/main.nf b/modules/local/fastp/main.nf deleted file mode 100644 index c85d5d4..0000000 --- a/modules/local/fastp/main.nf +++ /dev/null @@ -1,21 +0,0 @@ -process FASTP { - -/* Comments: - -m, --merge - for paired-end input, merge each pair of reads into a single read if they are overlapped. - The merged reads will be written to the file given by --merged_out, the unmerged reads will be - written to the files specified by --out1 and --out2. The merging mode is disabled by default. -*/ - tag "${name}" - label 'process_medium' - container 'quay.io/biocontainers/fastp:0.20.1--h8b12597_0' - - input: - tuple val(name), file(reads) - output: - tuple val(name), file("${name}*.fastp.fastq.gz") - script: - """ - fastp -i ${reads[0]} -I ${reads[1]} --thread ${task.cpus} -o ${name}.R1.fastp.fastq.gz -O ${name}.R2.fastp.fastq.gz - """ -} \ No newline at end of file diff --git a/modules/local/fastqc/main.nf b/modules/local/fastqc/main.nf deleted file mode 100644 index 199adb3..0000000 --- a/modules/local/fastqc/main.nf +++ /dev/null @@ -1,15 +0,0 @@ -process FASTQC { - tag "${name}" - label 'process_low' - container 'quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1' - - input: - tuple val(name), file(reads) - output: - tuple val(name), file("fastqc/${name}*fastqc*") - script: - """ - mkdir fastqc - fastqc -t ${task.cpus} -o fastqc *.fastq.gz - """ -} \ No newline at end of file diff --git a/modules/local/filter_reads/main.nf b/modules/local/filter_reads/main.nf index 5454154..aa0b6a5 100644 --- a/modules/local/filter_reads/main.nf +++ b/modules/local/filter_reads/main.nf @@ -1,19 +1,19 @@ process FILTER_READS { - tag "${name}" + tag "${meta.id}" label 'process_low' input: - tuple val(name), file(kaiju_filtered), file(fastq) + tuple val(meta), file(kaiju_filtered), file(fastq) output: - tuple val(name), file("${name}.filtered.fastq") - tuple val(name), file("${name}.filtered.fasta") + tuple val(meta), file("${meta.id}.filtered.fastq") + tuple val(meta), file("${meta.id}.filtered.fasta") script: """ - sed '/^@/!d;s//>/;N' ${fastq} > ${name}.fasta - faSomeRecords ${name}.fasta ${kaiju_filtered} ${name}.filtered.fasta - faToFastq ${name}.filtered.fasta ${name}.filtered.fastq - rm -f ${name}.fasta + sed '/^@/!d;s//>/;N' ${fastq} > ${meta.id}.fasta + faSomeRecords ${meta.id}.fasta ${kaiju_filtered} ${meta.id}.filtered.fasta + faToFastq ${meta.id}.filtered.fasta ${meta.id}.filtered.fastq + rm -f ${meta.id}.fasta """ } diff --git a/modules/local/hmm_postprocessing/main.nf b/modules/local/hmm_postprocessing/main.nf index af99cff..8de32c5 100644 --- a/modules/local/hmm_postprocessing/main.nf +++ b/modules/local/hmm_postprocessing/main.nf @@ -3,16 +3,16 @@ process HMM_POSTPROCESSING { input: File_hmmer_ViPhOG.tbl output: File_hmmer_ViPhOG_modified.tbl */ - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_low' container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), val(set_name), file(hmmer_tbl), file(faa) + tuple val(meta), val(set_name), file(hmmer_tbl), file(faa) output: - tuple val(name), val(set_name), file("${set_name}_modified.tsv"), file(faa) + tuple val(meta), val(set_name), file("${set_name}_modified.tsv"), file(faa) script: """ diff --git a/modules/local/hmmscan/main.nf b/modules/local/hmmscan/main.nf index 82cf7b7..81d2a40 100644 --- a/modules/local/hmmscan/main.nf +++ b/modules/local/hmmscan/main.nf @@ -1,15 +1,15 @@ process HMMSCAN { - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_high' container 'quay.io/microbiome-informatics/hmmer:3.1b2' input: - tuple val(name), val(set_name), file(faa) + tuple val(meta), val(set_name), file(faa) file(db) output: - tuple val(name), val(set_name), file("${set_name}_${params.db}_hmmscan.tbl"), file(faa) + tuple val(meta), val(set_name), file("${set_name}_${params.db}_hmmscan.tbl"), file(faa) script: """ diff --git a/modules/local/kaiju/main.nf b/modules/local/kaiju/main.nf index c461b2f..c2480d1 100644 --- a/modules/local/kaiju/main.nf +++ b/modules/local/kaiju/main.nf @@ -6,28 +6,28 @@ process KAIJU { */ label 'process_medium' - tag "${name}" + tag "${meta.id}" container 'quay.io/biocontainers/kaiju:1.7.2--hdbcaa40_0' input: - tuple val(name), file(fastq) + tuple val(meta), file(fastq) file(database) output: - tuple val(name), file("${name}.out") - tuple val(name), file("${name}.out.krona") + tuple val(meta), file("${meta.id}.out") + tuple val(meta), file("${meta.id}.out.krona") shell: if (params.illumina) { ''' - kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq[0]} -j !{fastq[1]} -o !{name}.out - kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{name}.out -o !{name}.out.krona + kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq[0]} -j !{fastq[1]} -o !{meta.id}.out + kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{meta.id}.out -o !{meta.id}.out.krona ''' } if (params.fasta) { ''' - kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq} -o !{name}.out - kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{name}.out -o !{name}.out.krona + kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq} -o !{meta.id}.out + kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{meta.id}.out -o !{meta.id}.out.krona ''' } } diff --git a/modules/local/krona/main.nf b/modules/local/krona/main.nf index deb8c04..185c30e 100644 --- a/modules/local/krona/main.nf +++ b/modules/local/krona/main.nf @@ -1,21 +1,21 @@ process GENERATE_KRONA_TABLE { label 'process_low' - tag "${name}" + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), val(set_name), file(tbl) + tuple val(meta), val(set_name), file(tbl) output: - tuple val(name), val(set_name), file("*.krona.tsv") + tuple val(meta), val(set_name), file("*.krona.tsv") script: """ if [[ "${set_name}" == "all" ]]; then - grep contig_ID *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq > ${name}.tmp - grep -v "contig_ID" *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq >> ${name}.tmp - cp ${name}.tmp ${name}.tsv - generate_counts_table.py -f ${name}.tsv -o ${name}.krona.tsv + grep contig_ID *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq > ${meta.id}.tmp + grep -v "contig_ID" *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq >> ${meta.id}.tmp + cp ${meta.id}.tmp ${meta.id}.tsv + generate_counts_table.py -f ${meta.id}.tsv -o ${meta.id}.krona.tsv else generate_counts_table.py -f ${tbl} -o ${set_name}.krona.tsv fi @@ -24,18 +24,18 @@ process GENERATE_KRONA_TABLE { process KRONA { label 'process_low' - + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/krona:2.7.1' input: - tuple val(name), val(set_name), file(krona_file) + tuple val(meta), val(set_name), file(krona_file) output: file("*.krona.html") script: """ if [[ ${set_name} == "all" ]]; then - ktImportText -o ${name}.krona.html ${krona_file} + ktImportText -o ${meta.id}.krona.html ${krona_file} else ktImportText -o ${set_name}.krona.html ${krona_file} fi diff --git a/modules/local/length_filtering/main.nf b/modules/local/length_filtering/main.nf index b7fc090..c891aca 100644 --- a/modules/local/length_filtering/main.nf +++ b/modules/local/length_filtering/main.nf @@ -1,18 +1,18 @@ process LENGTH_FILTERING { label 'process_low' - tag "${name}" + tag "${meta.id}" container 'quay.io/biocontainers/biopython:1.75' input: - tuple val(name), file(fasta), file(map) + tuple val(meta), file(fasta), file(map) output: - tuple val(name), file("${name}*filt*.fasta"), env(CONTIGS) + tuple val(meta), file("${meta.id}*filt*.fasta"), env(CONTIGS) script: """ filter_contigs_len.py -f ${fasta} -l ${params.length} -o ./ - CONTIGS=\$(grep ">" ${name}*filt*.fasta | wc -l) + CONTIGS=\$(grep ">" ${meta.id}*filt*.fasta | wc -l) """ } diff --git a/modules/local/mashmap/main.nf b/modules/local/mashmap/main.nf index 5312397..fad49a6 100644 --- a/modules/local/mashmap/main.nf +++ b/modules/local/mashmap/main.nf @@ -1,10 +1,10 @@ process MASHMAP { label 'process_medium' - tag "${assembly_name}" + tag "${meta.id} ${confidence_set_name}" container 'quay.io/microbiome-informatics/mashmap:2.0' input: - tuple val(assembly_name), val(confidence_set_name), file(fasta) + tuple val(meta), val(confidence_set_name), file(fasta) file(reference) output: diff --git a/modules/local/multiqc/main.nf b/modules/local/multiqc/main.nf deleted file mode 100644 index da54e9b..0000000 --- a/modules/local/multiqc/main.nf +++ /dev/null @@ -1,15 +0,0 @@ -process MULTIQC { - label 'process_low' - tag "${name}" - container 'quay.io/biocontainers/multiqc:1.9--py_1' - - input: - tuple val(name), file(fastqc) - output: - tuple val(name), file("${name}_multiqc_report.html") - - script: - """ - multiqc -i ${name} . - """ -} diff --git a/modules/local/parse/main.nf b/modules/local/parse/main.nf index 41949a7..17f99ad 100644 --- a/modules/local/parse/main.nf +++ b/modules/local/parse/main.nf @@ -1,21 +1,21 @@ process PARSE { label 'process_low' - tag "${name}" + tag "${meta.id}" container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), file(fasta), val(contig_number), file(virfinder), file(virsorter), file(pprmeta) + tuple val(meta), file(fasta), val(contig_number), file(virfinder), file(virsorter), file(pprmeta) when: contig_number.toInteger() > 0 output: - tuple val(name), file("*.fna"), file('virsorter_metadata.tsv'), file("${name}_virus_predictions.log"), optional: true + tuple val(meta), file("*.fna"), file('virsorter_metadata.tsv'), file("${meta.id}_virus_predictions.log"), optional: true script: """ touch virsorter_metadata.tsv - parse_viral_pred.py -a ${fasta} -f ${virfinder} -p ${pprmeta} -s ${virsorter}/Predicted_viral_sequences/*.fasta &> ${name}_virus_predictions.log + parse_viral_pred.py -a ${fasta} -f ${virfinder} -p ${pprmeta} -s ${virsorter}/Predicted_viral_sequences/*.fasta &> ${meta.id}_virus_predictions.log """ } diff --git a/modules/local/phanotate/main.nf b/modules/local/phanotate/main.nf index 5f72fe2..c1dd578 100644 --- a/modules/local/phanotate/main.nf +++ b/modules/local/phanotate/main.nf @@ -1,13 +1,13 @@ process PHANOTATE { label 'process_low' - tag "${name}" + tag "${meta.id}" container 'quay.io/biocontainers/phanotate:1.5.0--h30d9df9_2' input: - tuple val(name), file(fasta) + tuple val(meta), file(fasta) output: - tuple val(name), stdout, file("*.faa") + tuple val(meta), stdout, file("*.faa") script: """ diff --git a/modules/local/plot_contig_map/main.nf b/modules/local/plot_contig_map/main.nf index bc55bf2..a7060b9 100644 --- a/modules/local/plot_contig_map/main.nf +++ b/modules/local/plot_contig_map/main.nf @@ -1,14 +1,14 @@ process PLOT_CONTIG_MAP { - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_low' container 'quay.io/microbiome-informatics/virify-plot-contig-map:1' input: - tuple val(name), val(set_name), file(tab) + tuple val(meta), val(set_name), file(tab) output: - tuple val(name), val(set_name), file("${set_name}_mapping_results"), file("${set_name}_prot_ann_table_filtered.tsv") + tuple val(meta), val(set_name), file("${set_name}_mapping_results"), file("${set_name}_prot_ann_table_filtered.tsv") script: """ diff --git a/modules/local/pprmeta/main.nf b/modules/local/pprmeta/main.nf index e1f6288..79d7e85 100644 --- a/modules/local/pprmeta/main.nf +++ b/modules/local/pprmeta/main.nf @@ -1,22 +1,22 @@ process PPRMETA { label 'process_medium' - tag "${name}" + tag "${meta.id}" container 'quay.io/microbiome-informatics/pprmeta:1.1' input: - tuple val(name), file(fasta), val(contig_number) + tuple val(meta), file(fasta), val(contig_number) path(pprmeta_git) when: contig_number.toInteger() > 0 output: - tuple val(name), file("${name}_pprmeta.csv") + tuple val(meta), file("${meta.id}_pprmeta.csv") script: """ [ -d "pprmeta" ] && cp pprmeta/* . - ./PPR_Meta ${fasta} ${name}_pprmeta.csv + ./PPR_Meta ${fasta} ${meta.id}_pprmeta.csv """ } diff --git a/modules/local/prodigal/main.nf b/modules/local/prodigal/main.nf index 5b429f0..aee9afd 100644 --- a/modules/local/prodigal/main.nf +++ b/modules/local/prodigal/main.nf @@ -1,13 +1,13 @@ process PRODIGAL { label 'process_high' - tag "${name}" + tag "${meta.id} ${confidence_set_name}" container 'quay.io/biocontainers/prodigal:2.6.3--hec16e2b_4' input: - tuple val(assembly_name), val(confidence_set_name), file(fasta) + tuple val(meta), val(confidence_set_name), file(fasta) output: - tuple val(assembly_name), val(confidence_set_name), file("*.faa") + tuple val(meta), val(confidence_set_name), file("*.faa") script: """ diff --git a/modules/local/ratio_evalue/main.nf b/modules/local/ratio_evalue/main.nf index 28ff720..65e7481 100644 --- a/modules/local/ratio_evalue/main.nf +++ b/modules/local/ratio_evalue/main.nf @@ -7,17 +7,17 @@ process RATIO_EVALUE { out PRJNA530103_small_modified_informative.tsv */ - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_low' container 'quay.io/microbiome-informatics/virify-python3:1.1' input: - tuple val(name), val(set_name), file(modified_table), file(faa) + tuple val(meta), val(set_name), file(modified_table), file(faa) file(model_metadata) output: - tuple val(name), val(set_name), file("${set_name}_modified_informative.tsv"), file(faa), optional: true + tuple val(meta), val(set_name), file("${set_name}_modified_informative.tsv"), file(faa), optional: true script: """ diff --git a/modules/local/rename/main.nf b/modules/local/rename/main.nf index 8a54128..440115c 100644 --- a/modules/local/rename/main.nf +++ b/modules/local/rename/main.nf @@ -4,14 +4,14 @@ process RENAME { */ label 'process_low' - tag "${name}" + tag "${meta.id}" container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), file(fasta) + tuple val(meta), file(fasta) output: - tuple val(name), file("${name}_renamed.fasta"), file("${name}_map.tsv") + tuple val(meta), file("${meta.id}_renamed.fasta"), file("${meta.id}_map.tsv") script: """ @@ -20,7 +20,7 @@ process RENAME { else cp ${fasta} tmp.fasta fi - rename_fasta.py -i tmp.fasta -m ${name}_map.tsv -o ${name}_renamed.fasta rename + rename_fasta.py -i tmp.fasta -m ${meta.id}_map.tsv -o ${meta.id}_renamed.fasta rename """ } diff --git a/modules/local/restore/main.nf b/modules/local/restore/main.nf index 4327181..de98f2c 100644 --- a/modules/local/restore/main.nf +++ b/modules/local/restore/main.nf @@ -2,16 +2,16 @@ process RESTORE { /* usage: rename_fasta.py [-h] -i INPUT [-m MAP] -o OUTPUT {rename,restore} ... */ - tag "${name}" + tag "${meta.id}" label 'process_low' container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), file(fasta), file(map) + tuple val(meta), file(fasta), file(map) output: - tuple val(name), env(BN), file("*_original.fasta") + tuple val(meta), env(BN), file("*_original.fasta") script: """ diff --git a/modules/local/sankey/main.nf b/modules/local/sankey/main.nf index b1f2252..4240561 100644 --- a/modules/local/sankey/main.nf +++ b/modules/local/sankey/main.nf @@ -1,13 +1,13 @@ process GENERATE_SANKEY_TABLE { label 'process_low' - tag "${name}" + tag "${meta.id}" container 'quay.io/microbiome-informatics/bioruby:2.0.1' input: - tuple val(name), val(set_name), file(krona_table) + tuple val(meta), val(set_name), file(krona_table) output: - tuple val(name), val(set_name), file("${set_name}.sankey.filtered-${params.sankey}.json"), file("${set_name}.sankey.tsv") + tuple val(meta), val(set_name), file("${set_name}.sankey.filtered-${params.sankey}.json"), file("${set_name}.sankey.tsv") script: """ @@ -23,18 +23,18 @@ process GENERATE_SANKEY_TABLE { process SANKEY { label 'process_medium' - + tag "${meta.id}" container 'quay.io/microbiome-informatics/sankeyd3:0.12.3' input: - tuple val(name), val(set_name), file(json), file(tsv) + tuple val(meta), val(set_name), file(json), file(tsv) output: - tuple val(name), val(set_name), file("*.sankey.html") + tuple val(meta), val(set_name), file("*.sankey.html") script: id = set_name - if (set_name == "all") { id = name } + if (set_name == "all") { id = meta.id } """ #!/usr/bin/env Rscript diff --git a/modules/local/spades/main.nf b/modules/local/spades/main.nf deleted file mode 100644 index cdf4483..0000000 --- a/modules/local/spades/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -process SPADES { - - label 'process_medium' - tag "${name}" - container 'quay.io/biocontainers/spades:3.15.5--h95f258a_1' - - input: - tuple val(name), file(reads) - output: - tuple val(name), file("${name}.fasta") - - script: - """ - spades.py --meta --only-assembler -1 !{reads[0]} -2 !{reads[1]} -t !{task.cpus} -o assembly - mv assembly/contigs.fasta !{name}.fasta - """ -} \ No newline at end of file diff --git a/modules/local/virfinder/main.nf b/modules/local/virfinder/main.nf index f4d8f96..0bbd574 100644 --- a/modules/local/virfinder/main.nf +++ b/modules/local/virfinder/main.nf @@ -1,22 +1,22 @@ process VIRFINDER { - tag "${name}" + tag "${meta.id}" label 'process_high' container 'quay.io/microbiome-informatics/virfinder:1.1__eb8032e' input: - tuple val(name), file(fasta), val(contig_number) + tuple val(meta), file(fasta), val(contig_number) path model when: contig_number.toInteger() > 0 output: - tuple val(name), file("${name}.txt") + tuple val(meta), file("${meta.id}.txt") script: """ run_virfinder.Rscript ${model} ${fasta} . - awk '{print \$1"\\t"\$2"\\t"\$3"\\t"\$4}' ${name}*.tsv > ${name}.txt + awk '{print \$1"\\t"\$2"\\t"\$3"\\t"\$4}' ${meta.id}*.tsv > ${meta.id}.txt """ } diff --git a/modules/local/virsorter/main.nf b/modules/local/virsorter/main.nf index 3d5a2bd..0947efb 100644 --- a/modules/local/virsorter/main.nf +++ b/modules/local/virsorter/main.nf @@ -1,17 +1,17 @@ process VIRSORTER { label 'process_high' - tag "${name}" + tag "${meta.id}" container 'quay.io/microbiome-informatics/virsorter:1.0.6_edfeb8c5e72' input: - tuple val(name), file(fasta), val(contig_number) + tuple val(meta), file(fasta), val(contig_number) path(database) when: contig_number.toInteger() > 0 output: - tuple val(name), file("*") + tuple val(meta), file("*") script: if (params.virome) diff --git a/modules/local/write_gff/main.nf b/modules/local/write_gff/main.nf index 198b221..de1ab1f 100644 --- a/modules/local/write_gff/main.nf +++ b/modules/local/write_gff/main.nf @@ -1,17 +1,14 @@ process WRITE_GFF { - tag "${name}" + tag "${meta.id}" label 'process_medium' container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), path(fasta) - path(viphos_annotations) - path(taxonomies) - path(quality_summaries) + tuple val(meta), path(fasta), path(viphos_annotations), path(taxonomies), path(quality_summaries) output: - path("${name}_virify.gff") + path("${meta.id}_virify.gff") script: """ @@ -19,9 +16,9 @@ process WRITE_GFF { -v ${viphos_annotations.join(' ')} \ -c ${quality_summaries.join(' ')} \ -t ${taxonomies.join(' ')} \ - -s ${name} \ + -s ${meta.id} \ -a ${fasta} - gt gff3validator ${name}_virify.gff + gt gff3validator ${meta.id}_virify.gff """ } diff --git a/modules/local/fastp/fastp.yaml b/modules/nf-core/checkv/endtoend/environment.yml similarity index 64% rename from modules/local/fastp/fastp.yaml rename to modules/nf-core/checkv/endtoend/environment.yml index b4df7d8..8646fff 100644 --- a/modules/local/fastp/fastp.yaml +++ b/modules/nf-core/checkv/endtoend/environment.yml @@ -1,6 +1,5 @@ -name: fastp channels: - - bioconda - conda-forge + - bioconda dependencies: - - fastp=0.20.0 + - bioconda::checkv=1.0.1 \ No newline at end of file diff --git a/modules/nf-core/checkv/endtoend/main.nf b/modules/nf-core/checkv/endtoend/main.nf new file mode 100644 index 0000000..635c9fa --- /dev/null +++ b/modules/nf-core/checkv/endtoend/main.nf @@ -0,0 +1,63 @@ +process CHECKV_ENDTOEND { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/checkv:1.0.1--pyhdfd78af_0': + 'biocontainers/checkv:1.0.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + path db + + output: + tuple val(meta), path ("${prefix}/quality_summary.tsv") , emit: quality_summary + tuple val(meta), path ("${prefix}/completeness.tsv") , emit: completeness + tuple val(meta), path ("${prefix}/contamination.tsv") , emit: contamination + tuple val(meta), path ("${prefix}/complete_genomes.tsv"), emit: complete_genomes + tuple val(meta), path ("${prefix}/proviruses.fna") , emit: proviruses + tuple val(meta), path ("${prefix}/viruses.fna") , emit: viruses + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + checkv \\ + end_to_end \\ + $args \\ + -t $task.cpus \\ + -d $db \\ + $fasta \\ + $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + checkv: \$(checkv -h 2>&1 | sed -n 's/^.*CheckV v//; s/: assessing.*//; 1p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch ${prefix}/quality_summary.tsv + touch ${prefix}/completeness.tsv + touch ${prefix}/contamination.tsv + touch ${prefix}/complete_genomes.tsv + touch ${prefix}/proviruses.fna + touch ${prefix}/viruses.fna + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + checkv: \$(checkv -h 2>&1 | sed -n 's/^.*CheckV v//; s/: assessing.*//; 1p') + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/checkv/endtoend/meta.yml b/modules/nf-core/checkv/endtoend/meta.yml new file mode 100644 index 0000000..c74d091 --- /dev/null +++ b/modules/nf-core/checkv/endtoend/meta.yml @@ -0,0 +1,107 @@ +name: "checkv_endtoend" +description: Assess the quality of metagenome-assembled viral genomes. +keywords: + - checkv + - checkm + - mag + - metagenome + - quality + - isolates + - virus + - completeness + - contamination +tools: + - "checkv": + description: Assess the quality of metagenome-assembled viral genomes. + homepage: https://bitbucket.org/berkeleylab/checkv/src/master/ + documentation: https://bitbucket.org/berkeleylab/checkv/src/master/ + tool_dev_url: https://bitbucket.org/berkeleylab/checkv/src/master/ + doi: "10.1038/s41587-020-00774-7" + licence: ["BSD License"] + identifier: biotools:checkv +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: fasta file + pattern: "*.{fasta,fna,fa}" + - - db: + type: directory + description: Directory pointing to checkV database +output: + - quality_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/quality_summary.tsv: + type: file + description: CheckV's main output containing integrated results from the three + main modules (contamination, completeness, complete genomes) with overall + quality of contigs + pattern: "${prefix}/quality_summary.tsv" + - completeness: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/completeness.tsv: + type: file + description: CheckV's detailed overview table on estimating completeness + pattern: "${prefix}/completeness.tsv" + - contamination: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/contamination.tsv: + type: file + description: CheckV's detailed overview table on estimating contamination + pattern: "${prefix}/contamination.tsv" + - complete_genomes: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/complete_genomes.tsv: + type: file + description: CheckV's detailed overview table on the identified putative complete + genomes + pattern: "${prefix}/complete_genomes.tsv" + - proviruses: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/proviruses.fna: + type: file + description: CheckV's extracted proviruses contigs + pattern: "${prefix}/proviruses.fna" + - viruses: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/viruses.fna: + type: file + description: CheckV's extracted virus contigs + pattern: "${prefix}/viruses.fna" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" \ No newline at end of file diff --git a/modules/local/spades/spades.yaml b/modules/nf-core/fastp/environment.yml similarity index 64% rename from modules/local/spades/spades.yaml rename to modules/nf-core/fastp/environment.yml index b6db0fd..de9463b 100644 --- a/modules/local/spades/spades.yaml +++ b/modules/nf-core/fastp/environment.yml @@ -1,6 +1,5 @@ -name: spades channels: - - bioconda - conda-forge + - bioconda dependencies: - - spades=3.14 + - bioconda::fastp=0.23.4 \ No newline at end of file diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 0000000..08200cd --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,125 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val discard_trimmed_pass + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz" + """ + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 0000000..bece97e --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,113 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] + identifier: biotools:fastp +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - - discard_trimmed_pass: + type: boolean + description: Specify true to not write any reads that pass trimming thresholds. + | This can be used to use fastp for the output report only. + - - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" + - reads_fail: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" \ No newline at end of file diff --git a/modules/local/multiqc/multiqc.yaml b/modules/nf-core/fastqc/environment.yml similarity index 63% rename from modules/local/multiqc/multiqc.yaml rename to modules/nf-core/fastqc/environment.yml index 3bc368b..8b76b92 100644 --- a/modules/local/multiqc/multiqc.yaml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,6 +1,5 @@ -name: multiqc channels: - - bioconda - conda-forge + - bioconda dependencies: - - multiqc=1.8 + - bioconda::fastqc=0.12.1 \ No newline at end of file diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 0000000..feee5f0 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,64 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + --memory $fastqc_memory \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 0000000..40c8711 --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,66 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] + identifier: biotools:fastqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" \ No newline at end of file diff --git a/modules/local/fastqc/fastqc.yaml b/modules/nf-core/multiqc/environment.yml similarity index 63% rename from modules/local/fastqc/fastqc.yaml rename to modules/nf-core/multiqc/environment.yml index eef652b..e1d226f 100644 --- a/modules/local/fastqc/fastqc.yaml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,6 +1,5 @@ -name: fastqc channels: - - bioconda - conda-forge + - bioconda dependencies: - - fastqc=0.11.8 + - bioconda::multiqc=1.25.1 \ No newline at end of file diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 0000000..a91446d --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,63 @@ +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + path(replace_names) + path(sample_names) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $prefix \\ + $extra_config \\ + $logo \\ + $replace \\ + $samples \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + mkdir multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 0000000..2621b2a --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,78 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into + a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc +input: + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" +output: + - report: + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" \ No newline at end of file diff --git a/modules/nf-core/prodigal/environment.yml b/modules/nf-core/prodigal/environment.yml new file mode 100644 index 0000000..b9455d6 --- /dev/null +++ b/modules/nf-core/prodigal/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::prodigal=2.6.3 + - conda-forge::pigz=2.6 \ No newline at end of file diff --git a/modules/nf-core/prodigal/main.nf b/modules/nf-core/prodigal/main.nf new file mode 100644 index 0000000..916f97e --- /dev/null +++ b/modules/nf-core/prodigal/main.nf @@ -0,0 +1,64 @@ +process PRODIGAL { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' : + 'biocontainers/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' }" + + input: + tuple val(meta), path(genome) + val(output_format) + + output: + tuple val(meta), path("${prefix}.${output_format}.gz"), emit: gene_annotations + tuple val(meta), path("${prefix}.fna.gz"), emit: nucleotide_fasta + tuple val(meta), path("${prefix}.faa.gz"), emit: amino_acid_fasta + tuple val(meta), path("${prefix}_all.txt.gz"), emit: all_gene_annotations + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + pigz -cdf ${genome} | prodigal \\ + $args \\ + -f $output_format \\ + -d "${prefix}.fna" \\ + -o "${prefix}.${output_format}" \\ + -a "${prefix}.faa" \\ + -s "${prefix}_all.txt" + + pigz -nm ${prefix}.fna + pigz -nm ${prefix}.${output_format} + pigz -nm ${prefix}.faa + pigz -nm ${prefix}_all.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + pigz: \$(pigz -V 2>&1 | sed 's/pigz //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fna.gz + touch ${prefix}.${output_format}.gz + touch ${prefix}.faa.gz + touch ${prefix}_all.txt.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + pigz: \$(pigz -V 2>&1 | sed 's/pigz //g') + END_VERSIONS + """ + +} \ No newline at end of file diff --git a/modules/nf-core/prodigal/meta.yml b/modules/nf-core/prodigal/meta.yml new file mode 100644 index 0000000..d59ff5c --- /dev/null +++ b/modules/nf-core/prodigal/meta.yml @@ -0,0 +1,79 @@ +name: prodigal +description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) is a + microbial (bacterial and archaeal) gene finding program +keywords: + - prokaryotes + - gene finding + - microbial +tools: + - prodigal: + description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) + is a microbial (bacterial and archaeal) gene finding program + homepage: https://github.com/hyattpd/Prodigal + documentation: https://github.com/hyattpd/prodigal/wiki + tool_dev_url: https://github.com/hyattpd/Prodigal + doi: "10.1186/1471-2105-11-119" + licence: ["GPL v3"] + identifier: biotools:prodigal +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genome: + type: file + description: fasta/fasta.gz file + - - output_format: + type: string + description: Output format ("gbk"/"gff"/"sqn"/"sco") +output: + - gene_annotations: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${output_format}.gz: + type: file + description: gene annotations in output_format given as input + pattern: "*.{output_format}" + - nucleotide_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.fna.gz: + type: file + description: nucleotide sequences file + pattern: "*.{fna}" + - amino_acid_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.faa.gz: + type: file + description: protein translations file + pattern: "*.{faa}" + - all_gene_annotations: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}_all.txt.gz: + type: file + description: complete starts file + pattern: "*.{_all.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@grst" +maintainers: + - "@grst" \ No newline at end of file diff --git a/modules/nf-core/spades/environment.yml b/modules/nf-core/spades/environment.yml new file mode 100644 index 0000000..569eb73 --- /dev/null +++ b/modules/nf-core/spades/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::spades=4.0.0 \ No newline at end of file diff --git a/modules/nf-core/spades/main.nf b/modules/nf-core/spades/main.nf new file mode 100644 index 0000000..46f11c2 --- /dev/null +++ b/modules/nf-core/spades/main.nf @@ -0,0 +1,102 @@ +process SPADES { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/spades:4.0.0--h5fb382e_1' : + 'biocontainers/spades:4.0.0--h5fb382e_1' }" + + input: + tuple val(meta), path(illumina), path(pacbio), path(nanopore) + path yml + path hmm + + output: + tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds + tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs + tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts + tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters + tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa + tuple val(meta), path('*.warnings.log') , optional:true, emit: warnings + tuple val(meta), path('*.spades.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory.toGiga() + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" + def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" + def custom_hmms = hmm ? "--custom-hmms $hmm" : "" + def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads" + """ + spades.py \\ + $args \\ + --threads $task.cpus \\ + --memory $maxmem \\ + $custom_hmms \\ + $reads \\ + -o ./ + mv spades.log ${prefix}.spades.log + + if [ -f scaffolds.fasta ]; then + mv scaffolds.fasta ${prefix}.scaffolds.fa + gzip -n ${prefix}.scaffolds.fa + fi + if [ -f contigs.fasta ]; then + mv contigs.fasta ${prefix}.contigs.fa + gzip -n ${prefix}.contigs.fa + fi + if [ -f transcripts.fasta ]; then + mv transcripts.fasta ${prefix}.transcripts.fa + gzip -n ${prefix}.transcripts.fa + fi + if [ -f assembly_graph_with_scaffolds.gfa ]; then + mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa + gzip -n ${prefix}.assembly.gfa + fi + + if [ -f gene_clusters.fasta ]; then + mv gene_clusters.fasta ${prefix}.gene_clusters.fa + gzip -n ${prefix}.gene_clusters.fa + fi + + if [ -f warnings.log ]; then + mv warnings.log ${prefix}.warnings.log + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory.toGiga() + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" + def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" + def custom_hmms = hmm ? "--custom-hmms $hmm" : "" + def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads" + """ + echo "" | gzip > ${prefix}.scaffolds.fa.gz + echo "" | gzip > ${prefix}.contigs.fa.gz + echo "" | gzip > ${prefix}.transcripts.fa.gz + echo "" | gzip > ${prefix}.gene_clusters.fa.gz + echo "" | gzip > ${prefix}.assembly.gfa.gz + touch ${prefix}.spades.log + touch ${prefix}.warnings.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p') + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/spades/meta.yml b/modules/nf-core/spades/meta.yml new file mode 100644 index 0000000..65d260d --- /dev/null +++ b/modules/nf-core/spades/meta.yml @@ -0,0 +1,151 @@ +name: spades +description: Assembles a small genome (bacterial, fungal, viral) +keywords: + - genome + - assembly + - genome assembler + - small genome + - de novo assembler +tools: + - spades: + description: SPAdes (St. Petersburg genome assembler) is intended for both standard + isolates and single-cell MDA bacteria assemblies. + homepage: http://cab.spbu.ru/files/release3.15.0/manual.html + documentation: http://cab.spbu.ru/files/release3.15.0/manual.html + tool_dev_url: https://github.com/ablab/spades + doi: 10.1089/cmb.2012.0021 + licence: ["GPL v2"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - illumina: + type: file + description: | + List of input FastQ (Illumina or PacBio CCS reads) files + of size 1 and 2 for single-end and paired-end data, + respectively. This input data type is required. + - pacbio: + type: file + description: | + List of input PacBio CLR FastQ files of size 1. + - nanopore: + type: file + description: | + List of input FastQ files of size 1, originating from Oxford Nanopore technology. + - - yml: + type: file + description: | + Path to yml file containing read information. + The raw FASTQ files listed in this YAML file MUST be supplied to the respective illumina/pacbio/nanopore input channel(s) _in addition_ to this YML. + File entries in this yml must contain only the file name and no paths. + pattern: "*.{yml,yaml}" + - - hmm: + type: file + description: File or directory with amino acid HMMs for Spades HMM-guided mode. +output: + - scaffolds: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - "*.scaffolds.fa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - "*.contigs.fa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - transcripts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - "*.transcripts.fa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - gene_clusters: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - "*.gene_clusters.fa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - gfa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.gfa.gz" + - "*.assembly.gfa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.gfa.gz" + - warnings: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.warnings.log": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.spades.log" + - "*.spades.log": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.spades.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" +maintainers: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 34fa99b..92a8257 100755 --- a/nextflow.config +++ b/nextflow.config @@ -70,6 +70,12 @@ params { singularity_cachedir = 'singularity' publish_dir_mode = 'copy' + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + multiqc_methods_description = null } profiles { diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index 57810b2..678d351 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -93,25 +93,36 @@ workflow ANNOTATE { } CHECKV( - predicted_contigs.combine( contigs.map { name, fasta -> fasta }), + predicted_contigs.join(contigs.map { meta, fasta -> fasta }), checkv_db ) - viphos_annotations = ANNOTATION.out.map { _, __, annotations -> annotations }.collect() - taxonomy_annotations = ASSIGN.out.map { _, __, taxonomy -> taxonomy }.collect() - checkv_results = CHECKV.out.map { _, __, quality_summary, ___ -> quality_summary }.collect() + viphos_annotations = ANNOTATION.out.annotations.groupTuple().map{ + meta, values -> { + def annotations = values.collect{it[1]}; + return [meta, annotations] } + } + taxonomy_annotations = ASSIGN.out.groupTuple().map{ + meta, values -> { + def taxonomy = values.collect{it[1]}; + return [meta, taxonomy] } + } + checkv_results = CHECKV.out.groupTuple().map{ + meta, values -> { + def quality_summary = values.collect{it[1]}; + return [meta, quality_summary] } + } WRITE_GFF( - contigs.first(), - viphos_annotations, - taxonomy_annotations, - checkv_results + contigs.join(viphos_annotations).join(taxonomy_annotations).join(checkv_results) ) - - predicted_contigs_filtered = predicted_contigs.map { id, set_name, fasta -> [set_name, id, fasta] } - plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { id, set_name, dir, table -> [set_name, table] } + + chromomap_ch = Channel.empty() + predicted_contigs_filtered = predicted_contigs.map { meta, set_name, fasta -> [set_name, meta, fasta] } + plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { meta, set_name, dir, table -> [set_name, table] } chromomap_ch = predicted_contigs_filtered.join(plot_contig_map_filtered).map { set_name, assembly_name, fasta, tsv -> [assembly_name, set_name, fasta, tsv]} - + chromomap_ch.view() + emit: assign_output = ASSIGN.out chromomap = chromomap_ch diff --git a/subworkflows/local/assemble_illumina.nf b/subworkflows/local/assemble_illumina.nf index 7fe36be..d45aa6c 100644 --- a/subworkflows/local/assemble_illumina.nf +++ b/subworkflows/local/assemble_illumina.nf @@ -2,24 +2,39 @@ Optional assembly step, not fully implemented and tested. */ -include { FASTP } from '../../modules/local/fastp' -include { FASTQC } from '../../modules/local/fastqc' -include { MULTIQC } from '../../modules/local/multiqc' -include { SPADES } from '../../modules/local/spades' +include { FASTP } from '../../modules/nf-core/fastp' +include { FASTQC as FASTQC_BEFORE } from '../../modules/nf-core/fastqc' +include { FASTQC as FASTQC_AFTER } from '../../modules/nf-core/fastqc' +include { SPADES } from '../../modules/nf-core/spades' workflow ASSEMBLE_ILLUMINA { take: reads main: + // QC before filtering + FASTQC_BEFORE(reads) + // trimming - FASTP(reads) + FASTP( + reads, + [], + false, + false, + false + ) + + // QC after filtering + FASTQC_AFTER(FASTP.out.reads) - // read QC - MULTIQC(FASTQC(FSATP.out)) - // assembly - SPADES(FASTP.out) + SPADES(FASTP.out.reads) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix( FASTQC_BEFORE.out.zip.collect{it[1]}.ifEmpty([]) ) + ch_multiqc_files = ch_multiqc_files.mix( FASTP.out.json.collect{it[1]}.ifEmpty([]) ) + ch_multiqc_files = ch_multiqc_files.mix( FASTQC_AFTER.out.zip.collect{it[1]}.ifEmpty([]) ) emit: - assembly = SPADES.out + assembly = SPADES.out.contigs + ch_multiqc_files = ch_multiqc_files } \ No newline at end of file diff --git a/subworkflows/local/detect.nf b/subworkflows/local/detect.nf index bc4a37e..ea9fd64 100644 --- a/subworkflows/local/detect.nf +++ b/subworkflows/local/detect.nf @@ -18,14 +18,12 @@ workflow DETECT { main: - renamed_ch = assembly_renamed_length_filtered.map {name, renamed_fasta, map, _, __ -> { - tuple(name, renamed_fasta, map) - } + renamed_ch = assembly_renamed_length_filtered.map { + meta, renamed_fasta, map, _, __ -> tuple(meta, renamed_fasta, map) } - length_filtered_ch = assembly_renamed_length_filtered.map { name, _, __, filtered_fasta, contig_number -> { - tuple(name, filtered_fasta, contig_number) - } + length_filtered_ch = assembly_renamed_length_filtered.map { + meta, _, __, filtered_fasta, contig_number -> tuple(meta, filtered_fasta, contig_number) } // virus detection --> VirSorter, VirFinder and PPR-Meta @@ -37,5 +35,5 @@ workflow DETECT { PARSE( length_filtered_ch.join( VIRFINDER.out ).join( VIRSORTER.out ).join( PPRMETA.out ) ) emit: - detect_output = PARSE.out.join(renamed_ch).transpose().map{ name, fasta, vs_meta, log, renamed_fasta, map -> tuple (name, fasta, map) } + detect_output = PARSE.out.join(renamed_ch).transpose().map{ meta, fasta, vs_meta, log, renamed_fasta, map -> tuple (meta, fasta, map) } } \ No newline at end of file diff --git a/subworkflows/local/preprocess.nf b/subworkflows/local/preprocess.nf index 86cb231..efff437 100644 --- a/subworkflows/local/preprocess.nf +++ b/subworkflows/local/preprocess.nf @@ -12,12 +12,12 @@ workflow PREPROCESS { main: - RENAME(assembly) + RENAME(assembly) // out: (meta, renamed.fasta, map) // filter contigs by length - LENGTH_FILTERING(RENAME.out) + LENGTH_FILTERING(RENAME.out) // out: (meta, filt_fasta, env) emit: - // tuple val(name), file("${name}_renamed.fasta"), file("${name}_map.tsv"), file("${name}*filt*.fasta"), env(CONTIGS) + // tuple val(meta), file("${meta.id}_renamed.fasta"), file("${meta.id}_map.tsv"), file("${meta.id}*filt*.fasta"), env(CONTIGS) preprocessed_data = RENAME.out.join(LENGTH_FILTERING.out, by: 0) } \ No newline at end of file diff --git a/workflows/virify.nf b/workflows/virify.nf index a892dc0..21a7bb0 100755 --- a/workflows/virify.nf +++ b/workflows/virify.nf @@ -4,19 +4,27 @@ * INPUT CHANNELS **************************/ -input_ch = Channel.empty() -mashmap_ref_ch = Channel.empty() -factor_file = Channel.empty() +input_ch = Channel.empty() +mashmap_ref_ch = Channel.empty() +factor_file = Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.fromPath("$projectDir/assets/mgnify_logo.png") +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + include { samplesheetToList } from 'plugin/nf-schema' if ( params.samplesheet ) { groupReads = { id, assembly, fq1, fq2 -> if (fq1 == []) { - return tuple(id, assembly) + return tuple(["id": id], + assembly + ) } else { if (params.assemble) { - return tuple(id, [fq1, fq2]) + return tuple(["id": id], + [fq1, fq2]) } else { exit 1, "input missing, use [--assemble] flag with raw reads" @@ -29,7 +37,7 @@ if ( params.samplesheet ) { // one sample of assembly if (params.fasta) { input_ch = Channel.fromPath( params.fasta, checkIfExists: true) - .map { file -> tuple(file.simpleName, file) } + .map { file -> tuple(["id": file.simpleName], file) } } // mashmap input @@ -41,6 +49,11 @@ if (params.mashmap) { if (params.factor) { factor_file = file( params.factor, checkIfExists: true) } +/************************** +* SUB WORKFLOWS +**************************/ + +include { MULTIQC } from '../modules/nf-core/multiqc' /************************** * SUB WORKFLOWS @@ -83,6 +96,7 @@ workflow VIRIFY { } // ----------- rename fasta + length filtering + // out: (meta, renamed_fasta, map, filtered_fasta, env) PREPROCESS( assembly_ch ) // ----------- if --onlyannotate - skip DETECT step @@ -126,5 +140,15 @@ workflow VIRIFY { ANNOTATE.out.assign_output, ANNOTATE.out.chromomap ) + + if (params.assemble) { + ch_multiqc_files = ASSEMBLE_ILLUMINA.out.ch_multiqc_files + MULTIQC( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + } }