diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
new file mode 100644
index 0000000..e01bb42
--- /dev/null
+++ b/assets/methods_description_template.yml
@@ -0,0 +1,35 @@
+id: "ebi-metagenomics/emg-viral-pipeline-methods-description"
+description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication."
+section_name: "ebi-metagenomics/emg-viral-pipeline Methods Description"
+section_href: "https://github.com/EBI-Metagenomics/emg-viral-pipeline"
+plot_type: "html"
+data: |
+
Methods
+ Data was processed using ebi-metagenomics/genomes-generation v${workflow.manifest.version} (${doi_text}; Krakau et al., 2022) of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (GrĂ¼ning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.
+ The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:
+ ${workflow.commandLine}
+ ${tool_citations}
+ References
+
+ -
+ Informative Regions In Viral Genomes
+ Viruses (2021)
+ doi: 10.3390/v13061164
+ Moreno-Gallego, Jaime Leonardo, and Alejandro Reyes
+
+ -
+ VIRify: an integrated detection, annotation and taxonomic classification pipeline using virus-specific protein profile hidden Markov models
+ bioRxiv
+ doi: 10.1101/2022.08.22.504484
+ Rangel-Pineros, Guillermo, et al.
+
+ ${tool_bibliography}
+
+
+
Notes:
+
+ ${nodoi_text}
+ - The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
+ - You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
+
+
diff --git a/assets/mgnify_logo.png b/assets/mgnify_logo.png
new file mode 100644
index 0000000..fe6112b
Binary files /dev/null and b/assets/mgnify_logo.png differ
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
new file mode 100644
index 0000000..986fed1
--- /dev/null
+++ b/assets/multiqc_config.yml
@@ -0,0 +1,61 @@
+report_comment: >
+
+ This report has been generated by the ebi-metagenomics/emg-viral-pipeline pipeline.
+
+report_section_order:
+ "ebi-metagenomics/emg-viral-pipeline-methods-description":
+ order: -1000
+ software_versions:
+ order: -1001
+ "ebi-metagenomics/emg-viral-pipeline-summary":
+ order: -1002
+
+export_plots: true
+
+data_format: "yaml"
+
+run_modules:
+ - fastqc
+ - fastp
+
+## Module order
+module_order:
+ - fastqc
+ - fastp
+
+## File name cleaning
+extra_fn_clean_exts:
+ - "_fastp"
+
+## Prettification
+custom_logo: "mgnify_logo.png"
+custom_logo_url: https://github.com/ebi-metagenomics/emg-viral-pipeline/
+custom_logo_title: "ebi-metagenomics/emg-viral-pipeline"
+
+## General Stats customisation
+table_columns_visible:
+ "fastp":
+ pct_duplication: False
+ after_filtering_q30_rate: False
+ after_filtering_q30_bases: False
+ filtering_result_passed_filter_reads: 3300
+ after_filtering_gc_content: False
+ pct_surviving: True
+ pct_adapter: True
+
+table_columns_placement:
+ "fastp":
+ pct_duplication: 3000
+ after_filtering_q30_rate: 3100
+ after_filtering_q30_bases: 3200
+ filtering_result_passed_filter_reads: 3300
+ after_filtering_gc_content: 3400
+ pct_surviving: 3500
+ pct_adapter: 3600
+
+custom_table_header_config:
+ general_stats_table:
+ "Total length":
+ hidden: True
+ N50:
+ hidden: True
diff --git a/configs/modules.config b/configs/modules.config
index d0c3d8c..d435c71 100644
--- a/configs/modules.config
+++ b/configs/modules.config
@@ -12,7 +12,7 @@ process {
withName: ANNOTATION {
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/annotation/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/annotation/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_annotation.tsv"
@@ -23,13 +23,13 @@ process {
withName: ASSIGN {
publishDir = [
[
- path: "${params.output}/${name}/${params.taxdir}",
+ path: "${params.output}/${meta.id}/${params.taxdir}",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_taxonomy.tsv"
],
[
- path: "${params.output}/${name}/${params.finaldir}/taxonomy",
+ path: "${params.output}/${meta.id}/${params.finaldir}/taxonomy",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_taxonomy.tsv"
@@ -40,7 +40,7 @@ process {
withName: BALLOON {
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/balloon/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/balloon/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.{pdf,svg}"
@@ -89,13 +89,13 @@ process {
withName: CHECKV {
publishDir = [
[
- path: "${params.output}/${name}/${params.checkvdir}/",
+ path: "${params.output}/${meta.id}/${params.checkvdir}/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${confidence_set_name}"
],
[
- path: "${params.output}/${name}/${params.checkvdir}/",
+ path: "${params.output}/${meta.id}/${params.checkvdir}/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.tsv"
@@ -106,7 +106,7 @@ process {
withName: GENERATE_CHROMOMAP_TABLE {
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/chromomap/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/chromomap/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${id}.filtered-*.contigs.txt"
@@ -117,13 +117,13 @@ process {
withName: GENERATE_KRONA_TABLE {
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}",
+ path: "${params.output}/${meta.id}/${params.plotdir}",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.krona.tsv"
],
[
- path: "${params.output}/${name}/${params.finaldir}/krona/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/krona/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.krona.tsv"
@@ -134,13 +134,13 @@ process {
withName: GENERATE_SANKEY_TABLE {
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}",
+ path: "${params.output}/${meta.id}/${params.plotdir}",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${set_name}.sankey.*"
],
[
- path: "${params.output}/${name}/${params.finaldir}/sankey/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/sankey/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${set_name}.sankey.filtered-${params.sankey}.json"
@@ -151,7 +151,7 @@ process {
withName: CHROMOMAP {
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/chromomap/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/chromomap/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.html"
@@ -162,10 +162,10 @@ process {
withName: FILTER_READS {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}/${meta.id}/",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}.filtered.fastq"
+ pattern: "${meta.id}.filtered.fastq"
]
]
}
@@ -173,7 +173,7 @@ process {
withName: HMM_POSTPROCESSING {
publishDir = [
[
- path: "${params.output}/${name}/${params.hmmerdir}/",
+ path: "${params.output}/${meta.id}/${params.hmmerdir}/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${set_name}_modified.tsv"
@@ -184,7 +184,7 @@ process {
withName: HMMSCAN {
publishDir = [
[
- path: "${params.output}/${name}/${params.hmmerdir}/${params.db}",
+ path: "${params.output}/${meta.id}/${params.hmmerdir}/${params.db}",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${set_name}_${params.db}_hmmscan.tbl"
@@ -195,10 +195,10 @@ process {
withName: KAIJU {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}/${meta.id}/",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}.out"
+ pattern: "${meta.id}.out"
]
]
}
@@ -206,13 +206,13 @@ process {
withName: KRONA {
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}/krona/",
+ path: "${params.output}/${meta.id}/${params.plotdir}/krona/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.krona.html"
],
[
- path: "${params.output}/${name}/${params.finaldir}/krona/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/krona/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.krona.html"
@@ -223,10 +223,10 @@ process {
withName: LENGTH_FILTERING {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}/${meta.id}/",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}*filt*.fasta"
+ pattern: "${meta.id}*filt*.fasta"
]
]
}
@@ -251,10 +251,10 @@ process {
withName: MULTIQC {
publishDir = [
[
- path: "${params.output}/${name}/${params.assemblydir}",
+ path: "${params.output}/${meta.id}/${params.assemblydir}",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}_multiqc_report.html"
+ pattern: "${meta.id}_multiqc_report.html"
]
]
}
@@ -262,22 +262,22 @@ process {
withName: PARSE {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}/${meta.id}/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.fna"
],
[
- path: "${params.output}/${name}/",
+ path: "${params.output}/${meta.id}/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "virsorter_metadata.tsv"
],
[
- path: "${params.output}/${name}/${params.finaldir}/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}_virus_predictions.log"
+ pattern: "${meta.id}_virus_predictions.log"
]
]
}
@@ -285,7 +285,7 @@ process {
withName: PHANOTATE {
publishDir = [
[
- path: "${params.output}/${name}/${params.phanotatedir}",
+ path: "${params.output}/${meta.id}/${params.phanotatedir}",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.faa"
@@ -299,13 +299,13 @@ process {
}
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}/",
+ path: "${params.output}/${meta.id}/${params.plotdir}/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${set_name}_mapping_results"
],
[
- path: "${params.output}/${name}/${params.finaldir}/annotation/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/annotation/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${set_name}_prot_ann_table_filtered.tsv"
@@ -316,10 +316,10 @@ process {
withName: PPRMETA {
publishDir = [
[
- path: "${params.output}/${name}/${params.virusdir}/pprmeta",
+ path: "${params.output}/${meta.id}/${params.virusdir}/pprmeta",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}_pprmeta.csv"
+ pattern: "${meta.id}_pprmeta.csv"
]
]
}
@@ -350,7 +350,7 @@ process {
}
publishDir = [
[
- path: "${params.output}/${name}/ratio_evalue_tables",
+ path: "${params.output}/${meta.id}/ratio_evalue_tables",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "${set_name}_modified_informative.tsv"
@@ -361,10 +361,10 @@ process {
withName: RENAME {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}/${meta.id}/",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}_renamed.fasta"
+ pattern: "${meta.id}_renamed.fasta"
]
]
}
@@ -372,13 +372,13 @@ process {
withName: RESTORE {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}/${meta.id}/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_original.fasta"
],
[
- path: "${params.output}/${name}/${params.finaldir}/contigs/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/contigs/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_original.fasta"
@@ -389,13 +389,13 @@ process {
withName: SANKEY {
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}",
+ path: "${params.output}/${meta.id}/${params.plotdir}",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.sankey.html"
],
[
- path: "${params.output}/${name}/${params.finaldir}/sankey/",
+ path: "${params.output}/${meta.id}/${params.finaldir}/sankey/",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.sankey.html"
@@ -406,10 +406,10 @@ process {
withName: SPADES {
publishDir = [
[
- path: "${params.output}/${name}/${params.assemblydir}",
+ path: "${params.output}/${meta.id}/${params.assemblydir}",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}.fasta"
+ pattern: "${meta.id}.fasta"
]
]
}
@@ -420,10 +420,10 @@ process {
}
publishDir = [
[
- path: "${params.output}/${name}/${params.virusdir}/virfinder",
+ path: "${params.output}/${meta.id}/${params.virusdir}/virfinder",
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}.txt"
+ pattern: "${meta.id}.txt"
]
]
}
@@ -431,7 +431,7 @@ process {
withName: VIRSORTER {
publishDir = [
[
- path: "${params.output}/${name}/${params.virusdir}/",
+ path: "${params.output}/${meta.id}/${params.virusdir}/",
mode: params.publish_dir_mode,
failOnError: false
]
@@ -442,7 +442,7 @@ process {
errorStrategy 'ignore'
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/gff",
+ path: "${params.output}/${meta.id}/${params.finaldir}/gff",
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.gff"
diff --git a/modules/local/annotation/main.nf b/modules/local/annotation/main.nf
index 7aedbc2..4744998 100644
--- a/modules/local/annotation/main.nf
+++ b/modules/local/annotation/main.nf
@@ -15,16 +15,16 @@ process ANNOTATION {
help="Name of processing .fna file to write correct output name")
*/
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_low'
container 'quay.io/microbiome-informatics/virify-python3:1.1'
input:
- tuple val(name), val(set_name), file(tab), file(faa)
+ tuple val(meta), val(set_name), file(tab), file(faa)
output:
- tuple val(name), val(set_name), file("*_annotation.tsv")
+ tuple val(meta), val(set_name), file("*_annotation.tsv"), emit: annotations
script:
"""
diff --git a/modules/local/assign/main.nf b/modules/local/assign/main.nf
index 4fda3a2..6db0e14 100644
--- a/modules/local/assign/main.nf
+++ b/modules/local/assign/main.nf
@@ -4,18 +4,18 @@ process ASSIGN {
provides the taxonomic lineage of each viral contig, based on the corresponding ViPhOG annotations'''
*/
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_low'
container 'quay.io/microbiome-informatics/virify-python3:1.1'
input:
- tuple val(name), val(set_name), file(tab)
+ tuple val(meta), val(set_name), file(tab)
file(db)
file(factor)
output:
- tuple val(name), val(set_name), file("*_taxonomy.tsv")
+ tuple val(meta), val(set_name), file("*_taxonomy.tsv")
script:
"""
diff --git a/modules/local/balloon/main.nf b/modules/local/balloon/main.nf
index 06c8ee6..9184a03 100644
--- a/modules/local/balloon/main.nf
+++ b/modules/local/balloon/main.nf
@@ -1,11 +1,11 @@
process BALLOON {
- tag "${name}"
+ tag "${meta.id}"
label 'process_medium'
container 'nanozoo/r_balloon:3.1.1--64f0f7d'
input:
- tuple val(name), val(set_name), file(tbl)
+ tuple val(meta), val(set_name), file(tbl)
output:
path ("*.{pdf,svg}") optional true
@@ -22,16 +22,16 @@ process BALLOON {
fi
# genus
- grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$2!="" && \$2 !~ /^0/){print SAMPLE"\\tgenus\\t"\$2}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' > \$NAME"_summary.tsv"
+ grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$2!="" && \$2 !~ /^0/){print SAMPLE"\\tgenus\\t"\$2}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' > \$NAME"_summary.tsv"
# subfamily
- grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$3!="" && \$3 !~ /^0/){print SAMPLE"\\tsubfamily\\t"\$3}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
+ grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$3!="" && \$3 !~ /^0/){print SAMPLE"\\tsubfamily\\t"\$3}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
# family
- grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$4!="" && \$4 !~ /^0/){print SAMPLE"\\tfamily\\t"\$4}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
+ grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$4!="" && \$4 !~ /^0/){print SAMPLE"\\tfamily\\t"\$4}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
# order
- grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$5!="" && \$5 !~ /^0/){print SAMPLE"\\torder\\t"\$5}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
+ grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$5!="" && \$5 !~ /^0/){print SAMPLE"\\torder\\t"\$5}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
if [ -s \$NAME"_summary.tsv" ]; then
balloon.R "\${NAME}_summary.tsv" "\${NAME}_balloon.svg" 10 8
diff --git a/modules/local/blast/main.nf b/modules/local/blast/main.nf
index 73b162c..ffaefca 100644
--- a/modules/local/blast/main.nf
+++ b/modules/local/blast/main.nf
@@ -1,15 +1,15 @@
process BLAST {
label 'process_high'
- tag "${assembly_name}"
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/microbiome-informatics/blast:2.9.0'
input:
- tuple val(assembly_name), val(confidence_set_name), file(fasta)
+ tuple val(meta), val(confidence_set_name), file(fasta)
file(db)
output:
- tuple val(assembly_name), val(confidence_set_name), file("${confidence_set_name}.blast"), file("${confidence_set_name}.filtered.blast")
+ tuple val(meta), val(confidence_set_name), file("${confidence_set_name}.blast"), file("${confidence_set_name}.filtered.blast")
script:
if (task.attempt.toString() == '1')
diff --git a/modules/local/blast_filter/main.nf b/modules/local/blast_filter/main.nf
index 623aa4d..543b2c6 100644
--- a/modules/local/blast_filter/main.nf
+++ b/modules/local/blast_filter/main.nf
@@ -1,14 +1,14 @@
process BLAST_FILTER {
label 'process_low'
- tag "${assembly_name}"
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(assembly_name), val(confidence_set_name), file(blast), file(blast_filtered)
+ tuple val(meta), val(confidence_set_name), file(blast), file(blast_filtered)
file(db)
output:
- tuple val(assembly_name), val(confidence_set_name), file("*.meta")
+ tuple val(meta), val(confidence_set_name), file("*.meta")
script:
if (task.attempt.toString() == '1')
diff --git a/modules/local/checkv/main.nf b/modules/local/checkv/main.nf
index 11bcea9..050b25f 100644
--- a/modules/local/checkv/main.nf
+++ b/modules/local/checkv/main.nf
@@ -1,27 +1,22 @@
process CHECKV {
label 'process_medium'
- tag "${name}"
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/microbiome-informatics/checkv:0.8.1__1'
input:
- tuple val(name), val(confidence_set_name), file(fasta), file(contigs)
+ tuple val(meta), val(confidence_set_name), file(fasta), file(contigs)
file(database)
output:
- tuple val(name), val(confidence_set_name), file("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/")
+ tuple val(meta), val(confidence_set_name), file("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/")
script:
- if (confidence_set_name == 'prophages') {
- """
- checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name}
- cp ${confidence_set_name}/quality_summary.tsv ${confidence_set_name}_quality_summary.tsv
- """
- } else {
+
"""
- checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name}
+ checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name}
cp ${confidence_set_name}/quality_summary.tsv ${confidence_set_name}_quality_summary.tsv
"""
- }
+
stub:
"""
mkdir negative_result_${confidence_set_name}.tsv
diff --git a/modules/local/chromomap/main.nf b/modules/local/chromomap/main.nf
index 5d7af19..25ef58f 100644
--- a/modules/local/chromomap/main.nf
+++ b/modules/local/chromomap/main.nf
@@ -1,17 +1,17 @@
process GENERATE_CHROMOMAP_TABLE {
label 'process_low'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/bioruby:2.0.1'
input:
- tuple val(name), val(set_name), file(assembly), file(annotation_table)
+ tuple val(meta), val(set_name), file(assembly), file(annotation_table)
output:
- tuple val(name), val(set_name), file("${id}.filtered-*.contigs.txt"), file("${id}.filtered-*.anno.txt")
+ tuple val(meta), val(set_name), file("${id}.filtered-*.contigs.txt"), file("${id}.filtered-*.anno.txt")
script:
id = set_name
- if (set_name == "all") { id = name }
+ if (set_name == "all") { id = meta.id }
"""
# combine
if [[ ${set_name} == "all" ]]; then
@@ -30,17 +30,18 @@ process GENERATE_CHROMOMAP_TABLE {
process CHROMOMAP {
label 'process_medium'
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/r_chromomap:0.3'
input:
- tuple val(name), val(set_name), file(contigs), file(annotations)
+ tuple val(meta), val(set_name), file(contigs), file(annotations)
output:
- tuple val(name), val(set_name), file("*.html") optional true
+ tuple val(meta), val(set_name), file("*.html") optional true
script:
id = set_name
- if (set_name == "all") { id = name }
+ if (set_name == "all") { id = meta.id }
"""
#!/usr/bin/env Rscript
diff --git a/modules/local/fastp/main.nf b/modules/local/fastp/main.nf
deleted file mode 100644
index c85d5d4..0000000
--- a/modules/local/fastp/main.nf
+++ /dev/null
@@ -1,21 +0,0 @@
-process FASTP {
-
-/* Comments:
- -m, --merge
- for paired-end input, merge each pair of reads into a single read if they are overlapped.
- The merged reads will be written to the file given by --merged_out, the unmerged reads will be
- written to the files specified by --out1 and --out2. The merging mode is disabled by default.
-*/
- tag "${name}"
- label 'process_medium'
- container 'quay.io/biocontainers/fastp:0.20.1--h8b12597_0'
-
- input:
- tuple val(name), file(reads)
- output:
- tuple val(name), file("${name}*.fastp.fastq.gz")
- script:
- """
- fastp -i ${reads[0]} -I ${reads[1]} --thread ${task.cpus} -o ${name}.R1.fastp.fastq.gz -O ${name}.R2.fastp.fastq.gz
- """
-}
\ No newline at end of file
diff --git a/modules/local/fastqc/main.nf b/modules/local/fastqc/main.nf
deleted file mode 100644
index 199adb3..0000000
--- a/modules/local/fastqc/main.nf
+++ /dev/null
@@ -1,15 +0,0 @@
-process FASTQC {
- tag "${name}"
- label 'process_low'
- container 'quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1'
-
- input:
- tuple val(name), file(reads)
- output:
- tuple val(name), file("fastqc/${name}*fastqc*")
- script:
- """
- mkdir fastqc
- fastqc -t ${task.cpus} -o fastqc *.fastq.gz
- """
-}
\ No newline at end of file
diff --git a/modules/local/filter_reads/main.nf b/modules/local/filter_reads/main.nf
index 5454154..aa0b6a5 100644
--- a/modules/local/filter_reads/main.nf
+++ b/modules/local/filter_reads/main.nf
@@ -1,19 +1,19 @@
process FILTER_READS {
- tag "${name}"
+ tag "${meta.id}"
label 'process_low'
input:
- tuple val(name), file(kaiju_filtered), file(fastq)
+ tuple val(meta), file(kaiju_filtered), file(fastq)
output:
- tuple val(name), file("${name}.filtered.fastq")
- tuple val(name), file("${name}.filtered.fasta")
+ tuple val(meta), file("${meta.id}.filtered.fastq")
+ tuple val(meta), file("${meta.id}.filtered.fasta")
script:
"""
- sed '/^@/!d;s//>/;N' ${fastq} > ${name}.fasta
- faSomeRecords ${name}.fasta ${kaiju_filtered} ${name}.filtered.fasta
- faToFastq ${name}.filtered.fasta ${name}.filtered.fastq
- rm -f ${name}.fasta
+ sed '/^@/!d;s//>/;N' ${fastq} > ${meta.id}.fasta
+ faSomeRecords ${meta.id}.fasta ${kaiju_filtered} ${meta.id}.filtered.fasta
+ faToFastq ${meta.id}.filtered.fasta ${meta.id}.filtered.fastq
+ rm -f ${meta.id}.fasta
"""
}
diff --git a/modules/local/hmm_postprocessing/main.nf b/modules/local/hmm_postprocessing/main.nf
index af99cff..8de32c5 100644
--- a/modules/local/hmm_postprocessing/main.nf
+++ b/modules/local/hmm_postprocessing/main.nf
@@ -3,16 +3,16 @@ process HMM_POSTPROCESSING {
input: File_hmmer_ViPhOG.tbl
output: File_hmmer_ViPhOG_modified.tbl
*/
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_low'
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), val(set_name), file(hmmer_tbl), file(faa)
+ tuple val(meta), val(set_name), file(hmmer_tbl), file(faa)
output:
- tuple val(name), val(set_name), file("${set_name}_modified.tsv"), file(faa)
+ tuple val(meta), val(set_name), file("${set_name}_modified.tsv"), file(faa)
script:
"""
diff --git a/modules/local/hmmscan/main.nf b/modules/local/hmmscan/main.nf
index 82cf7b7..81d2a40 100644
--- a/modules/local/hmmscan/main.nf
+++ b/modules/local/hmmscan/main.nf
@@ -1,15 +1,15 @@
process HMMSCAN {
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_high'
container 'quay.io/microbiome-informatics/hmmer:3.1b2'
input:
- tuple val(name), val(set_name), file(faa)
+ tuple val(meta), val(set_name), file(faa)
file(db)
output:
- tuple val(name), val(set_name), file("${set_name}_${params.db}_hmmscan.tbl"), file(faa)
+ tuple val(meta), val(set_name), file("${set_name}_${params.db}_hmmscan.tbl"), file(faa)
script:
"""
diff --git a/modules/local/kaiju/main.nf b/modules/local/kaiju/main.nf
index c461b2f..c2480d1 100644
--- a/modules/local/kaiju/main.nf
+++ b/modules/local/kaiju/main.nf
@@ -6,28 +6,28 @@ process KAIJU {
*/
label 'process_medium'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/biocontainers/kaiju:1.7.2--hdbcaa40_0'
input:
- tuple val(name), file(fastq)
+ tuple val(meta), file(fastq)
file(database)
output:
- tuple val(name), file("${name}.out")
- tuple val(name), file("${name}.out.krona")
+ tuple val(meta), file("${meta.id}.out")
+ tuple val(meta), file("${meta.id}.out.krona")
shell:
if (params.illumina) {
'''
- kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq[0]} -j !{fastq[1]} -o !{name}.out
- kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{name}.out -o !{name}.out.krona
+ kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq[0]} -j !{fastq[1]} -o !{meta.id}.out
+ kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{meta.id}.out -o !{meta.id}.out.krona
'''
}
if (params.fasta) {
'''
- kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq} -o !{name}.out
- kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{name}.out -o !{name}.out.krona
+ kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq} -o !{meta.id}.out
+ kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{meta.id}.out -o !{meta.id}.out.krona
'''
}
}
diff --git a/modules/local/krona/main.nf b/modules/local/krona/main.nf
index deb8c04..185c30e 100644
--- a/modules/local/krona/main.nf
+++ b/modules/local/krona/main.nf
@@ -1,21 +1,21 @@
process GENERATE_KRONA_TABLE {
label 'process_low'
- tag "${name}"
+ tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), val(set_name), file(tbl)
+ tuple val(meta), val(set_name), file(tbl)
output:
- tuple val(name), val(set_name), file("*.krona.tsv")
+ tuple val(meta), val(set_name), file("*.krona.tsv")
script:
"""
if [[ "${set_name}" == "all" ]]; then
- grep contig_ID *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq > ${name}.tmp
- grep -v "contig_ID" *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq >> ${name}.tmp
- cp ${name}.tmp ${name}.tsv
- generate_counts_table.py -f ${name}.tsv -o ${name}.krona.tsv
+ grep contig_ID *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq > ${meta.id}.tmp
+ grep -v "contig_ID" *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq >> ${meta.id}.tmp
+ cp ${meta.id}.tmp ${meta.id}.tsv
+ generate_counts_table.py -f ${meta.id}.tsv -o ${meta.id}.krona.tsv
else
generate_counts_table.py -f ${tbl} -o ${set_name}.krona.tsv
fi
@@ -24,18 +24,18 @@ process GENERATE_KRONA_TABLE {
process KRONA {
label 'process_low'
-
+ tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/krona:2.7.1'
input:
- tuple val(name), val(set_name), file(krona_file)
+ tuple val(meta), val(set_name), file(krona_file)
output:
file("*.krona.html")
script:
"""
if [[ ${set_name} == "all" ]]; then
- ktImportText -o ${name}.krona.html ${krona_file}
+ ktImportText -o ${meta.id}.krona.html ${krona_file}
else
ktImportText -o ${set_name}.krona.html ${krona_file}
fi
diff --git a/modules/local/length_filtering/main.nf b/modules/local/length_filtering/main.nf
index b7fc090..c891aca 100644
--- a/modules/local/length_filtering/main.nf
+++ b/modules/local/length_filtering/main.nf
@@ -1,18 +1,18 @@
process LENGTH_FILTERING {
label 'process_low'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/biocontainers/biopython:1.75'
input:
- tuple val(name), file(fasta), file(map)
+ tuple val(meta), file(fasta), file(map)
output:
- tuple val(name), file("${name}*filt*.fasta"), env(CONTIGS)
+ tuple val(meta), file("${meta.id}*filt*.fasta"), env(CONTIGS)
script:
"""
filter_contigs_len.py -f ${fasta} -l ${params.length} -o ./
- CONTIGS=\$(grep ">" ${name}*filt*.fasta | wc -l)
+ CONTIGS=\$(grep ">" ${meta.id}*filt*.fasta | wc -l)
"""
}
diff --git a/modules/local/mashmap/main.nf b/modules/local/mashmap/main.nf
index 5312397..fad49a6 100644
--- a/modules/local/mashmap/main.nf
+++ b/modules/local/mashmap/main.nf
@@ -1,10 +1,10 @@
process MASHMAP {
label 'process_medium'
- tag "${assembly_name}"
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/microbiome-informatics/mashmap:2.0'
input:
- tuple val(assembly_name), val(confidence_set_name), file(fasta)
+ tuple val(meta), val(confidence_set_name), file(fasta)
file(reference)
output:
diff --git a/modules/local/multiqc/main.nf b/modules/local/multiqc/main.nf
deleted file mode 100644
index da54e9b..0000000
--- a/modules/local/multiqc/main.nf
+++ /dev/null
@@ -1,15 +0,0 @@
-process MULTIQC {
- label 'process_low'
- tag "${name}"
- container 'quay.io/biocontainers/multiqc:1.9--py_1'
-
- input:
- tuple val(name), file(fastqc)
- output:
- tuple val(name), file("${name}_multiqc_report.html")
-
- script:
- """
- multiqc -i ${name} .
- """
-}
diff --git a/modules/local/parse/main.nf b/modules/local/parse/main.nf
index 41949a7..17f99ad 100644
--- a/modules/local/parse/main.nf
+++ b/modules/local/parse/main.nf
@@ -1,21 +1,21 @@
process PARSE {
label 'process_low'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), file(fasta), val(contig_number), file(virfinder), file(virsorter), file(pprmeta)
+ tuple val(meta), file(fasta), val(contig_number), file(virfinder), file(virsorter), file(pprmeta)
when:
contig_number.toInteger() > 0
output:
- tuple val(name), file("*.fna"), file('virsorter_metadata.tsv'), file("${name}_virus_predictions.log"), optional: true
+ tuple val(meta), file("*.fna"), file('virsorter_metadata.tsv'), file("${meta.id}_virus_predictions.log"), optional: true
script:
"""
touch virsorter_metadata.tsv
- parse_viral_pred.py -a ${fasta} -f ${virfinder} -p ${pprmeta} -s ${virsorter}/Predicted_viral_sequences/*.fasta &> ${name}_virus_predictions.log
+ parse_viral_pred.py -a ${fasta} -f ${virfinder} -p ${pprmeta} -s ${virsorter}/Predicted_viral_sequences/*.fasta &> ${meta.id}_virus_predictions.log
"""
}
diff --git a/modules/local/phanotate/main.nf b/modules/local/phanotate/main.nf
index 5f72fe2..c1dd578 100644
--- a/modules/local/phanotate/main.nf
+++ b/modules/local/phanotate/main.nf
@@ -1,13 +1,13 @@
process PHANOTATE {
label 'process_low'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/biocontainers/phanotate:1.5.0--h30d9df9_2'
input:
- tuple val(name), file(fasta)
+ tuple val(meta), file(fasta)
output:
- tuple val(name), stdout, file("*.faa")
+ tuple val(meta), stdout, file("*.faa")
script:
"""
diff --git a/modules/local/plot_contig_map/main.nf b/modules/local/plot_contig_map/main.nf
index bc55bf2..a7060b9 100644
--- a/modules/local/plot_contig_map/main.nf
+++ b/modules/local/plot_contig_map/main.nf
@@ -1,14 +1,14 @@
process PLOT_CONTIG_MAP {
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_low'
container 'quay.io/microbiome-informatics/virify-plot-contig-map:1'
input:
- tuple val(name), val(set_name), file(tab)
+ tuple val(meta), val(set_name), file(tab)
output:
- tuple val(name), val(set_name), file("${set_name}_mapping_results"), file("${set_name}_prot_ann_table_filtered.tsv")
+ tuple val(meta), val(set_name), file("${set_name}_mapping_results"), file("${set_name}_prot_ann_table_filtered.tsv")
script:
"""
diff --git a/modules/local/pprmeta/main.nf b/modules/local/pprmeta/main.nf
index e1f6288..79d7e85 100644
--- a/modules/local/pprmeta/main.nf
+++ b/modules/local/pprmeta/main.nf
@@ -1,22 +1,22 @@
process PPRMETA {
label 'process_medium'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/pprmeta:1.1'
input:
- tuple val(name), file(fasta), val(contig_number)
+ tuple val(meta), file(fasta), val(contig_number)
path(pprmeta_git)
when:
contig_number.toInteger() > 0
output:
- tuple val(name), file("${name}_pprmeta.csv")
+ tuple val(meta), file("${meta.id}_pprmeta.csv")
script:
"""
[ -d "pprmeta" ] && cp pprmeta/* .
- ./PPR_Meta ${fasta} ${name}_pprmeta.csv
+ ./PPR_Meta ${fasta} ${meta.id}_pprmeta.csv
"""
}
diff --git a/modules/local/prodigal/main.nf b/modules/local/prodigal/main.nf
index 5b429f0..aee9afd 100644
--- a/modules/local/prodigal/main.nf
+++ b/modules/local/prodigal/main.nf
@@ -1,13 +1,13 @@
process PRODIGAL {
label 'process_high'
- tag "${name}"
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/biocontainers/prodigal:2.6.3--hec16e2b_4'
input:
- tuple val(assembly_name), val(confidence_set_name), file(fasta)
+ tuple val(meta), val(confidence_set_name), file(fasta)
output:
- tuple val(assembly_name), val(confidence_set_name), file("*.faa")
+ tuple val(meta), val(confidence_set_name), file("*.faa")
script:
"""
diff --git a/modules/local/ratio_evalue/main.nf b/modules/local/ratio_evalue/main.nf
index 28ff720..65e7481 100644
--- a/modules/local/ratio_evalue/main.nf
+++ b/modules/local/ratio_evalue/main.nf
@@ -7,17 +7,17 @@ process RATIO_EVALUE {
out PRJNA530103_small_modified_informative.tsv
*/
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_low'
container 'quay.io/microbiome-informatics/virify-python3:1.1'
input:
- tuple val(name), val(set_name), file(modified_table), file(faa)
+ tuple val(meta), val(set_name), file(modified_table), file(faa)
file(model_metadata)
output:
- tuple val(name), val(set_name), file("${set_name}_modified_informative.tsv"), file(faa), optional: true
+ tuple val(meta), val(set_name), file("${set_name}_modified_informative.tsv"), file(faa), optional: true
script:
"""
diff --git a/modules/local/rename/main.nf b/modules/local/rename/main.nf
index 8a54128..440115c 100644
--- a/modules/local/rename/main.nf
+++ b/modules/local/rename/main.nf
@@ -4,14 +4,14 @@ process RENAME {
*/
label 'process_low'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), file(fasta)
+ tuple val(meta), file(fasta)
output:
- tuple val(name), file("${name}_renamed.fasta"), file("${name}_map.tsv")
+ tuple val(meta), file("${meta.id}_renamed.fasta"), file("${meta.id}_map.tsv")
script:
"""
@@ -20,7 +20,7 @@ process RENAME {
else
cp ${fasta} tmp.fasta
fi
- rename_fasta.py -i tmp.fasta -m ${name}_map.tsv -o ${name}_renamed.fasta rename
+ rename_fasta.py -i tmp.fasta -m ${meta.id}_map.tsv -o ${meta.id}_renamed.fasta rename
"""
}
diff --git a/modules/local/restore/main.nf b/modules/local/restore/main.nf
index 4327181..de98f2c 100644
--- a/modules/local/restore/main.nf
+++ b/modules/local/restore/main.nf
@@ -2,16 +2,16 @@ process RESTORE {
/*
usage: rename_fasta.py [-h] -i INPUT [-m MAP] -o OUTPUT {rename,restore} ...
*/
- tag "${name}"
+ tag "${meta.id}"
label 'process_low'
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), file(fasta), file(map)
+ tuple val(meta), file(fasta), file(map)
output:
- tuple val(name), env(BN), file("*_original.fasta")
+ tuple val(meta), env(BN), file("*_original.fasta")
script:
"""
diff --git a/modules/local/sankey/main.nf b/modules/local/sankey/main.nf
index b1f2252..4240561 100644
--- a/modules/local/sankey/main.nf
+++ b/modules/local/sankey/main.nf
@@ -1,13 +1,13 @@
process GENERATE_SANKEY_TABLE {
label 'process_low'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/bioruby:2.0.1'
input:
- tuple val(name), val(set_name), file(krona_table)
+ tuple val(meta), val(set_name), file(krona_table)
output:
- tuple val(name), val(set_name), file("${set_name}.sankey.filtered-${params.sankey}.json"), file("${set_name}.sankey.tsv")
+ tuple val(meta), val(set_name), file("${set_name}.sankey.filtered-${params.sankey}.json"), file("${set_name}.sankey.tsv")
script:
"""
@@ -23,18 +23,18 @@ process GENERATE_SANKEY_TABLE {
process SANKEY {
label 'process_medium'
-
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/sankeyd3:0.12.3'
input:
- tuple val(name), val(set_name), file(json), file(tsv)
+ tuple val(meta), val(set_name), file(json), file(tsv)
output:
- tuple val(name), val(set_name), file("*.sankey.html")
+ tuple val(meta), val(set_name), file("*.sankey.html")
script:
id = set_name
- if (set_name == "all") { id = name }
+ if (set_name == "all") { id = meta.id }
"""
#!/usr/bin/env Rscript
diff --git a/modules/local/spades/main.nf b/modules/local/spades/main.nf
deleted file mode 100644
index cdf4483..0000000
--- a/modules/local/spades/main.nf
+++ /dev/null
@@ -1,17 +0,0 @@
-process SPADES {
-
- label 'process_medium'
- tag "${name}"
- container 'quay.io/biocontainers/spades:3.15.5--h95f258a_1'
-
- input:
- tuple val(name), file(reads)
- output:
- tuple val(name), file("${name}.fasta")
-
- script:
- """
- spades.py --meta --only-assembler -1 !{reads[0]} -2 !{reads[1]} -t !{task.cpus} -o assembly
- mv assembly/contigs.fasta !{name}.fasta
- """
-}
\ No newline at end of file
diff --git a/modules/local/virfinder/main.nf b/modules/local/virfinder/main.nf
index f4d8f96..0bbd574 100644
--- a/modules/local/virfinder/main.nf
+++ b/modules/local/virfinder/main.nf
@@ -1,22 +1,22 @@
process VIRFINDER {
- tag "${name}"
+ tag "${meta.id}"
label 'process_high'
container 'quay.io/microbiome-informatics/virfinder:1.1__eb8032e'
input:
- tuple val(name), file(fasta), val(contig_number)
+ tuple val(meta), file(fasta), val(contig_number)
path model
when:
contig_number.toInteger() > 0
output:
- tuple val(name), file("${name}.txt")
+ tuple val(meta), file("${meta.id}.txt")
script:
"""
run_virfinder.Rscript ${model} ${fasta} .
- awk '{print \$1"\\t"\$2"\\t"\$3"\\t"\$4}' ${name}*.tsv > ${name}.txt
+ awk '{print \$1"\\t"\$2"\\t"\$3"\\t"\$4}' ${meta.id}*.tsv > ${meta.id}.txt
"""
}
diff --git a/modules/local/virsorter/main.nf b/modules/local/virsorter/main.nf
index 3d5a2bd..0947efb 100644
--- a/modules/local/virsorter/main.nf
+++ b/modules/local/virsorter/main.nf
@@ -1,17 +1,17 @@
process VIRSORTER {
label 'process_high'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/virsorter:1.0.6_edfeb8c5e72'
input:
- tuple val(name), file(fasta), val(contig_number)
+ tuple val(meta), file(fasta), val(contig_number)
path(database)
when:
contig_number.toInteger() > 0
output:
- tuple val(name), file("*")
+ tuple val(meta), file("*")
script:
if (params.virome)
diff --git a/modules/local/write_gff/main.nf b/modules/local/write_gff/main.nf
index 198b221..de1ab1f 100644
--- a/modules/local/write_gff/main.nf
+++ b/modules/local/write_gff/main.nf
@@ -1,17 +1,14 @@
process WRITE_GFF {
- tag "${name}"
+ tag "${meta.id}"
label 'process_medium'
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), path(fasta)
- path(viphos_annotations)
- path(taxonomies)
- path(quality_summaries)
+ tuple val(meta), path(fasta), path(viphos_annotations), path(taxonomies), path(quality_summaries)
output:
- path("${name}_virify.gff")
+ path("${meta.id}_virify.gff")
script:
"""
@@ -19,9 +16,9 @@ process WRITE_GFF {
-v ${viphos_annotations.join(' ')} \
-c ${quality_summaries.join(' ')} \
-t ${taxonomies.join(' ')} \
- -s ${name} \
+ -s ${meta.id} \
-a ${fasta}
- gt gff3validator ${name}_virify.gff
+ gt gff3validator ${meta.id}_virify.gff
"""
}
diff --git a/modules/local/fastp/fastp.yaml b/modules/nf-core/checkv/endtoend/environment.yml
similarity index 64%
rename from modules/local/fastp/fastp.yaml
rename to modules/nf-core/checkv/endtoend/environment.yml
index b4df7d8..8646fff 100644
--- a/modules/local/fastp/fastp.yaml
+++ b/modules/nf-core/checkv/endtoend/environment.yml
@@ -1,6 +1,5 @@
-name: fastp
channels:
- - bioconda
- conda-forge
+ - bioconda
dependencies:
- - fastp=0.20.0
+ - bioconda::checkv=1.0.1
\ No newline at end of file
diff --git a/modules/nf-core/checkv/endtoend/main.nf b/modules/nf-core/checkv/endtoend/main.nf
new file mode 100644
index 0000000..635c9fa
--- /dev/null
+++ b/modules/nf-core/checkv/endtoend/main.nf
@@ -0,0 +1,63 @@
+process CHECKV_ENDTOEND {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/checkv:1.0.1--pyhdfd78af_0':
+ 'biocontainers/checkv:1.0.1--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ path db
+
+ output:
+ tuple val(meta), path ("${prefix}/quality_summary.tsv") , emit: quality_summary
+ tuple val(meta), path ("${prefix}/completeness.tsv") , emit: completeness
+ tuple val(meta), path ("${prefix}/contamination.tsv") , emit: contamination
+ tuple val(meta), path ("${prefix}/complete_genomes.tsv"), emit: complete_genomes
+ tuple val(meta), path ("${prefix}/proviruses.fna") , emit: proviruses
+ tuple val(meta), path ("${prefix}/viruses.fna") , emit: viruses
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ checkv \\
+ end_to_end \\
+ $args \\
+ -t $task.cpus \\
+ -d $db \\
+ $fasta \\
+ $prefix
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ checkv: \$(checkv -h 2>&1 | sed -n 's/^.*CheckV v//; s/: assessing.*//; 1p')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ mkdir -p ${prefix}
+ touch ${prefix}/quality_summary.tsv
+ touch ${prefix}/completeness.tsv
+ touch ${prefix}/contamination.tsv
+ touch ${prefix}/complete_genomes.tsv
+ touch ${prefix}/proviruses.fna
+ touch ${prefix}/viruses.fna
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ checkv: \$(checkv -h 2>&1 | sed -n 's/^.*CheckV v//; s/: assessing.*//; 1p')
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/checkv/endtoend/meta.yml b/modules/nf-core/checkv/endtoend/meta.yml
new file mode 100644
index 0000000..c74d091
--- /dev/null
+++ b/modules/nf-core/checkv/endtoend/meta.yml
@@ -0,0 +1,107 @@
+name: "checkv_endtoend"
+description: Assess the quality of metagenome-assembled viral genomes.
+keywords:
+ - checkv
+ - checkm
+ - mag
+ - metagenome
+ - quality
+ - isolates
+ - virus
+ - completeness
+ - contamination
+tools:
+ - "checkv":
+ description: Assess the quality of metagenome-assembled viral genomes.
+ homepage: https://bitbucket.org/berkeleylab/checkv/src/master/
+ documentation: https://bitbucket.org/berkeleylab/checkv/src/master/
+ tool_dev_url: https://bitbucket.org/berkeleylab/checkv/src/master/
+ doi: "10.1038/s41587-020-00774-7"
+ licence: ["BSD License"]
+ identifier: biotools:checkv
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: fasta file
+ pattern: "*.{fasta,fna,fa}"
+ - - db:
+ type: directory
+ description: Directory pointing to checkV database
+output:
+ - quality_summary:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/quality_summary.tsv:
+ type: file
+ description: CheckV's main output containing integrated results from the three
+ main modules (contamination, completeness, complete genomes) with overall
+ quality of contigs
+ pattern: "${prefix}/quality_summary.tsv"
+ - completeness:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/completeness.tsv:
+ type: file
+ description: CheckV's detailed overview table on estimating completeness
+ pattern: "${prefix}/completeness.tsv"
+ - contamination:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/contamination.tsv:
+ type: file
+ description: CheckV's detailed overview table on estimating contamination
+ pattern: "${prefix}/contamination.tsv"
+ - complete_genomes:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/complete_genomes.tsv:
+ type: file
+ description: CheckV's detailed overview table on the identified putative complete
+ genomes
+ pattern: "${prefix}/complete_genomes.tsv"
+ - proviruses:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/proviruses.fna:
+ type: file
+ description: CheckV's extracted proviruses contigs
+ pattern: "${prefix}/proviruses.fna"
+ - viruses:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/viruses.fna:
+ type: file
+ description: CheckV's extracted virus contigs
+ pattern: "${prefix}/viruses.fna"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@Joon-Klaps"
+maintainers:
+ - "@Joon-Klaps"
\ No newline at end of file
diff --git a/modules/local/spades/spades.yaml b/modules/nf-core/fastp/environment.yml
similarity index 64%
rename from modules/local/spades/spades.yaml
rename to modules/nf-core/fastp/environment.yml
index b6db0fd..de9463b 100644
--- a/modules/local/spades/spades.yaml
+++ b/modules/nf-core/fastp/environment.yml
@@ -1,6 +1,5 @@
-name: spades
channels:
- - bioconda
- conda-forge
+ - bioconda
dependencies:
- - spades=3.14
+ - bioconda::fastp=0.23.4
\ No newline at end of file
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
new file mode 100644
index 0000000..08200cd
--- /dev/null
+++ b/modules/nf-core/fastp/main.nf
@@ -0,0 +1,125 @@
+process FASTP {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
+ 'biocontainers/fastp:0.23.4--h5f740d0_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ path adapter_fasta
+ val discard_trimmed_pass
+ val save_trimmed_fail
+ val save_merged
+
+ output:
+ tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads
+ tuple val(meta), path('*.json') , emit: json
+ tuple val(meta), path('*.html') , emit: html
+ tuple val(meta), path('*.log') , emit: log
+ tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
+ tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
+ def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
+ def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" )
+ def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz"
+ // Added soft-links to original fastqs for consistent naming in MultiQC
+ // Use single ended for interleaved. Add --interleaved_in in config.
+ if ( task.ext.args?.contains('--interleaved_in') ) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
+ fastp \\
+ --stdout \\
+ --in1 ${prefix}.fastq.gz \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2) \\
+ | gzip -c > ${prefix}.fastp.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ } else if (meta.single_end) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
+ fastp \\
+ --in1 ${prefix}.fastq.gz \\
+ $out_fq1 \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ } else {
+ def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
+ """
+ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz
+ [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz
+ fastp \\
+ --in1 ${prefix}_1.fastq.gz \\
+ --in2 ${prefix}_2.fastq.gz \\
+ $out_fq1 \\
+ $out_fq2 \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $merge_fastq \\
+ --thread $task.cpus \\
+ --detect_adapter_for_pe \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ }
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end
+ def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz"
+ def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : ""
+ def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz"
+ """
+ $touch_reads
+ $touch_fail_fastq
+ $touch_merged
+ touch "${prefix}.fastp.json"
+ touch "${prefix}.fastp.html"
+ touch "${prefix}.fastp.log"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
new file mode 100644
index 0000000..bece97e
--- /dev/null
+++ b/modules/nf-core/fastp/meta.yml
@@ -0,0 +1,113 @@
+name: fastp
+description: Perform adapter/quality trimming on sequencing reads
+keywords:
+ - trimming
+ - quality control
+ - fastq
+tools:
+ - fastp:
+ description: |
+ A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
+ documentation: https://github.com/OpenGene/fastp
+ doi: 10.1093/bioinformatics/bty560
+ licence: ["MIT"]
+ identifier: biotools:fastp
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads.
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively. If you wish to run interleaved paired-end data, supply as single-end data
+ but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
+ - - adapter_fasta:
+ type: file
+ description: File in FASTA format containing possible adapters to remove.
+ pattern: "*.{fasta,fna,fas,fa}"
+ - - discard_trimmed_pass:
+ type: boolean
+ description: Specify true to not write any reads that pass trimming thresholds.
+ | This can be used to use fastp for the output report only.
+ - - save_trimmed_fail:
+ type: boolean
+ description: Specify true to save files that failed to pass trimming thresholds
+ ending in `*.fail.fastq.gz`
+ - - save_merged:
+ type: boolean
+ description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz`
+output:
+ - reads:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.fastp.fastq.gz":
+ type: file
+ description: The trimmed/modified/unmerged fastq reads
+ pattern: "*fastp.fastq.gz"
+ - json:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.json":
+ type: file
+ description: Results in JSON format
+ pattern: "*.json"
+ - html:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.html":
+ type: file
+ description: Results in HTML format
+ pattern: "*.html"
+ - log:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.log":
+ type: file
+ description: fastq log file
+ pattern: "*.log"
+ - reads_fail:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.fail.fastq.gz":
+ type: file
+ description: Reads the failed the preprocessing
+ pattern: "*fail.fastq.gz"
+ - reads_merged:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.merged.fastq.gz":
+ type: file
+ description: Reads that were successfully merged
+ pattern: "*.{merged.fastq.gz}"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@kevinmenden"
+maintainers:
+ - "@drpatelh"
+ - "@kevinmenden"
\ No newline at end of file
diff --git a/modules/local/multiqc/multiqc.yaml b/modules/nf-core/fastqc/environment.yml
similarity index 63%
rename from modules/local/multiqc/multiqc.yaml
rename to modules/nf-core/fastqc/environment.yml
index 3bc368b..8b76b92 100644
--- a/modules/local/multiqc/multiqc.yaml
+++ b/modules/nf-core/fastqc/environment.yml
@@ -1,6 +1,5 @@
-name: multiqc
channels:
- - bioconda
- conda-forge
+ - bioconda
dependencies:
- - multiqc=1.8
+ - bioconda::fastqc=0.12.1
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
new file mode 100644
index 0000000..feee5f0
--- /dev/null
+++ b/modules/nf-core/fastqc/main.nf
@@ -0,0 +1,64 @@
+process FASTQC {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
+ 'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.html"), emit: html
+ tuple val(meta), path("*.zip") , emit: zip
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ // Make list of old name and new name pairs to use for renaming in the bash while loop
+ def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
+ def rename_to = old_new_pairs*.join(' ').join(' ')
+ def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+
+ // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory)
+ // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222
+ // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label
+ def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus
+ // FastQC memory value allowed range (100 - 10000)
+ def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
+
+ """
+ printf "%s %s\\n" $rename_to | while read old_name new_name; do
+ [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+ done
+
+ fastqc \\
+ $args \\
+ --threads $task.cpus \\
+ --memory $fastqc_memory \\
+ $renamed_files
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.html
+ touch ${prefix}.zip
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
new file mode 100644
index 0000000..40c8711
--- /dev/null
+++ b/modules/nf-core/fastqc/meta.yml
@@ -0,0 +1,66 @@
+name: fastqc
+description: Run FastQC on sequenced reads
+keywords:
+ - quality control
+ - qc
+ - adapters
+ - fastq
+tools:
+ - fastqc:
+ description: |
+ FastQC gives general quality metrics about your reads.
+ It provides information about the quality score distribution
+ across your reads, the per base sequence content (%A/C/G/T).
+ You get information about adapter contamination and other
+ overrepresented sequences.
+ homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+ licence: ["GPL-2.0-only"]
+ identifier: biotools:fastqc
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+output:
+ - html:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.html":
+ type: file
+ description: FastQC report
+ pattern: "*_{fastqc.html}"
+ - zip:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.zip":
+ type: file
+ description: FastQC report archive
+ pattern: "*_{fastqc.zip}"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
+maintainers:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
\ No newline at end of file
diff --git a/modules/local/fastqc/fastqc.yaml b/modules/nf-core/multiqc/environment.yml
similarity index 63%
rename from modules/local/fastqc/fastqc.yaml
rename to modules/nf-core/multiqc/environment.yml
index eef652b..e1d226f 100644
--- a/modules/local/fastqc/fastqc.yaml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -1,6 +1,5 @@
-name: fastqc
channels:
- - bioconda
- conda-forge
+ - bioconda
dependencies:
- - fastqc=0.11.8
+ - bioconda::multiqc=1.25.1
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
new file mode 100644
index 0000000..a91446d
--- /dev/null
+++ b/modules/nf-core/multiqc/main.nf
@@ -0,0 +1,63 @@
+process MULTIQC {
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' :
+ 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }"
+
+ input:
+ path multiqc_files, stageAs: "?/*"
+ path(multiqc_config)
+ path(extra_multiqc_config)
+ path(multiqc_logo)
+ path(replace_names)
+ path(sample_names)
+
+ output:
+ path "*multiqc_report.html", emit: report
+ path "*_data" , emit: data
+ path "*_plots" , optional:true, emit: plots
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : ''
+ def config = multiqc_config ? "--config $multiqc_config" : ''
+ def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ''
+ def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : ''
+ def replace = replace_names ? "--replace-names ${replace_names}" : ''
+ def samples = sample_names ? "--sample-names ${sample_names}" : ''
+ """
+ multiqc \\
+ --force \\
+ $args \\
+ $config \\
+ $prefix \\
+ $extra_config \\
+ $logo \\
+ $replace \\
+ $samples \\
+ .
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ mkdir multiqc_data
+ touch multiqc_plots
+ touch multiqc_report.html
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
new file mode 100644
index 0000000..2621b2a
--- /dev/null
+++ b/modules/nf-core/multiqc/meta.yml
@@ -0,0 +1,78 @@
+name: multiqc
+description: Aggregate results from bioinformatics analyses across many samples into
+ a single report
+keywords:
+ - QC
+ - bioinformatics tools
+ - Beautiful stand-alone HTML report
+tools:
+ - multiqc:
+ description: |
+ MultiQC searches a given directory for analysis logs and compiles a HTML report.
+ It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.
+ homepage: https://multiqc.info/
+ documentation: https://multiqc.info/docs/
+ licence: ["GPL-3.0-or-later"]
+ identifier: biotools:multiqc
+input:
+ - - multiqc_files:
+ type: file
+ description: |
+ List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
+ - - multiqc_config:
+ type: file
+ description: Optional config yml for MultiQC
+ pattern: "*.{yml,yaml}"
+ - - extra_multiqc_config:
+ type: file
+ description: Second optional config yml for MultiQC. Will override common sections
+ in multiqc_config.
+ pattern: "*.{yml,yaml}"
+ - - multiqc_logo:
+ type: file
+ description: Optional logo file for MultiQC
+ pattern: "*.{png}"
+ - - replace_names:
+ type: file
+ description: |
+ Optional two-column sample renaming file. First column a set of
+ patterns, second column a set of corresponding replacements. Passed via
+ MultiQC's `--replace-names` option.
+ pattern: "*.{tsv}"
+ - - sample_names:
+ type: file
+ description: |
+ Optional TSV file with headers, passed to the MultiQC --sample_names
+ argument.
+ pattern: "*.{tsv}"
+output:
+ - report:
+ - "*multiqc_report.html":
+ type: file
+ description: MultiQC report file
+ pattern: "multiqc_report.html"
+ - data:
+ - "*_data":
+ type: directory
+ description: MultiQC data dir
+ pattern: "multiqc_data"
+ - plots:
+ - "*_plots":
+ type: file
+ description: Plots created by MultiQC
+ pattern: "*_data"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
+ - "@jfy133"
+maintainers:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
+ - "@jfy133"
\ No newline at end of file
diff --git a/modules/nf-core/prodigal/environment.yml b/modules/nf-core/prodigal/environment.yml
new file mode 100644
index 0000000..b9455d6
--- /dev/null
+++ b/modules/nf-core/prodigal/environment.yml
@@ -0,0 +1,6 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::prodigal=2.6.3
+ - conda-forge::pigz=2.6
\ No newline at end of file
diff --git a/modules/nf-core/prodigal/main.nf b/modules/nf-core/prodigal/main.nf
new file mode 100644
index 0000000..916f97e
--- /dev/null
+++ b/modules/nf-core/prodigal/main.nf
@@ -0,0 +1,64 @@
+process PRODIGAL {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' :
+ 'biocontainers/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' }"
+
+ input:
+ tuple val(meta), path(genome)
+ val(output_format)
+
+ output:
+ tuple val(meta), path("${prefix}.${output_format}.gz"), emit: gene_annotations
+ tuple val(meta), path("${prefix}.fna.gz"), emit: nucleotide_fasta
+ tuple val(meta), path("${prefix}.faa.gz"), emit: amino_acid_fasta
+ tuple val(meta), path("${prefix}_all.txt.gz"), emit: all_gene_annotations
+ path "versions.yml", emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ pigz -cdf ${genome} | prodigal \\
+ $args \\
+ -f $output_format \\
+ -d "${prefix}.fna" \\
+ -o "${prefix}.${output_format}" \\
+ -a "${prefix}.faa" \\
+ -s "${prefix}_all.txt"
+
+ pigz -nm ${prefix}.fna
+ pigz -nm ${prefix}.${output_format}
+ pigz -nm ${prefix}.faa
+ pigz -nm ${prefix}_all.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p')
+ pigz: \$(pigz -V 2>&1 | sed 's/pigz //g')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.fna.gz
+ touch ${prefix}.${output_format}.gz
+ touch ${prefix}.faa.gz
+ touch ${prefix}_all.txt.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p')
+ pigz: \$(pigz -V 2>&1 | sed 's/pigz //g')
+ END_VERSIONS
+ """
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/prodigal/meta.yml b/modules/nf-core/prodigal/meta.yml
new file mode 100644
index 0000000..d59ff5c
--- /dev/null
+++ b/modules/nf-core/prodigal/meta.yml
@@ -0,0 +1,79 @@
+name: prodigal
+description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) is a
+ microbial (bacterial and archaeal) gene finding program
+keywords:
+ - prokaryotes
+ - gene finding
+ - microbial
+tools:
+ - prodigal:
+ description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm)
+ is a microbial (bacterial and archaeal) gene finding program
+ homepage: https://github.com/hyattpd/Prodigal
+ documentation: https://github.com/hyattpd/prodigal/wiki
+ tool_dev_url: https://github.com/hyattpd/Prodigal
+ doi: "10.1186/1471-2105-11-119"
+ licence: ["GPL v3"]
+ identifier: biotools:prodigal
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - genome:
+ type: file
+ description: fasta/fasta.gz file
+ - - output_format:
+ type: string
+ description: Output format ("gbk"/"gff"/"sqn"/"sco")
+output:
+ - gene_annotations:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.${output_format}.gz:
+ type: file
+ description: gene annotations in output_format given as input
+ pattern: "*.{output_format}"
+ - nucleotide_fasta:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.fna.gz:
+ type: file
+ description: nucleotide sequences file
+ pattern: "*.{fna}"
+ - amino_acid_fasta:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.faa.gz:
+ type: file
+ description: protein translations file
+ pattern: "*.{faa}"
+ - all_gene_annotations:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}_all.txt.gz:
+ type: file
+ description: complete starts file
+ pattern: "*.{_all.txt}"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@grst"
+maintainers:
+ - "@grst"
\ No newline at end of file
diff --git a/modules/nf-core/spades/environment.yml b/modules/nf-core/spades/environment.yml
new file mode 100644
index 0000000..569eb73
--- /dev/null
+++ b/modules/nf-core/spades/environment.yml
@@ -0,0 +1,5 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::spades=4.0.0
\ No newline at end of file
diff --git a/modules/nf-core/spades/main.nf b/modules/nf-core/spades/main.nf
new file mode 100644
index 0000000..46f11c2
--- /dev/null
+++ b/modules/nf-core/spades/main.nf
@@ -0,0 +1,102 @@
+process SPADES {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/spades:4.0.0--h5fb382e_1' :
+ 'biocontainers/spades:4.0.0--h5fb382e_1' }"
+
+ input:
+ tuple val(meta), path(illumina), path(pacbio), path(nanopore)
+ path yml
+ path hmm
+
+ output:
+ tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds
+ tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs
+ tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts
+ tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters
+ tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa
+ tuple val(meta), path('*.warnings.log') , optional:true, emit: warnings
+ tuple val(meta), path('*.spades.log') , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def maxmem = task.memory.toGiga()
+ def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
+ def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
+ def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
+ def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
+ def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads"
+ """
+ spades.py \\
+ $args \\
+ --threads $task.cpus \\
+ --memory $maxmem \\
+ $custom_hmms \\
+ $reads \\
+ -o ./
+ mv spades.log ${prefix}.spades.log
+
+ if [ -f scaffolds.fasta ]; then
+ mv scaffolds.fasta ${prefix}.scaffolds.fa
+ gzip -n ${prefix}.scaffolds.fa
+ fi
+ if [ -f contigs.fasta ]; then
+ mv contigs.fasta ${prefix}.contigs.fa
+ gzip -n ${prefix}.contigs.fa
+ fi
+ if [ -f transcripts.fasta ]; then
+ mv transcripts.fasta ${prefix}.transcripts.fa
+ gzip -n ${prefix}.transcripts.fa
+ fi
+ if [ -f assembly_graph_with_scaffolds.gfa ]; then
+ mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa
+ gzip -n ${prefix}.assembly.gfa
+ fi
+
+ if [ -f gene_clusters.fasta ]; then
+ mv gene_clusters.fasta ${prefix}.gene_clusters.fa
+ gzip -n ${prefix}.gene_clusters.fa
+ fi
+
+ if [ -f warnings.log ]; then
+ mv warnings.log ${prefix}.warnings.log
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def maxmem = task.memory.toGiga()
+ def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
+ def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
+ def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
+ def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
+ def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads"
+ """
+ echo "" | gzip > ${prefix}.scaffolds.fa.gz
+ echo "" | gzip > ${prefix}.contigs.fa.gz
+ echo "" | gzip > ${prefix}.transcripts.fa.gz
+ echo "" | gzip > ${prefix}.gene_clusters.fa.gz
+ echo "" | gzip > ${prefix}.assembly.gfa.gz
+ touch ${prefix}.spades.log
+ touch ${prefix}.warnings.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p')
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/spades/meta.yml b/modules/nf-core/spades/meta.yml
new file mode 100644
index 0000000..65d260d
--- /dev/null
+++ b/modules/nf-core/spades/meta.yml
@@ -0,0 +1,151 @@
+name: spades
+description: Assembles a small genome (bacterial, fungal, viral)
+keywords:
+ - genome
+ - assembly
+ - genome assembler
+ - small genome
+ - de novo assembler
+tools:
+ - spades:
+ description: SPAdes (St. Petersburg genome assembler) is intended for both standard
+ isolates and single-cell MDA bacteria assemblies.
+ homepage: http://cab.spbu.ru/files/release3.15.0/manual.html
+ documentation: http://cab.spbu.ru/files/release3.15.0/manual.html
+ tool_dev_url: https://github.com/ablab/spades
+ doi: 10.1089/cmb.2012.0021
+ licence: ["GPL v2"]
+ identifier: ""
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - illumina:
+ type: file
+ description: |
+ List of input FastQ (Illumina or PacBio CCS reads) files
+ of size 1 and 2 for single-end and paired-end data,
+ respectively. This input data type is required.
+ - pacbio:
+ type: file
+ description: |
+ List of input PacBio CLR FastQ files of size 1.
+ - nanopore:
+ type: file
+ description: |
+ List of input FastQ files of size 1, originating from Oxford Nanopore technology.
+ - - yml:
+ type: file
+ description: |
+ Path to yml file containing read information.
+ The raw FASTQ files listed in this YAML file MUST be supplied to the respective illumina/pacbio/nanopore input channel(s) _in addition_ to this YML.
+ File entries in this yml must contain only the file name and no paths.
+ pattern: "*.{yml,yaml}"
+ - - hmm:
+ type: file
+ description: File or directory with amino acid HMMs for Spades HMM-guided mode.
+output:
+ - scaffolds:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - "*.scaffolds.fa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - contigs:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - "*.contigs.fa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - transcripts:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - "*.transcripts.fa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - gene_clusters:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - "*.gene_clusters.fa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - gfa:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.gfa.gz"
+ - "*.assembly.gfa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.gfa.gz"
+ - warnings:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.warnings.log":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - log:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.spades.log"
+ - "*.spades.log":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.spades.log"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@JoseEspinosa"
+ - "@drpatelh"
+ - "@d4straub"
+maintainers:
+ - "@JoseEspinosa"
+ - "@drpatelh"
+ - "@d4straub"
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 34fa99b..92a8257 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -70,6 +70,12 @@ params {
singularity_cachedir = 'singularity'
publish_dir_mode = 'copy'
+
+ // MultiQC options
+ multiqc_config = null
+ multiqc_title = null
+ multiqc_logo = null
+ multiqc_methods_description = null
}
profiles {
diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf
index 57810b2..678d351 100644
--- a/subworkflows/local/annotate.nf
+++ b/subworkflows/local/annotate.nf
@@ -93,25 +93,36 @@ workflow ANNOTATE {
}
CHECKV(
- predicted_contigs.combine( contigs.map { name, fasta -> fasta }),
+ predicted_contigs.join(contigs.map { meta, fasta -> fasta }),
checkv_db
)
- viphos_annotations = ANNOTATION.out.map { _, __, annotations -> annotations }.collect()
- taxonomy_annotations = ASSIGN.out.map { _, __, taxonomy -> taxonomy }.collect()
- checkv_results = CHECKV.out.map { _, __, quality_summary, ___ -> quality_summary }.collect()
+ viphos_annotations = ANNOTATION.out.annotations.groupTuple().map{
+ meta, values -> {
+ def annotations = values.collect{it[1]};
+ return [meta, annotations] }
+ }
+ taxonomy_annotations = ASSIGN.out.groupTuple().map{
+ meta, values -> {
+ def taxonomy = values.collect{it[1]};
+ return [meta, taxonomy] }
+ }
+ checkv_results = CHECKV.out.groupTuple().map{
+ meta, values -> {
+ def quality_summary = values.collect{it[1]};
+ return [meta, quality_summary] }
+ }
WRITE_GFF(
- contigs.first(),
- viphos_annotations,
- taxonomy_annotations,
- checkv_results
+ contigs.join(viphos_annotations).join(taxonomy_annotations).join(checkv_results)
)
-
- predicted_contigs_filtered = predicted_contigs.map { id, set_name, fasta -> [set_name, id, fasta] }
- plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { id, set_name, dir, table -> [set_name, table] }
+
+ chromomap_ch = Channel.empty()
+ predicted_contigs_filtered = predicted_contigs.map { meta, set_name, fasta -> [set_name, meta, fasta] }
+ plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { meta, set_name, dir, table -> [set_name, table] }
chromomap_ch = predicted_contigs_filtered.join(plot_contig_map_filtered).map { set_name, assembly_name, fasta, tsv -> [assembly_name, set_name, fasta, tsv]}
-
+ chromomap_ch.view()
+
emit:
assign_output = ASSIGN.out
chromomap = chromomap_ch
diff --git a/subworkflows/local/assemble_illumina.nf b/subworkflows/local/assemble_illumina.nf
index 7fe36be..d45aa6c 100644
--- a/subworkflows/local/assemble_illumina.nf
+++ b/subworkflows/local/assemble_illumina.nf
@@ -2,24 +2,39 @@
Optional assembly step, not fully implemented and tested.
*/
-include { FASTP } from '../../modules/local/fastp'
-include { FASTQC } from '../../modules/local/fastqc'
-include { MULTIQC } from '../../modules/local/multiqc'
-include { SPADES } from '../../modules/local/spades'
+include { FASTP } from '../../modules/nf-core/fastp'
+include { FASTQC as FASTQC_BEFORE } from '../../modules/nf-core/fastqc'
+include { FASTQC as FASTQC_AFTER } from '../../modules/nf-core/fastqc'
+include { SPADES } from '../../modules/nf-core/spades'
workflow ASSEMBLE_ILLUMINA {
take: reads
main:
+ // QC before filtering
+ FASTQC_BEFORE(reads)
+
// trimming
- FASTP(reads)
+ FASTP(
+ reads,
+ [],
+ false,
+ false,
+ false
+ )
+
+ // QC after filtering
+ FASTQC_AFTER(FASTP.out.reads)
- // read QC
- MULTIQC(FASTQC(FSATP.out))
-
// assembly
- SPADES(FASTP.out)
+ SPADES(FASTP.out.reads)
+
+ ch_multiqc_files = Channel.empty()
+ ch_multiqc_files = ch_multiqc_files.mix( FASTQC_BEFORE.out.zip.collect{it[1]}.ifEmpty([]) )
+ ch_multiqc_files = ch_multiqc_files.mix( FASTP.out.json.collect{it[1]}.ifEmpty([]) )
+ ch_multiqc_files = ch_multiqc_files.mix( FASTQC_AFTER.out.zip.collect{it[1]}.ifEmpty([]) )
emit:
- assembly = SPADES.out
+ assembly = SPADES.out.contigs
+ ch_multiqc_files = ch_multiqc_files
}
\ No newline at end of file
diff --git a/subworkflows/local/detect.nf b/subworkflows/local/detect.nf
index bc4a37e..ea9fd64 100644
--- a/subworkflows/local/detect.nf
+++ b/subworkflows/local/detect.nf
@@ -18,14 +18,12 @@ workflow DETECT {
main:
- renamed_ch = assembly_renamed_length_filtered.map {name, renamed_fasta, map, _, __ -> {
- tuple(name, renamed_fasta, map)
- }
+ renamed_ch = assembly_renamed_length_filtered.map {
+ meta, renamed_fasta, map, _, __ -> tuple(meta, renamed_fasta, map)
}
- length_filtered_ch = assembly_renamed_length_filtered.map { name, _, __, filtered_fasta, contig_number -> {
- tuple(name, filtered_fasta, contig_number)
- }
+ length_filtered_ch = assembly_renamed_length_filtered.map {
+ meta, _, __, filtered_fasta, contig_number -> tuple(meta, filtered_fasta, contig_number)
}
// virus detection --> VirSorter, VirFinder and PPR-Meta
@@ -37,5 +35,5 @@ workflow DETECT {
PARSE( length_filtered_ch.join( VIRFINDER.out ).join( VIRSORTER.out ).join( PPRMETA.out ) )
emit:
- detect_output = PARSE.out.join(renamed_ch).transpose().map{ name, fasta, vs_meta, log, renamed_fasta, map -> tuple (name, fasta, map) }
+ detect_output = PARSE.out.join(renamed_ch).transpose().map{ meta, fasta, vs_meta, log, renamed_fasta, map -> tuple (meta, fasta, map) }
}
\ No newline at end of file
diff --git a/subworkflows/local/preprocess.nf b/subworkflows/local/preprocess.nf
index 86cb231..efff437 100644
--- a/subworkflows/local/preprocess.nf
+++ b/subworkflows/local/preprocess.nf
@@ -12,12 +12,12 @@ workflow PREPROCESS {
main:
- RENAME(assembly)
+ RENAME(assembly) // out: (meta, renamed.fasta, map)
// filter contigs by length
- LENGTH_FILTERING(RENAME.out)
+ LENGTH_FILTERING(RENAME.out) // out: (meta, filt_fasta, env)
emit:
- // tuple val(name), file("${name}_renamed.fasta"), file("${name}_map.tsv"), file("${name}*filt*.fasta"), env(CONTIGS)
+ // tuple val(meta), file("${meta.id}_renamed.fasta"), file("${meta.id}_map.tsv"), file("${meta.id}*filt*.fasta"), env(CONTIGS)
preprocessed_data = RENAME.out.join(LENGTH_FILTERING.out, by: 0)
}
\ No newline at end of file
diff --git a/workflows/virify.nf b/workflows/virify.nf
index a892dc0..21a7bb0 100755
--- a/workflows/virify.nf
+++ b/workflows/virify.nf
@@ -4,19 +4,27 @@
* INPUT CHANNELS
**************************/
-input_ch = Channel.empty()
-mashmap_ref_ch = Channel.empty()
-factor_file = Channel.empty()
+input_ch = Channel.empty()
+mashmap_ref_ch = Channel.empty()
+factor_file = Channel.empty()
+ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
+ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.fromPath("$projectDir/assets/mgnify_logo.png")
+ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
+
include { samplesheetToList } from 'plugin/nf-schema'
if ( params.samplesheet ) {
groupReads = { id, assembly, fq1, fq2 ->
if (fq1 == []) {
- return tuple(id, assembly)
+ return tuple(["id": id],
+ assembly
+ )
} else {
if (params.assemble) {
- return tuple(id, [fq1, fq2])
+ return tuple(["id": id],
+ [fq1, fq2])
}
else {
exit 1, "input missing, use [--assemble] flag with raw reads"
@@ -29,7 +37,7 @@ if ( params.samplesheet ) {
// one sample of assembly
if (params.fasta) {
input_ch = Channel.fromPath( params.fasta, checkIfExists: true)
- .map { file -> tuple(file.simpleName, file) }
+ .map { file -> tuple(["id": file.simpleName], file) }
}
// mashmap input
@@ -41,6 +49,11 @@ if (params.mashmap) {
if (params.factor) {
factor_file = file( params.factor, checkIfExists: true)
}
+/**************************
+* SUB WORKFLOWS
+**************************/
+
+include { MULTIQC } from '../modules/nf-core/multiqc'
/**************************
* SUB WORKFLOWS
@@ -83,6 +96,7 @@ workflow VIRIFY {
}
// ----------- rename fasta + length filtering
+ // out: (meta, renamed_fasta, map, filtered_fasta, env)
PREPROCESS( assembly_ch )
// ----------- if --onlyannotate - skip DETECT step
@@ -126,5 +140,15 @@ workflow VIRIFY {
ANNOTATE.out.assign_output,
ANNOTATE.out.chromomap
)
+
+ if (params.assemble) {
+ ch_multiqc_files = ASSEMBLE_ILLUMINA.out.ch_multiqc_files
+ MULTIQC(
+ ch_multiqc_files.collect(),
+ ch_multiqc_config.toList(),
+ ch_multiqc_custom_config.toList(),
+ ch_multiqc_logo.toList()
+ )
+ }
}