diff --git a/bin/check_samplesheet_qc.py b/bin/check_samplesheet_qc.py new file mode 100755 index 00000000..db43a210 --- /dev/null +++ b/bin/check_samplesheet_qc.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python + +# This script is based on the example at: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv + +import os +import sys +import errno +import argparse +import gzip +import re + +def parse_args(args=None): + Description = "Reformat cdcgov/phoenix samplesheet file and check its contents." + Epilog = "Example usage: python check_samplesheet.py " + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FILE_IN", help="Input samplesheet file.") + parser.add_argument("FILE_OUT", help="Output file.") + return parser.parse_args(args) + + +def make_dir(path): + if len(path) > 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise exception + + +def print_error(error, context="Line", context_str=""): + error_str = "ERROR: Please check samplesheet -> {}".format(error) + if context != "" and context_str != "": + error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( + error, context.strip(), context_str.strip() + ) + print(error_str) + sys.exit(1) + + +def check_samplesheet(file_in, file_out): + """ + This function checks that the samplesheet follows the following structure: + + sample,fastq_1,fastq_2,fastp_pass_json,fastp_failed_json,spades,mlst,quast,amrfinderplus + SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz,passed_fastp.json,failed.fastp.json,spades.fasta,mlst.tsv,quast.tsv,amrfinderplus.tsv + + sample A sample name for the input + fastq_1 R1 of reads run through Fastp + fastq_2 R2 of reads run through Fastp + fastp_pass_json JSON output from initial Fastp run + fastp_failed_json JSON output from rerun of Fastp on failed reads + spades Assembly created by SPAdes + mlst TSV output from mlst tool + quast TSV report generated from quast + amrfinderplus TSV report generated from amrfinderplus + """ + + sample_mapping_dict = {} + with open(file_in, "r") as fin: + + ## Check header + MIN_COLS = 2 + HEADER = ['sample', 'fastq_1', 'fastq_2', 'fastp_pass_json', 'fastp_failed_json', 'spades', 'mlst', 'quast', 'amrfinderplus'] + + header = [x.strip('"') for x in fin.readline().strip().split(",")] + if header[: len(HEADER)] != HEADER: + print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) + sys.exit(1) + + ## Check sample entries + for line in fin: + lspl = [x.strip().strip('"') for x in line.strip().split(",")] + + # Check valid number of columns per row + if len(lspl) < len(HEADER): + print_error( + "Invalid number of columns (minimum = {})!".format(len(HEADER)), + "Line", + line, + ) + num_cols = len([x for x in lspl if x]) + if num_cols < MIN_COLS: + print_error( + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), + "Line", + line, + ) + + ## Check sample name entries + sample, fastq_1, fastq_2, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus = lspl[: len(HEADER)] + sample = sample.replace(" ", "_") + if not sample: + print_error("Sample entry has not been specified!", "Line", line) + + ## Check FastQ file extension + for fastq in [fastq_1, fastq_2]: + if fastq: + if fastq.find(" ") != -1: + print_error("FastQ file contains spaces!", "Line", line) + if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): # If file is not gzipped then gzip it. + fastq_gz = fastq + ".gz" + with open(fastq, "rb") as f_in: + with gzip.open(fastq_gz, 'wb') as f_out: + f_out.writelines(f_in) + print("FastQ file does not have extension '.fastq.gz' or '.fq.gz'! Zipping file.", + "Line", + line, + ) + + ## Auto-detect paired-end/single-end + sample_info = [] ## [single_end, fastq_1, fastq_2] + if sample and fastq_1 and fastq_2: ## Paired-end short reads + sample_info = ["0", fastq_1, fastq_2, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus] + elif sample and fastq_1 and not fastq_2: ## Single-end short reads + sample_info = ["1", fastq_1, fastq_2, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus] + else: + print_error("Invalid combination of columns provided!", "Line", line) + + ## Create sample mapping dictionary = { sample: [ single_end, fastq_1, fastq_2 ] } + if sample not in sample_mapping_dict: + sample_mapping_dict[sample] = [sample_info] + else: + if sample_info in sample_mapping_dict[sample]: + print_error("Samplesheet contains duplicate rows!", "Line", line) + else: + sample_mapping_dict[sample].append(sample_info) + + ## Write validated samplesheet with appropriate columns + if len(sample_mapping_dict) > 0: + out_dir = os.path.dirname(file_out) + make_dir(out_dir) + with open(file_out, "w") as fout: + fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "fastp_pass_json", "fastp_failed_json", "spades", "mlst", "quast", "amrfinderplus"]) + "\n") + for sample in sorted(sample_mapping_dict.keys()): + + ## Check that multiple runs of the same sample are of the same datatype + if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): + print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample)) + +# for idx, val in enumerate(sample_mapping_dict[sample]): +# fout.write(",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n") + for idx, val in enumerate(sample_mapping_dict[sample]): + if not val[1].endswith(".gz"): # check that forward read is a gzip file + val[1] = re.sub(".fastq$", ".fastq.gz", val[1]) + val[1] = re.sub(".fq$", ".fq.gz", val[1]) + if not val[2].endswith(".gz"): # check that reverse read is a gzip file + val[2] = re.sub(".fastq$", ".fastq.gz", val[2]) + val[2] = re.sub(".fq$", ".fq.gz", val[2]) + fout.write(",".join(["{}".format(sample)] + val) + "\n") + else: + print_error("No entries to process!", "Samplesheet: {}".format(file_in)) + + +def main(args=None): + args = parse_args(args) + check_samplesheet(args.FILE_IN, args.FILE_OUT) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/main.nf b/main.nf index a2684ac3..20cc8099 100644 --- a/main.nf +++ b/main.nf @@ -33,6 +33,7 @@ WorkflowMain.initialise(workflow, params, log) */ include { PHOENIX_EXTERNAL } from './workflows/phoenix' +include { PHOENIX_QC_EXTERNAL } from './workflows/phoenix_qc' include { PHOENIX_EXQC } from './workflows/cdc_phoenix' include { SRA_PHOENIX } from './workflows/sra_phoenix' include { SCAFFOLD_EXTERNAL } from './workflows/scaffolds' @@ -51,6 +52,14 @@ workflow PHOENIX { gamma_ar = PHOENIX_EXTERNAL.out.gamma_ar } +// +// WORKFLOW: Run QC steps from main cdcgov/phoenix analysis pipeline +// +workflow PHOENIX_QC { + main: + PHOENIX_QC_EXTERNAL () +} + // // WORKFLOW: Run internal version of cdcgov/phoenix analysis pipeline that includes BUSCO, SRST2 and KRAKEN_ASMBLED // diff --git a/modules/local/samplesheet_qc_check.nf b/modules/local/samplesheet_qc_check.nf new file mode 100644 index 00000000..b2cd35e2 --- /dev/null +++ b/modules/local/samplesheet_qc_check.nf @@ -0,0 +1,28 @@ +process SAMPLESHEET_QC_CHECK { + tag "$samplesheet" + label 'process_low' + container 'quay.io/jvhagey/phoenix:base_v1.0.0' + + /*container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }"*/ + + input: + path samplesheet + + output: + path '*.valid.csv' , emit: csv + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/ + """ + check_samplesheet_qc.py \\ + $samplesheet \\ + samplesheet.valid.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/input_qc_check.nf b/subworkflows/local/input_qc_check.nf new file mode 100644 index 00000000..51e92dd8 --- /dev/null +++ b/subworkflows/local/input_qc_check.nf @@ -0,0 +1,110 @@ +// +// Check input samplesheet and get read channels +// + +include { SAMPLESHEET_QC_CHECK } from '../../modules/local/samplesheet_qc_check' + +workflow INPUT_QC_CHECK { + take: + samplesheet // file: /path/to/samplesheet.csv + + main: + SAMPLESHEET_QC_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_qc_channels(it) } + .set {ch_samples } + + // Create channels to match upstream processes + // FASTP_TRIMD.out.reads -> tuple val(meta), path('*.trim.fastq.gz'), optional:true, emit: reads + ch_samples.map{meta, reads, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus -> + [ [id:meta.id, single_end:true], reads] + }.set { ch_reads } + + // GATHERING_READ_QC_STATS: tuple val(meta), path(fastp_trimd_json), path(fastp_singles_json) + ch_samples.map{meta, reads, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus -> + [ [id:meta.id, single_end:true], fastp_pass_json, fastp_failed_json] + }.set { ch_fastp_json } + + // SPADES_WF.out.spades_ch -> SPADES.out.scaffolds.map{meta, scaffolds -> [ [id:meta.id, single_end:true], scaffolds]} + ch_samples.map{meta, reads, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus -> + [ [id:meta.id, single_end:true], spades] + }.set { ch_spades } + + // MLST.out.tsv -> tuple val(meta), path("*.tsv"), emit: tsv + ch_samples.map{meta, reads, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus -> + [ [id:meta.id, single_end:true], mlst] + }.set { ch_mlst } + + // QUAST.out.report_tsv -> tuple val(meta), path("*.tsv"), emit: tsv + ch_samples.map{meta, reads, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus -> + [ [id:meta.id, single_end:true], quast] + }.set { ch_quast } + + // AMRFINDERPLUS_RUN.out.report -> tuple val(meta), path("${meta.id}_all_genes.tsv"), emit: report + ch_samples.map{meta, reads, fastp_pass_json, fastp_failed_json, spades, mlst, quast, amrfinderplus -> + [ [id:meta.id, single_end:true], amrfinderplus] + }.set { ch_amrfinderplus } + + emit: + reads = ch_reads + fastp_json = ch_fastp_json + spades = ch_spades + mlst = ch_mlst + quast = ch_quast + amrfinderplus = ch_amrfinderplus + valid_samplesheet = SAMPLESHEET_QC_CHECK.out.csv + versions = SAMPLESHEET_QC_CHECK.out.versions // channel: [ versions.yml ] +} + +// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +def create_qc_channels(LinkedHashMap row) { + def meta = [:] + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + missing_input = false + + def array = [] + if (!file(row.fastq_1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" + } + + if (!meta.single_end) { + if (!file(row.fastq_2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + } + } + + // Check remaining files + if (!file(row.fastp_pass_json).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Fastp passed reads JSON file does not exist!\n${row.fastp_pass_json}" + } + + if (!file(row.fastp_failed_json).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Fastp failed reads JSON file does not exist!\n${row.fastp_failed_json}" + } + + if (!file(row.spades).exists()) { + exit 1, "ERROR: Please check input samplesheet -> SPAdes assembly file does not exist!\n${row.spades}" + } + + if (!file(row.mlst).exists()) { + exit 1, "ERROR: Please check input samplesheet -> MLST TSV report file does not exist!\n${row.mlst}" + } + + if (!file(row.quast).exists()) { + exit 1, "ERROR: Please check input samplesheet -> QUAST TSV report file does not exist!\n${row.quast}" + } + + if (!file(row.amrfinderplus).exists()) { + exit 1, "ERROR: Please check input samplesheet -> AMRFinder+ report file does not exist!\n${row.amrfinderplus}" + } + + if (meta.single_end) { + array = [ meta, [ file(row.fastq_1) ], file(row.fastp_pass_json), file(row.fastp_failed_json), file(row.spades), file(row.mlst), file(row.quast), file(row.amrfinderplus) ] + } else { + array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ], file(row.fastp_pass_json), file(row.fastp_failed_json), file(row.spades), file(row.mlst), file(row.quast), file(row.amrfinderplus) ] + } + + return array +} diff --git a/workflows/phoenix_qc.nf b/workflows/phoenix_qc.nf new file mode 100644 index 00000000..68a4373e --- /dev/null +++ b/workflows/phoenix_qc.nf @@ -0,0 +1,305 @@ +/* +======================================================================================== + VALIDATE INPUTS +======================================================================================== +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowPhoenix.initialise(params, log) + + +// Check input path parameters to see if they exist +def checkPathParamList = [ params.input, params.multiqc_config, params.kraken2db] //removed , params.fasta to stop issue w/connecting to aws and igenomes not used +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters + +//input on command line +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet/list not specified!' } +if (params.kraken2db == null) { exit 1, 'Input path to kraken2db not specified!' } + +/* +======================================================================================== + SETUP +======================================================================================== +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +/* +======================================================================================== + CONFIG FILES +======================================================================================== +*/ + +ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() + +/* +======================================================================================== + IMPORT LOCAL MODULES +======================================================================================== +*/ + +include { ASSET_CHECK } from '../modules/local/asset_check' +include { RENAME_FASTA_HEADERS } from '../modules/local/rename_fasta_headers' +include { GAMMA_S as GAMMA_PF } from '../modules/local/gammas' +include { GAMMA as GAMMA_AR } from '../modules/local/gamma' +include { GAMMA as GAMMA_HV } from '../modules/local/gamma' +include { BBMAP_REFORMAT } from '../modules/local/contig_less500' +include { MASH_DIST } from '../modules/local/mash_distance' +include { FASTANI } from '../modules/local/fastani' +include { DETERMINE_TOP_TAXA } from '../modules/local/determine_top_taxa' +include { FORMAT_ANI } from '../modules/local/format_ANI_best_hit' +include { GATHERING_READ_QC_STATS } from '../modules/local/fastp_minimizer' +include { DETERMINE_TAXA_ID } from '../modules/local/tax_classifier' +include { GET_TAXA_FOR_AMRFINDER } from '../modules/local/get_taxa_for_amrfinder' +include { CALCULATE_ASSEMBLY_RATIO } from '../modules/local/assembly_ratio' +include { CREATE_SUMMARY_LINE } from '../modules/local/phoenix_summary_line' +include { GATHER_SUMMARY_LINES } from '../modules/local/phoenix_summary' +include { CHECK_MLST } from '../modules/local/check_mlst' + +/* +======================================================================================== + IMPORT LOCAL SUBWORKFLOWS +======================================================================================== +*/ + +include { INPUT_QC_CHECK } from '../subworkflows/local/input_qc_check' +include { GENERATE_PIPELINE_STATS_WF } from '../subworkflows/local/generate_pipeline_stats' +include { KRAKEN2_WF as KRAKEN2_TRIMD } from '../subworkflows/local/kraken2krona' +include { KRAKEN2_WF as KRAKEN2_ASMBLD } from '../subworkflows/local/kraken2krona' +include { KRAKEN2_WF as KRAKEN2_WTASMBLD } from '../subworkflows/local/kraken2krona' + +/* +======================================================================================== + IMPORT NF-CORE MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +// +// MODULE: Installed directly from nf-core/modules +// + +include { FASTQC as FASTQCTRIMD } from '../modules/nf-core/modules/fastqc/main' +include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' + +/* +======================================================================================== + RUN MAIN WORKFLOW +======================================================================================== +*/ + +workflow PHOENIX_QC_EXTERNAL { + main: + ch_versions = Channel.empty() // Used to collect the software versions + // Allow outdir to be relative + outdir_path = Channel.fromPath(params.outdir, relative: true) + + // SUBWORKFLOW: Read in samplesheet/list, validate and stage input files + INPUT_QC_CHECK ( + ch_input + ) + ch_versions = ch_versions.mix(INPUT_QC_CHECK.out.versions) + + //unzip any zipped databases + ASSET_CHECK ( + params.zipped_sketch + ) + + // Script gathers data from jsons for pipeline stats file + GATHERING_READ_QC_STATS( + INPUT_QC_CHECK.out.fastp_json + ) + + // Checking for Contamination in trimmed reads, creating krona plots and best hit files + KRAKEN2_TRIMD ( + INPUT_QC_CHECK.out.reads, "trimd", GATHERING_READ_QC_STATS.out.fastp_total_qc, [] + ) + ch_versions = ch_versions.mix(KRAKEN2_TRIMD.out.versions) + + // Rename scaffold headers + RENAME_FASTA_HEADERS ( + INPUT_QC_CHECK.out.spades + ) + ch_versions = ch_versions.mix(RENAME_FASTA_HEADERS.out.versions) + + // Removing scaffolds <500bp + BBMAP_REFORMAT ( + RENAME_FASTA_HEADERS.out.renamed_scaffolds + ) + ch_versions = ch_versions.mix(BBMAP_REFORMAT.out.versions) + + // Running gamma to identify hypervirulence genes in scaffolds + GAMMA_HV ( + BBMAP_REFORMAT.out.filtered_scaffolds, params.hvgamdb + ) + ch_versions = ch_versions.mix(GAMMA_HV.out.versions) + + // Running gamma to identify AR genes in scaffolds + GAMMA_AR ( + BBMAP_REFORMAT.out.filtered_scaffolds, params.ardb + ) + ch_versions = ch_versions.mix(GAMMA_AR.out.versions) + + GAMMA_PF ( + BBMAP_REFORMAT.out.filtered_scaffolds, params.gamdbpf + ) + ch_versions = ch_versions.mix(GAMMA_PF.out.versions) + + // Creating krona plots and best hit files for weighted assembly + KRAKEN2_WTASMBLD ( + BBMAP_REFORMAT.out.filtered_scaffolds,"wtasmbld", [], INPUT_QC_CHECK.out.quast + ) + ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions) + + // Running Mash distance to get top 20 matches for fastANI to speed things up + MASH_DIST ( + BBMAP_REFORMAT.out.filtered_scaffolds, ASSET_CHECK.out.mash_sketch + ) + ch_versions = ch_versions.mix(MASH_DIST.out.versions) + + // Combining mash dist with filtered scaffolds based on meta.id + top_taxa_ch = MASH_DIST.out.dist.map{ meta, dist -> [[id:meta.id], dist]}\ + .join(BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, reads -> [[id:meta.id], reads ]}, by: [0]) + + // Generate file with list of paths of top taxa for fastANI + DETERMINE_TOP_TAXA ( + top_taxa_ch + ) + + // Combining filtered scaffolds with the top taxa list based on meta.id + top_taxa_list_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, reads -> [[id:meta.id], reads]}\ + .join(DETERMINE_TOP_TAXA.out.top_taxa_list.map{ meta, top_taxa_list -> [[id:meta.id], top_taxa_list ]}, by: [0])\ + .join(DETERMINE_TOP_TAXA.out.reference_files.map{ meta, reference_files -> [[id:meta.id], reference_files ]}, by: [0]) + + // Getting species ID + FASTANI ( + top_taxa_list_ch + ) + ch_versions = ch_versions.mix(FASTANI.out.versions) + + // Reformat ANI headers + FORMAT_ANI ( + FASTANI.out.ani + ) + + // Combining weighted kraken report with the FastANI hit based on meta.id + best_hit_ch = KRAKEN2_WTASMBLD.out.report.map{meta, kraken_weighted_report -> [[id:meta.id], kraken_weighted_report]}\ + .join(FORMAT_ANI.out.ani_best_hit.map{ meta, ani_best_hit -> [[id:meta.id], ani_best_hit ]}, by: [0])\ + .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{ meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary ]}, by: [0]) + + // Getting ID from either FastANI or if fails, from Kraken2 + DETERMINE_TAXA_ID ( + best_hit_ch, params.taxa + ) + ch_versions = ch_versions.mix(DETERMINE_TAXA_ID.out.versions) + + combined_mlst_ch = INPUT_QC_CHECK.out.mlst.map{meta, tsv -> [[id:meta.id], tsv]}\ + .join(DETERMINE_TAXA_ID.out.taxonomy.map{ meta, taxonomy -> [[id:meta.id], taxonomy]}, by: [0]) + + // Combining and adding flare to all MLST outputs + CHECK_MLST ( + combined_mlst_ch + ) + + // Create file that has the organism name to pass to AMRFinder + GET_TAXA_FOR_AMRFINDER ( + DETERMINE_TAXA_ID.out.taxonomy + ) + + // Combining determined taxa with the assembly stats based on meta.id + assembly_ratios_ch = DETERMINE_TAXA_ID.out.taxonomy.map{meta, taxonomy -> [[id:meta.id], taxonomy]}\ + .join(INPUT_QC_CHECK.out.quast.map{ meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0]) + + // Calculating the assembly ratio and gather GC% stats + CALCULATE_ASSEMBLY_RATIO ( + assembly_ratios_ch, params.ncbi_assembly_stats + ) + ch_versions = ch_versions.mix(CALCULATE_ASSEMBLY_RATIO.out.versions) + + GENERATE_PIPELINE_STATS_WF ( + INPUT_QC_CHECK.out.reads, \ + GATHERING_READ_QC_STATS.out.fastp_raw_qc, \ + GATHERING_READ_QC_STATS.out.fastp_total_qc, \ + [], \ + KRAKEN2_TRIMD.out.report, \ + KRAKEN2_TRIMD.out.krona_html, \ + KRAKEN2_TRIMD.out.k2_bh_summary, \ + RENAME_FASTA_HEADERS.out.renamed_scaffolds, \ + BBMAP_REFORMAT.out.filtered_scaffolds, \ + INPUT_QC_CHECK.out.mlst, \ + GAMMA_HV.out.gamma, \ + GAMMA_AR.out.gamma, \ + GAMMA_PF.out.gamma, \ + INPUT_QC_CHECK.out.quast, \ + [], [], [], [], \ + KRAKEN2_WTASMBLD.out.report, \ + KRAKEN2_WTASMBLD.out.krona_html, \ + KRAKEN2_WTASMBLD.out.k2_bh_summary, \ + DETERMINE_TAXA_ID.out.taxonomy, \ + FORMAT_ANI.out.ani_best_hit, \ + CALCULATE_ASSEMBLY_RATIO.out.ratio, \ + INPUT_QC_CHECK.out.amrfinderplus, \ + CALCULATE_ASSEMBLY_RATIO.out.gc_content, \ + false + ) + + // Combining output based on meta.id to create summary by sample -- is this verbose, ugly and annoying? yes, if anyone has a slicker way to do this we welcome the input. + line_summary_ch = GATHERING_READ_QC_STATS.out.fastp_total_qc.map{meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}\ + //.join(INPUT_QC_CHECK.out.mlst.map{ meta, tsv -> [[id:meta.id], tsv]}, by: [0])\ + .join(CHECK_MLST.out.checked_MLSTs.map{ meta, checked_MLSTs -> [[id:meta.id], checked_MLSTs]}, by: [0])\ + .join(GAMMA_HV.out.gamma.map{ meta, gamma -> [[id:meta.id], gamma]}, by: [0])\ + .join(GAMMA_AR.out.gamma.map{ meta, gamma -> [[id:meta.id], gamma]}, by: [0])\ + .join(GAMMA_PF.out.gamma.map{ meta, gamma -> [[id:meta.id], gamma]}, by: [0])\ + .join(INPUT_QC_CHECK.out.quast.map{ meta, report_tsv -> [[id:meta.id], report_tsv]}, by: [0])\ + .join(CALCULATE_ASSEMBLY_RATIO.out.ratio.map{ meta, ratio -> [[id:meta.id], ratio]}, by: [0])\ + .join(GENERATE_PIPELINE_STATS_WF.out.pipeline_stats.map{ meta, pipeline_stats -> [[id:meta.id], pipeline_stats]}, by: [0])\ + .join(DETERMINE_TAXA_ID.out.taxonomy.map{ meta, taxonomy -> [[id:meta.id], taxonomy]}, by: [0])\ + .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{ meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}, by: [0])\ + .join(INPUT_QC_CHECK.out.amrfinderplus.map{ meta, report -> [[id:meta.id], report]}, by: [0]) + + // Generate summary per sample that passed SPAdes + CREATE_SUMMARY_LINE( + line_summary_ch + ) + ch_versions = ch_versions.mix(CREATE_SUMMARY_LINE.out.versions) + + // Combining sample summaries into final report + summaries_ch = CREATE_SUMMARY_LINE.out.line_summary.collect() + GATHER_SUMMARY_LINES ( + summaries_ch, outdir_path, false + ) + ch_versions = ch_versions.mix(GATHER_SUMMARY_LINES.out.versions) + + // Collecting the software versions + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) +} + +/* +======================================================================================== + COMPLETION EMAIL AND SUMMARY +======================================================================================== +*/ + +workflow.onComplete { + if (count == 0){ + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) + count++ + } +} + +/* +======================================================================================== + THE END +======================================================================================== +*/