From c24f618a4c87c219f644764678bd064843f63579 Mon Sep 17 00:00:00 2001 From: Eric Date: Mon, 11 Mar 2024 13:26:43 -0500 Subject: [PATCH 1/6] Setting up tests without skips. --- tests/main.nf.test | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/main.nf.test b/tests/main.nf.test index a356a6c1..289df5c3 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -79,11 +79,11 @@ nextflow_pipeline { mash { min_kmer = 1 } skip_bakta = true - skip_staramr = true - skip_mobrecon = true - skip_checkm = true - skip_raw_read_metrics = true - skip_polishing = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false max_memory = "2.GB" max_cpus = 1 @@ -121,7 +121,7 @@ nextflow_pipeline { // parse assembly file def assembly_header = path(assembly_path).linesGzip[0] - assert assembly_header.equals(">NODE_1_length_4949_cov_23.917254") + assert assembly_header.startsWith(">NODE_1_length_4949_cov_23.917254") // _pilon_pilon_pilon gets appended // compare IRIDA Next JSON output def iridanext_json = path("$launchDir/results/iridanext.output.json").json From 32d7e7a227978a97b7e81f622be7d1b176f0b60d Mon Sep 17 00:00:00 2001 From: Eric Date: Mon, 11 Mar 2024 15:38:00 -0500 Subject: [PATCH 2/6] Adding asserts for new data. --- tests/main.nf.test | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/main.nf.test b/tests/main.nf.test index 289df5c3..69f7b2dd 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -97,6 +97,11 @@ nextflow_pipeline { // parse output json file def json = path("$launchDir/results/SummaryReport/final_report.json").json + assert json.short.short.RawReadSummary.R1."total_bp".equals(118750) + assert json.short.short.RawReadSummary.R1."total_reads".equals(475) + assert json.short.short.RawReadSummary.R1."read_qual_mean".equals(40.0) + assert json.short.short.RawReadSummary.R1."mean_sequence_length".equals(250.0) + assert json.short.short.FastP.summary.sequencing.equals("paired end (250 cycles + 250 cycles)") assert json.short.short.FastP.summary.before_filtering.total_reads.equals(950) assert json.short.short.FastP.filtering_result.passed_filter_reads.equals(950) @@ -116,6 +121,23 @@ nextflow_pipeline { assert json.short.short.QUAST."0"."GC (%)".equals("52.96") assert json.short.short.QUAST."0"."Avg. coverage depth".equals("47") + assert json.short.short.StarAMR."0"."Isolate ID".equals("short_polished") + assert json.short.short.StarAMR."0"."Data Type".equals("MLST") + assert json.short.short.StarAMR."1"."Isolate ID".equals("short_polished") + assert json.short.short.StarAMR."1"."Data Type".equals("Plasmid") + assert json.short.short.StarAMR."2"."Isolate ID".equals("short_polished") + assert json.short.short.StarAMR."2"."Data Type".equals("Resistance") + assert json.short.short.StarAMR."2"."Predicted Phenotype".equals("Sensitive") + + assert json.short.short.CheckM."0"."# genomes".equals("5656") + assert json.short.short.CheckM."0"."# markers".equals("56") + assert json.short.short.CheckM."0"."# marker sets".equals("24") + assert json.short.short.CheckM."0".Contamination.equals("0.00") + + assert json.short.short.SevenGeneMLSTReport[0].filename.equals("short_polished.fasta.gz") + + assert json.short.short.Abricate."0".RESISTANCE.equals("NoData") // All Abricate results for this are "NoData". + def assembly_path = "$launchDir/results/assembly/length_filtered_contigs/short_filtered.fasta.gz" assert path(assembly_path).exists() From 878e806b9f1f8c4d3b7c71f2437b1ba7c62107c5 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 12 Mar 2024 10:01:00 -0500 Subject: [PATCH 3/6] Setting up test profile. --- conf/test.config | 89 +++++++++++++++++++++++++++++++++++++++++++----- nextflow.config | 2 ++ 2 files changed, 83 insertions(+), 8 deletions(-) diff --git a/conf/test.config b/conf/test.config index c662023a..2eee66f8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,15 +15,88 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_memory = "2.GB" + max_cpus = 1 + max_time = '3.h' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv" + outdir = "results" - // Genome references - genome = 'R64-1-1' + platform = "illumina" + + mash { + singularity = "https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1" + docker = 'quay.io/biocontainers/mash:2.3--he348c14_1' + // going forward labeled with _ext include '.' + mash_ext = ".screen" //TODO refactor out of utility workflow + output_reads_ext = ".reads.screen" + output_taxa_ext = ".taxa.screen" + output_dir = "contamination" + mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + sketch_ext = ".msh" + json_ext = ".json" + sketch_kmer_size = 21 // defualt param in mash + min_kmer = 1 + final_sketch_name = "GTDB_sketch" + report_tag = "Mash" + header_p = false + headers = ["identity", "Shared Hashes", "Median Multiplicity", "P-Value", "Query ID", "Query Note"] + } + + r_contaminants { + // container contains minimap2 and samtools + singularity = "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0" + docker = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0" + phix_fa = "" + homo_sapiens_fa = "" + pacbio_mg = "" + output_ext = ".cleaned.fastq.gz" + mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + mm2_illumina = "-x sr" // ax outputs sam + mm2_pac = "-x map-pb" + mm2_ont = "-x map-ont" + mm2_output_ext = ".sam" + samtools_output_ext = ".fastq" + samtools_singletons_ext = ".singleton.fq" + samtools_output_suffix = "deconned" + output_ext = "${samtools_output_ext}.gz" + output_dir = "${params.mash.output_dir}/deconned_reads" + } + + fastp { + fastq_ext = ".trimmed.fastq.gz" + singularity = 'https://depot.galaxyproject.org/singularity/fastp%3A0.23.2--hb7a2d85_2' + docker = 'quay.io/biocontainers/fastp:0.23.2--hb7a2d85_2' + html_ext = ".html" + json_ext = ".json" + report_tag = "FastP" + average_quality_e = 25 // The -e option in fastp for if the average quality is less than specified then read/read-pair is discarded + cut_tail_mean_quality = 15 + cut_tail_window_size = 4 // default is 4 + complexity_threshold = 20 // FastP default is 30 not 20 + qualified_quality_phred = 15 // min quality for a read to contain + unqualified_percent_limit = 40 // if the minimum quality of a read is below 10 that read is discarded + polyg_min_len = 10 + polyx_min_len = 10 + illumina_length_min = 35 + illumina_length_max = 400 + single_end_length_min = 1000 + dedup_reads = false + // -c is used to apply overlap analysis and cut out out adapters + args { + illumina = "-Q" + single_end = "--overrepresentation_analysis -Q --length_required ${params.fastp.single_end_length_min}" + } + report_exclude_fields = ["content_curves", "quality_curves", "mean", "kmer_count", "histogram", "overrepresented_sequences"] + } + + min_reads = 100 + + skip_bakta = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false } diff --git a/nextflow.config b/nextflow.config index 042f49e0..94ad1225 100644 --- a/nextflow.config +++ b/nextflow.config @@ -919,6 +919,8 @@ profiles { test_stub {includeConfig 'conf/test_stub.config'} // Remove when merge into main test_samplesheet {includeConfig 'conf/test_sample_sheet.config'} + + test { includeConfig 'conf/test.config' } } plugins { From f5f3fb6af9cb9614af3f4d6c56ceba76853bb8dc Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 12 Mar 2024 10:09:56 -0500 Subject: [PATCH 4/6] Harmonizing mash parameters. --- tests/main.nf.test | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/main.nf.test b/tests/main.nf.test index 69f7b2dd..8c1d5351 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -13,12 +13,16 @@ nextflow_pipeline { platform = "illumina" - mash { mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" } + mash + { + mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + min_kmer = 1 + } + r_contaminants { mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" } fastp { args { illumina = "-Q"} } min_reads = 100 - mash { min_kmer = 1 } skip_bakta = true skip_staramr = true @@ -71,12 +75,16 @@ nextflow_pipeline { platform = "illumina" - mash { mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" } + mash + { + mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + min_kmer = 1 + } + r_contaminants { mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" } fastp { args { illumina = "-Q"} } min_reads = 100 - mash { min_kmer = 1 } skip_bakta = true skip_staramr = false From 3ad8a5f1024d41be28ac432e52dc8a32e2b8a46a Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 12 Mar 2024 10:29:19 -0500 Subject: [PATCH 5/6] Adding test profile CI action. --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b645bd1e..d521d36b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,4 +54,7 @@ jobs: run: | nf-test test + - name: Nextflow run with test profile + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results From 865fe4dfacdb6a998c581462e47205ff09b3cbd6 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 12 Mar 2024 16:10:30 -0500 Subject: [PATCH 6/6] Removing unneeded statements. --- conf/test.config | 68 +++--------------------------------------------- 1 file changed, 4 insertions(+), 64 deletions(-) diff --git a/conf/test.config b/conf/test.config index 2eee66f8..c2ff5451 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,72 +25,12 @@ params { platform = "illumina" - mash { - singularity = "https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1" - docker = 'quay.io/biocontainers/mash:2.3--he348c14_1' - // going forward labeled with _ext include '.' - mash_ext = ".screen" //TODO refactor out of utility workflow - output_reads_ext = ".reads.screen" - output_taxa_ext = ".taxa.screen" - output_dir = "contamination" - mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" - sketch_ext = ".msh" - json_ext = ".json" - sketch_kmer_size = 21 // defualt param in mash - min_kmer = 1 - final_sketch_name = "GTDB_sketch" - report_tag = "Mash" - header_p = false - headers = ["identity", "Shared Hashes", "Median Multiplicity", "P-Value", "Query ID", "Query Note"] - } + mash.mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + mash.min_kmer = 1 - r_contaminants { - // container contains minimap2 and samtools - singularity = "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0" - docker = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0" - phix_fa = "" - homo_sapiens_fa = "" - pacbio_mg = "" - output_ext = ".cleaned.fastq.gz" - mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" - mm2_illumina = "-x sr" // ax outputs sam - mm2_pac = "-x map-pb" - mm2_ont = "-x map-ont" - mm2_output_ext = ".sam" - samtools_output_ext = ".fastq" - samtools_singletons_ext = ".singleton.fq" - samtools_output_suffix = "deconned" - output_ext = "${samtools_output_ext}.gz" - output_dir = "${params.mash.output_dir}/deconned_reads" - } - - fastp { - fastq_ext = ".trimmed.fastq.gz" - singularity = 'https://depot.galaxyproject.org/singularity/fastp%3A0.23.2--hb7a2d85_2' - docker = 'quay.io/biocontainers/fastp:0.23.2--hb7a2d85_2' - html_ext = ".html" - json_ext = ".json" - report_tag = "FastP" - average_quality_e = 25 // The -e option in fastp for if the average quality is less than specified then read/read-pair is discarded - cut_tail_mean_quality = 15 - cut_tail_window_size = 4 // default is 4 - complexity_threshold = 20 // FastP default is 30 not 20 - qualified_quality_phred = 15 // min quality for a read to contain - unqualified_percent_limit = 40 // if the minimum quality of a read is below 10 that read is discarded - polyg_min_len = 10 - polyx_min_len = 10 - illumina_length_min = 35 - illumina_length_max = 400 - single_end_length_min = 1000 - dedup_reads = false - // -c is used to apply overlap analysis and cut out out adapters - args { - illumina = "-Q" - single_end = "--overrepresentation_analysis -Q --length_required ${params.fastp.single_end_length_min}" - } - report_exclude_fields = ["content_curves", "quality_curves", "mean", "kmer_count", "histogram", "overrepresented_sequences"] - } + r_contaminants.mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + fastp.args.illumina = "-Q" min_reads = 100 skip_bakta = true