Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Additional Testing #42

Merged
merged 6 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,7 @@ jobs:
run: |
nf-test test

- name: Nextflow run with test profile
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results

89 changes: 81 additions & 8 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,88 @@ params {
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
max_memory = "2.GB"
max_cpus = 1
max_time = '3.h'

// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv"
outdir = "results"

// Genome references
genome = 'R64-1-1'
platform = "illumina"

mash {
apetkau marked this conversation as resolved.
Show resolved Hide resolved
singularity = "https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1"
docker = 'quay.io/biocontainers/mash:2.3--he348c14_1'
// going forward labeled with _ext include '.'
mash_ext = ".screen" //TODO refactor out of utility workflow
output_reads_ext = ".reads.screen"
output_taxa_ext = ".taxa.screen"
output_dir = "contamination"
mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
sketch_ext = ".msh"
json_ext = ".json"
sketch_kmer_size = 21 // defualt param in mash
min_kmer = 1
final_sketch_name = "GTDB_sketch"
report_tag = "Mash"
header_p = false
headers = ["identity", "Shared Hashes", "Median Multiplicity", "P-Value", "Query ID", "Query Note"]
}

r_contaminants {
// container contains minimap2 and samtools
singularity = "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0"
docker = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0"
phix_fa = ""
homo_sapiens_fa = ""
pacbio_mg = ""
output_ext = ".cleaned.fastq.gz"
mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi"
mm2_illumina = "-x sr" // ax outputs sam
mm2_pac = "-x map-pb"
mm2_ont = "-x map-ont"
mm2_output_ext = ".sam"
samtools_output_ext = ".fastq"
samtools_singletons_ext = ".singleton.fq"
samtools_output_suffix = "deconned"
output_ext = "${samtools_output_ext}.gz"
output_dir = "${params.mash.output_dir}/deconned_reads"
}

fastp {
fastq_ext = ".trimmed.fastq.gz"
singularity = 'https://depot.galaxyproject.org/singularity/fastp%3A0.23.2--hb7a2d85_2'
docker = 'quay.io/biocontainers/fastp:0.23.2--hb7a2d85_2'
html_ext = ".html"
json_ext = ".json"
report_tag = "FastP"
average_quality_e = 25 // The -e option in fastp for if the average quality is less than specified then read/read-pair is discarded
cut_tail_mean_quality = 15
cut_tail_window_size = 4 // default is 4
complexity_threshold = 20 // FastP default is 30 not 20
qualified_quality_phred = 15 // min quality for a read to contain
unqualified_percent_limit = 40 // if the minimum quality of a read is below 10 that read is discarded
polyg_min_len = 10
polyx_min_len = 10
illumina_length_min = 35
illumina_length_max = 400
single_end_length_min = 1000
dedup_reads = false
// -c is used to apply overlap analysis and cut out out adapters
args {
illumina = "-Q"
single_end = "--overrepresentation_analysis -Q --length_required ${params.fastp.single_end_length_min}"
}
report_exclude_fields = ["content_curves", "quality_curves", "mean", "kmer_count", "histogram", "overrepresented_sequences"]
}

min_reads = 100

skip_bakta = true
skip_staramr = false
skip_mobrecon = false
skip_checkm = false
skip_raw_read_metrics = false
skip_polishing = false
}
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,8 @@ profiles {
test_stub {includeConfig 'conf/test_stub.config'}
// Remove when merge into main
test_samplesheet {includeConfig 'conf/test_sample_sheet.config'}

test { includeConfig 'conf/test.config' }
}

plugins {
Expand Down
50 changes: 40 additions & 10 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@ nextflow_pipeline {

platform = "illumina"

mash { mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" }
mash
{
mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
min_kmer = 1
}

r_contaminants { mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" }

fastp { args { illumina = "-Q"} }
min_reads = 100
mash { min_kmer = 1 }

skip_bakta = true
skip_staramr = true
Expand Down Expand Up @@ -71,19 +75,23 @@ nextflow_pipeline {

platform = "illumina"

mash { mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" }
mash
{
mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
min_kmer = 1
}

r_contaminants { mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" }

fastp { args { illumina = "-Q"} }
min_reads = 100
mash { min_kmer = 1 }

skip_bakta = true
skip_staramr = true
skip_mobrecon = true
skip_checkm = true
skip_raw_read_metrics = true
skip_polishing = true
skip_staramr = false
skip_mobrecon = false
skip_checkm = false
skip_raw_read_metrics = false
skip_polishing = false

max_memory = "2.GB"
max_cpus = 1
Expand All @@ -97,6 +105,11 @@ nextflow_pipeline {
// parse output json file
def json = path("$launchDir/results/SummaryReport/final_report.json").json

assert json.short.short.RawReadSummary.R1."total_bp".equals(118750)
assert json.short.short.RawReadSummary.R1."total_reads".equals(475)
assert json.short.short.RawReadSummary.R1."read_qual_mean".equals(40.0)
assert json.short.short.RawReadSummary.R1."mean_sequence_length".equals(250.0)

assert json.short.short.FastP.summary.sequencing.equals("paired end (250 cycles + 250 cycles)")
assert json.short.short.FastP.summary.before_filtering.total_reads.equals(950)
assert json.short.short.FastP.filtering_result.passed_filter_reads.equals(950)
Expand All @@ -116,12 +129,29 @@ nextflow_pipeline {
assert json.short.short.QUAST."0"."GC (%)".equals("52.96")
assert json.short.short.QUAST."0"."Avg. coverage depth".equals("47")

assert json.short.short.StarAMR."0"."Isolate ID".equals("short_polished")
assert json.short.short.StarAMR."0"."Data Type".equals("MLST")
assert json.short.short.StarAMR."1"."Isolate ID".equals("short_polished")
assert json.short.short.StarAMR."1"."Data Type".equals("Plasmid")
assert json.short.short.StarAMR."2"."Isolate ID".equals("short_polished")
assert json.short.short.StarAMR."2"."Data Type".equals("Resistance")
assert json.short.short.StarAMR."2"."Predicted Phenotype".equals("Sensitive")

assert json.short.short.CheckM."0"."# genomes".equals("5656")
assert json.short.short.CheckM."0"."# markers".equals("56")
assert json.short.short.CheckM."0"."# marker sets".equals("24")
assert json.short.short.CheckM."0".Contamination.equals("0.00")

assert json.short.short.SevenGeneMLSTReport[0].filename.equals("short_polished.fasta.gz")

assert json.short.short.Abricate."0".RESISTANCE.equals("NoData") // All Abricate results for this are "NoData".

def assembly_path = "$launchDir/results/assembly/length_filtered_contigs/short_filtered.fasta.gz"
assert path(assembly_path).exists()

// parse assembly file
def assembly_header = path(assembly_path).linesGzip[0]
assert assembly_header.equals(">NODE_1_length_4949_cov_23.917254")
assert assembly_header.startsWith(">NODE_1_length_4949_cov_23.917254") // _pilon_pilon_pilon gets appended

// compare IRIDA Next JSON output
def iridanext_json = path("$launchDir/results/iridanext.output.json").json
Expand Down
Loading