phac-nml · mattheww95 · May 2, 2024 · May 1, 2024 · May 1, 2024 · May 1, 2024
diff --git a/conf/test.config b/conf/test.config
@@ -25,10 +25,10 @@ params {
 
     platform = "illumina"
 
-    mash.mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
+    mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
     mash.min_kmer = 1
 
-    r_contaminants.mega_mm2_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi"
+    dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi"
 
     fastp.args.illumina = "-Q"
     min_reads = 100

diff --git a/nextflow.config b/nextflow.config
@@ -69,10 +69,10 @@ params {
 
 
     // Datasets
-    dehosting_idx = "./databases/PhiPacHum_m2.idx" // mm2 index
-    mash_sketch = "./databases/GTDBSketch_20231003.msh" // Make sure comments are formatted as taxonomic strings
-    bakta_db = "./databases/db-light"
-    kraken2_db = "./databases/k2_standard_20220607/"
+    dehosting_idx = null // mm2 index
+    mash_sketch = null // Make sure comments are formatted as taxonomic strings
+    bakta_db = null
+    kraken2_db = null
     staramr_db = null // Recommended usage is to use the default database in the container
 
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -76,36 +76,37 @@
             "properties": {
                 "dehosting_idx": {
                     "type": "string",
-                    "default": "./databases/PhiPacHum_m2.idx",
                     "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)",
                     "pattern": "^\\S+$",
+                    "exists": true,
                     "format": "file-path"
                 },
                 "mash_sketch": {
                     "type": "string",
-                    "default": "./databases/GTDBSketch_20231003.msh",
                     "description": "Minimpa2 index for dehosting and kitome removal",
                     "pattern": "^\\S+$",
+                    "exists": true,
                     "format": "file-path"
                 },
                 "bakta_db": {
                     "type": "string",
                     "description": "Database use for bakta, this value is optional as bakta can be skipped",
-                    "default": "./databases/db-light",
                     "pattern": "^\\S+$",
+                    "exists": true,
                     "format": "directory-path"
                 },
                 "kraken2_db": {
                     "type": "string",
-                    "default": "./databases/k2_standard_20220607/",
                     "description": "Kraken2 database",
                     "pattern": "^\\S+$",
+                    "exists": true,
                     "format": "directory-path"
                 },
                 "staramr_db": {
                     "type": "string",
                     "description": "It is recommended to use the StarAMR database in the StarAMR container however, an external option can be specified",
                     "pattern": "^\\S+$",
+                    "exists": true,
                     "format": "directory-path",
                     "hidden": true
                 }

diff --git a/subworkflows/local/annotate_genomes.nf b/subworkflows/local/annotate_genomes.nf
@@ -28,7 +28,7 @@ workflow ANNOTATE_GENOMES {
     txt = channel.empty()
     abricate_report = channel.empty()
 
-    if(!params.skip_bakta){
+    if(!params.skip_bakta && params.bakta.db){
         db_file = Channel.value("${params.bakta.db}")
         annotated = BAKTA_ANNOTATE(contig_data, db_file,
             [], [], [], [], [], []) // empty channels for optional arguments

diff --git a/subworkflows/local/clean_reads.nf b/subworkflows/local/clean_reads.nf
@@ -29,7 +29,7 @@ workflow QC_READS {
 
     // TODO add in code to check that there are always enough reads left over after decontamination
     // TODO need to make sure that if one read is unmapped the other is not included as well
-    deconned_reads = REMOVE_CONTAMINANTS(reads, file(params.r_contaminants.mega_mm2_idx), Channel.value(platform_comp))
+    deconned_reads = REMOVE_CONTAMINANTS(reads, params.r_contaminants.mega_mm2_idx ? file(params.r_contaminants.mega_mm2_idx) : Channel.empty(), Channel.value(platform_comp))
     versions = versions.mix(REMOVE_CONTAMINANTS.out.versions)
 
 
@@ -139,7 +139,7 @@ workflow QC_READS {
         ch_prepped_reads = filtered_samples // put in un-downsampled reads
     }
 
-    mash_screen_out = MASH_SCREEN(ch_prepped_reads, file(params.mash.mash_sketch))
+    mash_screen_out = MASH_SCREEN(ch_prepped_reads, params.mash.mash_sketch ? file(params.mash.mash_sketch) : Channel.empty())
 
     versions = versions.mix(mash_screen_out.versions)
 

diff --git a/subworkflows/local/determine_species.nf b/subworkflows/local/determine_species.nf
@@ -40,7 +40,7 @@ workflow DETERMINE_SPECIES {
 
     }else {
         log.info "Using mash screen for sample classification"
-        MASH_SCREEN(contigs, file(params.mash.mash_sketch))
+        MASH_SCREEN(contigs, params.mash.mash_sketch ? file(params.mash.mash_sketch) : Channel.empty())
         results = results.mix(MASH_SCREEN.out.mash_data)
 
         parsed = PARSE_MASH(MASH_SCREEN.out.mash_data, Channel.value("top"))

diff --git a/subworkflows/local/split_metagenomic.nf b/subworkflows/local/split_metagenomic.nf
@@ -27,7 +27,7 @@ workflow SPLIT_METAGENOMIC {
     contigs = contigs.map{
         meta, contigs, reads -> tuple(meta, contigs)
     }
-    kraken_out = KRAKEN(contigs, file(params.kraken.db))
+    kraken_out = KRAKEN(contigs, params.kraken.db ? file(params.kraken.db): Channel.empty())
     staged_kraken_data = kraken_out.classified_contigs.join(kraken_out.report).join(kraken_out.kraken_output)
 
     binned_data = BIN_KRAKEN2(staged_kraken_data, Channel.value(params.kraken_bin.taxonomic_level))