From 7508612bc1c1511244d7101d274b016abcd01b58 Mon Sep 17 00:00:00 2001
From: Dani VM <da.valle@isciii.es>
Date: Wed, 20 Sep 2023 13:44:55 +0200
Subject: [PATCH 1/4] fix doc samplesheet file extension

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 48f0d4a8..d4f57207 100644
--- a/README.md
+++ b/README.md
@@ -52,9 +52,9 @@ In all cases, the assembly is assessed using [QUAST](http://bioinf.spbau.ru/quas
 
 First, prepare a samplesheet with your input data that looks as follows:
 
-`samplesheet.tsv`:
+`samplesheet.csv`:
 
-```tsv
+```csv
 ID    R1                            R2                            LongFastQ                Fast5              GenomeSize
 shortreads    ./data/S1_R1.fastq.gz    ./data/S1_R2.fastq.gz    NA                    NA                    NA
 longreads      NA                        NA                        ./data/S1_long_fastq.gz    ./data/FAST5    2.8m
@@ -67,13 +67,13 @@ Each row represents a fastq file (single-end) or a pair of fastq files (paired e
 Default: Short read assembly with Unicycler, `--kraken2db` can be any [compressed database (`.tar.gz`/`.tgz`)](https://benlangmead.github.io/aws-indexes/k2):
 
     ```console
-    nextflow run nf-core/bacass -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input samplesheet.tsv --kraken2db "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz"
+    nextflow run nf-core/bacass -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input samplesheet.csv --kraken2db "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz"
     ```
 
     Long read assembly with Miniasm:
 
     ```console
-    nextflow run nf-core/bacass -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input samplesheet.tsv --assembly_type 'long' --assembler 'miniasm' --kraken2db "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz"
+    nextflow run nf-core/bacass -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input samplesheet.csv --assembly_type 'long' --assembler 'miniasm' --kraken2db "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz"
     ```
 
 <!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
@@ -81,7 +81,7 @@ Default: Short read assembly with Unicycler, `--kraken2db` can be any [compresse
 ```bash
 nextflow run nf-core/bacass \
    -profile <docker/singularity/.../institute> \
-   --input samplesheet.tsv \
+   --input samplesheet.csv \
    --outdir <OUTDIR>
 ```
 

From d88c7e17dd925169d3c802dfa0100934843398cf Mon Sep 17 00:00:00 2001
From: Dani VM <da.valle@isciii.es>
Date: Thu, 21 Sep 2023 11:33:47 +0200
Subject: [PATCH 2/4] update access to test datasets

---
 README.md                     | 18 +++++++++---------
 conf/test.config              |  2 +-
 conf/test_dfast.config        |  2 +-
 conf/test_full.config         |  2 +-
 conf/test_hybrid.config       |  2 +-
 conf/test_long.config         |  2 +-
 conf/test_long_miniasm.config |  2 +-
 7 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index d4f57207..46b21c62 100644
--- a/README.md
+++ b/README.md
@@ -52,13 +52,13 @@ In all cases, the assembly is assessed using [QUAST](http://bioinf.spbau.ru/quas
 
 First, prepare a samplesheet with your input data that looks as follows:
 
-`samplesheet.csv`:
+`samplesheet.tsv`:
 
-```csv
-ID    R1                            R2                            LongFastQ                Fast5              GenomeSize
-shortreads    ./data/S1_R1.fastq.gz    ./data/S1_R2.fastq.gz    NA                    NA                    NA
-longreads      NA                        NA                        ./data/S1_long_fastq.gz    ./data/FAST5    2.8m
-shortNlong    ./data/S1_R1.fastq.gz    ./data/S1_R2.fastq.gz    ./data/S1_long_fastq.gz    ./data/FAST5    2.8m
+```tsv
+ID      R1                            R2                            LongFastQ                    Fast5    GenomeSize
+shortreads      ./data/S1_R1.fastq.gz       ./data/S1_R2.fastq.gz       NA                            NA      NA
+longreads       NA                          NA                          ./data/S1_long_fastq.gz      ./data/FAST5  2.8m
+shortNlong      ./data/S1_R1.fastq.gz       ./data/S1_R2.fastq.gz       ./data/S1_long_fastq.gz      ./data/FAST5  2.8m
 
 ```
 
@@ -67,13 +67,13 @@ Each row represents a fastq file (single-end) or a pair of fastq files (paired e
 Default: Short read assembly with Unicycler, `--kraken2db` can be any [compressed database (`.tar.gz`/`.tgz`)](https://benlangmead.github.io/aws-indexes/k2):
 
     ```console
-    nextflow run nf-core/bacass -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input samplesheet.csv --kraken2db "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz"
+    nextflow run nf-core/bacass -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input samplesheet.tsv --kraken2db "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz"
     ```
 
     Long read assembly with Miniasm:
 
     ```console
-    nextflow run nf-core/bacass -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input samplesheet.csv --assembly_type 'long' --assembler 'miniasm' --kraken2db "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz"
+    nextflow run nf-core/bacass -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> --input samplesheet.tsv --assembly_type 'long' --assembler 'miniasm' --kraken2db "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz"
     ```
 
 <!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
@@ -81,7 +81,7 @@ Default: Short read assembly with Unicycler, `--kraken2db` can be any [compresse
 ```bash
 nextflow run nf-core/bacass \
    -profile <docker/singularity/.../institute> \
-   --input samplesheet.csv \
+   --input samplesheet.tsv \
    --outdir <OUTDIR>
 ```
 
diff --git a/conf/test.config b/conf/test.config
index be18c00e..c827fd2d 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -20,7 +20,7 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_short.csv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_short.tsv'
 
     // some extra args to speed tests up
     unicycler_args="--no_correct --no_pilon"
diff --git a/conf/test_dfast.config b/conf/test_dfast.config
index 2ab10265..b1b02c4b 100644
--- a/conf/test_dfast.config
+++ b/conf/test_dfast.config
@@ -20,7 +20,7 @@ params {
     max_time   = 6.h
 
     // Input data
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_short.csv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_short.tsv'
 
     // some extra args to speed tests up
     unicycler_args="--no_correct --no_pilon"
diff --git a/conf/test_full.config b/conf/test_full.config
index e10e6a13..9432d763 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -15,6 +15,6 @@ params {
     config_profile_description = 'Full test dataset to check pipeline function'
 
     // Input data for full size test
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_full.csv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_full.tsv'
     kraken2db = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz'
 }
diff --git a/conf/test_hybrid.config b/conf/test_hybrid.config
index cd93e699..c27563a8 100644
--- a/conf/test_hybrid.config
+++ b/conf/test_hybrid.config
@@ -20,7 +20,7 @@ params {
     max_time   = 6.h
 
     // Input data
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_hybrid.csv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_hybrid.tsv'
 
     // some extra args to speed tests up
     assembly_type='hybrid'
diff --git a/conf/test_long.config b/conf/test_long.config
index be225894..e722aae8 100644
--- a/conf/test_long.config
+++ b/conf/test_long.config
@@ -20,7 +20,7 @@ params {
     max_time   = 6.h
 
     // Input data
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.csv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.tsv'
 
     // some extra args to speed tests up
     prokka_args = " --fast"
diff --git a/conf/test_long_miniasm.config b/conf/test_long_miniasm.config
index a68d3124..07af1a2c 100644
--- a/conf/test_long_miniasm.config
+++ b/conf/test_long_miniasm.config
@@ -20,7 +20,7 @@ params {
     max_time   = 6.h
 
     // Input data
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.csv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.tsv'
 
     // some extra args to speed tests up
     prokka_args = " --fast"

From d512b458f769dbc96014778d487bf95723513455 Mon Sep 17 00:00:00 2001
From: Dani VM <da.valle@isciii.es>
Date: Fri, 22 Sep 2023 11:39:21 +0200
Subject: [PATCH 3/4] add nf-validation on samplesheet

---
 assets/schema_input.json          | 72 ++++++++++++++++++++++----
 nextflow_schema.json              | 67 +++++++++++++++++-------
 subworkflows/local/input_check.nf | 86 -------------------------------
 workflows/bacass.nf               | 52 +++++++++++++------
 4 files changed, 146 insertions(+), 131 deletions(-)
 delete mode 100644 subworkflows/local/input_check.nf

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 9146feaa..a34ad666 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -7,22 +7,76 @@
     "items": {
         "type": "object",
         "properties": {
-            "sample": {
+            "ID": {
                 "type": "string",
                 "pattern": "^\\S+$",
-                "errorMessage": "Sample name must be provided and cannot contain spaces"
+                "unique": true,
+                "errorMessage": "Sample name must be provided and cannot contain spaces",
+                "meta": ["id"]
             },
-            "fastq_1": {
-                "type": "string",
-                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
-                "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+            "R1": {
+                "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
+                "anyOf": [
+                    {
+                        "type": ["string", "null"],
+                        "exists": true,
+                        "pattern": "^(\\S+\\.f(ast)?q\\.gz|NA)$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
             },
-            "fastq_2": {
+            "R2": {
                 "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
                 "anyOf": [
+                    {
+                        "type": ["string", "null"],
+                        "exists": true,
+                        "pattern": "^(\\S+\\.f(ast)?q\\.gz|NA)$"
+                    },
                     {
                         "type": "string",
-                        "pattern": "^\\S+\\.f(ast)?q\\.gz$"
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "LongFastQ": {
+                "errorMessage": "FastQ file for long reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
+                "anyOf": [
+                    {
+                        "type": ["string", "null"],
+                        "exists": true,
+                        "pattern": "^(\\S+\\.f(ast)?q\\.gz|NA)$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "Fast5": {
+                "errorMessage": "A valid path to Fast5 files. Example: ./data/FAST5",
+                "anyOf": [
+                    {
+                        "type": ["string", "null"],
+                        "format": "directory-path",
+                        "exists": true,
+                        "pattern": "^(\\/[\\S\\s]*|NA)$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "GenomeSize": {
+                "errorMessage": "A number (including decimals) ending with 'm', representing genome size. No spaces allowed.",
+                "anyOf": [
+                    {
+                        "type": ["string", "null"],
+                        "pattern": "(\\b\\d+\\.\\d+m\\b|NA)"
                     },
                     {
                         "type": "string",
@@ -31,6 +85,6 @@
                 ]
             }
         },
-        "required": ["sample", "fastq_1"]
+        "required": ["ID"]
     }
 }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 119f5403..45cf2d48 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -14,13 +14,11 @@
             "properties": {
                 "input": {
                     "type": "string",
-                    "format": "file-path",
-                    "exists": true,
-                    "mimetype": "text/csv",
-                    "pattern": "^\\S+\\.csv$",
-                    "description": "Path to comma-separated file containing information about the samples in the experiment.",
-                    "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a tab-separated file with 6 columns, and a header row. See [usage docs](https://nf-co.re/bacass/usage#samplesheet-input).\n\nFor example:\n\n`--input 'design_hybrid.csv'`\n\nAn example of properly formatted input files can be found at the [nf-core/test-datasets](https://github.com/nf-core/test-datasets/tree/bacass). \n\nFor example, this is the input used for a hybrid assembly in testing:\nID R1 R2 LongFastQ Fast5 GenomeSize\nERR044595 https://github.com/nf-core/test-datasets/raw/bacass/ERR044595_1M_1.fastq.gz https://github.com/nf-core/test-datasets/raw/bacass/ERR044595_1M_2.fastq.gz https://github.com/nf-core/test-datasets/raw/bacass/nanopore/subset15000.fq.gz NA 2.8m\n\n* `ID`: The identifier to use for handling the dataset e.g. sample name\n* `R1`: The forward reads in case of available short-read data\n* `R2`: The reverse reads in case of available short-read data\n* `LongFastQ`: The long read FastQ file with reads in FASTQ format\n* `Fast5`: The folder containing the basecalled fast5 files\n* `GenomeSize`: The expected genome size of the assembly. Only used by the canu assembler.\n\nMissing values (e.g. Fast5 folder in case of short reads) can be omitted by using a `NA` in the TSV file. The pipeline will handle such cases appropriately then.",
-                    "fa_icon": "fas fa-file-csv"
+                    "mimetype": "text/tsv",
+                    "fa_icon": "fas fa-dna",
+                    "description": "Path to tab-separated sample sheet",
+                    "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)",
+                    "schema": "assets/schema_input.json"
                 },
                 "outdir": {
                     "type": "string",
@@ -52,8 +50,14 @@
                     "type": "boolean",
                     "description": "save all merged reads to the a file ending in `*.merged.fastq.gz`"
                 },
-                "skip_fastqc": { "type": "boolean", "description": "Skip FastQC" },
-                "skip_fastp": { "type": "boolean", "description": "Skip FastP" }
+                "skip_fastqc": {
+                    "type": "boolean",
+                    "description": "Skip FastQC"
+                },
+                "skip_fastp": {
+                    "type": "boolean",
+                    "description": "Skip FastP"
+                }
             }
         },
         "contamination_screening": {
@@ -177,7 +181,10 @@
                     "fa_icon": "fas fa-forward",
                     "description": "Skip polishing the long-read assembly with fast5 input. Will not affect short/hybrid assemblies."
                 },
-                "skip_multiqc": { "type": "boolean", "description": "Skip MultiQC" }
+                "skip_multiqc": {
+                    "type": "boolean",
+                    "description": "Skip MultiQC"
+                }
             }
         },
         "institutional_config_options": {
@@ -387,15 +394,35 @@
         }
     },
     "allOf": [
-        { "$ref": "#/definitions/input_output_options" },
-        { "$ref": "#/definitions/qc_and_trim" },
-        { "$ref": "#/definitions/contamination_screening" },
-        { "$ref": "#/definitions/assembly_parameters" },
-        { "$ref": "#/definitions/assembly_polishing" },
-        { "$ref": "#/definitions/annotation" },
-        { "$ref": "#/definitions/skipping_options" },
-        { "$ref": "#/definitions/institutional_config_options" },
-        { "$ref": "#/definitions/max_job_request_options" },
-        { "$ref": "#/definitions/generic_options" }
+        {
+            "$ref": "#/definitions/input_output_options"
+        },
+        {
+            "$ref": "#/definitions/qc_and_trim"
+        },
+        {
+            "$ref": "#/definitions/contamination_screening"
+        },
+        {
+            "$ref": "#/definitions/assembly_parameters"
+        },
+        {
+            "$ref": "#/definitions/assembly_polishing"
+        },
+        {
+            "$ref": "#/definitions/annotation"
+        },
+        {
+            "$ref": "#/definitions/skipping_options"
+        },
+        {
+            "$ref": "#/definitions/institutional_config_options"
+        },
+        {
+            "$ref": "#/definitions/max_job_request_options"
+        },
+        {
+            "$ref": "#/definitions/generic_options"
+        }
     ]
 }
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
deleted file mode 100644
index eec3f75a..00000000
--- a/subworkflows/local/input_check.nf
+++ /dev/null
@@ -1,86 +0,0 @@
-//
-// Check input samplesheet and get read channels
-//
-
-params.options = [:]
-
-workflow INPUT_CHECK {
-    take:
-    samplesheet // file: /path/to/samplesheet.csv
-
-    main:
-    Channel
-        .fromPath( samplesheet )
-        .ifEmpty {exit 1, "Cannot find path file $samplesheet"}
-        .splitCsv ( header:true, sep:'\t' )
-        .map { create_fastq_channels(it) }
-        .set { reads }
-
-    // reconfigure channels
-    reads
-        .map { meta, reads, long_fastq, fast5 -> [ meta, reads ] }
-        .filter{ meta, reads -> reads != 'NA' }
-        .filter{ meta, reads -> reads[0] != 'NA' && reads[1] != 'NA' }
-        .set { shortreads }
-    reads
-        .map { meta, reads, long_fastq, fast5 -> [ meta, long_fastq ] }
-        .filter{ meta, long_fastq -> long_fastq != 'NA' }
-        .set { longreads }
-    reads
-        .map { meta, reads, long_fastq, fast5 -> [ meta, fast5 ] }
-        .filter{ meta, fast5 -> fast5 != 'NA' }
-        .set { fast5 }
-
-    emit:
-    reads      // channel: [ val(meta), [ reads ], long_fastq, fast5 ]
-    shortreads // channel: [ val(meta), [ reads ] ]
-    longreads  // channel: [ val(meta), long_fastq ]
-    fast5      // channel: [ val(meta), fast5 ]
-}
-
-// Function to get list of [ meta, [ fastq_1, fastq_2 ], long_fastq, fast5 ]
-def create_fastq_channels(LinkedHashMap row) {
-    def meta = [:]
-    meta.id           = row.ID
-    meta.single_end   = false
-    meta.genome_size  = row.GenomeSize == null ? 'NA' : row.GenomeSize
-
-    def array = []
-    // check short reads
-    if ( !(row.R1 == 'NA') ) {
-        if ( !file(row.R1).exists() ) {
-            exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.R1}"
-        }
-        fastq_1 = file(row.R1)
-    } else { fastq_1 = 'NA' }
-    if ( !(row.R2 == 'NA') ) {
-        if ( !file(row.R2).exists() ) {
-            exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.R2}"
-        }
-        fastq_2 = file(row.R2)
-    } else { fastq_2 = 'NA' }
-
-    // check long_fastq
-    if ( !(row.LongFastQ == 'NA') ) {
-        if ( !file(row.LongFastQ).exists() ) {
-            exit 1, "ERROR: Please check input samplesheet -> Long FastQ file does not exist!\n${row.R1}"
-        }
-        long_fastq = file(row.LongFastQ)
-    } else { long_fastq = 'NA' }
-
-    // check long_fastq
-    if ( !(row.Fast5 == 'NA') ) {
-        if ( !file(row.Fast5).exists() ) {
-            exit 1, "ERROR: Please check input samplesheet -> Fast5 file does not exist!\n${row.R1}"
-        }
-        fast5 = file(row.Fast5)
-    } else { fast5 = 'NA' }
-
-    // prepare output // currently does not allow single end data!
-    if ( meta.single_end ) {
-        array = [ meta, fastq_1 , long_fastq, fast5 ]
-    } else {
-        array = [ meta, [ fastq_1, fastq_2 ], long_fastq, fast5 ]
-    }
-    return array
-}
diff --git a/workflows/bacass.nf b/workflows/bacass.nf
index 5b6c5eb0..1ae3cb3e 100644
--- a/workflows/bacass.nf
+++ b/workflows/bacass.nf
@@ -4,7 +4,7 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation'
+include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation'
 
 def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
 def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
@@ -25,9 +25,6 @@ WorkflowBacass.initialise(params, log)
 def checkPathParamList = [ params.input, params.multiqc_config, params.kraken2db, params.dfast_config ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
-// Check mandatory parameters
-if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
-
 // Check krakendb
 if(! params.skip_kraken2){
     if(params.kraken2db){
@@ -67,7 +64,6 @@ include { DFAST                     } from '../modules/local/dfast'
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
-include { INPUT_CHECK               } from '../subworkflows/local/input_check'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -117,18 +113,42 @@ workflow BACASS {
     //
     // SUBWORKFLOW: Read in samplesheet, validate and stage input files
     //
-    INPUT_CHECK (
-        file(params.input)
-    )
-    // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input")
-    // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
-    // ! There is currently no tooling to help you write a sample sheet schema
+    if (params.input) {
+       def criteria = multiMapCriteria {
+            meta, fastq_1, fastq_2, long_fastq, fast5, genome_size ->
+                shortreads: fastq_1 != 'NA' ? tuple(tuple(meta, [fastq_1, fastq_2])) : null
+                longreads: long_fastq != 'NA' ? tuple(meta, long_fastq) : null
+                fast5: fast5 != 'NA' ? tuple(meta, fast5) : null
+        }
+
+        // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
+        Channel
+            .fromSamplesheet('input')
+            .multiMap (criteria)
+            .set { ch_input }
+
+        // reconfigure channels
+        ch_input
+            .shortreads
+            .filter{ it != null }
+            .set { ch_shortreads }
+        ch_input
+            .longreads
+            .filter{ it != null }
+            .set { ch_longreads }
+        ch_input
+            .fast5
+            .filter{ it != null }
+            .set { ch_fast5 }
+    } else {
+        exit 1, 'Input samplesheet not specified!'
+    }
 
     //
     // SUBWORKFLOW: Short reads QC and trim adapters
     //
     FASTQ_TRIM_FASTP_FASTQC (
-        INPUT_CHECK.out.shortreads,
+        ch_shortreads,
         [],
         params.save_trimmed_fail,
         params.save_merged,
@@ -141,7 +161,7 @@ workflow BACASS {
     // MODULE: Nanoplot, quality check for nanopore reads and Quality/Length Plots
     //
     NANOPLOT (
-        INPUT_CHECK.out.longreads
+        ch_longreads
     )
     ch_versions = ch_versions.mix(NANOPLOT.out.versions.ifEmpty(null))
 
@@ -151,7 +171,7 @@ workflow BACASS {
     // TODO: Couldn't be tested. No configuration test available (lack of fast5 file or params.skip_pycoqc=false).
     if ( !params.skip_pycoqc ) {
         PYCOQC (
-            INPUT_CHECK.out.fast5.dump(tag: 'fast5')
+            ch_fast5.dump(tag: 'fast5')
         )
         versions = ch_versions.mix(PYCOQC.out.versions.ifEmpty(null))
     }
@@ -161,7 +181,7 @@ workflow BACASS {
     //
     if ( params.assembly_type == 'hybrid' || params.assembly_type == 'long' && !('short' in params.assembly_type) ) {
         PORECHOP_PORECHOP (
-            INPUT_CHECK.out.longreads.dump(tag: 'longreads')
+            ch_longreads.dump(tag: 'longreads')
         )
         ch_versions = ch_versions.mix( PORECHOP_PORECHOP.out.versions.ifEmpty(null) )
     }
@@ -295,7 +315,7 @@ workflow BACASS {
         ch_for_polish    // tuple val(meta), val(reads), file(longreads), file(assembly)
             .join( MINIMAP2_POLISH.out.bam )    // tuple val(meta), file(bam)
             .join( SAMTOOLS_INDEX.out.bai )     // tuple  val(meta), file(bai)
-            .join( INPUT_CHECK.out.fast5 )      // tuple val(meta), file(fast5)
+            .join( ch_fast5 )             // tuple val(meta), file(fast5)
             .set { ch_for_nanopolish }          // tuple val(meta), val(reads), file(longreads), file(assembly), file(bam), file(bai), file(fast5)
 
         // TODO: 'nanopolish index' couldn't be tested. No fast5 provided in test datasets.

From 8ba1df5ecc39e49012338710022c433a276b8688 Mon Sep 17 00:00:00 2001
From: Dani VM <da.valle@isciii.es>
Date: Fri, 22 Sep 2023 16:12:03 +0200
Subject: [PATCH 4/4] add reviewer suggestions

---
 nextflow_schema.json |  3 ++-
 workflows/bacass.nf  | 52 ++++++++++++++++++++------------------------
 2 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 45cf2d48..a8ccce98 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -14,10 +14,11 @@
             "properties": {
                 "input": {
                     "type": "string",
+                    "exist": true,
                     "mimetype": "text/tsv",
                     "fa_icon": "fas fa-dna",
                     "description": "Path to tab-separated sample sheet",
-                    "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)",
+                    "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have six tab-separated columns/entries with the following headers: \n- `ID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `R1` (optional): Paths to (forward) reads zipped FastQ files\n- `R2` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `LongFastQ` (optional): Paths to long reads zipped FastQ files\n- `Fast5` (optional): Paths to the directory containing FAST5 files\n- `GenomeSize` (optional): A number (including decimals) ending with 'm', representing genome size.\n\n Please be aware that files will be required based on the chosen assembly type specified with the '--assembly_type' option, which can be set to one of the following values: ['short', 'long', 'hybrid'].`",
                     "schema": "assets/schema_input.json"
                 },
                 "outdir": {
diff --git a/workflows/bacass.nf b/workflows/bacass.nf
index 1ae3cb3e..34bdf48f 100644
--- a/workflows/bacass.nf
+++ b/workflows/bacass.nf
@@ -113,36 +113,30 @@ workflow BACASS {
     //
     // SUBWORKFLOW: Read in samplesheet, validate and stage input files
     //
-    if (params.input) {
-       def criteria = multiMapCriteria {
-            meta, fastq_1, fastq_2, long_fastq, fast5, genome_size ->
-                shortreads: fastq_1 != 'NA' ? tuple(tuple(meta, [fastq_1, fastq_2])) : null
-                longreads: long_fastq != 'NA' ? tuple(meta, long_fastq) : null
-                fast5: fast5 != 'NA' ? tuple(meta, fast5) : null
-        }
-
-        // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
-        Channel
-            .fromSamplesheet('input')
-            .multiMap (criteria)
-            .set { ch_input }
-
-        // reconfigure channels
-        ch_input
-            .shortreads
-            .filter{ it != null }
-            .set { ch_shortreads }
-        ch_input
-            .longreads
-            .filter{ it != null }
-            .set { ch_longreads }
-        ch_input
-            .fast5
-            .filter{ it != null }
-            .set { ch_fast5 }
-    } else {
-        exit 1, 'Input samplesheet not specified!'
+    def criteria = multiMapCriteria {
+        meta, fastq_1, fastq_2, long_fastq, fast5, genome_size ->
+            shortreads: fastq_1 != 'NA' ? tuple(tuple(meta, [fastq_1, fastq_2])) : null
+            longreads: long_fastq != 'NA' ? tuple(meta, long_fastq) : null
+            fast5: fast5 != 'NA' ? tuple(meta, fast5) : null
     }
+    // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
+    Channel
+        .fromSamplesheet('input')
+        .multiMap (criteria)
+        .set { ch_input }
+    // reconfigure channels
+    ch_input
+        .shortreads
+        .filter{ it != null }
+        .set { ch_shortreads }
+    ch_input
+        .longreads
+        .filter{ it != null }
+        .set { ch_longreads }
+    ch_input
+        .fast5
+        .filter{ it != null }
+        .set { ch_fast5 }
 
     //
     // SUBWORKFLOW: Short reads QC and trim adapters