Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nf-validation on samplesheet #88

Merged
merged 4 commits into from
Sep 23, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ First, prepare a samplesheet with your input data that looks as follows:
`samplesheet.tsv`:

```tsv
ID R1 R2 LongFastQ Fast5 GenomeSize
shortreads ./data/S1_R1.fastq.gz ./data/S1_R2.fastq.gz NA NA NA
longreads NA NA ./data/S1_long_fastq.gz ./data/FAST5 2.8m
shortNlong ./data/S1_R1.fastq.gz ./data/S1_R2.fastq.gz ./data/S1_long_fastq.gz ./data/FAST5 2.8m
ID R1 R2 LongFastQ Fast5 GenomeSize
shortreads ./data/S1_R1.fastq.gz ./data/S1_R2.fastq.gz NA NA NA
longreads NA NA ./data/S1_long_fastq.gz ./data/FAST5 2.8m
shortNlong ./data/S1_R1.fastq.gz ./data/S1_R2.fastq.gz ./data/S1_long_fastq.gz ./data/FAST5 2.8m

```

Expand Down
72 changes: 63 additions & 9 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,76 @@
"items": {
"type": "object",
"properties": {
"sample": {
"ID": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Sample name must be provided and cannot contain spaces"
"unique": true,
"errorMessage": "Sample name must be provided and cannot contain spaces",
"meta": ["id"]
},
"fastq_1": {
"type": "string",
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
"errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
"R1": {
"errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"anyOf": [
{
"type": ["string", "null"],
"exists": true,
"pattern": "^(\\S+\\.f(ast)?q\\.gz|NA)$"
},
{
"type": "string",
"maxLength": 0
}
]
},
"fastq_2": {
"R2": {
"errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"anyOf": [
{
"type": ["string", "null"],
"exists": true,
"pattern": "^(\\S+\\.f(ast)?q\\.gz|NA)$"
},
{
"type": "string",
"pattern": "^\\S+\\.f(ast)?q\\.gz$"
"maxLength": 0
}
]
},
"LongFastQ": {
"errorMessage": "FastQ file for long reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"anyOf": [
{
"type": ["string", "null"],
"exists": true,
"pattern": "^(\\S+\\.f(ast)?q\\.gz|NA)$"
},
{
"type": "string",
"maxLength": 0
}
]
},
"Fast5": {
"errorMessage": "A valid path to Fast5 files. Example: ./data/FAST5",
"anyOf": [
{
"type": ["string", "null"],
"format": "directory-path",
"exists": true,
"pattern": "^(\\/[\\S\\s]*|NA)$"
},
{
"type": "string",
"maxLength": 0
}
]
},
"GenomeSize": {
"errorMessage": "A number (including decimals) ending with 'm', representing genome size. No spaces allowed.",
"anyOf": [
{
"type": ["string", "null"],
"pattern": "(\\b\\d+\\.\\d+m\\b|NA)"
},
{
"type": "string",
Expand All @@ -31,6 +85,6 @@
]
}
},
"required": ["sample", "fastq_1"]
"required": ["ID"]
}
}
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_short.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_short.tsv'
Daniel-VM marked this conversation as resolved.
Show resolved Hide resolved

// some extra args to speed tests up
unicycler_args="--no_correct --no_pilon"
Expand Down
2 changes: 1 addition & 1 deletion conf/test_dfast.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = 6.h

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_short.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_short.tsv'

// some extra args to speed tests up
unicycler_args="--no_correct --no_pilon"
Expand Down
2 changes: 1 addition & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ params {
config_profile_description = 'Full test dataset to check pipeline function'

// Input data for full size test
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_full.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_full.tsv'
kraken2db = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz'
}
2 changes: 1 addition & 1 deletion conf/test_hybrid.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = 6.h

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_hybrid.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_hybrid.tsv'

// some extra args to speed tests up
assembly_type='hybrid'
Expand Down
2 changes: 1 addition & 1 deletion conf/test_long.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = 6.h

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.tsv'

// some extra args to speed tests up
prokka_args = " --fast"
Expand Down
2 changes: 1 addition & 1 deletion conf/test_long_miniasm.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = 6.h

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.tsv'

// some extra args to speed tests up
prokka_args = " --fast"
Expand Down
67 changes: 47 additions & 20 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@
"properties": {
"input": {
"type": "string",
"format": "file-path",
"exists": true,
"mimetype": "text/csv",
"pattern": "^\\S+\\.csv$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a tab-separated file with 6 columns, and a header row. See [usage docs](https://nf-co.re/bacass/usage#samplesheet-input).\n\nFor example:\n\n`--input 'design_hybrid.csv'`\n\nAn example of properly formatted input files can be found at the [nf-core/test-datasets](https://github.com/nf-core/test-datasets/tree/bacass). \n\nFor example, this is the input used for a hybrid assembly in testing:\nID R1 R2 LongFastQ Fast5 GenomeSize\nERR044595 https://github.com/nf-core/test-datasets/raw/bacass/ERR044595_1M_1.fastq.gz https://github.com/nf-core/test-datasets/raw/bacass/ERR044595_1M_2.fastq.gz https://github.com/nf-core/test-datasets/raw/bacass/nanopore/subset15000.fq.gz NA 2.8m\n\n* `ID`: The identifier to use for handling the dataset e.g. sample name\n* `R1`: The forward reads in case of available short-read data\n* `R2`: The reverse reads in case of available short-read data\n* `LongFastQ`: The long read FastQ file with reads in FASTQ format\n* `Fast5`: The folder containing the basecalled fast5 files\n* `GenomeSize`: The expected genome size of the assembly. Only used by the canu assembler.\n\nMissing values (e.g. Fast5 folder in case of short reads) can be omitted by using a `NA` in the TSV file. The pipeline will handle such cases appropriately then.",
"fa_icon": "fas fa-file-csv"
"mimetype": "text/tsv",
"fa_icon": "fas fa-dna",
"description": "Path to tab-separated sample sheet",
"help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)",
Daniel-VM marked this conversation as resolved.
Show resolved Hide resolved
"schema": "assets/schema_input.json"
},
"outdir": {
"type": "string",
Expand Down Expand Up @@ -52,8 +50,14 @@
"type": "boolean",
"description": "save all merged reads to the a file ending in `*.merged.fastq.gz`"
},
"skip_fastqc": { "type": "boolean", "description": "Skip FastQC" },
"skip_fastp": { "type": "boolean", "description": "Skip FastP" }
"skip_fastqc": {
"type": "boolean",
"description": "Skip FastQC"
},
"skip_fastp": {
"type": "boolean",
"description": "Skip FastP"
}
}
},
"contamination_screening": {
Expand Down Expand Up @@ -177,7 +181,10 @@
"fa_icon": "fas fa-forward",
"description": "Skip polishing the long-read assembly with fast5 input. Will not affect short/hybrid assemblies."
},
"skip_multiqc": { "type": "boolean", "description": "Skip MultiQC" }
"skip_multiqc": {
"type": "boolean",
"description": "Skip MultiQC"
}
}
},
"institutional_config_options": {
Expand Down Expand Up @@ -387,15 +394,35 @@
}
},
"allOf": [
{ "$ref": "#/definitions/input_output_options" },
{ "$ref": "#/definitions/qc_and_trim" },
{ "$ref": "#/definitions/contamination_screening" },
{ "$ref": "#/definitions/assembly_parameters" },
{ "$ref": "#/definitions/assembly_polishing" },
{ "$ref": "#/definitions/annotation" },
{ "$ref": "#/definitions/skipping_options" },
{ "$ref": "#/definitions/institutional_config_options" },
{ "$ref": "#/definitions/max_job_request_options" },
{ "$ref": "#/definitions/generic_options" }
{
"$ref": "#/definitions/input_output_options"
},
{
"$ref": "#/definitions/qc_and_trim"
},
{
"$ref": "#/definitions/contamination_screening"
},
{
"$ref": "#/definitions/assembly_parameters"
},
{
"$ref": "#/definitions/assembly_polishing"
},
{
"$ref": "#/definitions/annotation"
},
{
"$ref": "#/definitions/skipping_options"
},
{
"$ref": "#/definitions/institutional_config_options"
},
{
"$ref": "#/definitions/max_job_request_options"
},
{
"$ref": "#/definitions/generic_options"
}
]
}
86 changes: 0 additions & 86 deletions subworkflows/local/input_check.nf

This file was deleted.

Loading