diff --git a/.nf-core.yml b/.nf-core.yml index b03bcea5..4c14d399 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -23,8 +23,9 @@ lint: - assets/email_template.txt - assets/sendmail_template.txt - .gitignore - schema_params: False - nextflow_config: False + nextflow_config: + - manifest.name + - manifest.homePage multiqc_config: False template: prefix: phac-nml diff --git a/nextflow.config b/nextflow.config index 52dcacef..b8311ce5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,8 @@ params { validate_params = true show_hidden_params = false validationS3PathCheck = true - validationSchemaIgnoreParams = 'seqkit,contigs_too_short,output_idx_name,filtered_reads,coverage_calc_fields,assembly_status,coverage_calc_fields.fixed_cov,coverage_calc_fields.auto_cov,assembly_status.report_tag,medaka.model,medaka,validation-S3Path-check,validationS3PathCheck,abricate,python3,pointfinder_db_tag,staramr,mobsuite_recon,skip_staramr,genomes,flye_read_type,shigeifinder,lissero,sistr,ectyper,bandage,bakta,unicycler,medaka,pilon_iterative,pilon,racon,samtools,minimap2,r_contaminants,mash,kraken,checkm,quast_filter,quast,fastqc,spades,flye,chopper,fastp,seqtk,seqtk_size,kat,coreutils,opt_platforms,QCReportFields,QCReport-fields,QCReport,kraken_bin,shigatyper,spatyper,kleborate,subtyping_report,kraken_species,top_hit_species,mash_meta,mlst,raw_reads,abricate_params,target_depth' + validationShowHiddenParams = false + validationSchemaIgnoreParams = 'abricate,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler' validationFailUnrecognisedParams = false // for the qcreport fields // SKIP options @@ -68,10 +69,10 @@ params { // Datasets - dehosting_idx = "${projectDir}/databases/PhiPacHum_m2.idx" // mm2 index - mash_sketch = "${projectDir}/databases/GTDBSketch_20231003.msh" // Make sure comments are formatted as taxonomic strings - bakta_db = "${projectDir}/databases/db-light" - kraken2_db = "${projectDir}/databases/k2_standard_20220607/" + dehosting_idx = "./databases/PhiPacHum_m2.idx" // mm2 index + mash_sketch = "./databases/GTDBSketch_20231003.msh" // Make sure comments are formatted as taxonomic strings + bakta_db = "./databases/db-light" + kraken2_db = "./databases/k2_standard_20220607/" staramr_db = null // Recommended usage is to use the default database in the container diff --git a/nextflow_schema.json b/nextflow_schema.json index 822699c0..b0492c28 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/master/nextflow_schema.json", - "title": "mikrokondo pipeline parameters", - "description": "mikrokondo schema", + "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/main/nextflow_schema.json", + "title": "phac-nml/mikrokondo pipeline parameters", + "description": "Mikrokondo beta", "type": "object", "definitions": { "input_output_options": { @@ -12,15 +12,17 @@ "description": "Define where the pipeline should find input data and save output data.", "required": [ "input", - "outdir" + "outdir", + "platform" ], "properties": { "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", "fa_icon": "fas fa-file-csv" @@ -29,570 +31,352 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "hidden": true, + "pattern": "^\\S+$" + }, + "platform": { + "type": "string", + "default": "illumina", + "enum": [ + "illumina", + "nanopore", + "pacbio", + "hybrid" + ], + "description": "Sequencing platform used" + }, + "long_read_opt": { + "type": "string", + "default": "nanopore", + "description": "Specify which longread platform your data is from (nanopore or pacbio). This option must be specified if performing a hybrid assembly.", + "enum": [ + "nanopore", + "pacbio" + ] + }, + "metagenomic_run": { + "type": "boolean", + "description": "Label all samples as metagenomic (Skip autodetection)" }, "email": { "type": "string", "description": "Email address for completion summary.", "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "hidden": true } } }, - "bakta": { - "title": "Bakta", + "databases_and_pre_computed_files": { + "title": "Databases and Pre-Computed Files", "type": "object", - "description": "", "default": "", "properties": { - "bakta.output_dir": { + "dehosting_idx": { "type": "string", - "default": "bakta", - "hidden": true + "default": "./databases/PhiPacHum_m2.idx", + "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)", + "pattern": "^\\S+$", + "format": "file-path" }, - "bakta.embl_ext": { + "mash_sketch": { "type": "string", - "default": ".embl", - "hidden": true + "default": "./databases/GTDBSketch_20231003.msh", + "description": "Minimpa2 index for dehosting and kitome removal", + "pattern": "^\\S+$", + "format": "file-path" }, - "bakta.faa_ext": { + "bakta_db": { "type": "string", - "default": ".faa", - "hidden": true + "description": "Database use for bakta, this value is optional as bakta can be skipped", + "default": "./databases/db-light", + "pattern": "^\\S+$", + "format": "directory-path" }, - "bakta.ffn_ext": { + "kraken2_db": { "type": "string", - "default": ".ffn", - "hidden": true + "default": "./databases/k2_standard_20220607/", + "description": "Kraken2 database", + "pattern": "^\\S+$", + "format": "directory-path" }, - "bakta.fna_ext": { + "staramr_db": { "type": "string", - "default": ".fna", + "description": "It is recommended to use the StarAMR database in the StarAMR container however, an external option can be specified", + "pattern": "^\\S+$", + "format": "directory-path", "hidden": true - }, - "bakta.gbff_ext": { + } + }, + "required": [ + "dehosting_idx", + "mash_sketch" + ], + "description": "The location of databases used by mikrokondo" + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { "type": "string", - "default": ".gbff", - "hidden": true + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" }, - "bakta.gff_ext": { + "custom_config_base": { "type": "string", - "default": ".gff3", - "hidden": true - }, - "bakta.threads": { - "type": "integer", - "default": 12, - "hidden": true + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" }, - "bakta.hypotheticals_tsv_ext": { + "config_profile_name": { "type": "string", - "default": ".hypotheticals.tsv", - "hidden": true + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" }, - "bakta.hypotheticals_faa_ext": { + "config_profile_description": { "type": "string", - "default": ".hypotheticals.faa", - "hidden": true + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" }, - "bakta.tsv_ext": { + "config_profile_contact": { "type": "string", - "default": ".tsv", - "hidden": true + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" }, - "bakta.txt_ext": { + "config_profile_url": { "type": "string", - "default": ".txt", - "hidden": true - }, - "bakta.min_contig_length": { + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { "type": "integer", - "default": 200, - "hidden": true + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`", + "minimum": 1 }, - "bakta.db": { + "max_memory": { "type": "string", - "hidden": true + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "2000.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" }, - "bakta.args": { + "max_time": { "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, - "description": "Additional arguments to pass to bakta", - "default": "{ \"\" }" - }, - "ba_min_conting_length": { - "type": "integer", - "default": 200, - "description": "Minimum contig length for processing in Bakta" + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } - }, - "fa_icon": "fas fa-address-card" + } }, - "fastp": { - "title": "FastP", + "generic_options": { + "title": "Generic options", "type": "object", - "description": "", - "default": "", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "fastp.html_ext": { - "type": "string", - "default": ".html", - "hidden": true - }, - "fastp.average_quality_e": { - "type": "integer", - "default": 25, - "description": "Average quality of a read to be included (read pair is discarded if it is below this value)", - "hidden": true - }, - "fastp.json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "fastp.qualified_quality_phred": { - "type": "integer", - "default": 15, - "description": "Phred score to be considered qualified. See FastP docs for more details.", + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", "hidden": true }, - "fastp.unqualified_percent_limit": { - "type": "integer", - "default": 40, - "description": "Percent of bases in a read to be qualified for the read to be included. See FastP docs for more details.", + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", "hidden": true }, - "fastp.fastq_ext": { + "publish_dir_mode": { "type": "string", - "default": ".trimmed.fastq.gz", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, - "fastp.report_tag": { + "email_on_fail": { "type": "string", - "default": "FastP", - "hidden": true - }, - "fastp.illumina_length_min": { - "type": "integer", - "default": 35, - "description": "Minimum length of a read to be included in later analysis for illumina data.", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", "hidden": true }, - "fastp.single_end_length_min": { - "type": "integer", - "default": 1000, - "description": "Minimum leng of a read to be used in later analysis (for Nanopore or Pacbio)", + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", "hidden": true }, - "fastp.dedup_reads": { + "monochrome_logs": { "type": "boolean", - "description": "Option to turn on read de-duplication.", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", "hidden": true }, - "fastp.args.illumina": { + "hook_url": { "type": "string", - "default": "--overrepresentation_analysis --trim_poly_g --poly_g_min_len 10 --trim_poly_x --poly_x_min_len 10 --cut_tail --cut_tail_window_size 4 --cut_tail_mean_quality 15 --low_complexity_filter --complexity_threshold 20 --average_qual 25 --qualified_quality_phred 15 --unqualified_percent_limit 40 --length_limit 400 --length_required 35 --detect_adapter_for_pe", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, - "fastp.args.single_end": { - "type": "string", - "default": "--overrepresentation_analysis -Q --length_required 1000", + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", "hidden": true }, - "fp_average_quality": { - "type": "integer", - "default": 25 - }, - "fp_cut_tail_mean_quality": { - "type": "integer", - "default": 15 - }, - "fp_cut_tail_window_size": { - "type": "integer", - "default": 4 - }, - "fp_complexity_threshold": { - "type": "integer", - "default": 20 + "validationShowHiddenParams": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "fp_qualified_phred": { - "type": "integer", - "default": 15 + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, - "fp_unqualified_precent_limit": { - "type": "integer", - "default": 40 + "show_hidden_params": { + "type": "boolean", + "hidden": true, + "description": "Show hidden parameters." }, - "fp_polyg_min_len": { - "type": "integer", - "default": 10 + "tracedir": { + "type": "string", + "default": "null/pipeline_info", + "hidden": true, + "description": "Trace directory." }, - "fp_polyx_min_len": { - "type": "integer", - "default": 10 + "stage_in_mode": { + "type": "string", + "default": "symlink", + "hidden": true, + "description": "Mode for staging files." }, - "fp_illumina_length_min": { - "type": "integer", - "default": 35 + "slurm_p": { + "type": "boolean", + "hidden": true, + "description": "Enable running with slurm." }, - "fp_illumina_length_max": { - "type": "integer", - "default": 400 + "slurm_profile": { + "type": "string", + "hidden": true, + "description": "Profile used for slurm." }, - "fp_single_end_length_min": { - "type": "integer", - "default": 1000 + "validationS3PathCheck": { + "type": "boolean", + "default": true, + "hidden": true, + "description": "Whether or not to validate S3 paths." }, - "fp_dedup_reads": { - "type": "boolean" + "output_idx_name": { + "type": "string", + "hidden": true, + "description": "Index creation output file name" } - }, - "fa_icon": "fas fa-filter" + } }, - "mash": { - "title": "Mash", + "control_flow_options": { + "title": "Control flow options", "type": "object", - "description": "", + "description": "Options to alter control flow of the pipeline", "default": "", - "fa_icon": "fas fa-align-left", "properties": { - "mash.mash_ext": { - "type": "string", - "default": ".screen", - "hidden": true - }, - "mash.output_reads_ext": { - "type": "string", - "default": ".reads.screen", - "hidden": true - }, - "mash.output_taxa_ext": { - "type": "string", - "default": ".taxa.screen", - "hidden": true - }, - "mash.output_dir": { - "type": "string", - "default": "contamination", - "hidden": true - }, - "mash.mash_sketch": { - "type": "string", - "hidden": true - }, - "mash.sketch_ext": { - "type": "string", - "default": ".msh", - "hidden": true - }, - "mash.sketch_kmer_size": { - "type": "integer", - "default": 21, - "hidden": true - }, - "mash.final_sketch_name": { - "type": "string", - "default": "GTDB_sketch", - "hidden": true - }, - "mash.json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "mash.min_kmer": { - "type": "integer", - "default": 10, - "hidden": true - }, - "mash.report_tag": { - "type": "string", - "default": "Mash", - "hidden": true - }, - "mash.header_p": { - "type": "boolean", - "hidden": true - }, - "mash.headers": { - "type": "string", - "default": "['identity', 'Shared Hashes', 'Median Multiplicity', 'P-Value', 'Query ID', 'Query Note']", - "hidden": true - }, - "mh_min_kmer": { - "type": "integer", - "default": 10, - "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation" - } - } - }, - "quast": { - "title": "QUAST", - "type": "object", - "description": "", - "default": "", - "properties": { - "quast.suffix": { - "type": "string", - "default": "quast", - "hidden": true - }, - "quast.report_base": { - "type": "string", - "default": "report", - "hidden": true - }, - "quast.report_prefix": { - "type": "string", - "default": "transposed_", - "hidden": true - }, - "quast.report_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "quast.report_tag": { - "type": "string", - "default": "QUAST", - "hidden": true - }, - "quast.min_contig_length": { - "type": "integer", - "default": 1000, - "description": "Minimum contig length to be used my Quast.", - "hidden": true - }, - "quast.args": { - "type": "string", - "default": "{ \"--min-contig ${params.quast.min_contig_length} --report-all-metrics\" }", - "hidden": true - }, - "quast.contigs_field": { - "type": "string", - "default": "# contigs", - "hidden": true - }, - "qt_min_contig_length": { - "type": "integer", - "default": 1000 - } - }, - "fa_icon": "fas fa-check-circle" - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "target_depth": { - "type": "integer", - "default": 100, - "description": "Target depth to sub-sample reads to." - }, - "platform": { - "type": "string", - "enum": [ - "illumina", - "nanopore", - "pacbio", - "hybrid" - ], - "description": "Sequencing platform used.", - "default": "illumina" - }, - "nanopore_chemistry": { - "type": "string", - "description": "The guppy base calling model. See the docs for a link of valid options." - }, - "run_kraken": { - "type": "boolean", - "description": "Use Kraken2 instead of Mash for sample speciation (Useful if you have Eukaryotic data or Archae)" - }, - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle" + "run_kraken": { + "type": "boolean", + "description": "Use Kraken2 instead of Mash for sample speciation (Useful if you have Eukaryotic data or Archae)" }, "hybrid_unicycler": { "type": "boolean", "description": "Use unicycler for hybrid assembly." }, - "long_read_opt": { - "type": "string", - "default": "nanopore", - "enum": [ - "nanopore", - "pacbio" - ], - "description": "Specify which longread platform your data is from (nanopore or pacbio). This option must be specified if performing a hybrid assembly." - }, - "min_reads": { - "type": "integer", - "default": 1000, - "description": "Minimum number of reads a sample requires to move forward for assembly." - }, - "output_idx_name": { - "type": "string", - "hidden": true - }, - "metagenomic_run": { - "type": "boolean", - "description": "Label all samples as metagenomic (Skip autodetection)" - }, - "flye_read_type": { - "type": "string", - "default": "hq", - "enum": [ - "hq", - "corr", - "raw" - ], - "description": "Read type for flye to use. hq corresponds to hifi for Pacbio data." - }, - "stage_in_mode": { - "type": "string", - "default": "symlink", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle" - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], - "hidden": true - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully." - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format" - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true - }, - "hook_url": { - "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true - }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "null/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "fa_icon": "fas fa-check-square", - "default": true, - "hidden": true - }, - "show_hidden_params": { + "skip_report": { "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + "description": "Skip summary report generation" }, - "slurm_p": { + "skip_raw_read_metrics": { "type": "boolean", - "description": "Use slurm to execute your pipeline" - }, - "slurm_profile": { - "type": "string", - "description": "Slurm partition" + "description": "Skip generating raw-read metrics. e.g. when data first enters the pipeline" }, - "validationS3PathCheck": { - "type": "boolean", - "default": true, - "description": "Validate s3 sample sheet." - } - }, - "required": [ - "platform" - ] - }, - "skip_options": { - "title": "Skip Options", - "type": "object", - "description": "Options to alter control flow of the pipeline", - "default": "", - "properties": { - "skip_depth_sampling": { + "skip_version_gathering": { "type": "boolean", - "description": "Skip down sampling of data to a target depth. This is not supported for metagenomic samples or hybrid assemblies." + "description": "Skip creating a report of the final versions of tools used in mikrokondo" }, "skip_subtyping": { "type": "boolean", "description": "Do not enter the subtyping workflow, e.g. ECTyper, SISTR etc will not be ran." }, - "skip_polishing": { + "skip_bakta": { "type": "boolean", - "description": "Skip polishing of assemblies, useful in case of errors or for metagenomic samples that fail." + "description": "Skip annotation with Bakta" }, - "skip_ont_header_cleaning": { + "skip_abricate": { "type": "boolean", - "description": "Make nanopore headers unique. Only turn this on if you are worried about duplicate id's e.g. from errors in running sequencing.", - "default": true + "description": "Skip running abricate for annotation" }, "skip_checkm": { "type": "boolean", "description": "Skip running CheckM" }, - "skip_report": { - "type": "boolean", - "description": "Skip summary report generation" - }, - "skip_raw_read_metrics": { - "type": "boolean", - "description": "Skip generating raw-read metrics. e.g. when data first enters the pipeline" - }, - "skip_version_gathering": { - "type": "boolean", - "description": "Skip creating a report of the final versions of tools used in mikrokondo." - }, - "skip_metagenomic_detection": { + "skip_depth_sampling": { "type": "boolean", - "description": "For samples to be analyzed as isolates." + "description": "Skip down sampling of data to a target depth. This is not supported for metagenomic samples or hybrid assemblies." }, - "skip_abricate": { + "skip_ont_header_cleaning": { "type": "boolean", - "description": "Skip running abricate for annotation" + "default": true, + "description": "Make nanopore headers unique. Only turn this on if you are worried about duplicate id's e.g. from errors in running sequencing" }, - "skip_bakta": { + "skip_polishing": { "type": "boolean", - "description": "Skip annotation with Bakta" + "description": "Skip polishing of assemblies, useful in case of errors or for metagenomic samples that fail." }, "skip_species_classification": { "type": "boolean", @@ -600,1760 +384,161 @@ }, "skip_mlst": { "type": "boolean", - "description": "Skip classic 7gene MLST (Uses Torstein Tseemann's mlst)" + "description": "Skip classic 7gene MLST (Uses Torsten Seemann's mlst)" }, "skip_mobrecon": { "type": "boolean", "description": "Skip running mob recon for plasmid identification." }, + "skip_metagenomic_detection": { + "type": "boolean", + "description": "For samples to be analyzed as isolates" + }, "skip_staramr": { "type": "boolean", "description": "Skip running StarAMR" } } }, - "databases_and_pre_computed_files": { - "title": "Databases and Pre-Computed Files", + "fastp_options": { + "title": "Fastp options", "type": "object", - "description": "", + "description": "Options to fastp for read qa/qc", "default": "", "properties": { - "dehosting_idx": { - "type": "string", - "default": "databases/PhiPacHum_m2.idx", - "description": "Minimpa2 index for dehosting and kitome removal" - }, - "mash_sketch": { - "type": "string", - "default": "databases/GTDBSketch_20231003.msh", - "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)" - }, - "bakta_db": { - "type": "string", - "default": "databases/db-light", - "description": "Database use for bakta, this value is optional as bakta can be skipped" - }, - "kraken2_db": { - "type": "string", - "default": "databases/k2_standard_20220607/" - }, - "staramr_db": { - "type": "string", - "description": "It is recommended to use the StarAMR database in the StarAMR container however, an external option can be specified" - } - }, - "required": [ - "dehosting_idx", - "mash_sketch" - ] - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "2000.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_name": { - "type": "string", - "description": "Institutional config name.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "fa_icon": "fas fa-users-cog", - "hidden": true - } - } - }, - "seqkit": { - "title": "SeqKit", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqkit.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqkit:2.2.0--h9ee0642_0", - "hidden": true - }, - "seqkit.docker": { - "type": "string", - "default": "quay.io/biocontainers/seqkit:2.2.0--h9ee0642_0", - "hidden": true - }, - "seqkit.report_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "seqkit.fasta_ext": { - "type": "string", - "default": "_filtered.fasta.gz", - "hidden": true - }, - "seqkit.filter_field": { - "type": "string", - "default": "max_len", - "hidden": true - }, - "seqkit.report_tag": { - "type": "string", - "default": "Seqkit_stats", - "hidden": true - }, - "seqkit.header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - } - }, - "coveragecalculation": { - "title": "CoverageCalculation", - "type": "object", - "description": "", - "default": "", - "properties": { - "coverage_calc_fields.fixed_cov": { - "type": "string", - "default": "FixedGenomeSizeDepth", - "hidden": true - }, - "coverage_calc_fields.auto_cov": { - "type": "string", - "default": "DetectedGenomeSizeDepth", - "hidden": true - }, - "coverage_calc_fields.bp_field": { - "type": "string", - "default": "['RawReadSummary', 'combined', 'total_bp']", - "hidden": true - } - } - }, - "qcreport": { - "title": "QCReport", - "type": "object", - "description": "", - "default": "", - "properties": { - "QCReport.escherichia.search": { - "type": "string", - "default": "Escherichia coli", - "hidden": true - }, - "QCReport.escherichia.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.escherichia.min_n50": { - "type": "integer", - "default": 50000, - "hidden": true - }, - "QCReport.escherichia.max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.escherichia.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.escherichia.max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "QCReport.escherichia.min_length": { - "type": "integer", - "default": 4500000, - "hidden": true - }, - "QCReport.escherichia.max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.escherichia.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.escherichia.min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "QCReport.salmonella.search": { - "type": "string", - "default": "Salmonella", - "hidden": true - }, - "QCReport.salmonella.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.salmonella.min_n50": { - "type": "integer", - "default": 90000, - "hidden": true - }, - "QCReport.salmonella.max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.salmonella.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.salmonella.max_nr_contigs": { - "type": "integer", - "default": 200, - "hidden": true - }, - "QCReport.salmonella.min_length": { - "type": "integer", - "default": 4400000, - "hidden": true - }, - "QCReport.salmonella.max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.salmonella.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.salmonella.min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "QCReport.shigella.search": { - "type": "string", - "default": "Shigella", - "hidden": true - }, - "QCReport.shigella.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.shigella.min_n50": { - "type": "integer", - "default": 18000, - "hidden": true - }, - "QCReport.shigella.max_n50": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.shigella.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.shigella.max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "QCReport.shigella.min_length": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "QCReport.shigella.max_length": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.shigella.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.shigella.min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "QCReport.listeria.search": { - "type": "string", - "default": "Listeria", - "hidden": true - }, - "QCReport.listeria.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.listeria.min_n50": { - "type": "integer", - "default": 50000, - "hidden": true - }, - "QCReport.listeria.max_n50": { - "type": "integer", - "default": 3200000, - "hidden": true - }, - "QCReport.listeria.min_nr_contigs": { + "fp_average_quality": { "type": "integer", - "default": 1, - "hidden": true + "default": 25, + "description": "Average quality of a read to be included (read pair is discarded if it is below this value)", + "minimum": 0 }, - "QCReport.listeria.max_nr_contigs": { + "fp_cut_tail_mean_quality": { "type": "integer", - "default": 200, - "hidden": true + "default": 15, + "minimum": 1, + "description": "the mean quality requirement option shared by cut_front, cut_tail or cut_sliding", + "maximum": 36 }, - "QCReport.listeria.min_length": { + "fp_cut_tail_window_size": { "type": "integer", - "default": 2700000, - "hidden": true + "default": 4, + "minimum": 1, + "description": "the window size option shared by cut_front, cut_tail or cut_sliding.", + "maximum": 1000 }, - "QCReport.listeria.max_length": { + "fp_complexity_threshold": { "type": "integer", - "default": 3200000, - "hidden": true + "default": 20, + "minimum": 0, + "description": "the threshold for low complexity filter", + "maximum": 100 }, - "QCReport.listeria.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.listeria.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.klebsiella.search": { - "type": "string", - "default": "Klebsiella", - "hidden": true - }, - "QCReport.klebsiella.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.klebsiella.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.klebsiella.max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.klebsiella.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.klebsiella.max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "QCReport.klebsiella.min_length": { - "type": "integer", - "default": 4500000, - "hidden": true - }, - "QCReport.klebsiella.max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.klebsiella.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.klebsiella.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.staphylococcus.search": { - "type": "string", - "default": "Staphylococcus", - "hidden": true - }, - "QCReport.staphylococcus.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.staphylococcus.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.staphylococcus.max_n50": { - "type": "integer", - "default": 3500000, - "hidden": true - }, - "QCReport.staphylococcus.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.staphylococcus.max_nr_contigs": { - "type": "integer", - "default": 550, - "hidden": true - }, - "QCReport.staphylococcus.min_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.staphylococcus.max_length": { - "type": "integer", - "default": 3500000, - "hidden": true - }, - "QCReport.staphylococcus.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.staphylococcus.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.fallthrough.search": { - "type": "string", - "default": "No organism specific QC data available.", - "hidden": true - }, - "QCReport.fallthrough.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.fallthrough.min_n50": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.max_n50": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.min_nr_contigs": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.max_nr_contigs": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.min_length": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.max_length": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.fallthrough.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReportFields.raw_average_quality.path": { - "type": "string", - "default": "['RawReadSummary', 'combined', 'qual_mean']", - "hidden": true - }, - "QCReportFields.raw_average_quality.coerce_type": { - "type": "string", - "default": "Float", - "hidden": true - }, - "QCReportFields.raw_average_quality.compare_fields": { - "type": "string", - "default": "['raw_average_quality']", - "hidden": true - }, - "QCReportFields.raw_average_quality.comp_type": { - "type": "string", - "default": "ge", - "hidden": true - }, - "QCReportFields.raw_average_quality.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.raw_average_quality.low_msg": { - "type": "string", - "default": "Base quality is poor, resequencing is recommended.", - "hidden": true - }, - "QCReportFields.average_coverage.path": { - "type": "string", - "default": "['FixedGenomeSizeDepth']", - "hidden": true - }, - "QCReportFields.average_coverage.coerce_type": { - "type": "string", - "default": "Float", - "hidden": true - }, - "QCReportFields.average_coverage.compare_fields": { - "type": "string", - "default": "['min_average_coverage']", - "hidden": true - }, - "QCReportFields.average_coverage.comp_type": { - "type": "string", - "default": "ge", - "hidden": true - }, - "QCReportFields.average_coverage.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.average_coverage.low_msg": { - "type": "string", - "default": "Depth of coverage from assembly is lower than than expected. A top-up run is likely needed.", - "hidden": true - }, - "QCReportFields.metagenomic.path": { - "type": "string", - "default": "['MashMeta']", - "hidden": true - }, - "QCReportFields.metagenomic.coerce_type": { - "type": "string", - "default": "Bool", - "hidden": true - }, - "QCReportFields.metagenomic.compare_fields": { - "type": "string", - "default": "[]", - "hidden": true - }, - "QCReportFields.metagenomic.comp_type": { - "type": "string", - "default": "bool", - "hidden": true - }, - "QCReportFields.metagenomic.on": { - "type": "boolean", - "hidden": true - }, - "QCReportFields.n50_value.path": { - "type": "string", - "default": "['QUAST', '0', 'N50']", - "hidden": true - }, - "QCReportFields.n50_value.coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true - }, - "QCReportFields.n50_value.compare_fields": { - "type": "string", - "default": "['min_n50', 'max_n50']", - "hidden": true - }, - "QCReportFields.n50_value.comp_type": { - "type": "string", - "default": "range", - "hidden": true - }, - "QCReportFields.n50_value.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.n50_value.low_msg": { - "type": "string", - "default": "N50 value is low, this could be due to many reasons involving contamination, poor template quality or insufficient template quantity. Reisolation and reseqeuncing may be needed.", - "hidden": true - }, - "QCReportFields.n50_value.high_msg": { - "type": "string", - "default": "N50 value is high, this is likely a good thing if you have fewer contigs than expected.", - "hidden": true - }, - "QCReportFields.nr_contigs.path": { - "type": "string", - "default": "['QUAST', '0', '# contigs']", - "hidden": true - }, - "QCReportFields.nr_contigs.coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true - }, - "QCReportFields.nr_contigs.compare_fields": { - "type": "string", - "default": "['min_nr_contigs', 'max_nr_contigs']", - "hidden": true - }, - "QCReportFields.nr_contigs.comp_type": { - "type": "string", - "default": "range", - "hidden": true - }, - "QCReportFields.nr_contigs.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.nr_contigs.low_msg": { - "type": "string", - "default": "Fewer contigs than expected, if your genome length is of an expected size and you have a high N50 you likely just have a high quality assembly.", - "hidden": true - }, - "QCReportFields.nr_contigs.high_msg": { - "type": "string", - "default": "More contigs are present than expected, if your N50 is low and your genome length is shorter than expected you likely need to potentially need to reisolate and resequence/top-up your sample.", - "hidden": true - }, - "QCReportFields.length.path": { - "type": "string", - "default": "['QUAST', '0', 'Total length']", - "hidden": true - }, - "QCReportFields.length.coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true - }, - "QCReportFields.length.compare_fields": { - "type": "string", - "default": "['min_length', 'max_length']", - "hidden": true - }, - "QCReportFields.length.comp_type": { - "type": "string", - "default": "range", - "hidden": true - }, - "QCReportFields.length.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.length.low_msg": { - "type": "string", - "default": "Genome length lower than expected, you may need to resequence the sample.", - "hidden": true - }, - "QCReportFields.length.high_msg": { - "type": "string", - "default": "Genome length is higher than expected, contmination is potentially present.", - "hidden": true - }, - "QCReportFields.checkm_contamination.path": { - "type": "string", - "default": "['CheckM', '0', 'Contamination']", - "hidden": true - }, - "QCReportFields.checkm_contamination.coerce_type": { - "type": "string", - "default": "Float", - "hidden": true - }, - "QCReportFields.checkm_contamination.compare_fields": { - "type": "string", - "default": "['max_checkm_contamination']", - "hidden": true - }, - "QCReportFields.checkm_contamination.comp_type": { - "type": "string", - "default": "le", - "hidden": true - }, - "QCReportFields.checkm_contamination.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.checkm_contamination.high_msg": { - "type": "string", - "default": "Potential contamination is present in your sample. You may need to reisolate and resequence your sample.", - "hidden": true - }, - "QCReport.escherichia.fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.salmonella.fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.shigella.fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.listeria.fixed_genome_size": { - "type": "integer", - "default": 3000000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.search": { - "type": "string", - "default": "Campylobacter jejuni", - "hidden": true - }, - "QCReport.campylobacter_jejuni.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.campylobacter_jejuni.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.max_n50": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.campylobacter_jejuni.max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "QCReport.campylobacter_jejuni.fixed_genome_size": { - "type": "integer", - "default": 1800000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.min_length": { - "type": "integer", - "default": 1400000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.max_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.campylobacter_jejuni.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.campylobacter_coli.search": { - "type": "string", - "default": "Campylobacter coli", - "hidden": true - }, - "QCReport.campylobacter_coli.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.campylobacter_coli.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.campylobacter_coli.max_n50": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.campylobacter_coli.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.campylobacter_coli.max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "QCReport.campylobacter_coli.fixed_genome_size": { - "type": "integer", - "default": 1800000, - "hidden": true - }, - "QCReport.campylobacter_coli.min_length": { - "type": "integer", - "default": 1400000, - "hidden": true - }, - "QCReport.campylobacter_coli.max_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.campylobacter_coli.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.campylobacter_coli.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.vibrio_cholerae.search": { - "type": "string", - "default": "Vibrio cholerae", - "hidden": true - }, - "QCReport.vibrio_cholerae.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.vibrio_cholerae.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.vibrio_cholerae.max_n50": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "QCReport.vibrio_cholerae.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.vibrio_cholerae.max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "QCReport.vibrio_cholerae.fixed_genome_size": { - "type": "integer", - "default": 4000000, - "hidden": true - }, - "QCReport.vibrio_cholerae.min_length": { - "type": "integer", - "default": 3800000, - "hidden": true - }, - "QCReport.vibrio_cholerae.max_length": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "QCReport.vibrio_cholerae.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.vibrio_cholerae.min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "QCReport.klebsiella.fixed_genome_size": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.staphylococcus.fixed_genome_size": { - "type": "integer", - "default": 3500000, - "hidden": true - }, - "QCReport.fallthrough.fixed_genome_size": { - "type": "string", - "hidden": true - } - } - }, - "seqtk_size": { - "title": "seqtk_size", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqtk_size.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", - "hidden": true - }, - "seqtk_size.docker": { - "type": "string", - "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", - "hidden": true - }, - "seqtk_size.report_tag": { - "type": "string", - "default": "SeqtkBaseCount", - "hidden": true - } - } - }, - "medaka": { - "title": "medaka", - "type": "object", - "description": "", - "default": "", - "properties": { - "medaka.model": { - "type": "string", - "hidden": true, - "description": "This is set to the base calling model specified in the nanopore_chemistry param" - }, - "medaka.fasta_ext": { - "type": "string", - "default": ".fa.gz", - "hidden": true - }, - "medaka.outdir": { - "type": "string", - "default": "medaka", - "hidden": true - }, - "medaka.batch_size": { - "type": "integer", - "default": 5, - "description": "Batch size for medaka to use for processing.", - "hidden": true - } - } - }, - "docker": { - "title": "Docker", - "type": "object", - "description": "", - "default": "", - "properties": { - "coreutils.docker": { - "type": "string", - "default": "quay.io/biocontainers/coreutils:8.31--h14c3975_0", - "hidden": true - }, - "kat.docker": { - "type": "string", - "default": "quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2", - "hidden": true - }, - "seqtk.docker": { - "type": "string", - "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", - "hidden": true - }, - "fastp.docker": { - "type": "string", - "default": "quay.io/biocontainers/fastp:0.23.2--hb7a2d85_2", - "hidden": true - }, - "flye.docker": { - "type": "string", - "default": "quay.io/biocontainers/flye:2.9.2--py39h6935b12_0", - "hidden": true - }, - "spades.docker": { - "type": "string", - "default": "quay.io/biocontainers/spades:3.15.5--h95f258a_1", - "hidden": true - }, - "quast.docker": { - "type": "string", - "default": "quay.io/biocontainers/quast:5.2.0--py39pl5321h4e691d4_3", - "hidden": true - }, - "checkm.docker": { - "type": "string", - "default": "quay.io/biocontainers/checkm-genome:1.2.2--pyhdfd78af_1", - "hidden": true - }, - "kraken.docker": { - "type": "string", - "default": "quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:941789bd7fe00db16531c26de8bf3c5c985242a5-0", - "hidden": true - }, - "mlst.docker": { - "type": "string", - "default": "quay.io/biocontainers/mlst:2.19.0--hdfd78af_1", - "hidden": true - }, - "mash.docker": { - "type": "string", - "default": "quay.io/biocontainers/mash:2.3--he348c14_1", - "hidden": true - }, - "r_contaminants.docker": { - "type": "string", - "default": "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0", - "hidden": true - }, - "minimap2.docker": { - "type": "string", - "default": "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0", - "hidden": true - }, - "samtools.docker": { - "type": "string", - "default": "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0", - "hidden": true - }, - "racon.docker": { - "type": "string", - "default": "quay.io/biocontainers/racon:1.4.20--h9a82719_1", - "hidden": true - }, - "pilon.docker": { - "type": "string", - "default": "quay.io/biocontainers/pilon:1.24--hdfd78af_0", - "hidden": true - }, - "pilon_iterative.docker": { - "type": "string", - "default": "docker.io/mwells14/pilonpolisher:0.0.1", - "hidden": true - }, - "medaka.docker": { - "type": "string", - "default": "quay.io/biocontainers/medaka:1.8.0--py38hdaa7744_0", - "hidden": true - }, - "unicycler.docker": { - "type": "string", - "default": "quay.io/biocontainers/unicycler:0.5.0--py38h3b68952_2", - "hidden": true - }, - "bakta.docker": { - "type": "string", - "hidden": true, - "default": "quay.io/biocontainers/bakta:1.8.1--pyhdfd78af_0" - }, - "bandage.docker": { - "type": "string", - "default": "quay.io/biocontainers/bandage:0.8.1--hc9558a2_2", - "hidden": true - }, - "ectyper.docker": { - "type": "string", - "default": "quay.io/biocontainers/ectyper:1.0.0--pyhdfd78af_1", - "hidden": true - }, - "kleborate.docker": { - "type": "string", - "default": "quay.io/biocontainers/kleborate:2.1.0--pyhdfd78af_1", - "hidden": true - }, - "spatyper.docker": { - "type": "string", - "default": "quay.io/biocontainers/spatyper:0.3.3--pyhdfd78af_3", - "hidden": true - }, - "sistr.docker": { - "type": "string", - "default": "quay.io/biocontainers/sistr_cmd:1.1.1--pyh864c0ab_2", - "hidden": true - }, - "lissero.docker": { - "type": "string", - "default": "quay.io/biocontainers/lissero:0.4.9--py_0", - "hidden": true - }, - "shigeifinder.docker": { - "type": "string", - "default": "quay.io/biocontainers/shigeifinder:1.3.2--pyhdfd78af_0", - "hidden": true - }, - "python3.docker": { - "type": "string", - "default": "docker.io/python:3.11.6", - "hidden": true - }, - "abricate.docker": { - "type": "string", - "default": "quay.io/biocontainers/abricate:1.0.1--ha8f3691_1", - "hidden": true - }, - "mobsuite_recon.docker": { - "type": "string", - "default": "quay.io/biocontainers/mob_suite:3.0.3--pyhdfd78af_0", - "hidden": true - } - } - }, - "singularity": { - "title": "Singularity", - "type": "object", - "description": "", - "default": "", - "properties": { - "coreutils.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/coreutils%3A8.31--h14c3975_0", - "hidden": true - }, - "kat.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2", - "hidden": true - }, - "seqtk.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", - "hidden": true - }, - "fastp.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/fastp%3A0.23.2--hb7a2d85_2", - "hidden": true - }, - "chopper.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/chopper%3A0.5.0--hd03093a_0", - "hidden": true - }, - "flye.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1", - "hidden": true - }, - "spades.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/spades:3.15.5--h95f258a_1", - "hidden": true - }, - "quast.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/quast%3A5.2.0--py39pl5321h4e691d4_3", - "hidden": true - }, - "checkm.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/checkm-genome%3A1.2.2--pyhdfd78af_1", - "hidden": true - }, - "kraken.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0", - "hidden": true - }, - "mlst.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mlst:2.19.0--hdfd78af_1", - "hidden": true - }, - "mash.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1", - "hidden": true - }, - "r_contaminants.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0", - "hidden": true - }, - "minimap2.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0", - "hidden": true - }, - "samtools.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0", - "hidden": true - }, - "racon.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1", - "hidden": true - }, - "pilon.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/pilon%3A1.24--hdfd78af_0", - "hidden": true - }, - "pilon_iterative.singularity": { - "type": "string", - "default": "docker.io/mwells14/pilonpolisher:0.0.1", - "hidden": true - }, - "medaka.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/medaka%3A1.8.0--py38hdaa7744_0", - "hidden": true - }, - "unicycler.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/unicycler%3A0.5.0--py38h3b68952_2", - "hidden": true - }, - "bakta.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/bakta%3A1.8.1--pyhdfd78af_0", - "hidden": true - }, - "bandage.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/bandage:0.8.1--hc9558a2_2", - "hidden": true - }, - "ectyper.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/ectyper:1.0.0--pyhdfd78af_1", - "hidden": true - }, - "kleborate.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/kleborate:2.1.0--pyhdfd78af_1", - "hidden": true - }, - "spatyper.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/spatyper:0.3.3--pyhdfd78af_3", - "hidden": true - }, - "sistr.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/sistr_cmd:1.1.1--pyh864c0ab_2", - "hidden": true - }, - "lissero.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/lissero:0.4.9--py_0", - "hidden": true - }, - "shigeifinder.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/shigeifinder:1.3.2--pyhdfd78af_0", - "hidden": true - }, - "python3.singularity": { - "type": "string", - "default": "docker.io/python:3.11.6", - "hidden": true - }, - "abricate.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1", - "hidden": true - }, - "mobsuite_recon.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mob_suite%3A3.0.3--pyhdfd78af_0", - "hidden": true - } - } - }, - "staramr": { - "title": "StarAMR", - "type": "object", - "description": "", - "default": "", - "properties": { - "staramr.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/staramr%3A0.9.1--pyhdfd78af_0", - "hidden": true - }, - "staramr.docker": { - "type": "string", - "default": "quay.io/biocontainers/staramr:0.9.1--pyhdfd78af_0", - "hidden": true - }, - "staramr.db": { - "type": "string", - "description": "Path to a StarAMR database, a database is included in the container." - }, - "staramr.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "staramr.txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "staramr.xlsx_ext": { - "type": "string", - "default": ".xlsx", - "hidden": true - }, - "staramr.args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "staramr.point_finder_dbs": { - "type": "string", - "default": "['salmonella', 'campylobacter', 'enterococcus_faecalis', 'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori']", - "hidden": true - }, - "staramr.report_tag": { - "type": "string", - "default": "StarAMR", - "hidden": true - }, - "staramr.header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - } - }, - "mobrecon": { - "title": "mobrecon", - "type": "object", - "description": "", - "default": "", - "properties": { - "mobsuite_recon.args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "mobsuite_recon.fasta_ext": { - "type": "string", - "default": ".fasta", - "hidden": true - }, - "mobsuite_recon.results_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "mobsuite_recon.mob_results_file": { - "type": "string", - "default": "mobtyper_results.txt", - "hidden": true - }, - "mobsuite_recon.contig_report": { - "type": "string", - "default": "contig_report.txt", - "hidden": true - }, - "mobsuite_recon.report_tag": { - "type": "string", - "default": "MobRecon", - "hidden": true - }, - "mobsuite_recon.header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - } - }, - "kat": { - "title": "Kat", - "type": "object", - "description": "", - "default": "", - "properties": { - "kat.hist_ext": { - "type": "string", - "default": ".hist", - "hidden": true - }, - "kat.json_ext": { - "type": "string", - "default": ".hist.dist_analysis.json", - "hidden": true - }, - "kat.png_ext": { - "type": "string", - "default": ".png", - "hidden": true - }, - "kat.postscript_ext": { - "type": "string", - "default": ".ps", - "hidden": true - }, - "kat.output_type": { - "type": "string", - "default": "png", - "hidden": true - }, - "kat.pdf_ext": { - "type": "string", - "default": ".pdf", - "hidden": true - }, - "kat.report_tag": { - "type": "string", - "default": "KatHist", - "hidden": true - }, - "kat.jfhash_ext": { - "type": "string", - "default": ".jf", - "hidden": true - } - } - }, - "platform_options": { - "title": "Platform Options", - "type": "object", - "description": "", - "default": "", - "properties": { - "opt_platforms.ont": { - "type": "string", - "default": "nanopore", - "hidden": true - }, - "opt_platforms.pacbio": { - "type": "string", - "default": "pacbio", - "hidden": true - }, - "opt_platforms.hybrid": { - "type": "string", - "default": "hybrid", - "hidden": true - }, - "opt_platforms.illumina": { - "type": "string", - "default": "illumina", - "hidden": true - } - } - }, - "seqtk": { - "title": "Seqtk", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqtk.seed": { - "type": "integer", - "default": 42, - "hidden": true - }, - "seqtk.reads_ext": { - "type": "string", - "default": "_sampled.fastq.gz", - "hidden": true - } - } - }, - "flye": { - "title": "flye", - "type": "object", - "description": "", - "default": "", - "properties": { - "flye.nanopore.corr": { - "type": "string", - "default": "--nano-corr", - "hidden": true - }, - "flye.nanopore.hq": { - "type": "string", - "default": "--nano-hq", - "hidden": true - }, - "flye.pacbio.raw": { - "type": "string", - "default": "--pacbio-raw", - "hidden": true - }, - "flye.pacbio.corr": { - "type": "string", - "default": "--pacbio-corr", - "hidden": true - }, - "flye.nanopore.raw": { - "type": "string", - "default": "--nano-raw", - "hidden": true - }, - "flye.gfa_ext": { - "type": "string", - "default": ".gfa.gz", - "hidden": true - }, - "flye.gv_ext": { - "type": "string", - "default": ".gv.gz", - "hidden": true - }, - "flye.txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "flye.log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "flye.json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "flye.polishing_iterations": { - "type": "integer", - "default": 1, - "hidden": true - }, - "flye.fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "flye.pacbio.hq": { - "type": "string", - "default": "--pacbio-hifi", - "hidden": true - }, - "flye.args": { - "type": "string", - "default": "{ \"--iterations ${params.flye.polishing_iterations}\" }", - "hidden": true - } - } - }, - "spades": { - "title": "spades", - "type": "object", - "description": "", - "default": "", - "properties": { - "spades.scaffolds_ext": { - "type": "string", - "default": ".scaffolds.fasta.gz", - "hidden": true + "fp_qualified_phred": { + "type": "integer", + "default": 15, + "minimum": 0, + "description": "the quality value that a base is qualified." }, - "spades.contigs_ext": { - "type": "string", - "default": ".contigs.fasta.gz", - "hidden": true + "fp_unqualified_precent_limit": { + "type": "integer", + "default": 40, + "minimum": 0, + "maximum": 100, + "description": "how many percents of bases are allowed to be unqualified" }, - "spades.transcripts_ext": { - "type": "string", - "default": ".transcripts.fasta.gz", - "hidden": true + "fp_polyg_min_len": { + "type": "integer", + "default": 10, + "minimum": 1, + "description": "the minimum length to detect polyG in the read tail" }, - "spades.gene_clusters_ext": { - "type": "string", - "default": ".gene_clusters.fasta.gz", - "hidden": true + "fp_polyx_min_len": { + "type": "integer", + "default": 10, + "description": "the minimum length to detect polyX in the read tail", + "minimum": 1 }, - "spades.assembly_graphs_ext": { - "type": "string", - "default": ".assembly.gfa.gz", - "hidden": true + "fp_illumina_length_min": { + "type": "integer", + "default": 35, + "minimum": 0, + "description": "reads shorter than length_required will be discarded" }, - "spades.log_ext": { - "type": "string", - "default": ".log", - "hidden": true + "fp_illumina_length_max": { + "type": "integer", + "default": 400, + "minimum": 0, + "description": "reads longer than length_limit will be discarded, 0 means no limitation." }, - "spades.outdir": { - "type": "string", - "default": "assembly", - "hidden": true + "fp_single_end_length_min": { + "type": "integer", + "default": 1000, + "minimum": 0, + "description": "same as fp_illumina_length_min but for single-end data. reads shorter than length_required will be discarded" + }, + "fp_dedup_reads": { + "type": "boolean", + "description": "enable deduplication to drop the duplicated reads/pairs" } } }, - "checkm": { - "title": "checkm", + "data_processing_thresholds": { + "title": "Data processing thresholds", "type": "object", - "description": "", + "description": "Thresholds for processing or qa/qc of data", "default": "", "properties": { - "checkm.alignment_ext": { - "type": "string", - "default": "-genes.aln", - "hidden": true - }, - "checkm.results_ext": { - "type": "string", - "default": "-results.txt", - "hidden": true - }, - "checkm.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true + "target_depth": { + "type": "integer", + "default": 100, + "description": "Target depth to sub-sample reads to", + "minimum": 1 }, - "checkm.folder_name": { - "type": "string", - "default": "checkm", - "hidden": true + "min_reads": { + "type": "integer", + "default": 1000, + "description": "Minimum number of reads a sample requires to move forward for assembly", + "minimum": 1 }, - "checkm.gzip_ext": { - "type": "string", - "default": ".gz", - "hidden": true + "ba_min_conting_length": { + "type": "integer", + "default": 200, + "description": "Minimum contig length for processing in Bakta", + "minimum": 1 }, - "checkm.lineage_ms": { - "type": "string", - "default": "lineage.ms", - "hidden": true + "qt_min_contig_length": { + "type": "integer", + "default": 1000, + "description": "Minimum contig length for quast", + "minimum": 1 }, - "checkm.report_tag": { - "type": "string", - "default": "CheckM", - "hidden": true + "mh_min_kmer": { + "type": "integer", + "default": 10, + "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation", + "minimum": 1 } } }, - "kraken2": { - "title": "kraken2", + "other": { + "title": "Other", "type": "object", - "description": "", + "description": "Other parameters", "default": "", "properties": { - "kraken.db": { - "type": "string", - "description": "Path to Kraken2 database (do not use symlinks)" - }, - "kraken.classified_suffix": { - "type": "string", - "default": "classified", - "hidden": true - }, - "kraken.unclassified_suffix": { - "type": "string", - "default": "unclassified", - "hidden": true - }, - "kraken.report_suffix": { - "type": "string", - "default": "report", - "hidden": true - }, - "kraken.output_suffix": { - "type": "string", - "default": "output", - "hidden": true - }, - "kraken.save_output_fastqs": { - "type": "boolean", - "hidden": true - }, - "kraken.save_reads_assignments": { - "type": "boolean", - "default": true, - "hidden": true - }, - "kraken.run_kraken_quick": { - "type": "boolean", - "hidden": true - }, - "kraken.report_tag": { - "type": "string", - "default": "KrakenReport", - "hidden": true - }, - "kraken.tophit_level": { + "nanopore_chemistry": { "type": "string", - "default": "S", - "hidden": true + "description": "The guppy base calling model. See the docs for a link of valid options" }, - "kraken_bin.taxonomic_level": { + "flye_read_type": { "type": "string", - "default": "G", - "description": "Taxonomic level to bin contigs at." + "default": "hq", + "description": "Read type for flye to use. hq corresponds to hifi for Pacbio data.", + "enum": [ + "hq", + "corr", + "raw" + ] } } } @@ -2362,654 +547,29 @@ { "$ref": "#/definitions/input_output_options" }, - { - "$ref": "#/definitions/bakta" - }, - { - "$ref": "#/definitions/fastp" - }, - { - "$ref": "#/definitions/mash" - }, - { - "$ref": "#/definitions/quast" - }, - { - "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/skip_options" - }, { "$ref": "#/definitions/databases_and_pre_computed_files" }, - { - "$ref": "#/definitions/max_job_request_options" - }, { "$ref": "#/definitions/institutional_config_options" }, { - "$ref": "#/definitions/seqkit" - }, - { - "$ref": "#/definitions/coveragecalculation" - }, - { - "$ref": "#/definitions/qcreport" - }, - { - "$ref": "#/definitions/seqtk_size" - }, - { - "$ref": "#/definitions/medaka" - }, - { - "$ref": "#/definitions/docker" - }, - { - "$ref": "#/definitions/singularity" - }, - { - "$ref": "#/definitions/staramr" - }, - { - "$ref": "#/definitions/mobrecon" - }, - { - "$ref": "#/definitions/kat" - }, - { - "$ref": "#/definitions/platform_options" + "$ref": "#/definitions/max_job_request_options" }, { - "$ref": "#/definitions/seqtk" + "$ref": "#/definitions/generic_options" }, { - "$ref": "#/definitions/flye" + "$ref": "#/definitions/control_flow_options" }, { - "$ref": "#/definitions/spades" + "$ref": "#/definitions/fastp_options" }, { - "$ref": "#/definitions/checkm" + "$ref": "#/definitions/data_processing_thresholds" }, { - "$ref": "#/definitions/kraken2" - } - ], - "properties": { - "chopper.quality": { - "type": "integer", - "default": 0, - "hidden": true - }, - "chopper.minlength": { - "type": "integer", - "default": 100, - "hidden": true - }, - "chopper.fastq_ext": { - "type": "string", - "default": ".fastq.gz", - "hidden": true - }, - "fastqc.html_ext": { - "type": "string", - "default": ".html", - "hidden": true - }, - "fastqc.zip_ext": { - "type": "string", - "default": ".zip", - "hidden": true - }, - "r_contaminants.phix_fa": { - "type": "string", - "hidden": true - }, - "r_contaminants.homo_sapiens_fa": { - "type": "string", - "hidden": true - }, - "r_contaminants.pacbio_mg": { - "type": "string", - "hidden": true - }, - "r_contaminants.output_ext": { - "type": "string", - "default": ".fastq.gz", - "hidden": true - }, - "r_contaminants.mega_mm2_idx": { - "type": "string", - "hidden": true - }, - "r_contaminants.mm2_illumina": { - "type": "string", - "default": "-x sr", - "hidden": true - }, - "r_contaminants.mm2_pac": { - "type": "string", - "default": "-x map-pb", - "hidden": true - }, - "r_contaminants.mm2_ont": { - "type": "string", - "default": "-x map-ont", - "hidden": true - }, - "r_contaminants.mm2_output_ext": { - "type": "string", - "default": ".sam", - "hidden": true - }, - "r_contaminants.samtools_output_ext": { - "type": "string", - "default": ".fastq", - "hidden": true - }, - "r_contaminants.samtools_singletons_ext": { - "type": "string", - "default": ".singleton.fq", - "hidden": true - }, - "r_contaminants.samtools_output_suffix": { - "type": "string", - "default": "deconned", - "hidden": true - }, - "r_contaminants.output_dir": { - "type": "string", - "default": "contamination/deconned_reads", - "hidden": true - }, - "minimap2.index_outdir": { - "type": "string", - "default": "indices", - "hidden": true - }, - "minimap2.index_ext": { - "type": "string", - "default": ".idx", - "hidden": true - }, - "minimap2.mapped_paf_ext": { - "type": "string", - "default": ".paf", - "hidden": true - }, - "minimap2.mapped_sam_ext": { - "type": "string", - "default": ".sam", - "hidden": true - }, - "minimap2.mapped_outdir": { - "type": "string", - "default": "mapped", - "hidden": true - }, - "samtools.bam_ext": { - "type": "string", - "default": ".bam", - "hidden": true - }, - "samtools.bai_ext": { - "type": "string", - "default": ".bai", - "hidden": true - }, - "racon.consensus_suffix": { - "type": "string", - "default": "_assembly_consensus.fasta", - "hidden": true - }, - "racon.consensus_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "racon.outdir": { - "type": "string", - "default": "polished", - "hidden": true - }, - "pilon.outdir": { - "type": "string", - "default": "pilon", - "hidden": true - }, - "pilon.fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "pilon.fasta_outdir": { - "type": "string", - "default": "fasta", - "hidden": true - }, - "pilon.vcf_ext": { - "type": "string", - "default": ".vcf", - "hidden": true - }, - "pilon.vcf_outdir": { - "type": "string", - "default": "vcf", - "hidden": true - }, - "pilon.changes_ext": { - "type": "string", - "default": ".changes", - "hidden": true - }, - "pilon.changes_outdir": { - "type": "string", - "default": "changes", - "hidden": true - }, - "pilon.max_memory_multiplier": { - "type": "integer", - "default": 3, - "hidden": true - }, - "pilon_iterative.outdir": { - "type": "string", - "default": "pilon", - "hidden": true - }, - "pilon_iterative.fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "pilon_iterative.fasta_outdir": { - "type": "string", - "default": "fasta", - "hidden": true - }, - "pilon_iterative.vcf_ext": { - "type": "string", - "default": ".vcf", - "hidden": true - }, - "pilon_iterative.vcf_outdir": { - "type": "string", - "default": "vcf", - "hidden": true - }, - "pilon_iterative.bam_ext": { - "type": "string", - "default": ".bam", - "hidden": true - }, - "pilon_iterative.bai_ext": { - "type": "string", - "default": ".bai", - "hidden": true - }, - "pilon_iterative.changes_ext": { - "type": "string", - "default": ".changes", - "hidden": true - }, - "pilon_iterative.changes_outdir": { - "type": "string", - "default": "changes", - "hidden": true - }, - "pilon_iterative.max_memory_multiplier": { - "type": "integer", - "default": 3, - "hidden": true - }, - "pilon_iterative.max_polishing_illumina": { - "type": "integer", - "default": 3, - "hidden": true - }, - "pilon_iterative.max_polishing_pacbio": { - "type": "integer", - "default": 4, - "hidden": true - }, - "pilon_iterative.max_polishing_nanopore": { - "type": "integer", - "default": 10, - "hidden": true - }, - "unicycler.scaffolds_ext": { - "type": "string", - "default": ".scaffolds.fa.gz", - "hidden": true - }, - "unicycler.assembly_ext": { - "type": "string", - "default": ".assembly.gfa.gz", - "hidden": true - }, - "unicycler.log_ext": { - "type": "string", - "default": ".unicycler.log", - "hidden": true - }, - "unicycler.outdir": { - "type": "string", - "default": "unicycler", - "hidden": true - }, - "unicycler.mem_modifier": { - "type": "integer", - "default": 1000, - "hidden": true - }, - "unicycler.threads_increase_factor": { - "type": "integer", - "default": 1, - "hidden": true - }, - "bandage.svg_ext": { - "type": "string", - "default": ".svg", - "hidden": true - }, - "bandage.outdir": { - "type": "string", - "default": "bandage", - "hidden": true - }, - "ectyper.log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "ectyper.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "ectyper.txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "ectyper.report_tag": { - "type": "string", - "default": "ECTyperSubtyping", - "hidden": true - }, - "sistr.tsv_ext": { - "type": "string", - "default": ".tab", - "hidden": true - }, - "sistr.allele_fasta_ext": { - "type": "string", - "default": "-allele.fasta", - "hidden": true - }, - "sistr.allele_json_ext": { - "type": "string", - "default": "-allele.json", - "hidden": true - }, - "sistr.cgmlst_ext": { - "type": "string", - "default": "-cgmlst.csv", - "hidden": true - }, - "sistr.report_tag": { - "type": "string", - "default": "SISTRSubtyping", - "hidden": true - }, - "lissero.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "lissero.report_tag": { - "type": "string", - "default": "LISSEROSubtyping", - "hidden": true - }, - "shigeifinder.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "raw_reads.high_precision": { - "type": "boolean", - "hidden": true - }, - "raw_reads.report_tag": { - "type": "string", - "default": "RawReadSummary", - "hidden": true - }, - "seqtk.assembly_fastq": { - "type": "string", - "default": ".fastq.gz", - "hidden": true - }, - "seqtk.report_tag": { - "type": "string", - "default": "Seqtk", - "hidden": true - }, - "fastp.report_exclude_fields": { - "type": "string", - "default": "['content_curves', 'quality_curves', 'mean', 'kmer_count', 'histogram', 'overrepresented_sequences']", - "hidden": true - }, - "quast.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "checkm.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "kraken.header_p": { - "type": "boolean", - "hidden": true - }, - "kraken.headers": { - "type": "string", - "default": "['PercentID', 'FragmentsRecovered', 'FragmentsAssignmentTaxon', 'RankCode']", - "hidden": true - }, - "mlst.args": { - "type": "string", - "hidden": true - }, - "mlst.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "mlst.json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "mlst.report_tag": { - "type": "string", - "default": "SevenGeneMLSTReport", - "hidden": true - }, - "mash_meta.report_tag": { - "type": "string", - "default": "MashMeta", - "hidden": true - }, - "top_hit_species.report_tag": { - "type": "string", - "default": "SpeciesTopHit", - "hidden": true - }, - "kraken_species.report_tag": { - "type": "string", - "default": "Kraken2TopHit", - "hidden": true - }, - "subtyping_report.report_tag": { - "type": "string", - "default": "Subtyping", - "hidden": true - }, - "ectyper.args": { - "type": "string", - "default": "{ \"--verify\" }", - "hidden": true - }, - "ectyper.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "kleborate.txt_ext": { - "type": "string", - "default": ".results.txt", - "hidden": true - }, - "kleborate.report_tag": { - "type": "string", - "default": "KleborateSubtyping", - "hidden": true - }, - "kleborate.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "spatyper.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "spatyper.report_tag": { - "type": "string", - "default": "SpaTyperSubtyping", - "hidden": true - }, - "spatyper.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "spatyper.repeats": { - "type": "string", - "hidden": true - }, - "spatyper.repeat_order": { - "type": "string", - "hidden": true - }, - "sistr.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "lissero.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "shigeifinder.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "kraken_bin.fasta_ext": { - "type": "string", - "default": "_binned.fasta.gz", - "hidden": true - }, - "pointfinder_db_tag.report_tag": { - "type": "string", - "default": "PointfinderDB", - "hidden": true - }, - "abricate.args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "abricate.report_tag": { - "type": "string", - "default": "Abricate", - "hidden": true - }, - "abricate.header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "shigeifinder.container_version": { - "type": "string", - "default": "1.3.2", - "hidden": true - }, - "shigeifinder.report_tag": { - "type": "string", - "default": "ShigeifinderSubtyping", - "hidden": true - }, - "validationFailUnrecognisedParams": { - "type": "boolean" - }, - "fastp.cut_tail_mean_quality": { - "type": "integer", - "default": 15 - }, - "fastp.complexity_threshold": { - "type": "integer", - "default": 20 - }, - "fastp.polyg_min_len": { - "type": "integer", - "default": 10 - }, - "fastp.polyx_min_len": { - "type": "integer", - "default": 10 - }, - "fastp.illumina_length_max": { - "type": "integer", - "default": 400 - }, - "assembly_status.report_tag": { - "type": "string", - "default": "AssemblyCompleted", - "hidden": true - }, - "filtered_reads.threshold": { - "type": "integer", - "default": 1000, - "hidden": true - }, - "filtered_reads.report_tag": { - "type": "string", - "default": "MeetsReadThreshold", - "hidden": true - }, - "fastp.cut_tail_window_size": { - "type": "integer", - "default": 4 - }, - "contigs_too_short.report_tag": { - "type": "string", - "default": "MaxContigToShort" - }, - "report_aggregate.sample_flat_suffix": { - "type": "string", - "default": "_flat_sample.json", - "hidden": true + "$ref": "#/definitions/other" } - } + ] } \ No newline at end of file diff --git a/tests/main.nf.test b/tests/main.nf.test index f2fd671b..099d3274 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -180,4 +180,114 @@ nextflow_pipeline { } + test("Test validation of platform parameter") { + tag "fail_validation" + + when { + params { + input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv" + outdir = "results" + + mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + mh_min_kmer = 1 + + dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + + min_reads = 100 + + skip_bakta = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false + + max_memory = "2.GB" + max_cpus = 1 + + // parameter to fail validation + platform = "invalid" + } + } + + then { + assert workflow.failed + assert workflow.stderr.contains("* --platform: 'invalid' is not a valid choice (Available choices: illumina, nanopore, pacbio, hybrid)") + } + } + + test("Test validation of contig minimum length parameter") { + tag "fail_validation" + + when { + params { + input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv" + outdir = "results" + + platform = "illumina" + + mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + mh_min_kmer = 1 + + dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + + min_reads = 100 + + skip_bakta = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false + + max_memory = "2.GB" + max_cpus = 1 + + // parameter to fail validation + qt_min_contig_length = -1 + } + } + + then { + assert workflow.failed + assert workflow.stderr.contains("* --qt_min_contig_length: -1 is not greater or equal to 1 (-1)") + } + } + + test("Test validation of databases parameters") { + tag "fail_validation" + + when { + params { + input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv" + outdir = "results" + + platform = "illumina" + + mh_min_kmer = 1 + + dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + + min_reads = 100 + + skip_bakta = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false + + max_memory = "2.GB" + max_cpus = 1 + + // parameter to fail validation + mash_sketch = "invalid path" + } + } + + then { + assert workflow.failed + assert workflow.stderr.contains("* --mash_sketch: string [invalid path] does not match pattern ^\\S+\$ (invalid path)") + } + } } diff --git a/tests/pipelines/main.from_assemblies.nf.test b/tests/pipelines/main.from_assemblies.nf.test index 99d2bbcc..c4cf6b8e 100644 --- a/tests/pipelines/main.from_assemblies.nf.test +++ b/tests/pipelines/main.from_assemblies.nf.test @@ -88,6 +88,7 @@ nextflow_pipeline { // output metadata def ecoli_metadata = iridanext_metadata.ecoli_GCA_000947975 + assert ecoli_metadata."SpeciesTopHit" == "s__Escherichia coli" assert ecoli_metadata."QUAST.0.Total length" == "5333525" assert ecoli_metadata."QUAST.0.Largest contig" == "300823" assert ecoli_metadata."QUAST.0.# contigs" == "187" @@ -176,6 +177,10 @@ nextflow_pipeline { def salmonella_json = final_report.salmonella_GCA_000008105.salmonella_GCA_000008105 // Tests + assert salmonella_json.SpeciesTopHit == "s__Salmonella enterica" + assert iridanext_metadata.salmonella_GCA_000008105."SpeciesTopHit" == "s__Salmonella enterica" + assert final_report_tmap.SpeciesTopHit == "s__Salmonella enterica" + assert salmonella_json.QUAST."0"."Total length" == "4944000" assert iridanext_metadata.salmonella_GCA_000008105."QUAST.0.Total length" == "4944000" assert final_report_tmap."QUAST.0.Total length" == "4944000" @@ -293,6 +298,10 @@ nextflow_pipeline { def listeria_json = final_report.listeria_GCF_000196035.listeria_GCF_000196035 // Tests + assert listeria_json.SpeciesTopHit == "s__Listeria monocytogenes" + assert iridanext_metadata.listeria_GCF_000196035."SpeciesTopHit" == "s__Listeria monocytogenes" + assert final_report_tmap.SpeciesTopHit == "s__Listeria monocytogenes" + assert listeria_json.QUAST."0"."Total length" == "2944528" assert iridanext_metadata.listeria_GCF_000196035."QUAST.0.Total length" == "2944528" assert final_report_tmap."QUAST.0.Total length" == "2944528"