From 0bdee2cb728ced96324d100c104e3c64ef7e9e1a Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Mon, 18 Mar 2024 14:33:25 -0500 Subject: [PATCH 01/26] nested nextflow schema params --- nextflow_schema.json | 4845 ++++++++++++++++++------------------------ 1 file changed, 2030 insertions(+), 2815 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 822699c0..c52854df 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,3015 +1,2230 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/master/nextflow_schema.json", - "title": "mikrokondo pipeline parameters", - "description": "mikrokondo schema", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - } - } - }, - "bakta": { - "title": "Bakta", - "type": "object", - "description": "", - "default": "", - "properties": { - "bakta.output_dir": { - "type": "string", - "default": "bakta", - "hidden": true - }, - "bakta.embl_ext": { - "type": "string", - "default": ".embl", - "hidden": true - }, - "bakta.faa_ext": { - "type": "string", - "default": ".faa", - "hidden": true - }, - "bakta.ffn_ext": { - "type": "string", - "default": ".ffn", - "hidden": true - }, - "bakta.fna_ext": { - "type": "string", - "default": ".fna", - "hidden": true - }, - "bakta.gbff_ext": { - "type": "string", - "default": ".gbff", - "hidden": true - }, - "bakta.gff_ext": { - "type": "string", - "default": ".gff3", - "hidden": true - }, - "bakta.threads": { - "type": "integer", - "default": 12, - "hidden": true - }, - "bakta.hypotheticals_tsv_ext": { - "type": "string", - "default": ".hypotheticals.tsv", - "hidden": true - }, - "bakta.hypotheticals_faa_ext": { - "type": "string", - "default": ".hypotheticals.faa", - "hidden": true - }, - "bakta.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "bakta.txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "bakta.min_contig_length": { - "type": "integer", - "default": 200, - "hidden": true - }, - "bakta.db": { - "type": "string", - "hidden": true - }, - "bakta.args": { - "type": "string", - "hidden": true, - "description": "Additional arguments to pass to bakta", - "default": "{ \"\" }" - }, - "ba_min_conting_length": { - "type": "integer", - "default": 200, - "description": "Minimum contig length for processing in Bakta" - } - }, - "fa_icon": "fas fa-address-card" - }, - "fastp": { - "title": "FastP", - "type": "object", - "description": "", - "default": "", - "properties": { - "fastp.html_ext": { - "type": "string", - "default": ".html", - "hidden": true - }, - "fastp.average_quality_e": { - "type": "integer", - "default": 25, - "description": "Average quality of a read to be included (read pair is discarded if it is below this value)", - "hidden": true - }, - "fastp.json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "fastp.qualified_quality_phred": { - "type": "integer", - "default": 15, - "description": "Phred score to be considered qualified. See FastP docs for more details.", - "hidden": true - }, - "fastp.unqualified_percent_limit": { - "type": "integer", - "default": 40, - "description": "Percent of bases in a read to be qualified for the read to be included. See FastP docs for more details.", - "hidden": true - }, - "fastp.fastq_ext": { - "type": "string", - "default": ".trimmed.fastq.gz", - "hidden": true - }, - "fastp.report_tag": { - "type": "string", - "default": "FastP", - "hidden": true - }, - "fastp.illumina_length_min": { - "type": "integer", - "default": 35, - "description": "Minimum length of a read to be included in later analysis for illumina data.", - "hidden": true - }, - "fastp.single_end_length_min": { - "type": "integer", - "default": 1000, - "description": "Minimum leng of a read to be used in later analysis (for Nanopore or Pacbio)", - "hidden": true - }, - "fastp.dedup_reads": { - "type": "boolean", - "description": "Option to turn on read de-duplication.", - "hidden": true - }, - "fastp.args.illumina": { - "type": "string", - "default": "--overrepresentation_analysis --trim_poly_g --poly_g_min_len 10 --trim_poly_x --poly_x_min_len 10 --cut_tail --cut_tail_window_size 4 --cut_tail_mean_quality 15 --low_complexity_filter --complexity_threshold 20 --average_qual 25 --qualified_quality_phred 15 --unqualified_percent_limit 40 --length_limit 400 --length_required 35 --detect_adapter_for_pe", - "hidden": true - }, - "fastp.args.single_end": { - "type": "string", - "default": "--overrepresentation_analysis -Q --length_required 1000", - "hidden": true - }, - "fp_average_quality": { - "type": "integer", - "default": 25 - }, - "fp_cut_tail_mean_quality": { - "type": "integer", - "default": 15 - }, - "fp_cut_tail_window_size": { - "type": "integer", - "default": 4 - }, - "fp_complexity_threshold": { - "type": "integer", - "default": 20 - }, - "fp_qualified_phred": { - "type": "integer", - "default": 15 - }, - "fp_unqualified_precent_limit": { - "type": "integer", - "default": 40 - }, - "fp_polyg_min_len": { - "type": "integer", - "default": 10 - }, - "fp_polyx_min_len": { - "type": "integer", - "default": 10 - }, - "fp_illumina_length_min": { - "type": "integer", - "default": 35 - }, - "fp_illumina_length_max": { - "type": "integer", - "default": 400 - }, - "fp_single_end_length_min": { - "type": "integer", - "default": 1000 - }, - "fp_dedup_reads": { - "type": "boolean" - } - }, - "fa_icon": "fas fa-filter" - }, - "mash": { - "title": "Mash", - "type": "object", - "description": "", - "default": "", - "fa_icon": "fas fa-align-left", - "properties": { - "mash.mash_ext": { - "type": "string", - "default": ".screen", - "hidden": true - }, - "mash.output_reads_ext": { - "type": "string", - "default": ".reads.screen", - "hidden": true - }, - "mash.output_taxa_ext": { - "type": "string", - "default": ".taxa.screen", - "hidden": true - }, - "mash.output_dir": { - "type": "string", - "default": "contamination", - "hidden": true - }, - "mash.mash_sketch": { - "type": "string", - "hidden": true - }, - "mash.sketch_ext": { - "type": "string", - "default": ".msh", - "hidden": true - }, - "mash.sketch_kmer_size": { - "type": "integer", - "default": 21, - "hidden": true - }, - "mash.final_sketch_name": { - "type": "string", - "default": "GTDB_sketch", - "hidden": true - }, - "mash.json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "mash.min_kmer": { - "type": "integer", - "default": 10, - "hidden": true - }, - "mash.report_tag": { - "type": "string", - "default": "Mash", - "hidden": true - }, - "mash.header_p": { - "type": "boolean", - "hidden": true - }, - "mash.headers": { - "type": "string", - "default": "['identity', 'Shared Hashes', 'Median Multiplicity', 'P-Value', 'Query ID', 'Query Note']", - "hidden": true - }, - "mh_min_kmer": { - "type": "integer", - "default": 10, - "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation" - } - } - }, - "quast": { - "title": "QUAST", - "type": "object", - "description": "", - "default": "", - "properties": { - "quast.suffix": { - "type": "string", - "default": "quast", - "hidden": true - }, - "quast.report_base": { - "type": "string", - "default": "report", - "hidden": true - }, - "quast.report_prefix": { - "type": "string", - "default": "transposed_", - "hidden": true - }, - "quast.report_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "quast.report_tag": { - "type": "string", - "default": "QUAST", - "hidden": true - }, - "quast.min_contig_length": { - "type": "integer", - "default": 1000, - "description": "Minimum contig length to be used my Quast.", - "hidden": true - }, - "quast.args": { - "type": "string", - "default": "{ \"--min-contig ${params.quast.min_contig_length} --report-all-metrics\" }", - "hidden": true - }, - "quast.contigs_field": { - "type": "string", - "default": "# contigs", - "hidden": true - }, - "qt_min_contig_length": { - "type": "integer", - "default": 1000 - } - }, - "fa_icon": "fas fa-check-circle" - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "target_depth": { - "type": "integer", - "default": 100, - "description": "Target depth to sub-sample reads to." - }, - "platform": { - "type": "string", - "enum": [ - "illumina", - "nanopore", - "pacbio", - "hybrid" - ], - "description": "Sequencing platform used.", - "default": "illumina" - }, - "nanopore_chemistry": { - "type": "string", - "description": "The guppy base calling model. See the docs for a link of valid options." - }, - "run_kraken": { - "type": "boolean", - "description": "Use Kraken2 instead of Mash for sample speciation (Useful if you have Eukaryotic data or Archae)" - }, - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle" - }, - "hybrid_unicycler": { - "type": "boolean", - "description": "Use unicycler for hybrid assembly." - }, - "long_read_opt": { - "type": "string", - "default": "nanopore", - "enum": [ - "nanopore", - "pacbio" - ], - "description": "Specify which longread platform your data is from (nanopore or pacbio). This option must be specified if performing a hybrid assembly." - }, - "min_reads": { - "type": "integer", - "default": 1000, - "description": "Minimum number of reads a sample requires to move forward for assembly." - }, - "output_idx_name": { - "type": "string", - "hidden": true - }, - "metagenomic_run": { - "type": "boolean", - "description": "Label all samples as metagenomic (Skip autodetection)" - }, - "flye_read_type": { - "type": "string", - "default": "hq", - "enum": [ - "hq", - "corr", - "raw" - ], - "description": "Read type for flye to use. hq corresponds to hifi for Pacbio data." - }, - "stage_in_mode": { - "type": "string", - "default": "symlink", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle" - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], - "hidden": true - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully." - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format" - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true - }, - "hook_url": { - "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true - }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "null/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "fa_icon": "fas fa-check-square", - "default": true, - "hidden": true - }, - "show_hidden_params": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "slurm_p": { - "type": "boolean", - "description": "Use slurm to execute your pipeline" - }, - "slurm_profile": { - "type": "string", - "description": "Slurm partition" - }, - "validationS3PathCheck": { - "type": "boolean", - "default": true, - "description": "Validate s3 sample sheet." - } - }, - "required": [ - "platform" - ] - }, - "skip_options": { - "title": "Skip Options", - "type": "object", - "description": "Options to alter control flow of the pipeline", - "default": "", - "properties": { - "skip_depth_sampling": { - "type": "boolean", - "description": "Skip down sampling of data to a target depth. This is not supported for metagenomic samples or hybrid assemblies." - }, - "skip_subtyping": { - "type": "boolean", - "description": "Do not enter the subtyping workflow, e.g. ECTyper, SISTR etc will not be ran." - }, - "skip_polishing": { - "type": "boolean", - "description": "Skip polishing of assemblies, useful in case of errors or for metagenomic samples that fail." - }, - "skip_ont_header_cleaning": { - "type": "boolean", - "description": "Make nanopore headers unique. Only turn this on if you are worried about duplicate id's e.g. from errors in running sequencing.", - "default": true - }, - "skip_checkm": { - "type": "boolean", - "description": "Skip running CheckM" - }, - "skip_report": { - "type": "boolean", - "description": "Skip summary report generation" - }, - "skip_raw_read_metrics": { - "type": "boolean", - "description": "Skip generating raw-read metrics. e.g. when data first enters the pipeline" - }, - "skip_version_gathering": { - "type": "boolean", - "description": "Skip creating a report of the final versions of tools used in mikrokondo." - }, - "skip_metagenomic_detection": { - "type": "boolean", - "description": "For samples to be analyzed as isolates." - }, - "skip_abricate": { - "type": "boolean", - "description": "Skip running abricate for annotation" - }, - "skip_bakta": { - "type": "boolean", - "description": "Skip annotation with Bakta" - }, - "skip_species_classification": { - "type": "boolean", - "description": "Skip determining what your species is (with Kraken2 or Mash)" - }, - "skip_mlst": { - "type": "boolean", - "description": "Skip classic 7gene MLST (Uses Torstein Tseemann's mlst)" - }, - "skip_mobrecon": { - "type": "boolean", - "description": "Skip running mob recon for plasmid identification." - }, - "skip_staramr": { - "type": "boolean", - "description": "Skip running StarAMR" - } - } - }, - "databases_and_pre_computed_files": { - "title": "Databases and Pre-Computed Files", - "type": "object", - "description": "", - "default": "", - "properties": { - "dehosting_idx": { - "type": "string", - "default": "databases/PhiPacHum_m2.idx", - "description": "Minimpa2 index for dehosting and kitome removal" - }, - "mash_sketch": { - "type": "string", - "default": "databases/GTDBSketch_20231003.msh", - "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)" - }, - "bakta_db": { - "type": "string", - "default": "databases/db-light", - "description": "Database use for bakta, this value is optional as bakta can be skipped" - }, - "kraken2_db": { - "type": "string", - "default": "databases/k2_standard_20220607/" - }, - "staramr_db": { - "type": "string", - "description": "It is recommended to use the StarAMR database in the StarAMR container however, an external option can be specified" - } - }, - "required": [ - "dehosting_idx", - "mash_sketch" - ] - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "2000.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_name": { - "type": "string", - "description": "Institutional config name.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "fa_icon": "fas fa-users-cog", - "hidden": true - } - } - }, - "seqkit": { - "title": "SeqKit", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqkit.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqkit:2.2.0--h9ee0642_0", - "hidden": true - }, - "seqkit.docker": { - "type": "string", - "default": "quay.io/biocontainers/seqkit:2.2.0--h9ee0642_0", - "hidden": true - }, - "seqkit.report_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "seqkit.fasta_ext": { - "type": "string", - "default": "_filtered.fasta.gz", - "hidden": true - }, - "seqkit.filter_field": { - "type": "string", - "default": "max_len", - "hidden": true - }, - "seqkit.report_tag": { - "type": "string", - "default": "Seqkit_stats", - "hidden": true - }, - "seqkit.header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - } - }, - "coveragecalculation": { - "title": "CoverageCalculation", - "type": "object", - "description": "", - "default": "", - "properties": { - "coverage_calc_fields.fixed_cov": { - "type": "string", - "default": "FixedGenomeSizeDepth", - "hidden": true - }, - "coverage_calc_fields.auto_cov": { - "type": "string", - "default": "DetectedGenomeSizeDepth", - "hidden": true - }, - "coverage_calc_fields.bp_field": { - "type": "string", - "default": "['RawReadSummary', 'combined', 'total_bp']", - "hidden": true - } - } - }, - "qcreport": { - "title": "QCReport", - "type": "object", - "description": "", - "default": "", - "properties": { - "QCReport.escherichia.search": { - "type": "string", - "default": "Escherichia coli", - "hidden": true - }, - "QCReport.escherichia.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.escherichia.min_n50": { - "type": "integer", - "default": 50000, - "hidden": true - }, - "QCReport.escherichia.max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.escherichia.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.escherichia.max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "QCReport.escherichia.min_length": { - "type": "integer", - "default": 4500000, - "hidden": true - }, - "QCReport.escherichia.max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.escherichia.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.escherichia.min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "QCReport.salmonella.search": { - "type": "string", - "default": "Salmonella", - "hidden": true - }, - "QCReport.salmonella.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.salmonella.min_n50": { - "type": "integer", - "default": 90000, - "hidden": true - }, - "QCReport.salmonella.max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.salmonella.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.salmonella.max_nr_contigs": { - "type": "integer", - "default": 200, - "hidden": true - }, - "QCReport.salmonella.min_length": { - "type": "integer", - "default": 4400000, - "hidden": true - }, - "QCReport.salmonella.max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.salmonella.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.salmonella.min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "QCReport.shigella.search": { - "type": "string", - "default": "Shigella", - "hidden": true - }, - "QCReport.shigella.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.shigella.min_n50": { - "type": "integer", - "default": 18000, - "hidden": true - }, - "QCReport.shigella.max_n50": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.shigella.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.shigella.max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "QCReport.shigella.min_length": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "QCReport.shigella.max_length": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.shigella.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.shigella.min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "QCReport.listeria.search": { - "type": "string", - "default": "Listeria", - "hidden": true - }, - "QCReport.listeria.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.listeria.min_n50": { - "type": "integer", - "default": 50000, - "hidden": true - }, - "QCReport.listeria.max_n50": { - "type": "integer", - "default": 3200000, - "hidden": true - }, - "QCReport.listeria.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.listeria.max_nr_contigs": { - "type": "integer", - "default": 200, - "hidden": true - }, - "QCReport.listeria.min_length": { - "type": "integer", - "default": 2700000, - "hidden": true - }, - "QCReport.listeria.max_length": { - "type": "integer", - "default": 3200000, - "hidden": true - }, - "QCReport.listeria.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.listeria.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.klebsiella.search": { - "type": "string", - "default": "Klebsiella", - "hidden": true - }, - "QCReport.klebsiella.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.klebsiella.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.klebsiella.max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.klebsiella.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.klebsiella.max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "QCReport.klebsiella.min_length": { - "type": "integer", - "default": 4500000, - "hidden": true - }, - "QCReport.klebsiella.max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.klebsiella.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.klebsiella.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.staphylococcus.search": { - "type": "string", - "default": "Staphylococcus", - "hidden": true - }, - "QCReport.staphylococcus.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.staphylococcus.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.staphylococcus.max_n50": { - "type": "integer", - "default": 3500000, - "hidden": true - }, - "QCReport.staphylococcus.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.staphylococcus.max_nr_contigs": { - "type": "integer", - "default": 550, - "hidden": true - }, - "QCReport.staphylococcus.min_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.staphylococcus.max_length": { - "type": "integer", - "default": 3500000, - "hidden": true - }, - "QCReport.staphylococcus.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.staphylococcus.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.fallthrough.search": { - "type": "string", - "default": "No organism specific QC data available.", - "hidden": true - }, - "QCReport.fallthrough.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.fallthrough.min_n50": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.max_n50": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.min_nr_contigs": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.max_nr_contigs": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.min_length": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.max_length": { - "type": "integer", - "hidden": true - }, - "QCReport.fallthrough.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.fallthrough.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReportFields.raw_average_quality.path": { - "type": "string", - "default": "['RawReadSummary', 'combined', 'qual_mean']", - "hidden": true - }, - "QCReportFields.raw_average_quality.coerce_type": { - "type": "string", - "default": "Float", - "hidden": true - }, - "QCReportFields.raw_average_quality.compare_fields": { - "type": "string", - "default": "['raw_average_quality']", - "hidden": true - }, - "QCReportFields.raw_average_quality.comp_type": { - "type": "string", - "default": "ge", - "hidden": true - }, - "QCReportFields.raw_average_quality.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.raw_average_quality.low_msg": { - "type": "string", - "default": "Base quality is poor, resequencing is recommended.", - "hidden": true - }, - "QCReportFields.average_coverage.path": { - "type": "string", - "default": "['FixedGenomeSizeDepth']", - "hidden": true - }, - "QCReportFields.average_coverage.coerce_type": { - "type": "string", - "default": "Float", - "hidden": true - }, - "QCReportFields.average_coverage.compare_fields": { - "type": "string", - "default": "['min_average_coverage']", - "hidden": true - }, - "QCReportFields.average_coverage.comp_type": { - "type": "string", - "default": "ge", - "hidden": true - }, - "QCReportFields.average_coverage.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.average_coverage.low_msg": { - "type": "string", - "default": "Depth of coverage from assembly is lower than than expected. A top-up run is likely needed.", - "hidden": true - }, - "QCReportFields.metagenomic.path": { - "type": "string", - "default": "['MashMeta']", - "hidden": true - }, - "QCReportFields.metagenomic.coerce_type": { - "type": "string", - "default": "Bool", - "hidden": true - }, - "QCReportFields.metagenomic.compare_fields": { - "type": "string", - "default": "[]", - "hidden": true - }, - "QCReportFields.metagenomic.comp_type": { - "type": "string", - "default": "bool", - "hidden": true - }, - "QCReportFields.metagenomic.on": { - "type": "boolean", - "hidden": true - }, - "QCReportFields.n50_value.path": { - "type": "string", - "default": "['QUAST', '0', 'N50']", - "hidden": true - }, - "QCReportFields.n50_value.coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true - }, - "QCReportFields.n50_value.compare_fields": { - "type": "string", - "default": "['min_n50', 'max_n50']", - "hidden": true - }, - "QCReportFields.n50_value.comp_type": { - "type": "string", - "default": "range", - "hidden": true - }, - "QCReportFields.n50_value.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.n50_value.low_msg": { - "type": "string", - "default": "N50 value is low, this could be due to many reasons involving contamination, poor template quality or insufficient template quantity. Reisolation and reseqeuncing may be needed.", - "hidden": true - }, - "QCReportFields.n50_value.high_msg": { - "type": "string", - "default": "N50 value is high, this is likely a good thing if you have fewer contigs than expected.", - "hidden": true - }, - "QCReportFields.nr_contigs.path": { - "type": "string", - "default": "['QUAST', '0', '# contigs']", - "hidden": true - }, - "QCReportFields.nr_contigs.coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true - }, - "QCReportFields.nr_contigs.compare_fields": { - "type": "string", - "default": "['min_nr_contigs', 'max_nr_contigs']", - "hidden": true - }, - "QCReportFields.nr_contigs.comp_type": { - "type": "string", - "default": "range", - "hidden": true - }, - "QCReportFields.nr_contigs.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.nr_contigs.low_msg": { - "type": "string", - "default": "Fewer contigs than expected, if your genome length is of an expected size and you have a high N50 you likely just have a high quality assembly.", - "hidden": true - }, - "QCReportFields.nr_contigs.high_msg": { - "type": "string", - "default": "More contigs are present than expected, if your N50 is low and your genome length is shorter than expected you likely need to potentially need to reisolate and resequence/top-up your sample.", - "hidden": true - }, - "QCReportFields.length.path": { - "type": "string", - "default": "['QUAST', '0', 'Total length']", - "hidden": true - }, - "QCReportFields.length.coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true - }, - "QCReportFields.length.compare_fields": { - "type": "string", - "default": "['min_length', 'max_length']", - "hidden": true - }, - "QCReportFields.length.comp_type": { - "type": "string", - "default": "range", - "hidden": true - }, - "QCReportFields.length.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.length.low_msg": { - "type": "string", - "default": "Genome length lower than expected, you may need to resequence the sample.", - "hidden": true - }, - "QCReportFields.length.high_msg": { - "type": "string", - "default": "Genome length is higher than expected, contmination is potentially present.", - "hidden": true - }, - "QCReportFields.checkm_contamination.path": { - "type": "string", - "default": "['CheckM', '0', 'Contamination']", - "hidden": true - }, - "QCReportFields.checkm_contamination.coerce_type": { - "type": "string", - "default": "Float", - "hidden": true - }, - "QCReportFields.checkm_contamination.compare_fields": { - "type": "string", - "default": "['max_checkm_contamination']", - "hidden": true - }, - "QCReportFields.checkm_contamination.comp_type": { - "type": "string", - "default": "le", - "hidden": true - }, - "QCReportFields.checkm_contamination.on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "QCReportFields.checkm_contamination.high_msg": { - "type": "string", - "default": "Potential contamination is present in your sample. You may need to reisolate and resequence your sample.", - "hidden": true - }, - "QCReport.escherichia.fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.salmonella.fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.shigella.fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "QCReport.listeria.fixed_genome_size": { - "type": "integer", - "default": 3000000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.search": { - "type": "string", - "default": "Campylobacter jejuni", - "hidden": true - }, - "QCReport.campylobacter_jejuni.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.campylobacter_jejuni.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.max_n50": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.campylobacter_jejuni.max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "QCReport.campylobacter_jejuni.fixed_genome_size": { - "type": "integer", - "default": 1800000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.min_length": { - "type": "integer", - "default": 1400000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.max_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.campylobacter_jejuni.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.campylobacter_jejuni.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.campylobacter_coli.search": { - "type": "string", - "default": "Campylobacter coli", - "hidden": true - }, - "QCReport.campylobacter_coli.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.campylobacter_coli.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.campylobacter_coli.max_n50": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.campylobacter_coli.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.campylobacter_coli.max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "QCReport.campylobacter_coli.fixed_genome_size": { - "type": "integer", - "default": 1800000, - "hidden": true - }, - "QCReport.campylobacter_coli.min_length": { - "type": "integer", - "default": 1400000, - "hidden": true - }, - "QCReport.campylobacter_coli.max_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "QCReport.campylobacter_coli.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.campylobacter_coli.min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.vibrio_cholerae.search": { - "type": "string", - "default": "Vibrio cholerae", - "hidden": true - }, - "QCReport.vibrio_cholerae.raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "QCReport.vibrio_cholerae.min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "QCReport.vibrio_cholerae.max_n50": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "QCReport.vibrio_cholerae.min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "QCReport.vibrio_cholerae.max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "QCReport.vibrio_cholerae.fixed_genome_size": { - "type": "integer", - "default": 4000000, - "hidden": true - }, - "QCReport.vibrio_cholerae.min_length": { - "type": "integer", - "default": 3800000, - "hidden": true - }, - "QCReport.vibrio_cholerae.max_length": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "QCReport.vibrio_cholerae.max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "QCReport.vibrio_cholerae.min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "QCReport.klebsiella.fixed_genome_size": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "QCReport.staphylococcus.fixed_genome_size": { - "type": "integer", - "default": 3500000, - "hidden": true - }, - "QCReport.fallthrough.fixed_genome_size": { - "type": "string", - "hidden": true - } - } - }, - "seqtk_size": { - "title": "seqtk_size", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqtk_size.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", - "hidden": true - }, - "seqtk_size.docker": { - "type": "string", - "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", - "hidden": true - }, - "seqtk_size.report_tag": { - "type": "string", - "default": "SeqtkBaseCount", - "hidden": true - } - } - }, - "medaka": { - "title": "medaka", - "type": "object", - "description": "", - "default": "", - "properties": { - "medaka.model": { - "type": "string", - "hidden": true, - "description": "This is set to the base calling model specified in the nanopore_chemistry param" - }, - "medaka.fasta_ext": { - "type": "string", - "default": ".fa.gz", - "hidden": true - }, - "medaka.outdir": { - "type": "string", - "default": "medaka", - "hidden": true - }, - "medaka.batch_size": { - "type": "integer", - "default": 5, - "description": "Batch size for medaka to use for processing.", - "hidden": true - } - } - }, - "docker": { - "title": "Docker", - "type": "object", - "description": "", - "default": "", - "properties": { - "coreutils.docker": { - "type": "string", - "default": "quay.io/biocontainers/coreutils:8.31--h14c3975_0", - "hidden": true - }, - "kat.docker": { - "type": "string", - "default": "quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2", - "hidden": true - }, - "seqtk.docker": { - "type": "string", - "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", - "hidden": true - }, - "fastp.docker": { - "type": "string", - "default": "quay.io/biocontainers/fastp:0.23.2--hb7a2d85_2", - "hidden": true - }, - "flye.docker": { - "type": "string", - "default": "quay.io/biocontainers/flye:2.9.2--py39h6935b12_0", - "hidden": true - }, - "spades.docker": { - "type": "string", - "default": "quay.io/biocontainers/spades:3.15.5--h95f258a_1", - "hidden": true - }, - "quast.docker": { - "type": "string", - "default": "quay.io/biocontainers/quast:5.2.0--py39pl5321h4e691d4_3", - "hidden": true - }, - "checkm.docker": { - "type": "string", - "default": "quay.io/biocontainers/checkm-genome:1.2.2--pyhdfd78af_1", - "hidden": true - }, - "kraken.docker": { - "type": "string", - "default": "quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:941789bd7fe00db16531c26de8bf3c5c985242a5-0", - "hidden": true - }, - "mlst.docker": { - "type": "string", - "default": "quay.io/biocontainers/mlst:2.19.0--hdfd78af_1", - "hidden": true - }, - "mash.docker": { - "type": "string", - "default": "quay.io/biocontainers/mash:2.3--he348c14_1", - "hidden": true - }, - "r_contaminants.docker": { - "type": "string", - "default": "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0", - "hidden": true - }, - "minimap2.docker": { - "type": "string", - "default": "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0", - "hidden": true - }, - "samtools.docker": { - "type": "string", - "default": "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0", - "hidden": true - }, - "racon.docker": { - "type": "string", - "default": "quay.io/biocontainers/racon:1.4.20--h9a82719_1", - "hidden": true - }, - "pilon.docker": { - "type": "string", - "default": "quay.io/biocontainers/pilon:1.24--hdfd78af_0", - "hidden": true - }, - "pilon_iterative.docker": { - "type": "string", - "default": "docker.io/mwells14/pilonpolisher:0.0.1", - "hidden": true - }, - "medaka.docker": { - "type": "string", - "default": "quay.io/biocontainers/medaka:1.8.0--py38hdaa7744_0", - "hidden": true - }, - "unicycler.docker": { - "type": "string", - "default": "quay.io/biocontainers/unicycler:0.5.0--py38h3b68952_2", - "hidden": true - }, - "bakta.docker": { - "type": "string", - "hidden": true, - "default": "quay.io/biocontainers/bakta:1.8.1--pyhdfd78af_0" - }, - "bandage.docker": { - "type": "string", - "default": "quay.io/biocontainers/bandage:0.8.1--hc9558a2_2", - "hidden": true - }, - "ectyper.docker": { - "type": "string", - "default": "quay.io/biocontainers/ectyper:1.0.0--pyhdfd78af_1", - "hidden": true - }, - "kleborate.docker": { - "type": "string", - "default": "quay.io/biocontainers/kleborate:2.1.0--pyhdfd78af_1", - "hidden": true - }, - "spatyper.docker": { - "type": "string", - "default": "quay.io/biocontainers/spatyper:0.3.3--pyhdfd78af_3", - "hidden": true - }, - "sistr.docker": { - "type": "string", - "default": "quay.io/biocontainers/sistr_cmd:1.1.1--pyh864c0ab_2", - "hidden": true - }, - "lissero.docker": { - "type": "string", - "default": "quay.io/biocontainers/lissero:0.4.9--py_0", - "hidden": true - }, - "shigeifinder.docker": { - "type": "string", - "default": "quay.io/biocontainers/shigeifinder:1.3.2--pyhdfd78af_0", - "hidden": true - }, - "python3.docker": { - "type": "string", - "default": "docker.io/python:3.11.6", - "hidden": true - }, - "abricate.docker": { - "type": "string", - "default": "quay.io/biocontainers/abricate:1.0.1--ha8f3691_1", - "hidden": true - }, - "mobsuite_recon.docker": { - "type": "string", - "default": "quay.io/biocontainers/mob_suite:3.0.3--pyhdfd78af_0", - "hidden": true - } - } - }, - "singularity": { - "title": "Singularity", - "type": "object", - "description": "", - "default": "", - "properties": { - "coreutils.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/coreutils%3A8.31--h14c3975_0", - "hidden": true - }, - "kat.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2", - "hidden": true - }, - "seqtk.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", - "hidden": true - }, - "fastp.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/fastp%3A0.23.2--hb7a2d85_2", - "hidden": true - }, - "chopper.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/chopper%3A0.5.0--hd03093a_0", - "hidden": true - }, - "flye.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1", - "hidden": true - }, - "spades.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/spades:3.15.5--h95f258a_1", - "hidden": true - }, - "quast.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/quast%3A5.2.0--py39pl5321h4e691d4_3", - "hidden": true - }, - "checkm.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/checkm-genome%3A1.2.2--pyhdfd78af_1", - "hidden": true - }, - "kraken.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0", - "hidden": true - }, - "mlst.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mlst:2.19.0--hdfd78af_1", - "hidden": true - }, - "mash.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1", - "hidden": true - }, - "r_contaminants.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0", - "hidden": true - }, - "minimap2.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0", - "hidden": true - }, - "samtools.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0", - "hidden": true - }, - "racon.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1", - "hidden": true - }, - "pilon.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/pilon%3A1.24--hdfd78af_0", - "hidden": true - }, - "pilon_iterative.singularity": { - "type": "string", - "default": "docker.io/mwells14/pilonpolisher:0.0.1", - "hidden": true - }, - "medaka.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/medaka%3A1.8.0--py38hdaa7744_0", - "hidden": true - }, - "unicycler.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/unicycler%3A0.5.0--py38h3b68952_2", - "hidden": true - }, - "bakta.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/bakta%3A1.8.1--pyhdfd78af_0", - "hidden": true - }, - "bandage.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/bandage:0.8.1--hc9558a2_2", - "hidden": true - }, - "ectyper.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/ectyper:1.0.0--pyhdfd78af_1", - "hidden": true - }, - "kleborate.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/kleborate:2.1.0--pyhdfd78af_1", - "hidden": true - }, - "spatyper.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/spatyper:0.3.3--pyhdfd78af_3", - "hidden": true - }, - "sistr.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/sistr_cmd:1.1.1--pyh864c0ab_2", - "hidden": true - }, - "lissero.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/lissero:0.4.9--py_0", - "hidden": true - }, - "shigeifinder.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/shigeifinder:1.3.2--pyhdfd78af_0", - "hidden": true - }, - "python3.singularity": { - "type": "string", - "default": "docker.io/python:3.11.6", - "hidden": true - }, - "abricate.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1", - "hidden": true - }, - "mobsuite_recon.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mob_suite%3A3.0.3--pyhdfd78af_0", - "hidden": true - } - } - }, - "staramr": { - "title": "StarAMR", - "type": "object", - "description": "", - "default": "", - "properties": { - "staramr.singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/staramr%3A0.9.1--pyhdfd78af_0", - "hidden": true - }, - "staramr.docker": { - "type": "string", - "default": "quay.io/biocontainers/staramr:0.9.1--pyhdfd78af_0", - "hidden": true - }, - "staramr.db": { - "type": "string", - "description": "Path to a StarAMR database, a database is included in the container." - }, - "staramr.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "staramr.txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "staramr.xlsx_ext": { - "type": "string", - "default": ".xlsx", - "hidden": true - }, - "staramr.args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "staramr.point_finder_dbs": { - "type": "string", - "default": "['salmonella', 'campylobacter', 'enterococcus_faecalis', 'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori']", - "hidden": true - }, - "staramr.report_tag": { - "type": "string", - "default": "StarAMR", - "hidden": true - }, - "staramr.header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - } - }, - "mobrecon": { - "title": "mobrecon", - "type": "object", - "description": "", - "default": "", - "properties": { - "mobsuite_recon.args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "mobsuite_recon.fasta_ext": { - "type": "string", - "default": ".fasta", - "hidden": true - }, - "mobsuite_recon.results_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "mobsuite_recon.mob_results_file": { - "type": "string", - "default": "mobtyper_results.txt", - "hidden": true - }, - "mobsuite_recon.contig_report": { - "type": "string", - "default": "contig_report.txt", - "hidden": true - }, - "mobsuite_recon.report_tag": { - "type": "string", - "default": "MobRecon", - "hidden": true - }, - "mobsuite_recon.header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - } - }, - "kat": { - "title": "Kat", - "type": "object", - "description": "", - "default": "", - "properties": { - "kat.hist_ext": { - "type": "string", - "default": ".hist", - "hidden": true - }, - "kat.json_ext": { - "type": "string", - "default": ".hist.dist_analysis.json", - "hidden": true - }, - "kat.png_ext": { - "type": "string", - "default": ".png", - "hidden": true - }, - "kat.postscript_ext": { - "type": "string", - "default": ".ps", - "hidden": true - }, - "kat.output_type": { - "type": "string", - "default": "png", - "hidden": true - }, - "kat.pdf_ext": { - "type": "string", - "default": ".pdf", - "hidden": true - }, - "kat.report_tag": { - "type": "string", - "default": "KatHist", - "hidden": true - }, - "kat.jfhash_ext": { - "type": "string", - "default": ".jf", - "hidden": true - } - } - }, - "platform_options": { - "title": "Platform Options", - "type": "object", - "description": "", - "default": "", - "properties": { - "opt_platforms.ont": { - "type": "string", - "default": "nanopore", - "hidden": true - }, - "opt_platforms.pacbio": { - "type": "string", - "default": "pacbio", - "hidden": true - }, - "opt_platforms.hybrid": { - "type": "string", - "default": "hybrid", - "hidden": true - }, - "opt_platforms.illumina": { - "type": "string", - "default": "illumina", - "hidden": true - } - } - }, - "seqtk": { - "title": "Seqtk", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqtk.seed": { - "type": "integer", - "default": 42, - "hidden": true - }, - "seqtk.reads_ext": { - "type": "string", - "default": "_sampled.fastq.gz", - "hidden": true - } - } - }, - "flye": { - "title": "flye", - "type": "object", - "description": "", - "default": "", - "properties": { - "flye.nanopore.corr": { - "type": "string", - "default": "--nano-corr", - "hidden": true - }, - "flye.nanopore.hq": { - "type": "string", - "default": "--nano-hq", - "hidden": true - }, - "flye.pacbio.raw": { - "type": "string", - "default": "--pacbio-raw", - "hidden": true - }, - "flye.pacbio.corr": { - "type": "string", - "default": "--pacbio-corr", - "hidden": true - }, - "flye.nanopore.raw": { - "type": "string", - "default": "--nano-raw", - "hidden": true - }, - "flye.gfa_ext": { - "type": "string", - "default": ".gfa.gz", - "hidden": true - }, - "flye.gv_ext": { - "type": "string", - "default": ".gv.gz", - "hidden": true - }, - "flye.txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "flye.log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "flye.json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "flye.polishing_iterations": { - "type": "integer", - "default": 1, - "hidden": true - }, - "flye.fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "flye.pacbio.hq": { - "type": "string", - "default": "--pacbio-hifi", - "hidden": true - }, - "flye.args": { - "type": "string", - "default": "{ \"--iterations ${params.flye.polishing_iterations}\" }", - "hidden": true - } - } - }, - "spades": { - "title": "spades", - "type": "object", - "description": "", - "default": "", - "properties": { - "spades.scaffolds_ext": { - "type": "string", - "default": ".scaffolds.fasta.gz", - "hidden": true - }, - "spades.contigs_ext": { - "type": "string", - "default": ".contigs.fasta.gz", - "hidden": true - }, - "spades.transcripts_ext": { - "type": "string", - "default": ".transcripts.fasta.gz", - "hidden": true - }, - "spades.gene_clusters_ext": { - "type": "string", - "default": ".gene_clusters.fasta.gz", - "hidden": true - }, - "spades.assembly_graphs_ext": { - "type": "string", - "default": ".assembly.gfa.gz", - "hidden": true - }, - "spades.log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "spades.outdir": { - "type": "string", - "default": "assembly", - "hidden": true - } - } - }, - "checkm": { - "title": "checkm", - "type": "object", - "description": "", - "default": "", - "properties": { - "checkm.alignment_ext": { - "type": "string", - "default": "-genes.aln", - "hidden": true - }, - "checkm.results_ext": { - "type": "string", - "default": "-results.txt", - "hidden": true - }, - "checkm.tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "checkm.folder_name": { - "type": "string", - "default": "checkm", - "hidden": true - }, - "checkm.gzip_ext": { - "type": "string", - "default": ".gz", - "hidden": true - }, - "checkm.lineage_ms": { - "type": "string", - "default": "lineage.ms", - "hidden": true - }, - "checkm.report_tag": { - "type": "string", - "default": "CheckM", - "hidden": true - } - } - }, - "kraken2": { - "title": "kraken2", - "type": "object", - "description": "", - "default": "", - "properties": { - "kraken.db": { - "type": "string", - "description": "Path to Kraken2 database (do not use symlinks)" - }, - "kraken.classified_suffix": { - "type": "string", - "default": "classified", - "hidden": true - }, - "kraken.unclassified_suffix": { - "type": "string", - "default": "unclassified", - "hidden": true - }, - "kraken.report_suffix": { - "type": "string", - "default": "report", - "hidden": true - }, - "kraken.output_suffix": { - "type": "string", - "default": "output", - "hidden": true - }, - "kraken.save_output_fastqs": { - "type": "boolean", - "hidden": true - }, - "kraken.save_reads_assignments": { - "type": "boolean", - "default": true, - "hidden": true - }, - "kraken.run_kraken_quick": { - "type": "boolean", - "hidden": true - }, - "kraken.report_tag": { - "type": "string", - "default": "KrakenReport", - "hidden": true - }, - "kraken.tophit_level": { - "type": "string", - "default": "S", - "hidden": true - }, - "kraken_bin.taxonomic_level": { - "type": "string", - "default": "G", - "description": "Taxonomic level to bin contigs at." - } - } + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/master/nextflow_schema.json", + "title": "phac-nml/mikrokondo pipeline parameters", + "description": "mikrokondo Beta", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": [ + "input", + "outdir" + ], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "schema": "assets/schema_input.json", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", + "fa_icon": "fas fa-file-csv" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" } + } }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/bakta" - }, - { - "$ref": "#/definitions/fastp" - }, - { - "$ref": "#/definitions/mash" - }, - { - "$ref": "#/definitions/quast" - }, - { - "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/skip_options" - }, - { - "$ref": "#/definitions/databases_and_pre_computed_files" - }, - { - "$ref": "#/definitions/max_job_request_options" + "bakta": { + "title": "Bakta", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-address-card", + "properties": { + "ba_min_conting_length": { + "type": "integer", + "default": 200, + "description": "Minimum contig length for processing in Bakta" }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/seqkit" - }, - { - "$ref": "#/definitions/coveragecalculation" - }, - { - "$ref": "#/definitions/qcreport" - }, - { - "$ref": "#/definitions/seqtk_size" - }, - { - "$ref": "#/definitions/medaka" - }, - { - "$ref": "#/definitions/docker" - }, - { - "$ref": "#/definitions/singularity" - }, - { - "$ref": "#/definitions/staramr" - }, - { - "$ref": "#/definitions/mobrecon" - }, - { - "$ref": "#/definitions/kat" - }, - { - "$ref": "#/definitions/platform_options" - }, - { - "$ref": "#/definitions/seqtk" - }, - { - "$ref": "#/definitions/flye" - }, - { - "$ref": "#/definitions/spades" - }, - { - "$ref": "#/definitions/checkm" - }, - { - "$ref": "#/definitions/kraken2" - } - ], - "properties": { - "chopper.quality": { - "type": "integer", - "default": 0, - "hidden": true - }, - "chopper.minlength": { - "type": "integer", - "default": 100, - "hidden": true - }, - "chopper.fastq_ext": { + "bakta": { + "output_dir": { "type": "string", - "default": ".fastq.gz", + "default": "bakta", "hidden": true - }, - "fastqc.html_ext": { + }, + "embl_ext": { "type": "string", - "default": ".html", + "default": ".embl", "hidden": true - }, - "fastqc.zip_ext": { + }, + "faa_ext": { "type": "string", - "default": ".zip", + "default": ".faa", "hidden": true - }, - "r_contaminants.phix_fa": { + }, + "ffn_ext": { "type": "string", + "default": ".ffn", "hidden": true - }, - "r_contaminants.homo_sapiens_fa": { + }, + "fna_ext": { "type": "string", + "default": ".fna", "hidden": true - }, - "r_contaminants.pacbio_mg": { + }, + "gbff_ext": { "type": "string", + "default": ".gbff", "hidden": true - }, - "r_contaminants.output_ext": { + }, + "gff_ext": { "type": "string", - "default": ".fastq.gz", + "default": ".gff3", "hidden": true - }, - "r_contaminants.mega_mm2_idx": { - "type": "string", + }, + "threads": { + "type": "integer", + "default": 12, "hidden": true - }, - "r_contaminants.mm2_illumina": { + }, + "hypotheticals_tsv_ext": { "type": "string", - "default": "-x sr", + "default": ".hypotheticals.tsv", "hidden": true - }, - "r_contaminants.mm2_pac": { + }, + "hypotheticals_faa_ext": { "type": "string", - "default": "-x map-pb", + "default": ".hypotheticals.faa", "hidden": true - }, - "r_contaminants.mm2_ont": { + }, + "tsv_ext": { "type": "string", - "default": "-x map-ont", + "default": ".tsv", "hidden": true - }, - "r_contaminants.mm2_output_ext": { + }, + "txt_ext": { "type": "string", - "default": ".sam", + "default": ".txt", "hidden": true - }, - "r_contaminants.samtools_output_ext": { - "type": "string", - "default": ".fastq", + }, + "min_contig_length": { + "type": "integer", + "default": 200, "hidden": true - }, - "r_contaminants.samtools_singletons_ext": { + }, + "db": { "type": "string", - "default": ".singleton.fq", "hidden": true - }, - "r_contaminants.samtools_output_suffix": { + }, + "args": { "type": "string", - "default": "deconned", - "hidden": true - }, - "r_contaminants.output_dir": { + "hidden": true, + "description": "Additional arguments to pass to bakta", + "default": "{ \"\" }" + }, + "docker": { "type": "string", - "default": "contamination/deconned_reads", - "hidden": true - }, - "minimap2.index_outdir": { + "hidden": true, + "default": "quay.io/biocontainers/bakta:1.8.1--pyhdfd78af_0" + }, + "singularity": { "type": "string", - "default": "indices", + "default": "https://depot.galaxyproject.org/singularity/bakta%3A1.8.1--pyhdfd78af_0", "hidden": true + } + } + } + }, + "fastp": { + "title": "FastP", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-filter", + "properties": { + "fp_average_quality": { + "type": "integer", + "default": 25 + }, + "fp_cut_tail_mean_quality": { + "type": "integer", + "default": 15 + }, + "fp_cut_tail_window_size": { + "type": "integer", + "default": 4 + }, + "fp_complexity_threshold": { + "type": "integer", + "default": 20 + }, + "fp_qualified_phred": { + "type": "integer", + "default": 15 + }, + "fp_unqualified_precent_limit": { + "type": "integer", + "default": 40 + }, + "fp_polyg_min_len": { + "type": "integer", + "default": 10 + }, + "fp_polyx_min_len": { + "type": "integer", + "default": 10 + }, + "fp_illumina_length_min": { + "type": "integer", + "default": 35 + }, + "fp_illumina_length_max": { + "type": "integer", + "default": 400 + }, + "fp_single_end_length_min": { + "type": "integer", + "default": 1000 + }, + "fp_dedup_reads": { + "type": "boolean" }, - "minimap2.index_ext": { + "fastp": { + "html_ext": { "type": "string", - "default": ".idx", + "default": ".html", "hidden": true - }, - "minimap2.mapped_paf_ext": { - "type": "string", - "default": ".paf", + }, + "average_quality_e": { + "type": "integer", + "default": 25, + "description": "Average quality of a read to be included (read pair is discarded if it is below this value)", "hidden": true - }, - "minimap2.mapped_sam_ext": { + }, + "json_ext": { "type": "string", - "default": ".sam", + "default": ".json", "hidden": true - }, - "minimap2.mapped_outdir": { - "type": "string", - "default": "mapped", + }, + "qualified_quality_phred": { + "type": "integer", + "default": 15, + "description": "Phred score to be considered qualified. See FastP docs for more details.", "hidden": true - }, - "samtools.bam_ext": { - "type": "string", - "default": ".bam", + }, + "unqualified_percent_limit": { + "type": "integer", + "default": 40, + "description": "Percent of bases in a read to be qualified for the read to be included. See FastP docs for more details.", "hidden": true - }, - "samtools.bai_ext": { + }, + "fastq_ext": { "type": "string", - "default": ".bai", + "default": ".trimmed.fastq.gz", "hidden": true - }, - "racon.consensus_suffix": { + }, + "report_tag": { "type": "string", - "default": "_assembly_consensus.fasta", + "default": "FastP", "hidden": true - }, - "racon.consensus_ext": { - "type": "string", - "default": ".fasta.gz", + }, + "illumina_length_min": { + "type": "integer", + "default": 35, + "description": "Minimum length of a read to be included in later analysis for illumina data.", "hidden": true - }, - "racon.outdir": { + }, + "single_end_length_min": { + "type": "integer", + "default": 1000, + "description": "Minimum leng of a read to be used in later analysis (for Nanopore or Pacbio)", + "hidden": true + }, + "dedup_reads": { + "type": "boolean", + "description": "Option to turn on read de-duplication.", + "hidden": true + }, + "args": { + "single_end": { + "type": "string", + "default": "--overrepresentation_analysis -Q --length_required 1000", + "hidden": true + } + }, + "docker": { "type": "string", - "default": "polished", + "default": "quay.io/biocontainers/fastp:0.23.2--hb7a2d85_2", "hidden": true - }, - "pilon.outdir": { + }, + "singularity": { "type": "string", - "default": "pilon", + "default": "https://depot.galaxyproject.org/singularity/fastp%3A0.23.2--hb7a2d85_2", "hidden": true + } + } + } + }, + "mash": { + "title": "Mash", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-align-left", + "properties": { + "mh_min_kmer": { + "type": "integer", + "default": 10, + "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation" }, - "pilon.fasta_ext": { + "mash": { + "mash_ext": { "type": "string", - "default": ".fasta.gz", + "default": ".screen", "hidden": true - }, - "pilon.fasta_outdir": { + }, + "output_reads_ext": { "type": "string", - "default": "fasta", + "default": ".reads.screen", "hidden": true - }, - "pilon.vcf_ext": { + }, + "output_taxa_ext": { "type": "string", - "default": ".vcf", + "default": ".taxa.screen", "hidden": true - }, - "pilon.vcf_outdir": { + }, + "output_dir": { "type": "string", - "default": "vcf", + "default": "contamination", "hidden": true - }, - "pilon.changes_ext": { + }, + "mash_sketch": { "type": "string", - "default": ".changes", "hidden": true - }, - "pilon.changes_outdir": { + }, + "sketch_ext": { "type": "string", - "default": "changes", + "default": ".msh", "hidden": true - }, - "pilon.max_memory_multiplier": { + }, + "sketch_kmer_size": { "type": "integer", - "default": 3, + "default": 21, "hidden": true - }, - "pilon_iterative.outdir": { + }, + "final_sketch_name": { "type": "string", - "default": "pilon", + "default": "GTDB_sketch", "hidden": true - }, - "pilon_iterative.fasta_ext": { + }, + "json_ext": { "type": "string", - "default": ".fasta.gz", + "default": ".json", "hidden": true - }, - "pilon_iterative.fasta_outdir": { - "type": "string", - "default": "fasta", + }, + "min_kmer": { + "type": "integer", + "default": 10, "hidden": true - }, - "pilon_iterative.vcf_ext": { + }, + "report_tag": { "type": "string", - "default": ".vcf", + "default": "Mash", "hidden": true - }, - "pilon_iterative.vcf_outdir": { - "type": "string", - "default": "vcf", + }, + "header_p": { + "type": "boolean", "hidden": true - }, - "pilon_iterative.bam_ext": { + }, + "headers": { "type": "string", - "default": ".bam", + "default": "['identity', 'Shared Hashes', 'Median Multiplicity', 'P-Value', 'Query ID', 'Query Note']", "hidden": true - }, - "pilon_iterative.bai_ext": { + }, + "docker": { "type": "string", - "default": ".bai", + "default": "quay.io/biocontainers/mash:2.3--he348c14_1", "hidden": true - }, - "pilon_iterative.changes_ext": { + }, + "singularity": { "type": "string", - "default": ".changes", + "default": "https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1", "hidden": true + } + } + } + }, + "quast": { + "title": "QUAST", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-check-circle", + "properties": { + "qt_min_contig_length": { + "type": "integer", + "default": 1000 }, - "pilon_iterative.changes_outdir": { + "quast": { + "suffix": { "type": "string", - "default": "changes", - "hidden": true - }, - "pilon_iterative.max_memory_multiplier": { - "type": "integer", - "default": 3, - "hidden": true - }, - "pilon_iterative.max_polishing_illumina": { - "type": "integer", - "default": 3, - "hidden": true - }, - "pilon_iterative.max_polishing_pacbio": { - "type": "integer", - "default": 4, - "hidden": true - }, - "pilon_iterative.max_polishing_nanopore": { - "type": "integer", - "default": 10, + "default": "quast", "hidden": true - }, - "unicycler.scaffolds_ext": { + }, + "report_base": { "type": "string", - "default": ".scaffolds.fa.gz", + "default": "report", "hidden": true - }, - "unicycler.assembly_ext": { + }, + "report_prefix": { "type": "string", - "default": ".assembly.gfa.gz", + "default": "transposed_", "hidden": true - }, - "unicycler.log_ext": { + }, + "report_ext": { "type": "string", - "default": ".unicycler.log", + "default": ".tsv", "hidden": true - }, - "unicycler.outdir": { + }, + "report_tag": { "type": "string", - "default": "unicycler", + "default": "QUAST", "hidden": true - }, - "unicycler.mem_modifier": { + }, + "min_contig_length": { "type": "integer", "default": 1000, + "description": "Minimum contig length to be used my Quast.", "hidden": true - }, - "unicycler.threads_increase_factor": { - "type": "integer", - "default": 1, + }, + "args": { + "type": "string", + "default": "{ \"--min-contig ${params.quast.min_contig_length} --report-all-metrics\" }", "hidden": true - }, - "bandage.svg_ext": { + }, + "contigs_field": { "type": "string", - "default": ".svg", + "default": "# contigs", "hidden": true - }, - "bandage.outdir": { + }, + "docker": { "type": "string", - "default": "bandage", + "default": "quay.io/biocontainers/quast:5.2.0--py39pl5321h4e691d4_3", "hidden": true - }, - "ectyper.log_ext": { + }, + "singularity": { "type": "string", - "default": ".log", + "default": "https://depot.galaxyproject.org/singularity/quast%3A5.2.0--py39pl5321h4e691d4_3", "hidden": true - }, - "ectyper.tsv_ext": { + } + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "required": [ + "platform" + ], + "properties": { + "target_depth": { + "type": "integer", + "default": 100, + "description": "Target depth to sub-sample reads to." + }, + "platform": { + "type": "string", + "enum": [ + "illumina", + "nanopore", + "pacbio", + "hybrid" + ], + "description": "Sequencing platform used.", + "default": "illumina" + }, + "nanopore_chemistry": { + "type": "string", + "description": "The guppy base calling model. See the docs for a link of valid options." + }, + "run_kraken": { + "type": "boolean", + "description": "Use Kraken2 instead of Mash for sample speciation (Useful if you have Eukaryotic data or Archae)" + }, + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle" + }, + "hybrid_unicycler": { + "type": "boolean", + "description": "Use unicycler for hybrid assembly." + }, + "long_read_opt": { + "type": "string", + "default": "nanopore", + "enum": [ + "nanopore", + "pacbio" + ], + "description": "Specify which longread platform your data is from (nanopore or pacbio). This option must be specified if performing a hybrid assembly." + }, + "min_reads": { + "type": "integer", + "default": 1000, + "description": "Minimum number of reads a sample requires to move forward for assembly." + }, + "output_idx_name": { + "type": "string", + "hidden": true + }, + "metagenomic_run": { + "type": "boolean", + "description": "Label all samples as metagenomic (Skip autodetection)" + }, + "flye_read_type": { + "type": "string", + "default": "hq", + "enum": [ + "hq", + "corr", + "raw" + ], + "description": "Read type for flye to use. hq corresponds to hifi for Pacbio data." + }, + "stage_in_mode": { + "type": "string", + "default": "symlink", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle" + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully." + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format" + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports.", + "default": "null/pipeline_info", + "fa_icon": "fas fa-cogs", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "fa_icon": "fas fa-check-square", + "default": true, + "hidden": true + }, + "show_hidden_params": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "slurm_p": { + "type": "boolean", + "description": "Use slurm to execute your pipeline" + }, + "slurm_profile": { + "type": "string", + "description": "Slurm partition" + }, + "validationS3PathCheck": { + "type": "boolean", + "default": true, + "description": "Validate s3 sample sheet." + } + } + }, + "skip_options": { + "title": "Skip Options", + "type": "object", + "description": "Options to alter control flow of the pipeline", + "default": "", + "properties": { + "skip_depth_sampling": { + "type": "boolean", + "description": "Skip down sampling of data to a target depth. This is not supported for metagenomic samples or hybrid assemblies." + }, + "skip_subtyping": { + "type": "boolean", + "description": "Do not enter the subtyping workflow, e.g. ECTyper, SISTR etc will not be ran." + }, + "skip_polishing": { + "type": "boolean", + "description": "Skip polishing of assemblies, useful in case of errors or for metagenomic samples that fail." + }, + "skip_ont_header_cleaning": { + "type": "boolean", + "description": "Make nanopore headers unique. Only turn this on if you are worried about duplicate id's e.g. from errors in running sequencing.", + "default": true + }, + "skip_checkm": { + "type": "boolean", + "description": "Skip running CheckM" + }, + "skip_report": { + "type": "boolean", + "description": "Skip summary report generation" + }, + "skip_raw_read_metrics": { + "type": "boolean", + "description": "Skip generating raw-read metrics. e.g. when data first enters the pipeline" + }, + "skip_version_gathering": { + "type": "boolean", + "description": "Skip creating a report of the final versions of tools used in mikrokondo." + }, + "skip_metagenomic_detection": { + "type": "boolean", + "description": "For samples to be analyzed as isolates." + }, + "skip_abricate": { + "type": "boolean", + "description": "Skip running abricate for annotation" + }, + "skip_bakta": { + "type": "boolean", + "description": "Skip annotation with Bakta" + }, + "skip_species_classification": { + "type": "boolean", + "description": "Skip determining what your species is (with Kraken2 or Mash)" + }, + "skip_mlst": { + "type": "boolean", + "description": "Skip classic 7gene MLST (Uses Torstein Tseemann's mlst)" + }, + "skip_mobrecon": { + "type": "boolean", + "description": "Skip running mob recon for plasmid identification." + }, + "skip_staramr": { + "type": "boolean", + "description": "Skip running StarAMR" + } + } + }, + "databases_and_pre_computed_files": { + "title": "Databases and Pre-Computed Files", + "type": "object", + "description": "", + "default": "", + "required": [ + "dehosting_idx", + "mash_sketch" + ], + "properties": { + "dehosting_idx": { + "type": "string", + "default": "databases/PhiPacHum_m2.idx", + "description": "Minimpa2 index for dehosting and kitome removal" + }, + "mash_sketch": { + "type": "string", + "default": "databases/GTDBSketch_20231003.msh", + "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)" + }, + "bakta_db": { + "type": "string", + "default": "databases/db-light", + "description": "Database use for bakta, this value is optional as bakta can be skipped" + }, + "kraken2_db": { + "type": "string", + "default": "databases/k2_standard_20220607/" + }, + "staramr_db": { + "type": "string", + "description": "It is recommended to use the StarAMR database in the StarAMR container however, an external option can be specified" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "2000.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "fa_icon": "fas fa-users-cog", + "hidden": true + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog", + "hidden": true + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "fa_icon": "fas fa-users-cog", + "hidden": true + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "fa_icon": "fas fa-users-cog", + "hidden": true + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "fa_icon": "fas fa-users-cog", + "hidden": true + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "fa_icon": "fas fa-users-cog", + "hidden": true + } + } + }, + "seqkit": { + "title": "SeqKit", + "type": "object", + "description": "", + "default": "", + "properties": { + "seqkit": { + "singularity": { "type": "string", - "default": ".tsv", + "default": "https://depot.galaxyproject.org/singularity/seqkit:2.2.0--h9ee0642_0", "hidden": true - }, - "ectyper.txt_ext": { + }, + "docker": { "type": "string", - "default": ".txt", + "default": "quay.io/biocontainers/seqkit:2.2.0--h9ee0642_0", "hidden": true - }, - "ectyper.report_tag": { + }, + "report_ext": { "type": "string", - "default": "ECTyperSubtyping", + "default": ".tsv", "hidden": true - }, - "sistr.tsv_ext": { + }, + "fasta_ext": { "type": "string", - "default": ".tab", + "default": "_filtered.fasta.gz", "hidden": true - }, - "sistr.allele_fasta_ext": { + }, + "filter_field": { "type": "string", - "default": "-allele.fasta", + "default": "max_len", "hidden": true - }, - "sistr.allele_json_ext": { + }, + "report_tag": { "type": "string", - "default": "-allele.json", + "default": "Seqkit_stats", "hidden": true - }, - "sistr.cgmlst_ext": { + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + } + } + }, + "coveragecalculation": { + "title": "CoverageCalculation", + "type": "object", + "description": "", + "default": "", + "properties": { + "coverage_calc_fields": { + "fixed_cov": { "type": "string", - "default": "-cgmlst.csv", + "default": "FixedGenomeSizeDepth", "hidden": true - }, - "sistr.report_tag": { + }, + "auto_cov": { "type": "string", - "default": "SISTRSubtyping", + "default": "DetectedGenomeSizeDepth", "hidden": true - }, - "lissero.tsv_ext": { + }, + "bp_field": { "type": "string", - "default": ".tsv", + "default": "['RawReadSummary', 'combined', 'total_bp']", "hidden": true - }, - "lissero.report_tag": { + } + } + } + }, + "qcreport": { + "title": "QCReport", + "type": "object", + "description": "", + "default": "", + "properties": { + "QCReport": { + "escherichia": { + "fixed_genome_size": { + "type": "integer", + "default": 5000000, + "hidden": true + } + }, + "salmonella": { + "fixed_genome_size": { + "type": "integer", + "default": 5000000, + "hidden": true + } + }, + "shigella": { + "fixed_genome_size": { + "type": "integer", + "default": 5000000, + "hidden": true + } + }, + "listeria": { + "fixed_genome_size": { + "type": "integer", + "default": 3000000, + "hidden": true + } + }, + "klebsiella": { + "fixed_genome_size": { + "type": "integer", + "default": 6000000, + "hidden": true + } + }, + "staphylococcus": { + "fixed_genome_size": { + "type": "integer", + "default": 3500000, + "hidden": true + } + }, + "fallthrough": { + "fixed_genome_size": { + "type": "string", + "hidden": true + } + }, + "campylobacter_jejuni": { + "min_average_coverage": { + "type": "integer", + "default": 30, + "hidden": true + } + }, + "campylobacter_coli": { + "min_average_coverage": { + "type": "integer", + "default": 30, + "hidden": true + } + }, + "vibrio_cholerae": { + "min_average_coverage": { + "type": "integer", + "default": 40, + "hidden": true + } + } + }, + "QCReportFields": { + "raw_average_quality": { + "low_msg": { + "type": "string", + "default": "Base quality is poor, resequencing is recommended.", + "hidden": true + } + }, + "average_coverage": { + "low_msg": { + "type": "string", + "default": "Depth of coverage from assembly is lower than than expected. A top-up run is likely needed.", + "hidden": true + } + }, + "metagenomic": { + "on": { + "type": "boolean", + "hidden": true + } + }, + "n50_value": { + "high_msg": { + "type": "string", + "default": "N50 value is high, this is likely a good thing if you have fewer contigs than expected.", + "hidden": true + } + }, + "nr_contigs": { + "high_msg": { + "type": "string", + "default": "More contigs are present than expected, if your N50 is low and your genome length is shorter than expected you likely need to potentially need to reisolate and resequence/top-up your sample.", + "hidden": true + } + }, + "length": { + "high_msg": { + "type": "string", + "default": "Genome length is higher than expected, contmination is potentially present.", + "hidden": true + } + }, + "checkm_contamination": { + "high_msg": { + "type": "string", + "default": "Potential contamination is present in your sample. You may need to reisolate and resequence your sample.", + "hidden": true + } + } + } + } + }, + "seqtk_size": { + "title": "seqtk_size", + "type": "object", + "description": "", + "default": "", + "properties": { + "seqtk_size": { + "singularity": { "type": "string", - "default": "LISSEROSubtyping", + "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", "hidden": true - }, - "shigeifinder.tsv_ext": { + }, + "docker": { "type": "string", - "default": ".tsv", + "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", "hidden": true - }, - "raw_reads.high_precision": { - "type": "boolean", + }, + "report_tag": { + "type": "string", + "default": "SeqtkBaseCount", "hidden": true - }, - "raw_reads.report_tag": { + } + } + } + }, + "medaka": { + "title": "medaka", + "type": "object", + "description": "", + "default": "", + "properties": { + "medaka": { + "model": { + "type": "string", + "hidden": true, + "description": "This is set to the base calling model specified in the nanopore_chemistry param" + }, + "fasta_ext": { "type": "string", - "default": "RawReadSummary", + "default": ".fa.gz", "hidden": true - }, - "seqtk.assembly_fastq": { + }, + "outdir": { "type": "string", - "default": ".fastq.gz", + "default": "medaka", "hidden": true - }, - "seqtk.report_tag": { + }, + "batch_size": { + "type": "integer", + "default": 5, + "description": "Batch size for medaka to use for processing.", + "hidden": true + }, + "docker": { "type": "string", - "default": "Seqtk", + "default": "quay.io/biocontainers/medaka:1.8.0--py38hdaa7744_0", "hidden": true - }, - "fastp.report_exclude_fields": { + }, + "singularity": { "type": "string", - "default": "['content_curves', 'quality_curves', 'mean', 'kmer_count', 'histogram', 'overrepresented_sequences']", + "default": "https://depot.galaxyproject.org/singularity/medaka%3A1.8.0--py38hdaa7744_0", "hidden": true - }, - "quast.header_p": { + } + } + } + }, + "staramr": { + "title": "StarAMR", + "type": "object", + "description": "", + "default": "", + "properties": { + "staramr": { + "singularity": { + "type": "string", + "default": "https://depot.galaxyproject.org/singularity/staramr%3A0.9.1--pyhdfd78af_0", + "hidden": true + }, + "docker": { + "type": "string", + "default": "quay.io/biocontainers/staramr:0.9.1--pyhdfd78af_0", + "hidden": true + }, + "db": { + "type": "string", + "description": "Path to a StarAMR database, a database is included in the container." + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "txt_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "xlsx_ext": { + "type": "string", + "default": ".xlsx", + "hidden": true + }, + "args": { + "type": "string", + "hidden": true, + "default": "{ \"\" }" + }, + "point_finder_dbs": { + "type": "string", + "default": "['salmonella', 'campylobacter', 'enterococcus_faecalis', 'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori']", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "StarAMR", + "hidden": true + }, + "header_p": { "type": "boolean", "default": true, "hidden": true - }, - "checkm.header_p": { + } + } + } + }, + "mobrecon": { + "title": "mobrecon", + "type": "object", + "description": "", + "default": "", + "properties": { + "mobsuite_recon": { + "args": { + "type": "string", + "hidden": true, + "default": "{ \"\" }" + }, + "fasta_ext": { + "type": "string", + "default": ".fasta", + "hidden": true + }, + "results_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "mob_results_file": { + "type": "string", + "default": "mobtyper_results.txt", + "hidden": true + }, + "contig_report": { + "type": "string", + "default": "contig_report.txt", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "MobRecon", + "hidden": true + }, + "header_p": { "type": "boolean", "default": true, "hidden": true - }, - "kraken.header_p": { - "type": "boolean", + } + } + } + }, + "kat": { + "title": "Kat", + "type": "object", + "description": "", + "default": "", + "properties": { + "kat": { + "hist_ext": { + "type": "string", + "default": ".hist", "hidden": true - }, - "kraken.headers": { + }, + "json_ext": { "type": "string", - "default": "['PercentID', 'FragmentsRecovered', 'FragmentsAssignmentTaxon', 'RankCode']", + "default": ".hist.dist_analysis.json", "hidden": true - }, - "mlst.args": { + }, + "png_ext": { "type": "string", + "default": ".png", "hidden": true - }, - "mlst.tsv_ext": { + }, + "postscript_ext": { "type": "string", - "default": ".tsv", + "default": ".ps", "hidden": true - }, - "mlst.json_ext": { + }, + "output_type": { + "type": "string", + "default": "png", + "hidden": true + }, + "pdf_ext": { + "type": "string", + "default": ".pdf", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "KatHist", + "hidden": true + }, + "jfhash_ext": { + "type": "string", + "default": ".jf", + "hidden": true + }, + "docker": { + "type": "string", + "default": "quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2", + "hidden": true + }, + "singularity": { + "type": "string", + "default": "https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2", + "hidden": true + } + } + } + }, + "platform_options": { + "title": "Platform Options", + "type": "object", + "description": "", + "default": "", + "properties": { + "opt_platforms": { + "ont": { + "type": "string", + "default": "nanopore", + "hidden": true + }, + "pacbio": { + "type": "string", + "default": "pacbio", + "hidden": true + }, + "hybrid": { + "type": "string", + "default": "hybrid", + "hidden": true + }, + "illumina": { + "type": "string", + "default": "illumina", + "hidden": true + } + } + } + }, + "seqtk": { + "title": "Seqtk", + "type": "object", + "description": "", + "default": "", + "properties": { + "seqtk": { + "seed": { + "type": "integer", + "default": 42, + "hidden": true + }, + "reads_ext": { + "type": "string", + "default": "_sampled.fastq.gz", + "hidden": true + }, + "docker": { + "type": "string", + "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", + "hidden": true + }, + "singularity": { + "type": "string", + "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", + "hidden": true + } + } + } + }, + "flye": { + "title": "flye", + "type": "object", + "description": "", + "default": "", + "properties": { + "flye": { + "nanopore": { + "raw": { + "type": "string", + "default": "--nano-raw", + "hidden": true + } + }, + "pacbio": { + "hq": { + "type": "string", + "default": "--pacbio-hifi", + "hidden": true + } + }, + "gfa_ext": { + "type": "string", + "default": ".gfa.gz", + "hidden": true + }, + "gv_ext": { + "type": "string", + "default": ".gv.gz", + "hidden": true + }, + "txt_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "log_ext": { + "type": "string", + "default": ".log", + "hidden": true + }, + "json_ext": { "type": "string", "default": ".json", "hidden": true - }, - "mlst.report_tag": { + }, + "polishing_iterations": { + "type": "integer", + "default": 1, + "hidden": true + }, + "fasta_ext": { "type": "string", - "default": "SevenGeneMLSTReport", + "default": ".fasta.gz", "hidden": true - }, - "mash_meta.report_tag": { + }, + "args": { "type": "string", - "default": "MashMeta", + "default": "{ \"--iterations ${params.flye.polishing_iterations}\" }", "hidden": true - }, - "top_hit_species.report_tag": { + }, + "docker": { "type": "string", - "default": "SpeciesTopHit", + "default": "quay.io/biocontainers/flye:2.9.2--py39h6935b12_0", "hidden": true - }, - "kraken_species.report_tag": { + }, + "singularity": { "type": "string", - "default": "Kraken2TopHit", + "default": "https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1", "hidden": true - }, - "subtyping_report.report_tag": { + } + } + } + }, + "spades": { + "title": "spades", + "type": "object", + "description": "", + "default": "", + "properties": { + "spades": { + "scaffolds_ext": { "type": "string", - "default": "Subtyping", + "default": ".scaffolds.fasta.gz", "hidden": true - }, - "ectyper.args": { + }, + "contigs_ext": { "type": "string", - "default": "{ \"--verify\" }", + "default": ".contigs.fasta.gz", "hidden": true - }, - "ectyper.header_p": { - "type": "boolean", - "default": true, + }, + "transcripts_ext": { + "type": "string", + "default": ".transcripts.fasta.gz", "hidden": true - }, - "kleborate.txt_ext": { + }, + "gene_clusters_ext": { "type": "string", - "default": ".results.txt", + "default": ".gene_clusters.fasta.gz", "hidden": true - }, - "kleborate.report_tag": { + }, + "assembly_graphs_ext": { "type": "string", - "default": "KleborateSubtyping", + "default": ".assembly.gfa.gz", "hidden": true - }, - "kleborate.header_p": { - "type": "boolean", - "default": true, + }, + "log_ext": { + "type": "string", + "default": ".log", "hidden": true - }, - "spatyper.tsv_ext": { + }, + "outdir": { "type": "string", - "default": ".tsv", + "default": "assembly", "hidden": true - }, - "spatyper.report_tag": { + }, + "docker": { "type": "string", - "default": "SpaTyperSubtyping", + "default": "quay.io/biocontainers/spades:3.15.5--h95f258a_1", "hidden": true - }, - "spatyper.header_p": { - "type": "boolean", - "default": true, + }, + "singularity": { + "type": "string", + "default": "https://depot.galaxyproject.org/singularity/spades:3.15.5--h95f258a_1", "hidden": true - }, - "spatyper.repeats": { + } + } + } + }, + "checkm": { + "title": "checkm", + "type": "object", + "description": "", + "default": "", + "properties": { + "checkm": { + "alignment_ext": { "type": "string", + "default": "-genes.aln", "hidden": true - }, - "spatyper.repeat_order": { + }, + "results_ext": { "type": "string", + "default": "-results.txt", "hidden": true - }, - "sistr.header_p": { - "type": "boolean", - "default": true, + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", "hidden": true - }, - "lissero.header_p": { - "type": "boolean", - "default": true, + }, + "folder_name": { + "type": "string", + "default": "checkm", "hidden": true - }, - "shigeifinder.header_p": { - "type": "boolean", - "default": true, + }, + "gzip_ext": { + "type": "string", + "default": ".gz", "hidden": true - }, - "kraken_bin.fasta_ext": { + }, + "lineage_ms": { "type": "string", - "default": "_binned.fasta.gz", + "default": "lineage.ms", "hidden": true - }, - "pointfinder_db_tag.report_tag": { + }, + "report_tag": { "type": "string", - "default": "PointfinderDB", + "default": "CheckM", "hidden": true - }, - "abricate.args": { + }, + "docker": { "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "abricate.report_tag": { + "default": "quay.io/biocontainers/checkm-genome:1.2.2--pyhdfd78af_1", + "hidden": true + }, + "singularity": { "type": "string", - "default": "Abricate", + "default": "https://depot.galaxyproject.org/singularity/checkm-genome%3A1.2.2--pyhdfd78af_1", "hidden": true - }, - "abricate.header_p": { - "type": "boolean", - "default": true, + } + } + } + }, + "kraken2": { + "title": "kraken2", + "type": "object", + "description": "", + "default": "", + "properties": { + "kraken": { + "db": { + "type": "string", + "description": "Path to Kraken2 database (do not use symlinks)" + }, + "classified_suffix": { + "type": "string", + "default": "classified", "hidden": true - }, - "shigeifinder.container_version": { + }, + "unclassified_suffix": { "type": "string", - "default": "1.3.2", + "default": "unclassified", "hidden": true - }, - "shigeifinder.report_tag": { + }, + "report_suffix": { "type": "string", - "default": "ShigeifinderSubtyping", + "default": "report", "hidden": true - }, - "validationFailUnrecognisedParams": { - "type": "boolean" - }, - "fastp.cut_tail_mean_quality": { - "type": "integer", - "default": 15 - }, - "fastp.complexity_threshold": { - "type": "integer", - "default": 20 - }, - "fastp.polyg_min_len": { - "type": "integer", - "default": 10 - }, - "fastp.polyx_min_len": { - "type": "integer", - "default": 10 - }, - "fastp.illumina_length_max": { - "type": "integer", - "default": 400 - }, - "assembly_status.report_tag": { + }, + "output_suffix": { "type": "string", - "default": "AssemblyCompleted", + "default": "output", "hidden": true - }, - "filtered_reads.threshold": { - "type": "integer", - "default": 1000, + }, + "save_output_fastqs": { + "type": "boolean", "hidden": true - }, - "filtered_reads.report_tag": { + }, + "save_reads_assignments": { + "type": "boolean", + "default": true, + "hidden": true + }, + "run_kraken_quick": { + "type": "boolean", + "hidden": true + }, + "report_tag": { "type": "string", - "default": "MeetsReadThreshold", + "default": "KrakenReport", "hidden": true - }, - "fastp.cut_tail_window_size": { - "type": "integer", - "default": 4 - }, - "contigs_too_short.report_tag": { + }, + "tophit_level": { "type": "string", - "default": "MaxContigToShort" + "default": "S", + "hidden": true + } }, - "report_aggregate.sample_flat_suffix": { + "kraken_bin": { + "taxonomic_level": { "type": "string", - "default": "_flat_sample.json", - "hidden": true + "default": "G", + "description": "Taxonomic level to bin contigs at." + } } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/bakta" + }, + { + "$ref": "#/definitions/fastp" + }, + { + "$ref": "#/definitions/mash" + }, + { + "$ref": "#/definitions/quast" + }, + { + "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/skip_options" + }, + { + "$ref": "#/definitions/databases_and_pre_computed_files" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/seqkit" + }, + { + "$ref": "#/definitions/coveragecalculation" + }, + { + "$ref": "#/definitions/qcreport" + }, + { + "$ref": "#/definitions/seqtk_size" + }, + { + "$ref": "#/definitions/medaka" + }, + { + "$ref": "#/definitions/staramr" + }, + { + "$ref": "#/definitions/mobrecon" + }, + { + "$ref": "#/definitions/kat" + }, + { + "$ref": "#/definitions/platform_options" + }, + { + "$ref": "#/definitions/seqtk" + }, + { + "$ref": "#/definitions/flye" + }, + { + "$ref": "#/definitions/spades" + }, + { + "$ref": "#/definitions/checkm" + }, + { + "$ref": "#/definitions/kraken2" + } + ], + "validationFailUnrecognisedParams": { + "type": "boolean" + }, + "chopper": { + "quality": { + "type": "integer", + "default": 0, + "hidden": true + }, + "minlength": { + "type": "integer", + "default": 100, + "hidden": true + }, + "fastq_ext": { + "type": "string", + "default": ".fastq.gz", + "hidden": true + } + }, + "fastqc": { + "html_ext": { + "type": "string", + "default": ".html", + "hidden": true + }, + "zip_ext": { + "type": "string", + "default": ".zip", + "hidden": true + } + }, + "r_contaminants": { + "phix_fa": { + "type": "string", + "hidden": true + }, + "homo_sapiens_fa": { + "type": "string", + "hidden": true + }, + "pacbio_mg": { + "type": "string", + "hidden": true + }, + "output_ext": { + "type": "string", + "default": ".fastq.gz", + "hidden": true + }, + "mega_mm2_idx": { + "type": "string", + "hidden": true + }, + "mm2_illumina": { + "type": "string", + "default": "-x sr", + "hidden": true + }, + "mm2_pac": { + "type": "string", + "default": "-x map-pb", + "hidden": true + }, + "mm2_ont": { + "type": "string", + "default": "-x map-ont", + "hidden": true + }, + "mm2_output_ext": { + "type": "string", + "default": ".sam", + "hidden": true + }, + "samtools_output_ext": { + "type": "string", + "default": ".fastq", + "hidden": true + }, + "samtools_singletons_ext": { + "type": "string", + "default": ".singleton.fq", + "hidden": true + }, + "samtools_output_suffix": { + "type": "string", + "default": "deconned", + "hidden": true + }, + "output_dir": { + "type": "string", + "default": "contamination/deconned_reads", + "hidden": true + } + }, + "minimap2": { + "index_outdir": { + "type": "string", + "default": "indices", + "hidden": true + }, + "index_ext": { + "type": "string", + "default": ".idx", + "hidden": true + }, + "mapped_paf_ext": { + "type": "string", + "default": ".paf", + "hidden": true + }, + "mapped_sam_ext": { + "type": "string", + "default": ".sam", + "hidden": true + }, + "mapped_outdir": { + "type": "string", + "default": "mapped", + "hidden": true + } + }, + "samtools": { + "bam_ext": { + "type": "string", + "default": ".bam", + "hidden": true + }, + "bai_ext": { + "type": "string", + "default": ".bai", + "hidden": true + } + }, + "racon": { + "consensus_suffix": { + "type": "string", + "default": "_assembly_consensus.fasta", + "hidden": true + }, + "consensus_ext": { + "type": "string", + "default": ".fasta.gz", + "hidden": true + }, + "outdir": { + "type": "string", + "default": "polished", + "hidden": true + } + }, + "pilon": { + "outdir": { + "type": "string", + "default": "pilon", + "hidden": true + }, + "fasta_ext": { + "type": "string", + "default": ".fasta.gz", + "hidden": true + }, + "fasta_outdir": { + "type": "string", + "default": "fasta", + "hidden": true + }, + "vcf_ext": { + "type": "string", + "default": ".vcf", + "hidden": true + }, + "vcf_outdir": { + "type": "string", + "default": "vcf", + "hidden": true + }, + "changes_ext": { + "type": "string", + "default": ".changes", + "hidden": true + }, + "changes_outdir": { + "type": "string", + "default": "changes", + "hidden": true + }, + "max_memory_multiplier": { + "type": "integer", + "default": 3, + "hidden": true + } + }, + "pilon_iterative": { + "outdir": { + "type": "string", + "default": "pilon", + "hidden": true + }, + "fasta_ext": { + "type": "string", + "default": ".fasta.gz", + "hidden": true + }, + "fasta_outdir": { + "type": "string", + "default": "fasta", + "hidden": true + }, + "vcf_ext": { + "type": "string", + "default": ".vcf", + "hidden": true + }, + "vcf_outdir": { + "type": "string", + "default": "vcf", + "hidden": true + }, + "bam_ext": { + "type": "string", + "default": ".bam", + "hidden": true + }, + "bai_ext": { + "type": "string", + "default": ".bai", + "hidden": true + }, + "changes_ext": { + "type": "string", + "default": ".changes", + "hidden": true + }, + "changes_outdir": { + "type": "string", + "default": "changes", + "hidden": true + }, + "max_memory_multiplier": { + "type": "integer", + "default": 3, + "hidden": true + }, + "max_polishing_illumina": { + "type": "integer", + "default": 3, + "hidden": true + }, + "max_polishing_pacbio": { + "type": "integer", + "default": 4, + "hidden": true + }, + "max_polishing_nanopore": { + "type": "integer", + "default": 10, + "hidden": true + } + }, + "unicycler": { + "scaffolds_ext": { + "type": "string", + "default": ".scaffolds.fa.gz", + "hidden": true + }, + "assembly_ext": { + "type": "string", + "default": ".assembly.gfa.gz", + "hidden": true + }, + "log_ext": { + "type": "string", + "default": ".unicycler.log", + "hidden": true + }, + "outdir": { + "type": "string", + "default": "unicycler", + "hidden": true + }, + "mem_modifier": { + "type": "integer", + "default": 1000, + "hidden": true + }, + "threads_increase_factor": { + "type": "integer", + "default": 1, + "hidden": true + } + }, + "bandage": { + "svg_ext": { + "type": "string", + "default": ".svg", + "hidden": true + }, + "outdir": { + "type": "string", + "default": "bandage", + "hidden": true + } + }, + "ectyper": { + "log_ext": { + "type": "string", + "default": ".log", + "hidden": true + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "txt_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "ECTyperSubtyping", + "hidden": true + }, + "args": { + "type": "string", + "default": "{ \"--verify\" }", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + }, + "sistr": { + "tsv_ext": { + "type": "string", + "default": ".tab", + "hidden": true + }, + "allele_fasta_ext": { + "type": "string", + "default": "-allele.fasta", + "hidden": true + }, + "allele_json_ext": { + "type": "string", + "default": "-allele.json", + "hidden": true + }, + "cgmlst_ext": { + "type": "string", + "default": "-cgmlst.csv", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "SISTRSubtyping", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + }, + "lissero": { + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "LISSEROSubtyping", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + }, + "shigeifinder": { + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + }, + "container_version": { + "type": "string", + "default": "1.3.2", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "ShigeifinderSubtyping", + "hidden": true + } + }, + "raw_reads": { + "high_precision": { + "type": "boolean", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "RawReadSummary", + "hidden": true + } + }, + "seqtk": { + "assembly_fastq": { + "type": "string", + "default": ".fastq.gz", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "Seqtk", + "hidden": true + } + }, + "fastp": { + "report_exclude_fields": { + "type": "string", + "default": "['content_curves', 'quality_curves', 'mean', 'kmer_count', 'histogram', 'overrepresented_sequences']", + "hidden": true + }, + "cut_tail_mean_quality": { + "type": "integer", + "default": 15 + }, + "complexity_threshold": { + "type": "integer", + "default": 20 + }, + "polyg_min_len": { + "type": "integer", + "default": 10 + }, + "polyx_min_len": { + "type": "integer", + "default": 10 + }, + "illumina_length_max": { + "type": "integer", + "default": 400 + }, + "cut_tail_window_size": { + "type": "integer", + "default": 4 + } + }, + "quast": { + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + }, + "checkm": { + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + }, + "kraken": { + "header_p": { + "type": "boolean", + "hidden": true + }, + "headers": { + "type": "string", + "default": "['PercentID', 'FragmentsRecovered', 'FragmentsAssignmentTaxon', 'RankCode']", + "hidden": true + } + }, + "mlst": { + "args": { + "type": "string", + "hidden": true + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "json_ext": { + "type": "string", + "default": ".json", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "SevenGeneMLSTReport", + "hidden": true + } + }, + "mash_meta": { + "report_tag": { + "type": "string", + "default": "MashMeta", + "hidden": true + } + }, + "top_hit_species": { + "report_tag": { + "type": "string", + "default": "SpeciesTopHit", + "hidden": true + } + }, + "kraken_species": { + "report_tag": { + "type": "string", + "default": "Kraken2TopHit", + "hidden": true + } + }, + "subtyping_report": { + "report_tag": { + "type": "string", + "default": "Subtyping", + "hidden": true + } + }, + "kleborate": { + "txt_ext": { + "type": "string", + "default": ".results.txt", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "KleborateSubtyping", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + }, + "spatyper": { + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "SpaTyperSubtyping", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + }, + "repeats": { + "type": "string", + "hidden": true + }, + "repeat_order": { + "type": "string", + "hidden": true + } + }, + "kraken_bin": { + "fasta_ext": { + "type": "string", + "default": "_binned.fasta.gz", + "hidden": true + } + }, + "pointfinder_db_tag": { + "report_tag": { + "type": "string", + "default": "PointfinderDB", + "hidden": true + } + }, + "abricate": { + "args": { + "type": "string", + "hidden": true, + "default": "{ \"\" }" + }, + "report_tag": { + "type": "string", + "default": "Abricate", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + }, + "assembly_status": { + "report_tag": { + "type": "string", + "default": "AssemblyCompleted", + "hidden": true + } + }, + "filtered_reads": { + "threshold": { + "type": "integer", + "default": 1000, + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "MeetsReadThreshold", + "hidden": true + } + }, + "contigs_too_short": { + "report_tag": { + "type": "string", + "default": "MaxContigToShort" + } + }, + "report_aggregate": { + "sample_flat_suffix": { + "type": "string", + "default": "_flat_sample.json", + "hidden": true } -} \ No newline at end of file + } +} From 5cb4fc0e49315459beb87a91b2551013d1e41d23 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Mon, 18 Mar 2024 15:02:26 -0500 Subject: [PATCH 02/26] added utility config script --- utils/format.py | 194 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100755 utils/format.py diff --git a/utils/format.py b/utils/format.py new file mode 100755 index 00000000..0117971c --- /dev/null +++ b/utils/format.py @@ -0,0 +1,194 @@ +""" +Reformat a mikrokondo json to un-nest dotter parameters + +2024-03-14: Matthew Wells +""" + +from __future__ import annotations +import argparse +import json +import logging +import os +import sys +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + +@dataclass(frozen=True) +class Constants: + delimiter: str = "." + extraction_field: str = "properties" + properties_type: str = "object" + type_field: str = "type" + nesting_field: str = "definitions" + allof_field: str = "allOf" + ref_key: str = "$ref" + + +def drop_all_of_fields(schema_all_of: list, fields: set): + """ + Drop the fields in allOf of the schema that are erased + + schema_all_of list: The allOf list from the schema.json file id'd in the ref keys + fields set: the field values to delete + """ + defs_delete = frozenset([create_all_of_ref(i) for i in fields]) + + return list(filter(lambda x: x[Constants.ref_key] not in defs_delete, schema_all_of)) + +def create_all_of_ref(field): + return f"#/{Constants.nesting_field}/{field}" + + +def denested_information(keys: list[str], last_value: dict) -> dict: + """ + Recursively append new dictionaries with sub information being propagated throughout the dictionary + chain + + keys List[str]: list of keys to recursively implement as dictionaries are chained together + last_value dict: Last data entry to be appended to the chained dictionaries + """ + + if len(keys) == 1: + return last_value + + new_chain: dict = {} + temp = new_chain + + for i in keys[1:-1]: + temp[i] = {} + temp = temp[i] + temp[keys[-1]] = last_value + return new_chain + + +def nest_schema(properties: dict) -> dict: + """Convert a 'dotted' schema into a nested json + e.g. + properties: { + "seqkit.singularity": { + type: "string", + } + } + + into + "properties" : { + "singularity" : { + "type": string + } + } + + properties (dict): an existing list of json properties + """ + + new_dict: dict = {} + poisoned_keys = [] + for key, values in properties.items(): + if Constants.delimiter not in key: + continue + split_key = key.split(Constants.delimiter) + if new_dict.get(split_key[0]) is None: + new_dict[split_key[0]] = {} + denested_data = denested_information(split_key[1:], values) + new_dict[split_key[0]][split_key[1]] = denested_data + poisoned_keys.append(key) + + for i in poisoned_keys: + del properties[i] + + properties.update(new_dict) + return properties + + +def read_json(fp: str) -> json: + """ + Read and return json file. + + input + """ + if not os.path.isfile(fp): + logger.critical("File not found: %s, Bailing.", fp) + sys.exit(1) + with open(fp, encoding="utf8") as in_file: + return json.load(in_file) + + +def nest_properties(schema: dict) -> dict: + """ + Extract all + """ + type_field = schema.get(Constants.type_field) + properties = None + if type_field and type_field == Constants.properties_type: + properties = schema.get(Constants.extraction_field) + if properties is None: + raise KeyError("No properties field in json schema.") + + for k, props in schema[Constants.nesting_field].items(): + new_properties = nest_schema(properties=props[Constants.extraction_field]) + del schema[Constants.nesting_field][k][Constants.extraction_field] + schema[Constants.nesting_field][k][Constants.extraction_field] = new_properties + + new_properties = nest_schema(properties=properties) + del schema[Constants.extraction_field] + schema.update(new_properties) + drop_keys = reorganize_schema(schema[Constants.nesting_field]) + schema[Constants.allof_field] = drop_all_of_fields(schema[Constants.allof_field], drop_keys) + return schema + + +def reorganize_schema(definitions) -> set: + """Take a newly nested schema and merge paramter definitions together to prevent errors + + definitions dict: Updated definitions field in a json schema + return drop_keys set: Additional fields to delete from the schema after processing + """ + sub_key_fields = [] + for k, v in definitions.items(): + sub_key_fields.extend([(k, i) for i in v[Constants.extraction_field].keys()]) + main_keys = {i[0] for i in sub_key_fields} + drop_keys: set = set() + for i in sub_key_fields: + if i[1] in main_keys and i[1] != i[0]: + drop_keys.add(i[0]) + definitions[i[1]][Constants.extraction_field][i[1]].update(definitions[i[0]][Constants.extraction_field][i[1]]) + + for i in drop_keys: + del definitions[i] + return drop_keys + +def dump_schema(schema: dict, output_fp: str): + """Dump the updated schema + + schema dict: The updated json schema + output_fp: the location for the new json schema + """ + with open(output_fp, 'w', encoding='utf8') as output_file: + json.dump(schema, output_file, indent=2) + + +def reformat_schema(input_json, output): + """Resolve issues with nested paramters in a nextflow schema.json + + input os.Path: file path to input file + output os.Path: file path to output file + """ + schema_in = read_json(input_json) + updated_schema = nest_properties(schema_in) + dump_schema(updated_schema, output) + +def main(argv=None): + parser = argparse.ArgumentParser(prog=__file__, description="Fromat a nextflow") + parser.add_argument("-i", "--input-file", + type=str, + help="input file", + default=None, + required=True + ) + parser.add_argument("-o", "--output", + required=True) + args = parser.parse_args(argv) + reformat_schema(args.input_file, args.output) + +if __name__ == "__main__": + sys.exit(main()) From 2cd9c6e8d1db0ba536f7fc796e4a6e34f5d6b719 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Mon, 18 Mar 2024 15:37:59 -0500 Subject: [PATCH 03/26] commited yml --- .nf-core.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.nf-core.yml b/.nf-core.yml index b03bcea5..2abd222f 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -23,7 +23,7 @@ lint: - assets/email_template.txt - assets/sendmail_template.txt - .gitignore - schema_params: False + schema_params: True nextflow_config: False multiqc_config: False template: From 7faa3525feb25f9c5abc1f2e2470007ad81ace57 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Mon, 18 Mar 2024 16:03:07 -0500 Subject: [PATCH 04/26] updated schema generic options --- nextflow_schema.json | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c52854df..96f25a93 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -566,6 +566,12 @@ "default": true, "hidden": true }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "fa_icon": "far fa-check-circle", + "hidden": true + }, "show_hidden_params": { "type": "boolean", "fa_icon": "far fa-eye-slash", @@ -1587,9 +1593,6 @@ "$ref": "#/definitions/kraken2" } ], - "validationFailUnrecognisedParams": { - "type": "boolean" - }, "chopper": { "quality": { "type": "integer", From 8bac17e5ccdea825efc6e6b9032bafb411a4c382 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 19 Mar 2024 15:52:44 -0500 Subject: [PATCH 05/26] updated schema, and script for flattening --- nextflow_schema.json | 3628 ++++++++++++++++++++++++++---------------- utils/format.py | 101 +- 2 files changed, 2317 insertions(+), 1412 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 96f25a93..00357973 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/master/nextflow_schema.json", "title": "phac-nml/mikrokondo pipeline parameters", - "description": "mikrokondo Beta", + "description": "mikrokondo schema", "type": "object", "definitions": { "input_output_options": { @@ -53,80 +53,83 @@ "description": "Minimum contig length for processing in Bakta" }, "bakta": { - "output_dir": { - "type": "string", - "default": "bakta", - "hidden": true - }, - "embl_ext": { - "type": "string", - "default": ".embl", - "hidden": true - }, - "faa_ext": { - "type": "string", - "default": ".faa", - "hidden": true - }, - "ffn_ext": { - "type": "string", - "default": ".ffn", - "hidden": true - }, - "fna_ext": { - "type": "string", - "default": ".fna", - "hidden": true - }, - "gbff_ext": { - "type": "string", - "default": ".gbff", - "hidden": true - }, - "gff_ext": { - "type": "string", - "default": ".gff3", - "hidden": true - }, - "threads": { - "type": "integer", - "default": 12, - "hidden": true - }, - "hypotheticals_tsv_ext": { - "type": "string", - "default": ".hypotheticals.tsv", - "hidden": true - }, - "hypotheticals_faa_ext": { - "type": "string", - "default": ".hypotheticals.faa", - "hidden": true - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "min_contig_length": { - "type": "integer", - "default": 200, - "hidden": true - }, - "db": { - "type": "string", - "hidden": true - }, - "args": { - "type": "string", - "hidden": true, - "description": "Additional arguments to pass to bakta", - "default": "{ \"\" }" + "type": "object", + "properties": { + "output_dir": { + "type": "string", + "default": "bakta", + "hidden": true + }, + "embl_ext": { + "type": "string", + "default": ".embl", + "hidden": true + }, + "faa_ext": { + "type": "string", + "default": ".faa", + "hidden": true + }, + "ffn_ext": { + "type": "string", + "default": ".ffn", + "hidden": true + }, + "fna_ext": { + "type": "string", + "default": ".fna", + "hidden": true + }, + "gbff_ext": { + "type": "string", + "default": ".gbff", + "hidden": true + }, + "gff_ext": { + "type": "string", + "default": ".gff3", + "hidden": true + }, + "threads": { + "type": "integer", + "default": 12, + "hidden": true + }, + "hypotheticals_tsv_ext": { + "type": "string", + "default": ".hypotheticals.tsv", + "hidden": true + }, + "hypotheticals_faa_ext": { + "type": "string", + "default": ".hypotheticals.faa", + "hidden": true + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "txt_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "min_contig_length": { + "type": "integer", + "default": 200, + "hidden": true + }, + "db": { + "type": "string", + "hidden": true + }, + "args": { + "type": "string", + "hidden": true, + "description": "Additional arguments to pass to bakta", + "default": "{ \"\" }" + } }, "docker": { "type": "string", @@ -196,66 +199,77 @@ "type": "boolean" }, "fastp": { - "html_ext": { - "type": "string", - "default": ".html", - "hidden": true - }, - "average_quality_e": { - "type": "integer", - "default": 25, - "description": "Average quality of a read to be included (read pair is discarded if it is below this value)", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "qualified_quality_phred": { - "type": "integer", - "default": 15, - "description": "Phred score to be considered qualified. See FastP docs for more details.", - "hidden": true - }, - "unqualified_percent_limit": { - "type": "integer", - "default": 40, - "description": "Percent of bases in a read to be qualified for the read to be included. See FastP docs for more details.", - "hidden": true - }, - "fastq_ext": { - "type": "string", - "default": ".trimmed.fastq.gz", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "FastP", - "hidden": true - }, - "illumina_length_min": { - "type": "integer", - "default": 35, - "description": "Minimum length of a read to be included in later analysis for illumina data.", - "hidden": true - }, - "single_end_length_min": { - "type": "integer", - "default": 1000, - "description": "Minimum leng of a read to be used in later analysis (for Nanopore or Pacbio)", - "hidden": true - }, - "dedup_reads": { - "type": "boolean", - "description": "Option to turn on read de-duplication.", - "hidden": true - }, - "args": { - "single_end": { + "type": "object", + "properties": { + "html_ext": { + "type": "string", + "default": ".html", + "hidden": true + }, + "average_quality_e": { + "type": "integer", + "default": 25, + "description": "Average quality of a read to be included (read pair is discarded if it is below this value)", + "hidden": true + }, + "json_ext": { + "type": "string", + "default": ".json", + "hidden": true + }, + "qualified_quality_phred": { + "type": "integer", + "default": 15, + "description": "Phred score to be considered qualified. See FastP docs for more details.", + "hidden": true + }, + "unqualified_percent_limit": { + "type": "integer", + "default": 40, + "description": "Percent of bases in a read to be qualified for the read to be included. See FastP docs for more details.", + "hidden": true + }, + "fastq_ext": { "type": "string", - "default": "--overrepresentation_analysis -Q --length_required 1000", + "default": ".trimmed.fastq.gz", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "FastP", + "hidden": true + }, + "illumina_length_min": { + "type": "integer", + "default": 35, + "description": "Minimum length of a read to be included in later analysis for illumina data.", "hidden": true + }, + "single_end_length_min": { + "type": "integer", + "default": 1000, + "description": "Minimum leng of a read to be used in later analysis (for Nanopore or Pacbio)", + "hidden": true + }, + "dedup_reads": { + "type": "boolean", + "description": "Option to turn on read de-duplication.", + "hidden": true + }, + "args": { + "type": "object", + "properties": { + "illumina": { + "type": "string", + "default": "--overrepresentation_analysis --trim_poly_g --poly_g_min_len 10 --trim_poly_x --poly_x_min_len 10 --cut_tail --cut_tail_window_size 4 --cut_tail_mean_quality 15 --low_complexity_filter --complexity_threshold 20 --average_qual 25 --qualified_quality_phred 15 --unqualified_percent_limit 40 --length_limit 400 --length_required 35 --detect_adapter_for_pe", + "hidden": true + }, + "single_end": { + "type": "string", + "default": "--overrepresentation_analysis -Q --length_required 1000", + "hidden": true + } + } } }, "docker": { @@ -284,68 +298,71 @@ "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation" }, "mash": { - "mash_ext": { - "type": "string", - "default": ".screen", - "hidden": true - }, - "output_reads_ext": { - "type": "string", - "default": ".reads.screen", - "hidden": true - }, - "output_taxa_ext": { - "type": "string", - "default": ".taxa.screen", - "hidden": true - }, - "output_dir": { - "type": "string", - "default": "contamination", - "hidden": true - }, - "mash_sketch": { - "type": "string", - "hidden": true - }, - "sketch_ext": { - "type": "string", - "default": ".msh", - "hidden": true - }, - "sketch_kmer_size": { - "type": "integer", - "default": 21, - "hidden": true - }, - "final_sketch_name": { - "type": "string", - "default": "GTDB_sketch", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "min_kmer": { - "type": "integer", - "default": 10, - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "Mash", - "hidden": true - }, - "header_p": { - "type": "boolean", - "hidden": true - }, - "headers": { - "type": "string", - "default": "['identity', 'Shared Hashes', 'Median Multiplicity', 'P-Value', 'Query ID', 'Query Note']", - "hidden": true + "type": "object", + "properties": { + "mash_ext": { + "type": "string", + "default": ".screen", + "hidden": true + }, + "output_reads_ext": { + "type": "string", + "default": ".reads.screen", + "hidden": true + }, + "output_taxa_ext": { + "type": "string", + "default": ".taxa.screen", + "hidden": true + }, + "output_dir": { + "type": "string", + "default": "contamination", + "hidden": true + }, + "mash_sketch": { + "type": "string", + "hidden": true + }, + "sketch_ext": { + "type": "string", + "default": ".msh", + "hidden": true + }, + "sketch_kmer_size": { + "type": "integer", + "default": 21, + "hidden": true + }, + "final_sketch_name": { + "type": "string", + "default": "GTDB_sketch", + "hidden": true + }, + "json_ext": { + "type": "string", + "default": ".json", + "hidden": true + }, + "min_kmer": { + "type": "integer", + "default": 10, + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "Mash", + "hidden": true + }, + "header_p": { + "type": "boolean", + "hidden": true + }, + "headers": { + "type": "string", + "default": "['identity', 'Shared Hashes', 'Median Multiplicity', 'P-Value', 'Query ID', 'Query Note']", + "hidden": true + } }, "docker": { "type": "string", @@ -372,46 +389,49 @@ "default": 1000 }, "quast": { - "suffix": { - "type": "string", - "default": "quast", - "hidden": true - }, - "report_base": { - "type": "string", - "default": "report", - "hidden": true - }, - "report_prefix": { - "type": "string", - "default": "transposed_", - "hidden": true - }, - "report_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "QUAST", - "hidden": true - }, - "min_contig_length": { - "type": "integer", - "default": 1000, - "description": "Minimum contig length to be used my Quast.", - "hidden": true - }, - "args": { - "type": "string", - "default": "{ \"--min-contig ${params.quast.min_contig_length} --report-all-metrics\" }", - "hidden": true - }, - "contigs_field": { - "type": "string", - "default": "# contigs", - "hidden": true + "type": "object", + "properties": { + "suffix": { + "type": "string", + "default": "quast", + "hidden": true + }, + "report_base": { + "type": "string", + "default": "report", + "hidden": true + }, + "report_prefix": { + "type": "string", + "default": "transposed_", + "hidden": true + }, + "report_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "QUAST", + "hidden": true + }, + "min_contig_length": { + "type": "integer", + "default": 1000, + "description": "Minimum contig length to be used my Quast.", + "hidden": true + }, + "args": { + "type": "string", + "default": "{ \"--min-contig ${params.quast.min_contig_length} --report-all-metrics\" }", + "hidden": true + }, + "contigs_field": { + "type": "string", + "default": "# contigs", + "hidden": true + } }, "docker": { "type": "string", @@ -566,12 +586,6 @@ "default": true, "hidden": true }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "fa_icon": "far fa-check-circle", - "hidden": true - }, "show_hidden_params": { "type": "boolean", "fa_icon": "far fa-eye-slash", @@ -788,193 +802,935 @@ "default": "", "properties": { "seqkit": { - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqkit:2.2.0--h9ee0642_0", - "hidden": true - }, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/seqkit:2.2.0--h9ee0642_0", - "hidden": true - }, - "report_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "fasta_ext": { - "type": "string", - "default": "_filtered.fasta.gz", - "hidden": true - }, - "filter_field": { - "type": "string", - "default": "max_len", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "Seqkit_stats", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - } - } - }, - "coveragecalculation": { - "title": "CoverageCalculation", - "type": "object", - "description": "", - "default": "", - "properties": { - "coverage_calc_fields": { - "fixed_cov": { - "type": "string", - "default": "FixedGenomeSizeDepth", - "hidden": true - }, - "auto_cov": { - "type": "string", - "default": "DetectedGenomeSizeDepth", - "hidden": true - }, - "bp_field": { - "type": "string", - "default": "['RawReadSummary', 'combined', 'total_bp']", - "hidden": true - } - } - } - }, - "qcreport": { - "title": "QCReport", - "type": "object", - "description": "", - "default": "", - "properties": { - "QCReport": { - "escherichia": { - "fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - } - }, - "salmonella": { - "fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - } - }, - "shigella": { - "fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - } - }, - "listeria": { - "fixed_genome_size": { - "type": "integer", - "default": 3000000, - "hidden": true - } - }, - "klebsiella": { - "fixed_genome_size": { - "type": "integer", - "default": 6000000, - "hidden": true - } - }, - "staphylococcus": { - "fixed_genome_size": { - "type": "integer", - "default": 3500000, - "hidden": true - } - }, - "fallthrough": { - "fixed_genome_size": { + "type": "object", + "properties": { + "singularity": { "type": "string", + "default": "https://depot.galaxyproject.org/singularity/seqkit:2.2.0--h9ee0642_0", "hidden": true - } - }, - "campylobacter_jejuni": { - "min_average_coverage": { - "type": "integer", - "default": 30, + }, + "docker": { + "type": "string", + "default": "quay.io/biocontainers/seqkit:2.2.0--h9ee0642_0", "hidden": true - } - }, - "campylobacter_coli": { - "min_average_coverage": { - "type": "integer", - "default": 30, + }, + "report_ext": { + "type": "string", + "default": ".tsv", "hidden": true - } - }, - "vibrio_cholerae": { - "min_average_coverage": { - "type": "integer", - "default": 40, + }, + "fasta_ext": { + "type": "string", + "default": "_filtered.fasta.gz", "hidden": true - } - } - }, - "QCReportFields": { - "raw_average_quality": { - "low_msg": { + }, + "filter_field": { "type": "string", - "default": "Base quality is poor, resequencing is recommended.", + "default": "max_len", "hidden": true - } - }, - "average_coverage": { - "low_msg": { + }, + "report_tag": { "type": "string", - "default": "Depth of coverage from assembly is lower than than expected. A top-up run is likely needed.", + "default": "Seqkit_stats", "hidden": true - } - }, - "metagenomic": { - "on": { + }, + "header_p": { "type": "boolean", + "default": true, "hidden": true } - }, - "n50_value": { - "high_msg": { + } + } + } + }, + "coveragecalculation": { + "title": "CoverageCalculation", + "type": "object", + "description": "", + "default": "", + "properties": { + "coverage_calc_fields": { + "type": "object", + "properties": { + "fixed_cov": { "type": "string", - "default": "N50 value is high, this is likely a good thing if you have fewer contigs than expected.", + "default": "FixedGenomeSizeDepth", "hidden": true - } - }, - "nr_contigs": { - "high_msg": { + }, + "auto_cov": { "type": "string", - "default": "More contigs are present than expected, if your N50 is low and your genome length is shorter than expected you likely need to potentially need to reisolate and resequence/top-up your sample.", + "default": "DetectedGenomeSizeDepth", "hidden": true - } - }, - "length": { - "high_msg": { + }, + "bp_field": { "type": "string", - "default": "Genome length is higher than expected, contmination is potentially present.", + "default": "['RawReadSummary', 'combined', 'total_bp']", "hidden": true } - }, - "checkm_contamination": { - "high_msg": { - "type": "string", - "default": "Potential contamination is present in your sample. You may need to reisolate and resequence your sample.", - "hidden": true + } + } + } + }, + "qcreport": { + "title": "QCReport", + "type": "object", + "description": "", + "default": "", + "properties": { + "QCReport": { + "type": "object", + "properties": { + "escherichia": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Escherichia coli", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 50000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 6000000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 500, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 4500000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 6000000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 40, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 5000000, + "hidden": true + } + } + }, + "salmonella": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Salmonella", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 90000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 6000000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 200, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 4400000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 6000000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 40, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 5000000, + "hidden": true + } + } + }, + "shigella": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Shigella", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 18000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 5000000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 500, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 4300000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 5000000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 40, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 5000000, + "hidden": true + } + } + }, + "listeria": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Listeria", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 50000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 3200000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 200, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 2700000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 3200000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 30, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 3000000, + "hidden": true + } + } + }, + "klebsiella": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Klebsiella", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 100000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 6000000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 500, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 4500000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 6000000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 30, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 6000000, + "hidden": true + } + } + }, + "staphylococcus": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Staphylococcus", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 100000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 3500000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 550, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 2000000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 3500000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 30, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 3500000, + "hidden": true + } + } + }, + "fallthrough": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "No organism specific QC data available.", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "hidden": true + }, + "max_n50": { + "type": "integer", + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "hidden": true + }, + "min_length": { + "type": "integer", + "hidden": true + }, + "max_length": { + "type": "integer", + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 30, + "hidden": true + }, + "fixed_genome_size": { + "type": "string", + "hidden": true + } + } + }, + "campylobacter_jejuni": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Campylobacter jejuni", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 100000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 2000000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 150, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 1800000, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 1400000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 2000000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 30, + "hidden": true + } + } + }, + "campylobacter_coli": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Campylobacter coli", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 100000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 2000000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 150, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 1800000, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 1400000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 2000000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 30, + "hidden": true + } + } + }, + "vibrio_cholerae": { + "type": "object", + "properties": { + "search": { + "type": "string", + "default": "Vibrio cholerae", + "hidden": true + }, + "raw_average_quality": { + "type": "integer", + "default": 30, + "hidden": true + }, + "min_n50": { + "type": "integer", + "default": 100000, + "hidden": true + }, + "max_n50": { + "type": "integer", + "default": 4300000, + "hidden": true + }, + "min_nr_contigs": { + "type": "integer", + "default": 1, + "hidden": true + }, + "max_nr_contigs": { + "type": "integer", + "default": 150, + "hidden": true + }, + "fixed_genome_size": { + "type": "integer", + "default": 4000000, + "hidden": true + }, + "min_length": { + "type": "integer", + "default": 3800000, + "hidden": true + }, + "max_length": { + "type": "integer", + "default": 4300000, + "hidden": true + }, + "max_checkm_contamination": { + "type": "number", + "default": 3, + "hidden": true + }, + "min_average_coverage": { + "type": "integer", + "default": 40, + "hidden": true + } + } + } + } + }, + "QCReportFields": { + "type": "object", + "properties": { + "raw_average_quality": { + "type": "object", + "properties": { + "path": { + "type": "string", + "default": "['RawReadSummary', 'combined', 'qual_mean']", + "hidden": true + }, + "coerce_type": { + "type": "string", + "default": "Float", + "hidden": true + }, + "compare_fields": { + "type": "string", + "default": "['raw_average_quality']", + "hidden": true + }, + "comp_type": { + "type": "string", + "default": "ge", + "hidden": true + }, + "on": { + "type": "boolean", + "default": true, + "hidden": true + }, + "low_msg": { + "type": "string", + "default": "Base quality is poor, resequencing is recommended.", + "hidden": true + } + } + }, + "average_coverage": { + "type": "object", + "properties": { + "path": { + "type": "string", + "default": "['FixedGenomeSizeDepth']", + "hidden": true + }, + "coerce_type": { + "type": "string", + "default": "Float", + "hidden": true + }, + "compare_fields": { + "type": "string", + "default": "['min_average_coverage']", + "hidden": true + }, + "comp_type": { + "type": "string", + "default": "ge", + "hidden": true + }, + "on": { + "type": "boolean", + "default": true, + "hidden": true + }, + "low_msg": { + "type": "string", + "default": "Depth of coverage from assembly is lower than than expected. A top-up run is likely needed.", + "hidden": true + } + } + }, + "metagenomic": { + "type": "object", + "properties": { + "path": { + "type": "string", + "default": "['MashMeta']", + "hidden": true + }, + "coerce_type": { + "type": "string", + "default": "Bool", + "hidden": true + }, + "compare_fields": { + "type": "string", + "default": "[]", + "hidden": true + }, + "comp_type": { + "type": "string", + "default": "bool", + "hidden": true + }, + "on": { + "type": "boolean", + "hidden": true + } + } + }, + "n50_value": { + "type": "object", + "properties": { + "path": { + "type": "string", + "default": "['QUAST', '0', 'N50']", + "hidden": true + }, + "coerce_type": { + "type": "string", + "default": "Integer", + "hidden": true + }, + "compare_fields": { + "type": "string", + "default": "['min_n50', 'max_n50']", + "hidden": true + }, + "comp_type": { + "type": "string", + "default": "range", + "hidden": true + }, + "on": { + "type": "boolean", + "default": true, + "hidden": true + }, + "low_msg": { + "type": "string", + "default": "N50 value is low, this could be due to many reasons involving contamination, poor template quality or insufficient template quantity. Reisolation and reseqeuncing may be needed.", + "hidden": true + }, + "high_msg": { + "type": "string", + "default": "N50 value is high, this is likely a good thing if you have fewer contigs than expected.", + "hidden": true + } + } + }, + "nr_contigs": { + "type": "object", + "properties": { + "path": { + "type": "string", + "default": "['QUAST', '0', '# contigs']", + "hidden": true + }, + "coerce_type": { + "type": "string", + "default": "Integer", + "hidden": true + }, + "compare_fields": { + "type": "string", + "default": "['min_nr_contigs', 'max_nr_contigs']", + "hidden": true + }, + "comp_type": { + "type": "string", + "default": "range", + "hidden": true + }, + "on": { + "type": "boolean", + "default": true, + "hidden": true + }, + "low_msg": { + "type": "string", + "default": "Fewer contigs than expected, if your genome length is of an expected size and you have a high N50 you likely just have a high quality assembly.", + "hidden": true + }, + "high_msg": { + "type": "string", + "default": "More contigs are present than expected, if your N50 is low and your genome length is shorter than expected you likely need to potentially need to reisolate and resequence/top-up your sample.", + "hidden": true + } + } + }, + "length": { + "type": "object", + "properties": { + "path": { + "type": "string", + "default": "['QUAST', '0', 'Total length']", + "hidden": true + }, + "coerce_type": { + "type": "string", + "default": "Integer", + "hidden": true + }, + "compare_fields": { + "type": "string", + "default": "['min_length', 'max_length']", + "hidden": true + }, + "comp_type": { + "type": "string", + "default": "range", + "hidden": true + }, + "on": { + "type": "boolean", + "default": true, + "hidden": true + }, + "low_msg": { + "type": "string", + "default": "Genome length lower than expected, you may need to resequence the sample.", + "hidden": true + }, + "high_msg": { + "type": "string", + "default": "Genome length is higher than expected, contmination is potentially present.", + "hidden": true + } + } + }, + "checkm_contamination": { + "type": "object", + "properties": { + "path": { + "type": "string", + "default": "['CheckM', '0', 'Contamination']", + "hidden": true + }, + "coerce_type": { + "type": "string", + "default": "Float", + "hidden": true + }, + "compare_fields": { + "type": "string", + "default": "['max_checkm_contamination']", + "hidden": true + }, + "comp_type": { + "type": "string", + "default": "le", + "hidden": true + }, + "on": { + "type": "boolean", + "default": true, + "hidden": true + }, + "high_msg": { + "type": "string", + "default": "Potential contamination is present in your sample. You may need to reisolate and resequence your sample.", + "hidden": true + } + } } } } @@ -987,20 +1743,23 @@ "default": "", "properties": { "seqtk_size": { - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", - "hidden": true - }, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "SeqtkBaseCount", - "hidden": true + "type": "object", + "properties": { + "singularity": { + "type": "string", + "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", + "hidden": true + }, + "docker": { + "type": "string", + "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "SeqtkBaseCount", + "hidden": true + } } } } @@ -1012,26 +1771,29 @@ "default": "", "properties": { "medaka": { - "model": { - "type": "string", - "hidden": true, - "description": "This is set to the base calling model specified in the nanopore_chemistry param" - }, - "fasta_ext": { - "type": "string", - "default": ".fa.gz", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "medaka", - "hidden": true - }, - "batch_size": { - "type": "integer", - "default": 5, - "description": "Batch size for medaka to use for processing.", - "hidden": true + "type": "object", + "properties": { + "model": { + "type": "string", + "hidden": true, + "description": "This is set to the base calling model specified in the nanopore_chemistry param" + }, + "fasta_ext": { + "type": "string", + "default": ".fa.gz", + "hidden": true + }, + "outdir": { + "type": "string", + "default": "medaka", + "hidden": true + }, + "batch_size": { + "type": "integer", + "default": 5, + "description": "Batch size for medaka to use for processing.", + "hidden": true + } }, "docker": { "type": "string", @@ -1053,54 +1815,57 @@ "default": "", "properties": { "staramr": { - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/staramr%3A0.9.1--pyhdfd78af_0", - "hidden": true - }, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/staramr:0.9.1--pyhdfd78af_0", - "hidden": true - }, - "db": { - "type": "string", - "description": "Path to a StarAMR database, a database is included in the container." - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "xlsx_ext": { - "type": "string", - "default": ".xlsx", - "hidden": true - }, - "args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "point_finder_dbs": { - "type": "string", - "default": "['salmonella', 'campylobacter', 'enterococcus_faecalis', 'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori']", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "StarAMR", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true + "type": "object", + "properties": { + "singularity": { + "type": "string", + "default": "https://depot.galaxyproject.org/singularity/staramr%3A0.9.1--pyhdfd78af_0", + "hidden": true + }, + "docker": { + "type": "string", + "default": "quay.io/biocontainers/staramr:0.9.1--pyhdfd78af_0", + "hidden": true + }, + "db": { + "type": "string", + "description": "Path to a StarAMR database, a database is included in the container." + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "txt_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "xlsx_ext": { + "type": "string", + "default": ".xlsx", + "hidden": true + }, + "args": { + "type": "string", + "hidden": true, + "default": "{ \"\" }" + }, + "point_finder_dbs": { + "type": "string", + "default": "['salmonella', 'campylobacter', 'enterococcus_faecalis', 'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori']", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "StarAMR", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } } } } @@ -1112,40 +1877,43 @@ "default": "", "properties": { "mobsuite_recon": { - "args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "fasta_ext": { - "type": "string", - "default": ".fasta", - "hidden": true - }, - "results_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "mob_results_file": { - "type": "string", - "default": "mobtyper_results.txt", - "hidden": true - }, - "contig_report": { - "type": "string", - "default": "contig_report.txt", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "MobRecon", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true + "type": "object", + "properties": { + "args": { + "type": "string", + "hidden": true, + "default": "{ \"\" }" + }, + "fasta_ext": { + "type": "string", + "default": ".fasta", + "hidden": true + }, + "results_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "mob_results_file": { + "type": "string", + "default": "mobtyper_results.txt", + "hidden": true + }, + "contig_report": { + "type": "string", + "default": "contig_report.txt", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "MobRecon", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } } } } @@ -1157,54 +1925,57 @@ "default": "", "properties": { "kat": { - "hist_ext": { - "type": "string", - "default": ".hist", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".hist.dist_analysis.json", - "hidden": true - }, - "png_ext": { - "type": "string", - "default": ".png", - "hidden": true - }, - "postscript_ext": { - "type": "string", - "default": ".ps", - "hidden": true + "type": "object", + "properties": { + "hist_ext": { + "type": "string", + "default": ".hist", + "hidden": true + }, + "json_ext": { + "type": "string", + "default": ".hist.dist_analysis.json", + "hidden": true + }, + "png_ext": { + "type": "string", + "default": ".png", + "hidden": true + }, + "postscript_ext": { + "type": "string", + "default": ".ps", + "hidden": true + }, + "output_type": { + "type": "string", + "default": "png", + "hidden": true + }, + "pdf_ext": { + "type": "string", + "default": ".pdf", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "KatHist", + "hidden": true + }, + "jfhash_ext": { + "type": "string", + "default": ".jf", + "hidden": true + } }, - "output_type": { + "docker": { "type": "string", - "default": "png", + "default": "quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2", "hidden": true }, - "pdf_ext": { + "singularity": { "type": "string", - "default": ".pdf", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "KatHist", - "hidden": true - }, - "jfhash_ext": { - "type": "string", - "default": ".jf", - "hidden": true - }, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2", + "default": "https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2", "hidden": true } } @@ -1217,25 +1988,28 @@ "default": "", "properties": { "opt_platforms": { - "ont": { - "type": "string", - "default": "nanopore", - "hidden": true - }, - "pacbio": { - "type": "string", - "default": "pacbio", - "hidden": true - }, - "hybrid": { - "type": "string", - "default": "hybrid", - "hidden": true - }, - "illumina": { - "type": "string", - "default": "illumina", - "hidden": true + "type": "object", + "properties": { + "ont": { + "type": "string", + "default": "nanopore", + "hidden": true + }, + "pacbio": { + "type": "string", + "default": "pacbio", + "hidden": true + }, + "hybrid": { + "type": "string", + "default": "hybrid", + "hidden": true + }, + "illumina": { + "type": "string", + "default": "illumina", + "hidden": true + } } } } @@ -1247,15 +2021,18 @@ "default": "", "properties": { "seqtk": { - "seed": { - "type": "integer", - "default": 42, - "hidden": true - }, - "reads_ext": { - "type": "string", - "default": "_sampled.fastq.gz", - "hidden": true + "type": "object", + "properties": { + "seed": { + "type": "integer", + "default": 42, + "hidden": true + }, + "reads_ext": { + "type": "string", + "default": "_sampled.fastq.gz", + "hidden": true + } }, "docker": { "type": "string", @@ -1277,60 +2054,89 @@ "default": "", "properties": { "flye": { - "nanopore": { - "raw": { + "type": "object", + "properties": { + "nanopore": { + "type": "object", + "properties": { + "corr": { + "type": "string", + "default": "--nano-corr", + "hidden": true + }, + "hq": { + "type": "string", + "default": "--nano-hq", + "hidden": true + }, + "raw": { + "type": "string", + "default": "--nano-raw", + "hidden": true + } + } + }, + "pacbio": { + "type": "object", + "properties": { + "raw": { + "type": "string", + "default": "--pacbio-raw", + "hidden": true + }, + "corr": { + "type": "string", + "default": "--pacbio-corr", + "hidden": true + }, + "hq": { + "type": "string", + "default": "--pacbio-hifi", + "hidden": true + } + } + }, + "gfa_ext": { "type": "string", - "default": "--nano-raw", + "default": ".gfa.gz", "hidden": true - } - }, - "pacbio": { - "hq": { + }, + "gv_ext": { + "type": "string", + "default": ".gv.gz", + "hidden": true + }, + "txt_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "log_ext": { + "type": "string", + "default": ".log", + "hidden": true + }, + "json_ext": { + "type": "string", + "default": ".json", + "hidden": true + }, + "polishing_iterations": { + "type": "integer", + "default": 1, + "hidden": true + }, + "fasta_ext": { + "type": "string", + "default": ".fasta.gz", + "hidden": true + }, + "args": { "type": "string", - "default": "--pacbio-hifi", + "default": "{ \"--iterations ${params.flye.polishing_iterations}\" }", "hidden": true } }, - "gfa_ext": { - "type": "string", - "default": ".gfa.gz", - "hidden": true - }, - "gv_ext": { - "type": "string", - "default": ".gv.gz", - "hidden": true - }, - "txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "polishing_iterations": { - "type": "integer", - "default": 1, - "hidden": true - }, - "fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "args": { - "type": "string", - "default": "{ \"--iterations ${params.flye.polishing_iterations}\" }", - "hidden": true - }, "docker": { "type": "string", "default": "quay.io/biocontainers/flye:2.9.2--py39h6935b12_0", @@ -1351,40 +2157,43 @@ "default": "", "properties": { "spades": { - "scaffolds_ext": { - "type": "string", - "default": ".scaffolds.fasta.gz", - "hidden": true - }, - "contigs_ext": { - "type": "string", - "default": ".contigs.fasta.gz", - "hidden": true - }, - "transcripts_ext": { - "type": "string", - "default": ".transcripts.fasta.gz", - "hidden": true - }, - "gene_clusters_ext": { - "type": "string", - "default": ".gene_clusters.fasta.gz", - "hidden": true - }, - "assembly_graphs_ext": { - "type": "string", - "default": ".assembly.gfa.gz", - "hidden": true - }, - "log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "assembly", - "hidden": true + "type": "object", + "properties": { + "scaffolds_ext": { + "type": "string", + "default": ".scaffolds.fasta.gz", + "hidden": true + }, + "contigs_ext": { + "type": "string", + "default": ".contigs.fasta.gz", + "hidden": true + }, + "transcripts_ext": { + "type": "string", + "default": ".transcripts.fasta.gz", + "hidden": true + }, + "gene_clusters_ext": { + "type": "string", + "default": ".gene_clusters.fasta.gz", + "hidden": true + }, + "assembly_graphs_ext": { + "type": "string", + "default": ".assembly.gfa.gz", + "hidden": true + }, + "log_ext": { + "type": "string", + "default": ".log", + "hidden": true + }, + "outdir": { + "type": "string", + "default": "assembly", + "hidden": true + } }, "docker": { "type": "string", @@ -1406,40 +2215,43 @@ "default": "", "properties": { "checkm": { - "alignment_ext": { - "type": "string", - "default": "-genes.aln", - "hidden": true - }, - "results_ext": { - "type": "string", - "default": "-results.txt", - "hidden": true - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "folder_name": { - "type": "string", - "default": "checkm", - "hidden": true - }, - "gzip_ext": { - "type": "string", - "default": ".gz", - "hidden": true - }, - "lineage_ms": { - "type": "string", - "default": "lineage.ms", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "CheckM", - "hidden": true + "type": "object", + "properties": { + "alignment_ext": { + "type": "string", + "default": "-genes.aln", + "hidden": true + }, + "results_ext": { + "type": "string", + "default": "-results.txt", + "hidden": true + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "folder_name": { + "type": "string", + "default": "checkm", + "hidden": true + }, + "gzip_ext": { + "type": "string", + "default": ".gz", + "hidden": true + }, + "lineage_ms": { + "type": "string", + "default": "lineage.ms", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "CheckM", + "hidden": true + } }, "docker": { "type": "string", @@ -1461,59 +2273,79 @@ "default": "", "properties": { "kraken": { - "db": { - "type": "string", - "description": "Path to Kraken2 database (do not use symlinks)" - }, - "classified_suffix": { - "type": "string", - "default": "classified", - "hidden": true - }, - "unclassified_suffix": { - "type": "string", - "default": "unclassified", - "hidden": true - }, - "report_suffix": { - "type": "string", - "default": "report", - "hidden": true - }, - "output_suffix": { - "type": "string", - "default": "output", - "hidden": true - }, - "save_output_fastqs": { - "type": "boolean", - "hidden": true - }, - "save_reads_assignments": { - "type": "boolean", - "default": true, - "hidden": true + "type": "object", + "properties": { + "db": { + "type": "string", + "description": "Path to Kraken2 database (do not use symlinks)" + }, + "classified_suffix": { + "type": "string", + "default": "classified", + "hidden": true + }, + "unclassified_suffix": { + "type": "string", + "default": "unclassified", + "hidden": true + }, + "report_suffix": { + "type": "string", + "default": "report", + "hidden": true + }, + "output_suffix": { + "type": "string", + "default": "output", + "hidden": true + }, + "save_output_fastqs": { + "type": "boolean", + "hidden": true + }, + "save_reads_assignments": { + "type": "boolean", + "default": true, + "hidden": true + }, + "run_kraken_quick": { + "type": "boolean", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "KrakenReport", + "hidden": true + }, + "tophit_level": { + "type": "string", + "default": "S", + "hidden": true + } }, - "run_kraken_quick": { + "header_p": { "type": "boolean", "hidden": true }, - "report_tag": { - "type": "string", - "default": "KrakenReport", - "hidden": true - }, - "tophit_level": { + "headers": { "type": "string", - "default": "S", + "default": "['PercentID', 'FragmentsRecovered', 'FragmentsAssignmentTaxon', 'RankCode']", "hidden": true } }, "kraken_bin": { - "taxonomic_level": { + "type": "object", + "properties": { + "taxonomic_level": { + "type": "string", + "default": "G", + "description": "Taxonomic level to bin contigs at." + } + }, + "fasta_ext": { "type": "string", - "default": "G", - "description": "Taxonomic level to bin contigs at." + "default": "_binned.fasta.gz", + "hidden": true } } } @@ -1593,641 +2425,655 @@ "$ref": "#/definitions/kraken2" } ], - "chopper": { - "quality": { - "type": "integer", - "default": 0, - "hidden": true - }, - "minlength": { - "type": "integer", - "default": 100, - "hidden": true - }, - "fastq_ext": { - "type": "string", - "default": ".fastq.gz", - "hidden": true - } - }, - "fastqc": { - "html_ext": { - "type": "string", - "default": ".html", - "hidden": true - }, - "zip_ext": { - "type": "string", - "default": ".zip", - "hidden": true - } - }, - "r_contaminants": { - "phix_fa": { - "type": "string", - "hidden": true - }, - "homo_sapiens_fa": { - "type": "string", - "hidden": true - }, - "pacbio_mg": { - "type": "string", - "hidden": true - }, - "output_ext": { - "type": "string", - "default": ".fastq.gz", - "hidden": true - }, - "mega_mm2_idx": { - "type": "string", - "hidden": true - }, - "mm2_illumina": { - "type": "string", - "default": "-x sr", - "hidden": true - }, - "mm2_pac": { - "type": "string", - "default": "-x map-pb", - "hidden": true - }, - "mm2_ont": { - "type": "string", - "default": "-x map-ont", - "hidden": true - }, - "mm2_output_ext": { - "type": "string", - "default": ".sam", - "hidden": true - }, - "samtools_output_ext": { - "type": "string", - "default": ".fastq", - "hidden": true - }, - "samtools_singletons_ext": { - "type": "string", - "default": ".singleton.fq", - "hidden": true - }, - "samtools_output_suffix": { - "type": "string", - "default": "deconned", - "hidden": true - }, - "output_dir": { - "type": "string", - "default": "contamination/deconned_reads", - "hidden": true - } - }, - "minimap2": { - "index_outdir": { - "type": "string", - "default": "indices", - "hidden": true - }, - "index_ext": { - "type": "string", - "default": ".idx", - "hidden": true - }, - "mapped_paf_ext": { - "type": "string", - "default": ".paf", - "hidden": true - }, - "mapped_sam_ext": { - "type": "string", - "default": ".sam", - "hidden": true - }, - "mapped_outdir": { - "type": "string", - "default": "mapped", - "hidden": true - } - }, - "samtools": { - "bam_ext": { - "type": "string", - "default": ".bam", - "hidden": true - }, - "bai_ext": { - "type": "string", - "default": ".bai", - "hidden": true - } - }, - "racon": { - "consensus_suffix": { - "type": "string", - "default": "_assembly_consensus.fasta", - "hidden": true - }, - "consensus_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "polished", - "hidden": true - } - }, - "pilon": { - "outdir": { - "type": "string", - "default": "pilon", - "hidden": true - }, - "fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "fasta_outdir": { - "type": "string", - "default": "fasta", - "hidden": true - }, - "vcf_ext": { - "type": "string", - "default": ".vcf", - "hidden": true - }, - "vcf_outdir": { - "type": "string", - "default": "vcf", - "hidden": true - }, - "changes_ext": { - "type": "string", - "default": ".changes", - "hidden": true - }, - "changes_outdir": { - "type": "string", - "default": "changes", - "hidden": true - }, - "max_memory_multiplier": { - "type": "integer", - "default": 3, - "hidden": true - } - }, - "pilon_iterative": { - "outdir": { - "type": "string", - "default": "pilon", - "hidden": true - }, - "fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "fasta_outdir": { - "type": "string", - "default": "fasta", - "hidden": true - }, - "vcf_ext": { - "type": "string", - "default": ".vcf", - "hidden": true - }, - "vcf_outdir": { - "type": "string", - "default": "vcf", - "hidden": true - }, - "bam_ext": { - "type": "string", - "default": ".bam", - "hidden": true - }, - "bai_ext": { - "type": "string", - "default": ".bai", - "hidden": true - }, - "changes_ext": { - "type": "string", - "default": ".changes", - "hidden": true - }, - "changes_outdir": { - "type": "string", - "default": "changes", - "hidden": true - }, - "max_memory_multiplier": { - "type": "integer", - "default": 3, - "hidden": true - }, - "max_polishing_illumina": { - "type": "integer", - "default": 3, - "hidden": true - }, - "max_polishing_pacbio": { - "type": "integer", - "default": 4, - "hidden": true - }, - "max_polishing_nanopore": { - "type": "integer", - "default": 10, - "hidden": true - } - }, - "unicycler": { - "scaffolds_ext": { - "type": "string", - "default": ".scaffolds.fa.gz", - "hidden": true - }, - "assembly_ext": { - "type": "string", - "default": ".assembly.gfa.gz", - "hidden": true - }, - "log_ext": { - "type": "string", - "default": ".unicycler.log", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "unicycler", - "hidden": true - }, - "mem_modifier": { - "type": "integer", - "default": 1000, - "hidden": true - }, - "threads_increase_factor": { - "type": "integer", - "default": 1, - "hidden": true - } - }, - "bandage": { - "svg_ext": { - "type": "string", - "default": ".svg", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "bandage", - "hidden": true - } - }, - "ectyper": { - "log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "ECTyperSubtyping", - "hidden": true - }, - "args": { - "type": "string", - "default": "{ \"--verify\" }", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "sistr": { - "tsv_ext": { - "type": "string", - "default": ".tab", - "hidden": true - }, - "allele_fasta_ext": { - "type": "string", - "default": "-allele.fasta", - "hidden": true - }, - "allele_json_ext": { - "type": "string", - "default": "-allele.json", - "hidden": true - }, - "cgmlst_ext": { - "type": "string", - "default": "-cgmlst.csv", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "SISTRSubtyping", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "lissero": { - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "LISSEROSubtyping", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "shigeifinder": { - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "container_version": { - "type": "string", - "default": "1.3.2", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "ShigeifinderSubtyping", - "hidden": true - } - }, - "raw_reads": { - "high_precision": { - "type": "boolean", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "RawReadSummary", - "hidden": true - } - }, - "seqtk": { - "assembly_fastq": { - "type": "string", - "default": ".fastq.gz", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "Seqtk", - "hidden": true - } - }, - "fastp": { - "report_exclude_fields": { - "type": "string", - "default": "['content_curves', 'quality_curves', 'mean', 'kmer_count', 'histogram', 'overrepresented_sequences']", - "hidden": true - }, - "cut_tail_mean_quality": { - "type": "integer", - "default": 15 - }, - "complexity_threshold": { - "type": "integer", - "default": 20 - }, - "polyg_min_len": { - "type": "integer", - "default": 10 - }, - "polyx_min_len": { - "type": "integer", - "default": 10 - }, - "illumina_length_max": { - "type": "integer", - "default": 400 - }, - "cut_tail_window_size": { - "type": "integer", - "default": 4 - } - }, - "quast": { - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "checkm": { - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "kraken": { - "header_p": { - "type": "boolean", - "hidden": true - }, - "headers": { - "type": "string", - "default": "['PercentID', 'FragmentsRecovered', 'FragmentsAssignmentTaxon', 'RankCode']", - "hidden": true - } - }, - "mlst": { - "args": { - "type": "string", - "hidden": true - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "SevenGeneMLSTReport", - "hidden": true - } - }, - "mash_meta": { - "report_tag": { - "type": "string", - "default": "MashMeta", - "hidden": true - } - }, - "top_hit_species": { - "report_tag": { - "type": "string", - "default": "SpeciesTopHit", - "hidden": true - } - }, - "kraken_species": { - "report_tag": { - "type": "string", - "default": "Kraken2TopHit", - "hidden": true - } - }, - "subtyping_report": { - "report_tag": { - "type": "string", - "default": "Subtyping", - "hidden": true - } - }, - "kleborate": { - "txt_ext": { - "type": "string", - "default": ".results.txt", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "KleborateSubtyping", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "spatyper": { - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "SpaTyperSubtyping", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "repeats": { - "type": "string", - "hidden": true - }, - "repeat_order": { - "type": "string", - "hidden": true - } - }, - "kraken_bin": { - "fasta_ext": { - "type": "string", - "default": "_binned.fasta.gz", - "hidden": true - } - }, - "pointfinder_db_tag": { - "report_tag": { - "type": "string", - "default": "PointfinderDB", - "hidden": true - } - }, - "abricate": { - "args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "report_tag": { - "type": "string", - "default": "Abricate", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "assembly_status": { - "report_tag": { - "type": "string", - "default": "AssemblyCompleted", - "hidden": true - } - }, - "filtered_reads": { - "threshold": { - "type": "integer", - "default": 1000, - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "MeetsReadThreshold", - "hidden": true - } - }, - "contigs_too_short": { - "report_tag": { - "type": "string", - "default": "MaxContigToShort" - } - }, - "report_aggregate": { - "sample_flat_suffix": { - "type": "string", - "default": "_flat_sample.json", - "hidden": true + "properties": { + "validationFailUnrecognisedParams": { + "type": "boolean" + }, + "chopper": { + "type": "object", + "properties": { + "quality": { + "type": "integer", + "default": 0, + "hidden": true + }, + "minlength": { + "type": "integer", + "default": 100, + "hidden": true + }, + "fastq_ext": { + "type": "string", + "default": ".fastq.gz", + "hidden": true + } + } + }, + "fastqc": { + "type": "object", + "properties": { + "html_ext": { + "type": "string", + "default": ".html", + "hidden": true + }, + "zip_ext": { + "type": "string", + "default": ".zip", + "hidden": true + } + } + }, + "r_contaminants": { + "type": "object", + "properties": { + "phix_fa": { + "type": "string", + "hidden": true + }, + "homo_sapiens_fa": { + "type": "string", + "hidden": true + }, + "pacbio_mg": { + "type": "string", + "hidden": true + }, + "output_ext": { + "type": "string", + "default": ".fastq.gz", + "hidden": true + }, + "mega_mm2_idx": { + "type": "string", + "hidden": true + }, + "mm2_illumina": { + "type": "string", + "default": "-x sr", + "hidden": true + }, + "mm2_pac": { + "type": "string", + "default": "-x map-pb", + "hidden": true + }, + "mm2_ont": { + "type": "string", + "default": "-x map-ont", + "hidden": true + }, + "mm2_output_ext": { + "type": "string", + "default": ".sam", + "hidden": true + }, + "samtools_output_ext": { + "type": "string", + "default": ".fastq", + "hidden": true + }, + "samtools_singletons_ext": { + "type": "string", + "default": ".singleton.fq", + "hidden": true + }, + "samtools_output_suffix": { + "type": "string", + "default": "deconned", + "hidden": true + }, + "output_dir": { + "type": "string", + "default": "contamination/deconned_reads", + "hidden": true + } + } + }, + "minimap2": { + "type": "object", + "properties": { + "index_outdir": { + "type": "string", + "default": "indices", + "hidden": true + }, + "index_ext": { + "type": "string", + "default": ".idx", + "hidden": true + }, + "mapped_paf_ext": { + "type": "string", + "default": ".paf", + "hidden": true + }, + "mapped_sam_ext": { + "type": "string", + "default": ".sam", + "hidden": true + }, + "mapped_outdir": { + "type": "string", + "default": "mapped", + "hidden": true + } + } + }, + "samtools": { + "type": "object", + "properties": { + "bam_ext": { + "type": "string", + "default": ".bam", + "hidden": true + }, + "bai_ext": { + "type": "string", + "default": ".bai", + "hidden": true + } + } + }, + "racon": { + "type": "object", + "properties": { + "consensus_suffix": { + "type": "string", + "default": "_assembly_consensus.fasta", + "hidden": true + }, + "consensus_ext": { + "type": "string", + "default": ".fasta.gz", + "hidden": true + }, + "outdir": { + "type": "string", + "default": "polished", + "hidden": true + } + } + }, + "pilon": { + "type": "object", + "properties": { + "outdir": { + "type": "string", + "default": "pilon", + "hidden": true + }, + "fasta_ext": { + "type": "string", + "default": ".fasta.gz", + "hidden": true + }, + "fasta_outdir": { + "type": "string", + "default": "fasta", + "hidden": true + }, + "vcf_ext": { + "type": "string", + "default": ".vcf", + "hidden": true + }, + "vcf_outdir": { + "type": "string", + "default": "vcf", + "hidden": true + }, + "changes_ext": { + "type": "string", + "default": ".changes", + "hidden": true + }, + "changes_outdir": { + "type": "string", + "default": "changes", + "hidden": true + }, + "max_memory_multiplier": { + "type": "integer", + "default": 3, + "hidden": true + } + } + }, + "pilon_iterative": { + "type": "object", + "properties": { + "outdir": { + "type": "string", + "default": "pilon", + "hidden": true + }, + "fasta_ext": { + "type": "string", + "default": ".fasta.gz", + "hidden": true + }, + "fasta_outdir": { + "type": "string", + "default": "fasta", + "hidden": true + }, + "vcf_ext": { + "type": "string", + "default": ".vcf", + "hidden": true + }, + "vcf_outdir": { + "type": "string", + "default": "vcf", + "hidden": true + }, + "bam_ext": { + "type": "string", + "default": ".bam", + "hidden": true + }, + "bai_ext": { + "type": "string", + "default": ".bai", + "hidden": true + }, + "changes_ext": { + "type": "string", + "default": ".changes", + "hidden": true + }, + "changes_outdir": { + "type": "string", + "default": "changes", + "hidden": true + }, + "max_memory_multiplier": { + "type": "integer", + "default": 3, + "hidden": true + }, + "max_polishing_illumina": { + "type": "integer", + "default": 3, + "hidden": true + }, + "max_polishing_pacbio": { + "type": "integer", + "default": 4, + "hidden": true + }, + "max_polishing_nanopore": { + "type": "integer", + "default": 10, + "hidden": true + } + } + }, + "unicycler": { + "type": "object", + "properties": { + "scaffolds_ext": { + "type": "string", + "default": ".scaffolds.fa.gz", + "hidden": true + }, + "assembly_ext": { + "type": "string", + "default": ".assembly.gfa.gz", + "hidden": true + }, + "log_ext": { + "type": "string", + "default": ".unicycler.log", + "hidden": true + }, + "outdir": { + "type": "string", + "default": "unicycler", + "hidden": true + }, + "mem_modifier": { + "type": "integer", + "default": 1000, + "hidden": true + }, + "threads_increase_factor": { + "type": "integer", + "default": 1, + "hidden": true + } + } + }, + "bandage": { + "type": "object", + "properties": { + "svg_ext": { + "type": "string", + "default": ".svg", + "hidden": true + }, + "outdir": { + "type": "string", + "default": "bandage", + "hidden": true + } + } + }, + "ectyper": { + "type": "object", + "properties": { + "log_ext": { + "type": "string", + "default": ".log", + "hidden": true + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "txt_ext": { + "type": "string", + "default": ".txt", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "ECTyperSubtyping", + "hidden": true + }, + "args": { + "type": "string", + "default": "{ \"--verify\" }", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + } + }, + "sistr": { + "type": "object", + "properties": { + "tsv_ext": { + "type": "string", + "default": ".tab", + "hidden": true + }, + "allele_fasta_ext": { + "type": "string", + "default": "-allele.fasta", + "hidden": true + }, + "allele_json_ext": { + "type": "string", + "default": "-allele.json", + "hidden": true + }, + "cgmlst_ext": { + "type": "string", + "default": "-cgmlst.csv", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "SISTRSubtyping", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + } + }, + "lissero": { + "type": "object", + "properties": { + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "LISSEROSubtyping", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + } + }, + "shigeifinder": { + "type": "object", + "properties": { + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + }, + "container_version": { + "type": "string", + "default": "1.3.2", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "ShigeifinderSubtyping", + "hidden": true + } + } + }, + "raw_reads": { + "type": "object", + "properties": { + "high_precision": { + "type": "boolean", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "RawReadSummary", + "hidden": true + } + } + }, + "mlst": { + "type": "object", + "properties": { + "args": { + "type": "string", + "hidden": true + }, + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "json_ext": { + "type": "string", + "default": ".json", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "SevenGeneMLSTReport", + "hidden": true + } + } + }, + "mash_meta": { + "type": "object", + "properties": { + "report_tag": { + "type": "string", + "default": "MashMeta", + "hidden": true + } + } + }, + "top_hit_species": { + "type": "object", + "properties": { + "report_tag": { + "type": "string", + "default": "SpeciesTopHit", + "hidden": true + } + } + }, + "kraken_species": { + "type": "object", + "properties": { + "report_tag": { + "type": "string", + "default": "Kraken2TopHit", + "hidden": true + } + } + }, + "subtyping_report": { + "type": "object", + "properties": { + "report_tag": { + "type": "string", + "default": "Subtyping", + "hidden": true + } + } + }, + "kleborate": { + "type": "object", + "properties": { + "txt_ext": { + "type": "string", + "default": ".results.txt", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "KleborateSubtyping", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + } + }, + "spatyper": { + "type": "object", + "properties": { + "tsv_ext": { + "type": "string", + "default": ".tsv", + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "SpaTyperSubtyping", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + }, + "repeats": { + "type": "string", + "hidden": true + }, + "repeat_order": { + "type": "string", + "hidden": true + } + } + }, + "pointfinder_db_tag": { + "type": "object", + "properties": { + "report_tag": { + "type": "string", + "default": "PointfinderDB", + "hidden": true + } + } + }, + "abricate": { + "type": "object", + "properties": { + "args": { + "type": "string", + "hidden": true, + "default": "{ \"\" }" + }, + "report_tag": { + "type": "string", + "default": "Abricate", + "hidden": true + }, + "header_p": { + "type": "boolean", + "default": true, + "hidden": true + } + } + }, + "assembly_status": { + "type": "object", + "properties": { + "report_tag": { + "type": "string", + "default": "AssemblyCompleted", + "hidden": true + } + } + }, + "filtered_reads": { + "type": "object", + "properties": { + "threshold": { + "type": "integer", + "default": 1000, + "hidden": true + }, + "report_tag": { + "type": "string", + "default": "MeetsReadThreshold", + "hidden": true + } + } + }, + "contigs_too_short": { + "type": "object", + "properties": { + "report_tag": { + "type": "string", + "default": "MaxContigToShort" + } + } + }, + "report_aggregate": { + "type": "object", + "properties": { + "sample_flat_suffix": { + "type": "string", + "default": "_flat_sample.json", + "hidden": true + } + } } } -} +} \ No newline at end of file diff --git a/utils/format.py b/utils/format.py index 0117971c..bfdf5e61 100755 --- a/utils/format.py +++ b/utils/format.py @@ -5,15 +5,17 @@ """ from __future__ import annotations -import argparse + import json import logging import os import sys +import argparse from dataclasses import dataclass logger = logging.getLogger(__name__) + @dataclass(frozen=True) class Constants: delimiter: str = "." @@ -54,14 +56,20 @@ def denested_information(keys: list[str], last_value: dict) -> dict: new_chain: dict = {} temp = new_chain - - for i in keys[1:-1]: + for i in keys[0:-1]: temp[i] = {} - temp = temp[i] - temp[keys[-1]] = last_value + temp[i][Constants.type_field] = Constants.properties_type + temp[i][Constants.extraction_field] = {} + temp = temp[i][Constants.extraction_field] + + temp[Constants.type_field] = Constants.properties_type + temp[Constants.extraction_field] = {keys[-1]: last_value} + return new_chain + + def nest_schema(properties: dict) -> dict: """Convert a 'dotted' schema into a nested json e.g. @@ -87,15 +95,36 @@ def nest_schema(properties: dict) -> dict: if Constants.delimiter not in key: continue split_key = key.split(Constants.delimiter) + if new_dict.get(split_key[0]) is None: new_dict[split_key[0]] = {} + new_dict[split_key[0]][Constants.type_field] = Constants.properties_type + new_dict[split_key[0]][Constants.extraction_field] = {} + denested_data = denested_information(split_key[1:], values) - new_dict[split_key[0]][split_key[1]] = denested_data + if denested_data != values: + # multiple fields to be set, update the properties instead of overwriting it + + temp = denested_data + nd_temp = new_dict[split_key[0]][Constants.extraction_field] + for i in split_key[1:-1]: + if nd_temp.get(i) is None: + nd_temp[i] = {} + nd_temp[i][Constants.type_field] = temp[i][Constants.type_field] + nd_temp[i][Constants.extraction_field] = {} + + nd_temp = nd_temp[i][Constants.extraction_field] + temp = temp[i][Constants.extraction_field] + + nd_temp[split_key[-1]] = temp[Constants.extraction_field][split_key[-1]] + + else: + new_dict[split_key[0]][Constants.extraction_field][split_key[1]] = denested_data + poisoned_keys.append(key) for i in poisoned_keys: del properties[i] - properties.update(new_dict) return properties @@ -129,32 +158,62 @@ def nest_properties(schema: dict) -> dict: del schema[Constants.nesting_field][k][Constants.extraction_field] schema[Constants.nesting_field][k][Constants.extraction_field] = new_properties + new_properties = nest_schema(properties=properties) del schema[Constants.extraction_field] - schema.update(new_properties) - drop_keys = reorganize_schema(schema[Constants.nesting_field]) + schema[Constants.extraction_field] = new_properties + drop_keys = reorganize_schema(schema) schema[Constants.allof_field] = drop_all_of_fields(schema[Constants.allof_field], drop_keys) + return schema +def drop_keys_repeated(schema): + """ + Keys in the properties section need may be repeated in the definitions, those in the definitions + need to be removed from the properties field + """ + common_keys = schema[Constants.nesting_field].keys() & schema[Constants.extraction_field].keys() + for key in common_keys: + del schema[Constants.extraction_field][key] + return schema -def reorganize_schema(definitions) -> set: +def reorganize_schema(schema) -> set: """Take a newly nested schema and merge paramter definitions together to prevent errors + TODO break this function up into smaller sections definitions dict: Updated definitions field in a json schema return drop_keys set: Additional fields to delete from the schema after processing """ - sub_key_fields = [] + + + definitions = schema[Constants.nesting_field] + top_lvl_keys = frozenset(definitions.keys()) + properties_keys = frozenset(schema[Constants.extraction_field].keys()) + drop_keys = set() for k, v in definitions.items(): - sub_key_fields.extend([(k, i) for i in v[Constants.extraction_field].keys()]) - main_keys = {i[0] for i in sub_key_fields} - drop_keys: set = set() - for i in sub_key_fields: - if i[1] in main_keys and i[1] != i[0]: - drop_keys.add(i[0]) - definitions[i[1]][Constants.extraction_field][i[1]].update(definitions[i[0]][Constants.extraction_field][i[1]]) - - for i in drop_keys: - del definitions[i] + tpl_keys = [i for i in v[Constants.extraction_field].keys() if i in top_lvl_keys] + if not tpl_keys or len(tpl_keys) == 1: + continue + for i in tpl_keys: + definitions[i][Constants.extraction_field][i].update(v[Constants.extraction_field][i][Constants.extraction_field]) + del v[Constants.extraction_field][i] + if schema[Constants.extraction_field].get(i): + del schema[Constants.extraction_field][i] + drop_keys.add(k) + + for k in drop_keys: + del definitions[k] + + for k, v in definitions.items(): + common_keys = v[Constants.extraction_field].keys() & properties_keys + if not common_keys: + continue + for i in common_keys: + props = schema[Constants.extraction_field].get(i) + if props: + v[Constants.extraction_field][i].update(props[Constants.extraction_field]) + del schema[Constants.extraction_field][i] + return drop_keys def dump_schema(schema: dict, output_fp: str): From b1314a9e405f42d84c40d1e69178b9123675ca9c Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 19 Mar 2024 16:21:06 -0500 Subject: [PATCH 06/26] propogated hidden message to higher params --- nextflow_schema.json | 116 ++++++++++++++++++++++++++++++------------- utils/format.py | 8 ++- 2 files changed, 87 insertions(+), 37 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 00357973..d57080db 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -131,6 +131,7 @@ "default": "{ \"\" }" } }, + "hidden": true, "docker": { "type": "string", "hidden": true, @@ -272,6 +273,7 @@ } } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/fastp:0.23.2--hb7a2d85_2", @@ -364,6 +366,7 @@ "hidden": true } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/mash:2.3--he348c14_1", @@ -433,6 +436,7 @@ "hidden": true } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/quast:5.2.0--py39pl5321h4e691d4_3", @@ -839,7 +843,8 @@ "default": true, "hidden": true } - } + }, + "hidden": true } } }, @@ -867,7 +872,8 @@ "default": "['RawReadSummary', 'combined', 'total_bp']", "hidden": true } - } + }, + "hidden": true } } }, @@ -1473,7 +1479,8 @@ } } } - } + }, + "hidden": true }, "QCReportFields": { "type": "object", @@ -1732,7 +1739,8 @@ } } } - } + }, + "hidden": true } } }, @@ -1760,7 +1768,8 @@ "default": "SeqtkBaseCount", "hidden": true } - } + }, + "hidden": true } } }, @@ -1795,6 +1804,7 @@ "hidden": true } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/medaka:1.8.0--py38hdaa7744_0", @@ -1866,7 +1876,8 @@ "default": true, "hidden": true } - } + }, + "hidden": true } } }, @@ -1914,7 +1925,8 @@ "default": true, "hidden": true } - } + }, + "hidden": true } } }, @@ -1968,6 +1980,7 @@ "hidden": true } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2", @@ -2010,7 +2023,8 @@ "default": "illumina", "hidden": true } - } + }, + "hidden": true } } }, @@ -2034,6 +2048,7 @@ "hidden": true } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", @@ -2137,6 +2152,7 @@ "hidden": true } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/flye:2.9.2--py39h6935b12_0", @@ -2195,6 +2211,7 @@ "hidden": true } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/spades:3.15.5--h95f258a_1", @@ -2253,6 +2270,7 @@ "hidden": true } }, + "hidden": true, "docker": { "type": "string", "default": "quay.io/biocontainers/checkm-genome:1.2.2--pyhdfd78af_1", @@ -2323,6 +2341,7 @@ "hidden": true } }, + "hidden": true, "header_p": { "type": "boolean", "hidden": true @@ -2447,7 +2466,8 @@ "default": ".fastq.gz", "hidden": true } - } + }, + "hidden": true }, "fastqc": { "type": "object", @@ -2462,7 +2482,8 @@ "default": ".zip", "hidden": true } - } + }, + "hidden": true }, "r_contaminants": { "type": "object", @@ -2528,7 +2549,8 @@ "default": "contamination/deconned_reads", "hidden": true } - } + }, + "hidden": true }, "minimap2": { "type": "object", @@ -2558,7 +2580,8 @@ "default": "mapped", "hidden": true } - } + }, + "hidden": true }, "samtools": { "type": "object", @@ -2573,7 +2596,8 @@ "default": ".bai", "hidden": true } - } + }, + "hidden": true }, "racon": { "type": "object", @@ -2593,7 +2617,8 @@ "default": "polished", "hidden": true } - } + }, + "hidden": true }, "pilon": { "type": "object", @@ -2638,7 +2663,8 @@ "default": 3, "hidden": true } - } + }, + "hidden": true }, "pilon_iterative": { "type": "object", @@ -2708,7 +2734,8 @@ "default": 10, "hidden": true } - } + }, + "hidden": true }, "unicycler": { "type": "object", @@ -2743,7 +2770,8 @@ "default": 1, "hidden": true } - } + }, + "hidden": true }, "bandage": { "type": "object", @@ -2758,7 +2786,8 @@ "default": "bandage", "hidden": true } - } + }, + "hidden": true }, "ectyper": { "type": "object", @@ -2793,7 +2822,8 @@ "default": true, "hidden": true } - } + }, + "hidden": true }, "sistr": { "type": "object", @@ -2828,7 +2858,8 @@ "default": true, "hidden": true } - } + }, + "hidden": true }, "lissero": { "type": "object", @@ -2848,7 +2879,8 @@ "default": true, "hidden": true } - } + }, + "hidden": true }, "shigeifinder": { "type": "object", @@ -2873,7 +2905,8 @@ "default": "ShigeifinderSubtyping", "hidden": true } - } + }, + "hidden": true }, "raw_reads": { "type": "object", @@ -2887,7 +2920,8 @@ "default": "RawReadSummary", "hidden": true } - } + }, + "hidden": true }, "mlst": { "type": "object", @@ -2911,7 +2945,8 @@ "default": "SevenGeneMLSTReport", "hidden": true } - } + }, + "hidden": true }, "mash_meta": { "type": "object", @@ -2921,7 +2956,8 @@ "default": "MashMeta", "hidden": true } - } + }, + "hidden": true }, "top_hit_species": { "type": "object", @@ -2931,7 +2967,8 @@ "default": "SpeciesTopHit", "hidden": true } - } + }, + "hidden": true }, "kraken_species": { "type": "object", @@ -2941,7 +2978,8 @@ "default": "Kraken2TopHit", "hidden": true } - } + }, + "hidden": true }, "subtyping_report": { "type": "object", @@ -2951,7 +2989,8 @@ "default": "Subtyping", "hidden": true } - } + }, + "hidden": true }, "kleborate": { "type": "object", @@ -2971,7 +3010,8 @@ "default": true, "hidden": true } - } + }, + "hidden": true }, "spatyper": { "type": "object", @@ -2999,7 +3039,8 @@ "type": "string", "hidden": true } - } + }, + "hidden": true }, "pointfinder_db_tag": { "type": "object", @@ -3009,7 +3050,8 @@ "default": "PointfinderDB", "hidden": true } - } + }, + "hidden": true }, "abricate": { "type": "object", @@ -3029,7 +3071,8 @@ "default": true, "hidden": true } - } + }, + "hidden": true }, "assembly_status": { "type": "object", @@ -3039,7 +3082,8 @@ "default": "AssemblyCompleted", "hidden": true } - } + }, + "hidden": true }, "filtered_reads": { "type": "object", @@ -3054,7 +3098,8 @@ "default": "MeetsReadThreshold", "hidden": true } - } + }, + "hidden": true }, "contigs_too_short": { "type": "object", @@ -3073,7 +3118,8 @@ "default": "_flat_sample.json", "hidden": true } - } + }, + "hidden": true } } } \ No newline at end of file diff --git a/utils/format.py b/utils/format.py index bfdf5e61..da7b6028 100755 --- a/utils/format.py +++ b/utils/format.py @@ -25,6 +25,7 @@ class Constants: nesting_field: str = "definitions" allof_field: str = "allOf" ref_key: str = "$ref" + hidden: str = "hidden" def drop_all_of_fields(schema_all_of: list, fields: set): @@ -68,8 +69,6 @@ def denested_information(keys: list[str], last_value: dict) -> dict: return new_chain - - def nest_schema(properties: dict) -> dict: """Convert a 'dotted' schema into a nested json e.g. @@ -106,6 +105,7 @@ def nest_schema(properties: dict) -> dict: # multiple fields to be set, update the properties instead of overwriting it temp = denested_data + nd_temp = new_dict[split_key[0]][Constants.extraction_field] for i in split_key[1:-1]: if nd_temp.get(i) is None: @@ -117,9 +117,13 @@ def nest_schema(properties: dict) -> dict: temp = temp[i][Constants.extraction_field] nd_temp[split_key[-1]] = temp[Constants.extraction_field][split_key[-1]] + if hidden := nd_temp[split_key[-1]].get(Constants.hidden): + new_dict[split_key[0]][Constants.hidden] = hidden else: new_dict[split_key[0]][Constants.extraction_field][split_key[1]] = denested_data + if hidden := denested_data.get(Constants.hidden): + new_dict[split_key[0]][Constants.hidden] = hidden poisoned_keys.append(key) From 3bf5d87b3a4fd6806346f4bfcdf32091371c61d3 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 19 Mar 2024 16:41:09 -0500 Subject: [PATCH 07/26] added report_aggreagate to ignored params --- nextflow.config | 2 +- nextflow_schema.json | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 52dcacef..62400c61 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,7 @@ params { validate_params = true show_hidden_params = false validationS3PathCheck = true - validationSchemaIgnoreParams = 'seqkit,contigs_too_short,output_idx_name,filtered_reads,coverage_calc_fields,assembly_status,coverage_calc_fields.fixed_cov,coverage_calc_fields.auto_cov,assembly_status.report_tag,medaka.model,medaka,validation-S3Path-check,validationS3PathCheck,abricate,python3,pointfinder_db_tag,staramr,mobsuite_recon,skip_staramr,genomes,flye_read_type,shigeifinder,lissero,sistr,ectyper,bandage,bakta,unicycler,medaka,pilon_iterative,pilon,racon,samtools,minimap2,r_contaminants,mash,kraken,checkm,quast_filter,quast,fastqc,spades,flye,chopper,fastp,seqtk,seqtk_size,kat,coreutils,opt_platforms,QCReportFields,QCReport-fields,QCReport,kraken_bin,shigatyper,spatyper,kleborate,subtyping_report,kraken_species,top_hit_species,mash_meta,mlst,raw_reads,abricate_params,target_depth' + validationSchemaIgnoreParams = 'report_aggregate,seqkit,contigs_too_short,output_idx_name,filtered_reads,coverage_calc_fields,assembly_status,coverage_calc_fields.fixed_cov,coverage_calc_fields.auto_cov,assembly_status.report_tag,medaka.model,medaka,validation-S3Path-check,validationS3PathCheck,abricate,python3,pointfinder_db_tag,staramr,mobsuite_recon,skip_staramr,genomes,flye_read_type,shigeifinder,lissero,sistr,ectyper,bandage,bakta,unicycler,medaka,pilon_iterative,pilon,racon,samtools,minimap2,r_contaminants,mash,kraken,checkm,quast_filter,quast,fastqc,spades,flye,chopper,fastp,seqtk,seqtk_size,kat,coreutils,opt_platforms,QCReportFields,QCReport-fields,QCReport,kraken_bin,shigatyper,spatyper,kleborate,subtyping_report,kraken_species,top_hit_species,mash_meta,mlst,raw_reads,abricate_params,target_depth' validationFailUnrecognisedParams = false // for the qcreport fields // SKIP options diff --git a/nextflow_schema.json b/nextflow_schema.json index d57080db..9e28f92c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -3112,14 +3112,14 @@ }, "report_aggregate": { "type": "object", + "hidden": true, "properties": { "sample_flat_suffix": { "type": "string", "default": "_flat_sample.json", "hidden": true } - }, - "hidden": true + } } } -} \ No newline at end of file +} From 66194e5113a1a3db2b861230fed584697f70d1c0 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 19 Mar 2024 20:50:29 -0500 Subject: [PATCH 08/26] Re-enabled some linting --- .nf-core.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 2abd222f..7604c855 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -23,8 +23,6 @@ lint: - assets/email_template.txt - assets/sendmail_template.txt - .gitignore - schema_params: True - nextflow_config: False multiqc_config: False template: prefix: phac-nml From 1e7363969e91d8fa9d81ab1f263ced27c88f211e Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 19 Mar 2024 20:59:42 -0500 Subject: [PATCH 09/26] Fixed up linting of nextflow config --- .nf-core.yml | 3 +++ nextflow.config | 1 + 2 files changed, 4 insertions(+) diff --git a/.nf-core.yml b/.nf-core.yml index 7604c855..4c14d399 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -23,6 +23,9 @@ lint: - assets/email_template.txt - assets/sendmail_template.txt - .gitignore + nextflow_config: + - manifest.name + - manifest.homePage multiqc_config: False template: prefix: phac-nml diff --git a/nextflow.config b/nextflow.config index 62400c61..12af36e5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,7 @@ params { validate_params = true show_hidden_params = false validationS3PathCheck = true + validationShowHiddenParams = false validationSchemaIgnoreParams = 'report_aggregate,seqkit,contigs_too_short,output_idx_name,filtered_reads,coverage_calc_fields,assembly_status,coverage_calc_fields.fixed_cov,coverage_calc_fields.auto_cov,assembly_status.report_tag,medaka.model,medaka,validation-S3Path-check,validationS3PathCheck,abricate,python3,pointfinder_db_tag,staramr,mobsuite_recon,skip_staramr,genomes,flye_read_type,shigeifinder,lissero,sistr,ectyper,bandage,bakta,unicycler,medaka,pilon_iterative,pilon,racon,samtools,minimap2,r_contaminants,mash,kraken,checkm,quast_filter,quast,fastqc,spades,flye,chopper,fastp,seqtk,seqtk_size,kat,coreutils,opt_platforms,QCReportFields,QCReport-fields,QCReport,kraken_bin,shigatyper,spatyper,kleborate,subtyping_report,kraken_species,top_hit_species,mash_meta,mlst,raw_reads,abricate_params,target_depth' validationFailUnrecognisedParams = false // for the qcreport fields From 76a0d17fcb6a074d401d98d3b837434b260f26d6 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 19 Mar 2024 21:33:39 -0500 Subject: [PATCH 10/26] Used schema builder to generate nextflow schema --- nextflow_schema.json | 3527 ++++++------------------------------------ 1 file changed, 444 insertions(+), 3083 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 9e28f92c..f2f353ef 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,3125 +1,486 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/master/nextflow_schema.json", - "title": "phac-nml/mikrokondo pipeline parameters", - "description": "mikrokondo schema", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - } - } - }, - "bakta": { - "title": "Bakta", - "type": "object", - "description": "", - "default": "", - "fa_icon": "fas fa-address-card", - "properties": { - "ba_min_conting_length": { - "type": "integer", - "default": 200, - "description": "Minimum contig length for processing in Bakta" - }, - "bakta": { - "type": "object", - "properties": { - "output_dir": { - "type": "string", - "default": "bakta", - "hidden": true - }, - "embl_ext": { - "type": "string", - "default": ".embl", - "hidden": true - }, - "faa_ext": { - "type": "string", - "default": ".faa", - "hidden": true - }, - "ffn_ext": { - "type": "string", - "default": ".ffn", - "hidden": true - }, - "fna_ext": { - "type": "string", - "default": ".fna", - "hidden": true - }, - "gbff_ext": { - "type": "string", - "default": ".gbff", - "hidden": true - }, - "gff_ext": { - "type": "string", - "default": ".gff3", - "hidden": true - }, - "threads": { - "type": "integer", - "default": 12, - "hidden": true - }, - "hypotheticals_tsv_ext": { - "type": "string", - "default": ".hypotheticals.tsv", - "hidden": true - }, - "hypotheticals_faa_ext": { - "type": "string", - "default": ".hypotheticals.faa", - "hidden": true - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "min_contig_length": { - "type": "integer", - "default": 200, - "hidden": true - }, - "db": { - "type": "string", - "hidden": true - }, - "args": { - "type": "string", - "hidden": true, - "description": "Additional arguments to pass to bakta", - "default": "{ \"\" }" - } - }, - "hidden": true, - "docker": { - "type": "string", - "hidden": true, - "default": "quay.io/biocontainers/bakta:1.8.1--pyhdfd78af_0" - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/bakta%3A1.8.1--pyhdfd78af_0", - "hidden": true - } - } - } - }, - "fastp": { - "title": "FastP", - "type": "object", - "description": "", - "default": "", - "fa_icon": "fas fa-filter", - "properties": { - "fp_average_quality": { - "type": "integer", - "default": 25 - }, - "fp_cut_tail_mean_quality": { - "type": "integer", - "default": 15 - }, - "fp_cut_tail_window_size": { - "type": "integer", - "default": 4 - }, - "fp_complexity_threshold": { - "type": "integer", - "default": 20 - }, - "fp_qualified_phred": { - "type": "integer", - "default": 15 - }, - "fp_unqualified_precent_limit": { - "type": "integer", - "default": 40 - }, - "fp_polyg_min_len": { - "type": "integer", - "default": 10 - }, - "fp_polyx_min_len": { - "type": "integer", - "default": 10 - }, - "fp_illumina_length_min": { - "type": "integer", - "default": 35 - }, - "fp_illumina_length_max": { - "type": "integer", - "default": 400 - }, - "fp_single_end_length_min": { - "type": "integer", - "default": 1000 - }, - "fp_dedup_reads": { - "type": "boolean" - }, - "fastp": { - "type": "object", - "properties": { - "html_ext": { - "type": "string", - "default": ".html", - "hidden": true - }, - "average_quality_e": { - "type": "integer", - "default": 25, - "description": "Average quality of a read to be included (read pair is discarded if it is below this value)", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "qualified_quality_phred": { - "type": "integer", - "default": 15, - "description": "Phred score to be considered qualified. See FastP docs for more details.", - "hidden": true - }, - "unqualified_percent_limit": { - "type": "integer", - "default": 40, - "description": "Percent of bases in a read to be qualified for the read to be included. See FastP docs for more details.", - "hidden": true - }, - "fastq_ext": { - "type": "string", - "default": ".trimmed.fastq.gz", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "FastP", - "hidden": true - }, - "illumina_length_min": { - "type": "integer", - "default": 35, - "description": "Minimum length of a read to be included in later analysis for illumina data.", - "hidden": true - }, - "single_end_length_min": { - "type": "integer", - "default": 1000, - "description": "Minimum leng of a read to be used in later analysis (for Nanopore or Pacbio)", - "hidden": true - }, - "dedup_reads": { - "type": "boolean", - "description": "Option to turn on read de-duplication.", - "hidden": true - }, - "args": { - "type": "object", - "properties": { - "illumina": { - "type": "string", - "default": "--overrepresentation_analysis --trim_poly_g --poly_g_min_len 10 --trim_poly_x --poly_x_min_len 10 --cut_tail --cut_tail_window_size 4 --cut_tail_mean_quality 15 --low_complexity_filter --complexity_threshold 20 --average_qual 25 --qualified_quality_phred 15 --unqualified_percent_limit 40 --length_limit 400 --length_required 35 --detect_adapter_for_pe", - "hidden": true - }, - "single_end": { - "type": "string", - "default": "--overrepresentation_analysis -Q --length_required 1000", - "hidden": true + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/master/nextflow_schema.json", + "title": "phac-nml/mikrokondo pipeline parameters", + "description": "Mikrokondo beta", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": [ + "input", + "outdir", + "platform" + ], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", + "fa_icon": "fas fa-file-csv" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open", + "hidden": true + }, + "platform": { + "type": "string", + "default": "illumina", + "enum": [ + "illumina", + "nanopore", + "pacbio", + "hybrid" + ], + "description": "Sequencing platform used" + }, + "long_read_opt": { + "type": "string", + "default": "nanopore", + "description": "Specify which longread platform your data is from (nanopore or pacbio). This option must be specified if performing a hybrid assembly.", + "enum": [ + "nanopore", + "pacbio" + ] + }, + "nanopore_chemistry": { + "type": "string", + "description": "The guppy base calling model. See the docs for a link of valid options" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "hidden": true } - } - } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/fastp:0.23.2--hb7a2d85_2", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/fastp%3A0.23.2--hb7a2d85_2", - "hidden": true - } - } - } - }, - "mash": { - "title": "Mash", - "type": "object", - "description": "", - "default": "", - "fa_icon": "fas fa-align-left", - "properties": { - "mh_min_kmer": { - "type": "integer", - "default": 10, - "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation" - }, - "mash": { - "type": "object", - "properties": { - "mash_ext": { - "type": "string", - "default": ".screen", - "hidden": true - }, - "output_reads_ext": { - "type": "string", - "default": ".reads.screen", - "hidden": true - }, - "output_taxa_ext": { - "type": "string", - "default": ".taxa.screen", - "hidden": true - }, - "output_dir": { - "type": "string", - "default": "contamination", - "hidden": true - }, - "mash_sketch": { - "type": "string", - "hidden": true - }, - "sketch_ext": { - "type": "string", - "default": ".msh", - "hidden": true - }, - "sketch_kmer_size": { - "type": "integer", - "default": 21, - "hidden": true - }, - "final_sketch_name": { - "type": "string", - "default": "GTDB_sketch", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "min_kmer": { - "type": "integer", - "default": 10, - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "Mash", - "hidden": true - }, - "header_p": { - "type": "boolean", - "hidden": true - }, - "headers": { - "type": "string", - "default": "['identity', 'Shared Hashes', 'Median Multiplicity', 'P-Value', 'Query ID', 'Query Note']", - "hidden": true - } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/mash:2.3--he348c14_1", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1", - "hidden": true - } - } - } - }, - "quast": { - "title": "QUAST", - "type": "object", - "description": "", - "default": "", - "fa_icon": "fas fa-check-circle", - "properties": { - "qt_min_contig_length": { - "type": "integer", - "default": 1000 - }, - "quast": { - "type": "object", - "properties": { - "suffix": { - "type": "string", - "default": "quast", - "hidden": true - }, - "report_base": { - "type": "string", - "default": "report", - "hidden": true - }, - "report_prefix": { - "type": "string", - "default": "transposed_", - "hidden": true - }, - "report_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "QUAST", - "hidden": true - }, - "min_contig_length": { - "type": "integer", - "default": 1000, - "description": "Minimum contig length to be used my Quast.", - "hidden": true - }, - "args": { - "type": "string", - "default": "{ \"--min-contig ${params.quast.min_contig_length} --report-all-metrics\" }", - "hidden": true - }, - "contigs_field": { - "type": "string", - "default": "# contigs", - "hidden": true } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/quast:5.2.0--py39pl5321h4e691d4_3", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/quast%3A5.2.0--py39pl5321h4e691d4_3", - "hidden": true - } - } - } - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "required": [ - "platform" - ], - "properties": { - "target_depth": { - "type": "integer", - "default": 100, - "description": "Target depth to sub-sample reads to." - }, - "platform": { - "type": "string", - "enum": [ - "illumina", - "nanopore", - "pacbio", - "hybrid" - ], - "description": "Sequencing platform used.", - "default": "illumina" - }, - "nanopore_chemistry": { - "type": "string", - "description": "The guppy base calling model. See the docs for a link of valid options." - }, - "run_kraken": { - "type": "boolean", - "description": "Use Kraken2 instead of Mash for sample speciation (Useful if you have Eukaryotic data or Archae)" - }, - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle" - }, - "hybrid_unicycler": { - "type": "boolean", - "description": "Use unicycler for hybrid assembly." - }, - "long_read_opt": { - "type": "string", - "default": "nanopore", - "enum": [ - "nanopore", - "pacbio" - ], - "description": "Specify which longread platform your data is from (nanopore or pacbio). This option must be specified if performing a hybrid assembly." - }, - "min_reads": { - "type": "integer", - "default": 1000, - "description": "Minimum number of reads a sample requires to move forward for assembly." - }, - "output_idx_name": { - "type": "string", - "hidden": true - }, - "metagenomic_run": { - "type": "boolean", - "description": "Label all samples as metagenomic (Skip autodetection)" - }, - "flye_read_type": { - "type": "string", - "default": "hq", - "enum": [ - "hq", - "corr", - "raw" - ], - "description": "Read type for flye to use. hq corresponds to hifi for Pacbio data." - }, - "stage_in_mode": { - "type": "string", - "default": "symlink", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle" - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], - "hidden": true - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully." - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format" - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true - }, - "hook_url": { - "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true - }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "null/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "fa_icon": "fas fa-check-square", - "default": true, - "hidden": true - }, - "show_hidden_params": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "slurm_p": { - "type": "boolean", - "description": "Use slurm to execute your pipeline" - }, - "slurm_profile": { - "type": "string", - "description": "Slurm partition" - }, - "validationS3PathCheck": { - "type": "boolean", - "default": true, - "description": "Validate s3 sample sheet." - } - } - }, - "skip_options": { - "title": "Skip Options", - "type": "object", - "description": "Options to alter control flow of the pipeline", - "default": "", - "properties": { - "skip_depth_sampling": { - "type": "boolean", - "description": "Skip down sampling of data to a target depth. This is not supported for metagenomic samples or hybrid assemblies." - }, - "skip_subtyping": { - "type": "boolean", - "description": "Do not enter the subtyping workflow, e.g. ECTyper, SISTR etc will not be ran." - }, - "skip_polishing": { - "type": "boolean", - "description": "Skip polishing of assemblies, useful in case of errors or for metagenomic samples that fail." - }, - "skip_ont_header_cleaning": { - "type": "boolean", - "description": "Make nanopore headers unique. Only turn this on if you are worried about duplicate id's e.g. from errors in running sequencing.", - "default": true - }, - "skip_checkm": { - "type": "boolean", - "description": "Skip running CheckM" - }, - "skip_report": { - "type": "boolean", - "description": "Skip summary report generation" - }, - "skip_raw_read_metrics": { - "type": "boolean", - "description": "Skip generating raw-read metrics. e.g. when data first enters the pipeline" - }, - "skip_version_gathering": { - "type": "boolean", - "description": "Skip creating a report of the final versions of tools used in mikrokondo." - }, - "skip_metagenomic_detection": { - "type": "boolean", - "description": "For samples to be analyzed as isolates." - }, - "skip_abricate": { - "type": "boolean", - "description": "Skip running abricate for annotation" }, - "skip_bakta": { - "type": "boolean", - "description": "Skip annotation with Bakta" - }, - "skip_species_classification": { - "type": "boolean", - "description": "Skip determining what your species is (with Kraken2 or Mash)" - }, - "skip_mlst": { - "type": "boolean", - "description": "Skip classic 7gene MLST (Uses Torstein Tseemann's mlst)" - }, - "skip_mobrecon": { - "type": "boolean", - "description": "Skip running mob recon for plasmid identification." - }, - "skip_staramr": { - "type": "boolean", - "description": "Skip running StarAMR" - } - } - }, - "databases_and_pre_computed_files": { - "title": "Databases and Pre-Computed Files", - "type": "object", - "description": "", - "default": "", - "required": [ - "dehosting_idx", - "mash_sketch" - ], - "properties": { - "dehosting_idx": { - "type": "string", - "default": "databases/PhiPacHum_m2.idx", - "description": "Minimpa2 index for dehosting and kitome removal" - }, - "mash_sketch": { - "type": "string", - "default": "databases/GTDBSketch_20231003.msh", - "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)" - }, - "bakta_db": { - "type": "string", - "default": "databases/db-light", - "description": "Database use for bakta, this value is optional as bakta can be skipped" - }, - "kraken2_db": { - "type": "string", - "default": "databases/k2_standard_20220607/" - }, - "staramr_db": { - "type": "string", - "description": "It is recommended to use the StarAMR database in the StarAMR container however, an external option can be specified" - } - } - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "2000.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_name": { - "type": "string", - "description": "Institutional config name.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "fa_icon": "fas fa-users-cog", - "hidden": true - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "fa_icon": "fas fa-users-cog", - "hidden": true - } - } - }, - "seqkit": { - "title": "SeqKit", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqkit": { - "type": "object", - "properties": { - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqkit:2.2.0--h9ee0642_0", - "hidden": true - }, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/seqkit:2.2.0--h9ee0642_0", - "hidden": true - }, - "report_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "fasta_ext": { - "type": "string", - "default": "_filtered.fasta.gz", - "hidden": true - }, - "filter_field": { - "type": "string", - "default": "max_len", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "Seqkit_stats", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "hidden": true - } - } - }, - "coveragecalculation": { - "title": "CoverageCalculation", - "type": "object", - "description": "", - "default": "", - "properties": { - "coverage_calc_fields": { - "type": "object", - "properties": { - "fixed_cov": { - "type": "string", - "default": "FixedGenomeSizeDepth", - "hidden": true - }, - "auto_cov": { - "type": "string", - "default": "DetectedGenomeSizeDepth", - "hidden": true - }, - "bp_field": { - "type": "string", - "default": "['RawReadSummary', 'combined', 'total_bp']", - "hidden": true - } - }, - "hidden": true - } - } - }, - "qcreport": { - "title": "QCReport", - "type": "object", - "description": "", - "default": "", - "properties": { - "QCReport": { - "type": "object", - "properties": { - "escherichia": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Escherichia coli", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 50000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 4500000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - } - } - }, - "salmonella": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Salmonella", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 90000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 200, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 4400000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - } - } - }, - "shigella": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Shigella", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 18000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 5000000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 5000000, - "hidden": true - } - } - }, - "listeria": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Listeria", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 50000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 3200000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 200, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 2700000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 3200000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 3000000, - "hidden": true - } - } - }, - "klebsiella": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Klebsiella", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 500, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 4500000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 6000000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 6000000, - "hidden": true - } - } - }, - "staphylococcus": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Staphylococcus", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 3500000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 550, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 3500000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 3500000, - "hidden": true - } - } - }, - "fallthrough": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "No organism specific QC data available.", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "hidden": true - }, - "max_n50": { - "type": "integer", - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "hidden": true - }, - "min_length": { - "type": "integer", - "hidden": true - }, - "max_length": { - "type": "integer", - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - }, - "fixed_genome_size": { - "type": "string", - "hidden": true - } - } - }, - "campylobacter_jejuni": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Campylobacter jejuni", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 1800000, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 1400000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true - } - } - }, - "campylobacter_coli": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Campylobacter coli", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 1800000, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 1400000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 2000000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 30, - "hidden": true + "databases_and_pre_computed_files": { + "title": "Databases and Pre-Computed Files", + "type": "object", + "default": "", + "properties": { + "dehosting_idx": { + "type": "string", + "default": "databases/PhiPacHum_m2.idx", + "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)", + "pattern": "^\\S+$", + "format": "file-path" + }, + "mash_sketch": { + "type": "string", + "default": "databases/GTDBSketch_20231003.msh", + "description": "Minimpa2 index for dehosting and kitome removal", + "pattern": "^\\S+$", + "format": "file-path" + }, + "bakta_db": { + "type": "string", + "description": "Database use for bakta, this value is optional as bakta can be skipped", + "default": "databases/db-light", + "pattern": "^\\S+$", + "format": "directory-path" + }, + "kraken2_db": { + "type": "string", + "default": "databases/k2_standard_20220607/", + "description": "Kraken2 database", + "pattern": "^\\S+$", + "format": "directory-path" + }, + "staramr_db": { + "type": "string", + "description": "It is recommended to use the StarAMR database in the StarAMR container however, an external option can be specified", + "pattern": "^\\S+$", + "format": "directory-path", + "hidden": true } - } }, - "vibrio_cholerae": { - "type": "object", - "properties": { - "search": { - "type": "string", - "default": "Vibrio cholerae", - "hidden": true - }, - "raw_average_quality": { - "type": "integer", - "default": 30, - "hidden": true - }, - "min_n50": { - "type": "integer", - "default": 100000, - "hidden": true - }, - "max_n50": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "min_nr_contigs": { - "type": "integer", - "default": 1, - "hidden": true - }, - "max_nr_contigs": { - "type": "integer", - "default": 150, - "hidden": true - }, - "fixed_genome_size": { - "type": "integer", - "default": 4000000, - "hidden": true - }, - "min_length": { - "type": "integer", - "default": 3800000, - "hidden": true - }, - "max_length": { - "type": "integer", - "default": 4300000, - "hidden": true - }, - "max_checkm_contamination": { - "type": "number", - "default": 3, - "hidden": true - }, - "min_average_coverage": { - "type": "integer", - "default": 40, - "hidden": true + "required": [ + "dehosting_idx", + "mash_sketch" + ] + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" } - } } - }, - "hidden": true }, - "QCReportFields": { - "type": "object", - "properties": { - "raw_average_quality": { - "type": "object", - "properties": { - "path": { - "type": "string", - "default": "['RawReadSummary', 'combined', 'qual_mean']", - "hidden": true - }, - "coerce_type": { - "type": "string", - "default": "Float", - "hidden": true - }, - "compare_fields": { - "type": "string", - "default": "['raw_average_quality']", - "hidden": true - }, - "comp_type": { - "type": "string", - "default": "ge", - "hidden": true - }, - "on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "low_msg": { - "type": "string", - "default": "Base quality is poor, resequencing is recommended.", - "hidden": true - } - } - }, - "average_coverage": { - "type": "object", - "properties": { - "path": { - "type": "string", - "default": "['FixedGenomeSizeDepth']", - "hidden": true - }, - "coerce_type": { - "type": "string", - "default": "Float", - "hidden": true - }, - "compare_fields": { - "type": "string", - "default": "['min_average_coverage']", - "hidden": true - }, - "comp_type": { - "type": "string", - "default": "ge", - "hidden": true - }, - "on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "low_msg": { - "type": "string", - "default": "Depth of coverage from assembly is lower than than expected. A top-up run is likely needed.", - "hidden": true + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "2000.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } - } - }, - "metagenomic": { - "type": "object", - "properties": { - "path": { - "type": "string", - "default": "['MashMeta']", - "hidden": true - }, - "coerce_type": { - "type": "string", - "default": "Bool", - "hidden": true - }, - "compare_fields": { - "type": "string", - "default": "[]", - "hidden": true - }, - "comp_type": { - "type": "string", - "default": "bool", - "hidden": true - }, - "on": { - "type": "boolean", - "hidden": true + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "validationShowHiddenParams": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "show_hidden_params": { + "type": "boolean", + "hidden": true + }, + "tracedir": { + "type": "string", + "default": "null/pipeline_info", + "hidden": true + }, + "stage_in_mode": { + "type": "string", + "default": "symlink", + "hidden": true + }, + "slurm_p": { + "type": "boolean", + "hidden": true + }, + "slurm_profile": { + "type": "string", + "hidden": true } - } - }, - "n50_value": { - "type": "object", - "properties": { - "path": { - "type": "string", - "default": "['QUAST', '0', 'N50']", - "hidden": true - }, - "coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true - }, - "compare_fields": { - "type": "string", - "default": "['min_n50', 'max_n50']", - "hidden": true - }, - "comp_type": { - "type": "string", - "default": "range", - "hidden": true - }, - "on": { - "type": "boolean", - "default": true, - "hidden": true - }, - "low_msg": { - "type": "string", - "default": "N50 value is low, this could be due to many reasons involving contamination, poor template quality or insufficient template quantity. Reisolation and reseqeuncing may be needed.", - "hidden": true - }, - "high_msg": { - "type": "string", - "default": "N50 value is high, this is likely a good thing if you have fewer contigs than expected.", - "hidden": true + } + }, + "control_flow_options": { + "title": "Control flow options", + "type": "object", + "description": "", + "default": "", + "properties": { + "run_kraken": { + "type": "boolean", + "description": "Use Kraken2 instead of Mash for sample speciation (Useful if you have Eukaryotic data or Archae)" + }, + "hybrid_unicycler": { + "type": "boolean", + "description": "Use unicycler for hybrid assembly." } - } - }, - "nr_contigs": { - "type": "object", - "properties": { - "path": { - "type": "string", - "default": "['QUAST', '0', '# contigs']", - "hidden": true - }, - "coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true - }, - "compare_fields": { - "type": "string", - "default": "['min_nr_contigs', 'max_nr_contigs']", - "hidden": true - }, - "comp_type": { - "type": "string", - "default": "range", - "hidden": true - }, - "on": { - "type": "boolean", - "default": true, - "hidden": true + } + }, + "skip_options": { + "title": "Skip Options", + "type": "object", + "description": "Options to alter control flow of the pipeline", + "default": "", + "properties": { + "skip_report": { + "type": "boolean" }, - "low_msg": { - "type": "string", - "default": "Fewer contigs than expected, if your genome length is of an expected size and you have a high N50 you likely just have a high quality assembly.", - "hidden": true + "skip_raw_read_metrics": { + "type": "boolean" }, - "high_msg": { - "type": "string", - "default": "More contigs are present than expected, if your N50 is low and your genome length is shorter than expected you likely need to potentially need to reisolate and resequence/top-up your sample.", - "hidden": true - } - } - }, - "length": { - "type": "object", - "properties": { - "path": { - "type": "string", - "default": "['QUAST', '0', 'Total length']", - "hidden": true + "skip_version_gathering": { + "type": "boolean" }, - "coerce_type": { - "type": "string", - "default": "Integer", - "hidden": true + "skip_subtyping": { + "type": "boolean" }, - "compare_fields": { - "type": "string", - "default": "['min_length', 'max_length']", - "hidden": true + "skip_bakta": { + "type": "boolean" }, - "comp_type": { - "type": "string", - "default": "range", - "hidden": true + "skip_abricate": { + "type": "boolean" }, - "on": { - "type": "boolean", - "default": true, - "hidden": true + "skip_checkm": { + "type": "boolean" }, - "low_msg": { - "type": "string", - "default": "Genome length lower than expected, you may need to resequence the sample.", - "hidden": true + "skip_depth_sampling": { + "type": "boolean" }, - "high_msg": { - "type": "string", - "default": "Genome length is higher than expected, contmination is potentially present.", - "hidden": true - } - } - }, - "checkm_contamination": { - "type": "object", - "properties": { - "path": { - "type": "string", - "default": "['CheckM', '0', 'Contamination']", - "hidden": true + "skip_ont_header_cleaning": { + "type": "boolean", + "default": true }, - "coerce_type": { - "type": "string", - "default": "Float", - "hidden": true + "skip_polishing": { + "type": "boolean" }, - "compare_fields": { - "type": "string", - "default": "['max_checkm_contamination']", - "hidden": true + "skip_species_classification": { + "type": "boolean" }, - "comp_type": { - "type": "string", - "default": "le", - "hidden": true + "skip_mlst": { + "type": "boolean" }, - "on": { - "type": "boolean", - "default": true, - "hidden": true + "skip_mobrecon": { + "type": "boolean" }, - "high_msg": { - "type": "string", - "default": "Potential contamination is present in your sample. You may need to reisolate and resequence your sample.", - "hidden": true + "skip_metagenomic_detection": { + "type": "boolean" } - } - } - }, - "hidden": true - } - } - }, - "seqtk_size": { - "title": "seqtk_size", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqtk_size": { - "type": "object", - "properties": { - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", - "hidden": true - }, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "SeqtkBaseCount", - "hidden": true - } - }, - "hidden": true - } - } - }, - "medaka": { - "title": "medaka", - "type": "object", - "description": "", - "default": "", - "properties": { - "medaka": { - "type": "object", - "properties": { - "model": { - "type": "string", - "hidden": true, - "description": "This is set to the base calling model specified in the nanopore_chemistry param" - }, - "fasta_ext": { - "type": "string", - "default": ".fa.gz", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "medaka", - "hidden": true - }, - "batch_size": { - "type": "integer", - "default": 5, - "description": "Batch size for medaka to use for processing.", - "hidden": true - } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/medaka:1.8.0--py38hdaa7744_0", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/medaka%3A1.8.0--py38hdaa7744_0", - "hidden": true - } - } - } - }, - "staramr": { - "title": "StarAMR", - "type": "object", - "description": "", - "default": "", - "properties": { - "staramr": { - "type": "object", - "properties": { - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/staramr%3A0.9.1--pyhdfd78af_0", - "hidden": true - }, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/staramr:0.9.1--pyhdfd78af_0", - "hidden": true - }, - "db": { - "type": "string", - "description": "Path to a StarAMR database, a database is included in the container." - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "xlsx_ext": { - "type": "string", - "default": ".xlsx", - "hidden": true - }, - "args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "point_finder_dbs": { - "type": "string", - "default": "['salmonella', 'campylobacter', 'enterococcus_faecalis', 'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori']", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "StarAMR", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "hidden": true - } - } - }, - "mobrecon": { - "title": "mobrecon", - "type": "object", - "description": "", - "default": "", - "properties": { - "mobsuite_recon": { - "type": "object", - "properties": { - "args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "fasta_ext": { - "type": "string", - "default": ".fasta", - "hidden": true - }, - "results_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "mob_results_file": { - "type": "string", - "default": "mobtyper_results.txt", - "hidden": true - }, - "contig_report": { - "type": "string", - "default": "contig_report.txt", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "MobRecon", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "hidden": true - } - } - }, - "kat": { - "title": "Kat", - "type": "object", - "description": "", - "default": "", - "properties": { - "kat": { - "type": "object", - "properties": { - "hist_ext": { - "type": "string", - "default": ".hist", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".hist.dist_analysis.json", - "hidden": true - }, - "png_ext": { - "type": "string", - "default": ".png", - "hidden": true - }, - "postscript_ext": { - "type": "string", - "default": ".ps", - "hidden": true - }, - "output_type": { - "type": "string", - "default": "png", - "hidden": true - }, - "pdf_ext": { - "type": "string", - "default": ".pdf", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "KatHist", - "hidden": true - }, - "jfhash_ext": { - "type": "string", - "default": ".jf", - "hidden": true - } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2", - "hidden": true - } - } - } - }, - "platform_options": { - "title": "Platform Options", - "type": "object", - "description": "", - "default": "", - "properties": { - "opt_platforms": { - "type": "object", - "properties": { - "ont": { - "type": "string", - "default": "nanopore", - "hidden": true - }, - "pacbio": { - "type": "string", - "default": "pacbio", - "hidden": true - }, - "hybrid": { - "type": "string", - "default": "hybrid", - "hidden": true - }, - "illumina": { - "type": "string", - "default": "illumina", - "hidden": true - } - }, - "hidden": true - } - } - }, - "seqtk": { - "title": "Seqtk", - "type": "object", - "description": "", - "default": "", - "properties": { - "seqtk": { - "type": "object", - "properties": { - "seed": { - "type": "integer", - "default": 42, - "hidden": true - }, - "reads_ext": { - "type": "string", - "default": "_sampled.fastq.gz", - "hidden": true } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/seqtk:1.4--he4a0461_1", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1", - "hidden": true - } - } - } - }, - "flye": { - "title": "flye", - "type": "object", - "description": "", - "default": "", - "properties": { - "flye": { - "type": "object", - "properties": { - "nanopore": { - "type": "object", - "properties": { - "corr": { - "type": "string", - "default": "--nano-corr", - "hidden": true - }, - "hq": { - "type": "string", - "default": "--nano-hq", - "hidden": true - }, - "raw": { - "type": "string", - "default": "--nano-raw", - "hidden": true - } - } - }, - "pacbio": { - "type": "object", - "properties": { - "raw": { - "type": "string", - "default": "--pacbio-raw", - "hidden": true - }, - "corr": { - "type": "string", - "default": "--pacbio-corr", - "hidden": true - }, - "hq": { - "type": "string", - "default": "--pacbio-hifi", - "hidden": true + }, + "fastp_options": { + "title": "Fastp options", + "type": "object", + "description": "Options to fastp for read qa/qc", + "default": "", + "properties": { + "fp_average_quality": { + "type": "integer", + "default": 25 + }, + "fp_cut_tail_mean_quality": { + "type": "integer", + "default": 15 + }, + "fp_cut_tail_window_size": { + "type": "integer", + "default": 4 + }, + "fp_complexity_threshold": { + "type": "integer", + "default": 20 + }, + "fp_qualified_phred": { + "type": "integer", + "default": 15 + }, + "fp_unqualified_precent_limit": { + "type": "integer", + "default": 40 + }, + "fp_polyg_min_len": { + "type": "integer", + "default": 10 + }, + "fp_polyx_min_len": { + "type": "integer", + "default": 10 + }, + "fp_illumina_length_min": { + "type": "integer", + "default": 35 + }, + "fp_illumina_length_max": { + "type": "integer", + "default": 400 + }, + "fp_single_end_length_min": { + "type": "integer", + "default": 1000 + }, + "fp_dedup_reads": { + "type": "boolean" } - } - }, - "gfa_ext": { - "type": "string", - "default": ".gfa.gz", - "hidden": true - }, - "gv_ext": { - "type": "string", - "default": ".gv.gz", - "hidden": true - }, - "txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "json_ext": { - "type": "string", - "default": ".json", - "hidden": true - }, - "polishing_iterations": { - "type": "integer", - "default": 1, - "hidden": true - }, - "fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "args": { - "type": "string", - "default": "{ \"--iterations ${params.flye.polishing_iterations}\" }", - "hidden": true - } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/flye:2.9.2--py39h6935b12_0", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1", - "hidden": true - } - } - } - }, - "spades": { - "title": "spades", - "type": "object", - "description": "", - "default": "", - "properties": { - "spades": { - "type": "object", - "properties": { - "scaffolds_ext": { - "type": "string", - "default": ".scaffolds.fasta.gz", - "hidden": true - }, - "contigs_ext": { - "type": "string", - "default": ".contigs.fasta.gz", - "hidden": true - }, - "transcripts_ext": { - "type": "string", - "default": ".transcripts.fasta.gz", - "hidden": true - }, - "gene_clusters_ext": { - "type": "string", - "default": ".gene_clusters.fasta.gz", - "hidden": true - }, - "assembly_graphs_ext": { - "type": "string", - "default": ".assembly.gfa.gz", - "hidden": true - }, - "log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "assembly", - "hidden": true - } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/spades:3.15.5--h95f258a_1", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/spades:3.15.5--h95f258a_1", - "hidden": true - } - } - } - }, - "checkm": { - "title": "checkm", - "type": "object", - "description": "", - "default": "", - "properties": { - "checkm": { - "type": "object", - "properties": { - "alignment_ext": { - "type": "string", - "default": "-genes.aln", - "hidden": true - }, - "results_ext": { - "type": "string", - "default": "-results.txt", - "hidden": true - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "folder_name": { - "type": "string", - "default": "checkm", - "hidden": true - }, - "gzip_ext": { - "type": "string", - "default": ".gz", - "hidden": true - }, - "lineage_ms": { - "type": "string", - "default": "lineage.ms", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "CheckM", - "hidden": true - } - }, - "hidden": true, - "docker": { - "type": "string", - "default": "quay.io/biocontainers/checkm-genome:1.2.2--pyhdfd78af_1", - "hidden": true - }, - "singularity": { - "type": "string", - "default": "https://depot.galaxyproject.org/singularity/checkm-genome%3A1.2.2--pyhdfd78af_1", - "hidden": true - } - } - } - }, - "kraken2": { - "title": "kraken2", - "type": "object", - "description": "", - "default": "", - "properties": { - "kraken": { - "type": "object", - "properties": { - "db": { - "type": "string", - "description": "Path to Kraken2 database (do not use symlinks)" - }, - "classified_suffix": { - "type": "string", - "default": "classified", - "hidden": true - }, - "unclassified_suffix": { - "type": "string", - "default": "unclassified", - "hidden": true - }, - "report_suffix": { - "type": "string", - "default": "report", - "hidden": true - }, - "output_suffix": { - "type": "string", - "default": "output", - "hidden": true - }, - "save_output_fastqs": { - "type": "boolean", - "hidden": true - }, - "save_reads_assignments": { - "type": "boolean", - "default": true, - "hidden": true - }, - "run_kraken_quick": { - "type": "boolean", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "KrakenReport", - "hidden": true - }, - "tophit_level": { - "type": "string", - "default": "S", - "hidden": true } - }, - "hidden": true, - "header_p": { - "type": "boolean", - "hidden": true - }, - "headers": { - "type": "string", - "default": "['PercentID', 'FragmentsRecovered', 'FragmentsAssignmentTaxon', 'RankCode']", - "hidden": true - } }, - "kraken_bin": { - "type": "object", - "properties": { - "taxonomic_level": { - "type": "string", - "default": "G", - "description": "Taxonomic level to bin contigs at." + "other": { + "title": "Other", + "type": "object", + "description": "Other options", + "default": "", + "properties": { + "min_reads": { + "type": "integer", + "default": 1000 + }, + "metagenomic_run": { + "type": "boolean" + }, + "ba_min_conting_length": { + "type": "integer", + "default": 200 + }, + "qt_min_contig_length": { + "type": "integer", + "default": 1000 + }, + "mh_min_kmer": { + "type": "integer", + "default": 10 + } } - }, - "fasta_ext": { - "type": "string", - "default": "_binned.fasta.gz", - "hidden": true - } - } - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/bakta" - }, - { - "$ref": "#/definitions/fastp" - }, - { - "$ref": "#/definitions/mash" - }, - { - "$ref": "#/definitions/quast" - }, - { - "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/skip_options" - }, - { - "$ref": "#/definitions/databases_and_pre_computed_files" - }, - { - "$ref": "#/definitions/max_job_request_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/seqkit" - }, - { - "$ref": "#/definitions/coveragecalculation" - }, - { - "$ref": "#/definitions/qcreport" - }, - { - "$ref": "#/definitions/seqtk_size" - }, - { - "$ref": "#/definitions/medaka" - }, - { - "$ref": "#/definitions/staramr" - }, - { - "$ref": "#/definitions/mobrecon" - }, - { - "$ref": "#/definitions/kat" - }, - { - "$ref": "#/definitions/platform_options" - }, - { - "$ref": "#/definitions/seqtk" - }, - { - "$ref": "#/definitions/flye" - }, - { - "$ref": "#/definitions/spades" - }, - { - "$ref": "#/definitions/checkm" - }, - { - "$ref": "#/definitions/kraken2" - } - ], - "properties": { - "validationFailUnrecognisedParams": { - "type": "boolean" - }, - "chopper": { - "type": "object", - "properties": { - "quality": { - "type": "integer", - "default": 0, - "hidden": true - }, - "minlength": { - "type": "integer", - "default": 100, - "hidden": true - }, - "fastq_ext": { - "type": "string", - "default": ".fastq.gz", - "hidden": true - } - }, - "hidden": true - }, - "fastqc": { - "type": "object", - "properties": { - "html_ext": { - "type": "string", - "default": ".html", - "hidden": true - }, - "zip_ext": { - "type": "string", - "default": ".zip", - "hidden": true - } - }, - "hidden": true - }, - "r_contaminants": { - "type": "object", - "properties": { - "phix_fa": { - "type": "string", - "hidden": true - }, - "homo_sapiens_fa": { - "type": "string", - "hidden": true - }, - "pacbio_mg": { - "type": "string", - "hidden": true - }, - "output_ext": { - "type": "string", - "default": ".fastq.gz", - "hidden": true - }, - "mega_mm2_idx": { - "type": "string", - "hidden": true - }, - "mm2_illumina": { - "type": "string", - "default": "-x sr", - "hidden": true - }, - "mm2_pac": { - "type": "string", - "default": "-x map-pb", - "hidden": true - }, - "mm2_ont": { - "type": "string", - "default": "-x map-ont", - "hidden": true - }, - "mm2_output_ext": { - "type": "string", - "default": ".sam", - "hidden": true - }, - "samtools_output_ext": { - "type": "string", - "default": ".fastq", - "hidden": true - }, - "samtools_singletons_ext": { - "type": "string", - "default": ".singleton.fq", - "hidden": true - }, - "samtools_output_suffix": { - "type": "string", - "default": "deconned", - "hidden": true - }, - "output_dir": { - "type": "string", - "default": "contamination/deconned_reads", - "hidden": true - } - }, - "hidden": true - }, - "minimap2": { - "type": "object", - "properties": { - "index_outdir": { - "type": "string", - "default": "indices", - "hidden": true - }, - "index_ext": { - "type": "string", - "default": ".idx", - "hidden": true - }, - "mapped_paf_ext": { - "type": "string", - "default": ".paf", - "hidden": true - }, - "mapped_sam_ext": { - "type": "string", - "default": ".sam", - "hidden": true - }, - "mapped_outdir": { - "type": "string", - "default": "mapped", - "hidden": true - } - }, - "hidden": true - }, - "samtools": { - "type": "object", - "properties": { - "bam_ext": { - "type": "string", - "default": ".bam", - "hidden": true - }, - "bai_ext": { - "type": "string", - "default": ".bai", - "hidden": true - } - }, - "hidden": true - }, - "racon": { - "type": "object", - "properties": { - "consensus_suffix": { - "type": "string", - "default": "_assembly_consensus.fasta", - "hidden": true - }, - "consensus_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "polished", - "hidden": true - } - }, - "hidden": true - }, - "pilon": { - "type": "object", - "properties": { - "outdir": { - "type": "string", - "default": "pilon", - "hidden": true - }, - "fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "fasta_outdir": { - "type": "string", - "default": "fasta", - "hidden": true - }, - "vcf_ext": { - "type": "string", - "default": ".vcf", - "hidden": true - }, - "vcf_outdir": { - "type": "string", - "default": "vcf", - "hidden": true - }, - "changes_ext": { - "type": "string", - "default": ".changes", - "hidden": true - }, - "changes_outdir": { - "type": "string", - "default": "changes", - "hidden": true - }, - "max_memory_multiplier": { - "type": "integer", - "default": 3, - "hidden": true - } - }, - "hidden": true - }, - "pilon_iterative": { - "type": "object", - "properties": { - "outdir": { - "type": "string", - "default": "pilon", - "hidden": true - }, - "fasta_ext": { - "type": "string", - "default": ".fasta.gz", - "hidden": true - }, - "fasta_outdir": { - "type": "string", - "default": "fasta", - "hidden": true - }, - "vcf_ext": { - "type": "string", - "default": ".vcf", - "hidden": true - }, - "vcf_outdir": { - "type": "string", - "default": "vcf", - "hidden": true - }, - "bam_ext": { - "type": "string", - "default": ".bam", - "hidden": true - }, - "bai_ext": { - "type": "string", - "default": ".bai", - "hidden": true - }, - "changes_ext": { - "type": "string", - "default": ".changes", - "hidden": true - }, - "changes_outdir": { - "type": "string", - "default": "changes", - "hidden": true - }, - "max_memory_multiplier": { - "type": "integer", - "default": 3, - "hidden": true - }, - "max_polishing_illumina": { - "type": "integer", - "default": 3, - "hidden": true - }, - "max_polishing_pacbio": { - "type": "integer", - "default": 4, - "hidden": true - }, - "max_polishing_nanopore": { - "type": "integer", - "default": 10, - "hidden": true } - }, - "hidden": true }, - "unicycler": { - "type": "object", - "properties": { - "scaffolds_ext": { - "type": "string", - "default": ".scaffolds.fa.gz", - "hidden": true + "allOf": [ + { + "$ref": "#/definitions/input_output_options" }, - "assembly_ext": { - "type": "string", - "default": ".assembly.gfa.gz", - "hidden": true + { + "$ref": "#/definitions/databases_and_pre_computed_files" }, - "log_ext": { - "type": "string", - "default": ".unicycler.log", - "hidden": true + { + "$ref": "#/definitions/institutional_config_options" }, - "outdir": { - "type": "string", - "default": "unicycler", - "hidden": true + { + "$ref": "#/definitions/max_job_request_options" }, - "mem_modifier": { - "type": "integer", - "default": 1000, - "hidden": true + { + "$ref": "#/definitions/generic_options" }, - "threads_increase_factor": { - "type": "integer", - "default": 1, - "hidden": true - } - }, - "hidden": true - }, - "bandage": { - "type": "object", - "properties": { - "svg_ext": { - "type": "string", - "default": ".svg", - "hidden": true - }, - "outdir": { - "type": "string", - "default": "bandage", - "hidden": true - } - }, - "hidden": true - }, - "ectyper": { - "type": "object", - "properties": { - "log_ext": { - "type": "string", - "default": ".log", - "hidden": true - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "txt_ext": { - "type": "string", - "default": ".txt", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "ECTyperSubtyping", - "hidden": true - }, - "args": { - "type": "string", - "default": "{ \"--verify\" }", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "hidden": true - }, - "sistr": { - "type": "object", - "properties": { - "tsv_ext": { - "type": "string", - "default": ".tab", - "hidden": true - }, - "allele_fasta_ext": { - "type": "string", - "default": "-allele.fasta", - "hidden": true - }, - "allele_json_ext": { - "type": "string", - "default": "-allele.json", - "hidden": true - }, - "cgmlst_ext": { - "type": "string", - "default": "-cgmlst.csv", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "SISTRSubtyping", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "hidden": true - }, - "lissero": { - "type": "object", - "properties": { - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "LISSEROSubtyping", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "hidden": true - }, - "shigeifinder": { - "type": "object", - "properties": { - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - }, - "container_version": { - "type": "string", - "default": "1.3.2", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "ShigeifinderSubtyping", - "hidden": true - } - }, - "hidden": true - }, - "raw_reads": { - "type": "object", - "properties": { - "high_precision": { - "type": "boolean", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "RawReadSummary", - "hidden": true - } - }, - "hidden": true - }, - "mlst": { - "type": "object", - "properties": { - "args": { - "type": "string", - "hidden": true - }, - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true + { + "$ref": "#/definitions/control_flow_options" }, - "json_ext": { - "type": "string", - "default": ".json", - "hidden": true + { + "$ref": "#/definitions/skip_options" }, - "report_tag": { - "type": "string", - "default": "SevenGeneMLSTReport", - "hidden": true - } - }, - "hidden": true - }, - "mash_meta": { - "type": "object", - "properties": { - "report_tag": { - "type": "string", - "default": "MashMeta", - "hidden": true - } - }, - "hidden": true - }, - "top_hit_species": { - "type": "object", - "properties": { - "report_tag": { - "type": "string", - "default": "SpeciesTopHit", - "hidden": true - } - }, - "hidden": true - }, - "kraken_species": { - "type": "object", - "properties": { - "report_tag": { - "type": "string", - "default": "Kraken2TopHit", - "hidden": true - } - }, - "hidden": true - }, - "subtyping_report": { - "type": "object", - "properties": { - "report_tag": { - "type": "string", - "default": "Subtyping", - "hidden": true - } - }, - "hidden": true - }, - "kleborate": { - "type": "object", - "properties": { - "txt_ext": { - "type": "string", - "default": ".results.txt", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "KleborateSubtyping", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "hidden": true - }, - "spatyper": { - "type": "object", - "properties": { - "tsv_ext": { - "type": "string", - "default": ".tsv", - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "SpaTyperSubtyping", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true + { + "$ref": "#/definitions/fastp_options" }, - "repeats": { - "type": "string", - "hidden": true - }, - "repeat_order": { - "type": "string", - "hidden": true - } - }, - "hidden": true - }, - "pointfinder_db_tag": { - "type": "object", - "properties": { - "report_tag": { - "type": "string", - "default": "PointfinderDB", - "hidden": true - } - }, - "hidden": true - }, - "abricate": { - "type": "object", - "properties": { - "args": { - "type": "string", - "hidden": true, - "default": "{ \"\" }" - }, - "report_tag": { - "type": "string", - "default": "Abricate", - "hidden": true - }, - "header_p": { - "type": "boolean", - "default": true, - "hidden": true - } - }, - "hidden": true - }, - "assembly_status": { - "type": "object", - "properties": { - "report_tag": { - "type": "string", - "default": "AssemblyCompleted", - "hidden": true - } - }, - "hidden": true - }, - "filtered_reads": { - "type": "object", - "properties": { - "threshold": { - "type": "integer", - "default": 1000, - "hidden": true - }, - "report_tag": { - "type": "string", - "default": "MeetsReadThreshold", - "hidden": true - } - }, - "hidden": true - }, - "contigs_too_short": { - "type": "object", - "properties": { - "report_tag": { - "type": "string", - "default": "MaxContigToShort" - } - } - }, - "report_aggregate": { - "type": "object", - "hidden": true, - "properties": { - "sample_flat_suffix": { - "type": "string", - "default": "_flat_sample.json", - "hidden": true + { + "$ref": "#/definitions/other" } - } - } - } -} + ] +} \ No newline at end of file From d84fcb6775815094c7c30441bf002d583d8be7c7 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 19 Mar 2024 21:37:37 -0500 Subject: [PATCH 11/26] Changed default database locations to null --- nextflow.config | 8 ++++---- nextflow_schema.json | 4 ---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 12af36e5..6606dc63 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,10 +69,10 @@ params { // Datasets - dehosting_idx = "${projectDir}/databases/PhiPacHum_m2.idx" // mm2 index - mash_sketch = "${projectDir}/databases/GTDBSketch_20231003.msh" // Make sure comments are formatted as taxonomic strings - bakta_db = "${projectDir}/databases/db-light" - kraken2_db = "${projectDir}/databases/k2_standard_20220607/" + dehosting_idx = null // mm2 index + mash_sketch = null // Make sure comments are formatted as taxonomic strings + bakta_db = null + kraken2_db = null staramr_db = null // Recommended usage is to use the default database in the container diff --git a/nextflow_schema.json b/nextflow_schema.json index f2f353ef..5aa7cd65 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -75,14 +75,12 @@ "properties": { "dehosting_idx": { "type": "string", - "default": "databases/PhiPacHum_m2.idx", "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)", "pattern": "^\\S+$", "format": "file-path" }, "mash_sketch": { "type": "string", - "default": "databases/GTDBSketch_20231003.msh", "description": "Minimpa2 index for dehosting and kitome removal", "pattern": "^\\S+$", "format": "file-path" @@ -90,13 +88,11 @@ "bakta_db": { "type": "string", "description": "Database use for bakta, this value is optional as bakta can be skipped", - "default": "databases/db-light", "pattern": "^\\S+$", "format": "directory-path" }, "kraken2_db": { "type": "string", - "default": "databases/k2_standard_20220607/", "description": "Kraken2 database", "pattern": "^\\S+$", "format": "directory-path" From 6ba53d33e661c9971eaecaaee8fd1336e0ce14e2 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 19 Mar 2024 21:50:51 -0500 Subject: [PATCH 12/26] Adjusted control flow options --- nextflow_schema.json | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5aa7cd65..804fdf99 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -300,10 +300,10 @@ } } }, - "control_flow_options": { - "title": "Control flow options", + "skip_options": { + "title": "Skip Options", "type": "object", - "description": "", + "description": "Options to alter control flow of the pipeline", "default": "", "properties": { "run_kraken": { @@ -313,15 +313,7 @@ "hybrid_unicycler": { "type": "boolean", "description": "Use unicycler for hybrid assembly." - } - } - }, - "skip_options": { - "title": "Skip Options", - "type": "object", - "description": "Options to alter control flow of the pipeline", - "default": "", - "properties": { + }, "skip_report": { "type": "boolean" }, @@ -466,9 +458,6 @@ { "$ref": "#/definitions/generic_options" }, - { - "$ref": "#/definitions/control_flow_options" - }, { "$ref": "#/definitions/skip_options" }, From 4565b284483284f06f79c29bfbfdd0924f5757ef Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 19 Mar 2024 21:55:22 -0500 Subject: [PATCH 13/26] Additional description for parameters --- nextflow_schema.json | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 804fdf99..2df68401 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -54,6 +54,10 @@ "pacbio" ] }, + "metagenomic_run": { + "type": "boolean", + "description": "Label all samples as metagenomic (Skip autodetection)" + }, "nanopore_chemistry": { "type": "string", "description": "The guppy base calling model. See the docs for a link of valid options" @@ -414,30 +418,31 @@ } } }, - "other": { - "title": "Other", + "data_processing_thresholds": { + "title": "Data processing thresholds", "type": "object", - "description": "Other options", + "description": "Thresholds for processing or qa/qc of data", "default": "", "properties": { "min_reads": { "type": "integer", - "default": 1000 - }, - "metagenomic_run": { - "type": "boolean" + "default": 1000, + "description": "Minimum number of reads a sample requires to move forward for assembly" }, "ba_min_conting_length": { "type": "integer", - "default": 200 + "default": 200, + "description": "Minimum contig length for processing in Bakta" }, "qt_min_contig_length": { "type": "integer", - "default": 1000 + "default": 1000, + "description": "Minimum contig length for quast" }, "mh_min_kmer": { "type": "integer", - "default": 10 + "default": 10, + "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation" } } } @@ -465,7 +470,7 @@ "$ref": "#/definitions/fastp_options" }, { - "$ref": "#/definitions/other" + "$ref": "#/definitions/data_processing_thresholds" } ] } \ No newline at end of file From 4edc35cf9af105eea19672fecd5c6896caa5f96c Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 19 Mar 2024 22:07:38 -0500 Subject: [PATCH 14/26] Revert "Changed default database locations to null" This reverts commit d84fcb6775815094c7c30441bf002d583d8be7c7. --- nextflow.config | 8 ++++---- nextflow_schema.json | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 6606dc63..12af36e5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,10 +69,10 @@ params { // Datasets - dehosting_idx = null // mm2 index - mash_sketch = null // Make sure comments are formatted as taxonomic strings - bakta_db = null - kraken2_db = null + dehosting_idx = "${projectDir}/databases/PhiPacHum_m2.idx" // mm2 index + mash_sketch = "${projectDir}/databases/GTDBSketch_20231003.msh" // Make sure comments are formatted as taxonomic strings + bakta_db = "${projectDir}/databases/db-light" + kraken2_db = "${projectDir}/databases/k2_standard_20220607/" staramr_db = null // Recommended usage is to use the default database in the container diff --git a/nextflow_schema.json b/nextflow_schema.json index 2df68401..78821f59 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -79,12 +79,14 @@ "properties": { "dehosting_idx": { "type": "string", + "default": "databases/PhiPacHum_m2.idx", "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)", "pattern": "^\\S+$", "format": "file-path" }, "mash_sketch": { "type": "string", + "default": "databases/GTDBSketch_20231003.msh", "description": "Minimpa2 index for dehosting and kitome removal", "pattern": "^\\S+$", "format": "file-path" @@ -92,11 +94,13 @@ "bakta_db": { "type": "string", "description": "Database use for bakta, this value is optional as bakta can be skipped", + "default": "databases/db-light", "pattern": "^\\S+$", "format": "directory-path" }, "kraken2_db": { "type": "string", + "default": "databases/k2_standard_20220607/", "description": "Kraken2 database", "pattern": "^\\S+$", "format": "directory-path" From 9e2e23d9871ed4b670bbe710c140f5e18dd6484f Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 19 Mar 2024 22:13:45 -0500 Subject: [PATCH 15/26] Fixed up linting and running pipeline --- nextflow.config | 8 ++++---- nextflow_schema.json | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 12af36e5..58388e70 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,10 +69,10 @@ params { // Datasets - dehosting_idx = "${projectDir}/databases/PhiPacHum_m2.idx" // mm2 index - mash_sketch = "${projectDir}/databases/GTDBSketch_20231003.msh" // Make sure comments are formatted as taxonomic strings - bakta_db = "${projectDir}/databases/db-light" - kraken2_db = "${projectDir}/databases/k2_standard_20220607/" + dehosting_idx = "./databases/PhiPacHum_m2.idx" // mm2 index + mash_sketch = "./databases/GTDBSketch_20231003.msh" // Make sure comments are formatted as taxonomic strings + bakta_db = "./databases/db-light" + kraken2_db = "./databases/k2_standard_20220607/" staramr_db = null // Recommended usage is to use the default database in the container diff --git a/nextflow_schema.json b/nextflow_schema.json index 78821f59..72bca4ea 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -79,14 +79,14 @@ "properties": { "dehosting_idx": { "type": "string", - "default": "databases/PhiPacHum_m2.idx", + "default": "./databases/PhiPacHum_m2.idx", "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)", "pattern": "^\\S+$", "format": "file-path" }, "mash_sketch": { "type": "string", - "default": "databases/GTDBSketch_20231003.msh", + "default": "./databases/GTDBSketch_20231003.msh", "description": "Minimpa2 index for dehosting and kitome removal", "pattern": "^\\S+$", "format": "file-path" @@ -94,13 +94,13 @@ "bakta_db": { "type": "string", "description": "Database use for bakta, this value is optional as bakta can be skipped", - "default": "databases/db-light", + "default": "./databases/db-light", "pattern": "^\\S+$", "format": "directory-path" }, "kraken2_db": { "type": "string", - "default": "databases/k2_standard_20220607/", + "default": "./databases/k2_standard_20220607/", "description": "Kraken2 database", "pattern": "^\\S+$", "format": "directory-path" From e56de8f871e3ec1a19b1b35ae6fab0feb416b4e9 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 08:18:05 -0500 Subject: [PATCH 16/26] Removed ignore validation --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 58388e70..04014553 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,7 +43,7 @@ params { show_hidden_params = false validationS3PathCheck = true validationShowHiddenParams = false - validationSchemaIgnoreParams = 'report_aggregate,seqkit,contigs_too_short,output_idx_name,filtered_reads,coverage_calc_fields,assembly_status,coverage_calc_fields.fixed_cov,coverage_calc_fields.auto_cov,assembly_status.report_tag,medaka.model,medaka,validation-S3Path-check,validationS3PathCheck,abricate,python3,pointfinder_db_tag,staramr,mobsuite_recon,skip_staramr,genomes,flye_read_type,shigeifinder,lissero,sistr,ectyper,bandage,bakta,unicycler,medaka,pilon_iterative,pilon,racon,samtools,minimap2,r_contaminants,mash,kraken,checkm,quast_filter,quast,fastqc,spades,flye,chopper,fastp,seqtk,seqtk_size,kat,coreutils,opt_platforms,QCReportFields,QCReport-fields,QCReport,kraken_bin,shigatyper,spatyper,kleborate,subtyping_report,kraken_species,top_hit_species,mash_meta,mlst,raw_reads,abricate_params,target_depth' + validationSchemaIgnoreParams = false validationFailUnrecognisedParams = false // for the qcreport fields // SKIP options From fd1d5c3b150bd26ae7b7e535ab8773f5cc9d3b01 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 08:28:18 -0500 Subject: [PATCH 17/26] Updated schema with additional parameters --- nextflow_schema.json | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 72bca4ea..673a11ab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -305,11 +305,19 @@ "slurm_profile": { "type": "string", "hidden": true + }, + "validationS3PathCheck": { + "type": "boolean", + "default": true, + "hidden": true + }, + "output_idx_name": { + "type": "string" } } }, - "skip_options": { - "title": "Skip Options", + "control_flow_options": { + "title": "Control flow options", "type": "object", "description": "Options to alter control flow of the pipeline", "default": "", @@ -364,6 +372,10 @@ }, "skip_metagenomic_detection": { "type": "boolean" + }, + "skip_staramr": { + "type": "boolean", + "description": "Skip running StarAMR" } } }, @@ -428,6 +440,11 @@ "description": "Thresholds for processing or qa/qc of data", "default": "", "properties": { + "target_depth": { + "type": "integer", + "default": 100, + "description": "Target depth to sub-sample reads to" + }, "min_reads": { "type": "integer", "default": 1000, @@ -449,6 +466,24 @@ "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation" } } + }, + "other": { + "title": "Other", + "type": "object", + "description": "Other parameters", + "default": "", + "properties": { + "flye_read_type": { + "type": "string", + "default": "hq", + "description": "Read type for flye to use. hq corresponds to hifi for Pacbio data.", + "enum": [ + "hq", + "corr", + "raw" + ] + } + } } }, "allOf": [ @@ -468,13 +503,16 @@ "$ref": "#/definitions/generic_options" }, { - "$ref": "#/definitions/skip_options" + "$ref": "#/definitions/control_flow_options" }, { "$ref": "#/definitions/fastp_options" }, { "$ref": "#/definitions/data_processing_thresholds" + }, + { + "$ref": "#/definitions/other" } ] } \ No newline at end of file From e6a4e2d3f45b14c4e20a4ca560ac867efa012a2a Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 08:46:12 -0500 Subject: [PATCH 18/26] Added test to ensure validation of parameters --- tests/main.nf.test | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/main.nf.test b/tests/main.nf.test index f2fd671b..45ae5463 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -180,4 +180,39 @@ nextflow_pipeline { } + test("Test validation of platform parameter") { + tag "fail_validation" + + when { + params { + input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv" + outdir = "results" + + mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + mh_min_kmer = 1 + + dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + + min_reads = 100 + + skip_bakta = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false + + max_memory = "2.GB" + max_cpus = 1 + + // parameter to fail validation + platform = "invalid" + } + } + + then { + assert workflow.failed + assert workflow.stderr.contains("* --platform: 'invalid' is not a valid choice (Available choices: illumina, nanopore, pacbio, hybrid)") + } + } } From a82f40b6fce63d4c9e201060cff002365bc2db7b Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 09:11:53 -0500 Subject: [PATCH 19/26] Additional changes and descriptions to schema --- nextflow_schema.json | 59 ++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 673a11ab..ca931515 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -32,7 +32,8 @@ "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open", - "hidden": true + "hidden": true, + "pattern": "^\\S+$" }, "platform": { "type": "string", @@ -58,10 +59,6 @@ "type": "boolean", "description": "Label all samples as metagenomic (Skip autodetection)" }, - "nanopore_chemistry": { - "type": "string", - "description": "The guppy base calling model. See the docs for a link of valid options" - }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -312,7 +309,8 @@ "hidden": true }, "output_idx_name": { - "type": "string" + "type": "string", + "hidden": true } } }, @@ -331,47 +329,61 @@ "description": "Use unicycler for hybrid assembly." }, "skip_report": { - "type": "boolean" + "type": "boolean", + "description": "Skip summary report generation" }, "skip_raw_read_metrics": { - "type": "boolean" + "type": "boolean", + "description": "Skip generating raw-read metrics. e.g. when data first enters the pipeline" }, "skip_version_gathering": { - "type": "boolean" + "type": "boolean", + "description": "Skip creating a report of the final versions of tools used in mikrokondo" }, "skip_subtyping": { - "type": "boolean" + "type": "boolean", + "description": "Do not enter the subtyping workflow, e.g. ECTyper, SISTR etc will not be ran." }, "skip_bakta": { - "type": "boolean" + "type": "boolean", + "description": "Skip annotation with Bakta" }, "skip_abricate": { - "type": "boolean" + "type": "boolean", + "description": "Skip running abricate for annotation" }, "skip_checkm": { - "type": "boolean" + "type": "boolean", + "description": "Skip running CheckM" }, "skip_depth_sampling": { - "type": "boolean" + "type": "boolean", + "description": "Skip down sampling of data to a target depth. This is not supported for metagenomic samples or hybrid assemblies." }, "skip_ont_header_cleaning": { "type": "boolean", - "default": true + "default": true, + "description": "Make nanopore headers unique. Only turn this on if you are worried about duplicate id's e.g. from errors in running sequencing" }, "skip_polishing": { - "type": "boolean" + "type": "boolean", + "description": "Skip polishing of assemblies, useful in case of errors or for metagenomic samples that fail." }, "skip_species_classification": { - "type": "boolean" + "type": "boolean", + "description": "Skip determining what your species is (with Kraken2 or Mash)" }, "skip_mlst": { - "type": "boolean" + "type": "boolean", + "description": "Skip classic 7gene MLST (Uses Torsten Seemann's mlst)" }, "skip_mobrecon": { - "type": "boolean" + "type": "boolean", + "description": "Skip running mob recon for plasmid identification." }, "skip_metagenomic_detection": { - "type": "boolean" + "type": "boolean", + "description": "For samples to be analyzed as isolates" }, "skip_staramr": { "type": "boolean", @@ -387,7 +399,8 @@ "properties": { "fp_average_quality": { "type": "integer", - "default": 25 + "default": 25, + "description": "Average quality of a read to be included (read pair is discarded if it is below this value)" }, "fp_cut_tail_mean_quality": { "type": "integer", @@ -473,6 +486,10 @@ "description": "Other parameters", "default": "", "properties": { + "nanopore_chemistry": { + "type": "string", + "description": "The guppy base calling model. See the docs for a link of valid options" + }, "flye_read_type": { "type": "string", "default": "hq", From f480967beae798b5aefe19562533a638fe9c7fa0 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 09:35:05 -0500 Subject: [PATCH 20/26] Adding more descriptions and ranges --- nextflow_schema.json | 68 +++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index ca931515..aedbf183 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -113,7 +113,8 @@ "required": [ "dehosting_idx", "mash_sketch" - ] + ], + "description": "The location of databases used by mikrokondo" }, "institutional_config_options": { "title": "Institutional config options", @@ -176,7 +177,8 @@ "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`", + "minimum": 1 }, "max_memory": { "type": "string", @@ -400,50 +402,73 @@ "fp_average_quality": { "type": "integer", "default": 25, - "description": "Average quality of a read to be included (read pair is discarded if it is below this value)" + "description": "Average quality of a read to be included (read pair is discarded if it is below this value)", + "minimum": 0 }, "fp_cut_tail_mean_quality": { "type": "integer", - "default": 15 + "default": 15, + "minimum": 1, + "description": "the mean quality requirement option shared by cut_front, cut_tail or cut_sliding", + "maximum": 36 }, "fp_cut_tail_window_size": { "type": "integer", - "default": 4 + "default": 4, + "minimum": 1, + "description": "the window size option shared by cut_front, cut_tail or cut_sliding.", + "maximum": 1000 }, "fp_complexity_threshold": { "type": "integer", - "default": 20 + "default": 20, + "minimum": 0, + "description": "the threshold for low complexity filter", + "maximum": 100 }, "fp_qualified_phred": { "type": "integer", - "default": 15 + "default": 15, + "minimum": 0, + "description": "the quality value that a base is qualified." }, "fp_unqualified_precent_limit": { "type": "integer", - "default": 40 + "default": 40, + "minimum": 0, + "maximum": 100, + "description": "how many percents of bases are allowed to be unqualified" }, "fp_polyg_min_len": { "type": "integer", - "default": 10 + "default": 10, + "minimum": 1, + "description": "the minimum length to detect polyG in the read tail" }, "fp_polyx_min_len": { "type": "integer", - "default": 10 + "default": 10, + "description": "the minimum length to detect polyX in the read tail", + "minimum": 1 }, "fp_illumina_length_min": { "type": "integer", - "default": 35 + "default": 35, + "minimum": 1 }, "fp_illumina_length_max": { "type": "integer", - "default": 400 + "default": 400, + "minimum": 1 }, "fp_single_end_length_min": { "type": "integer", - "default": 1000 + "default": 1000, + "minimum": 1 }, "fp_dedup_reads": { - "type": "boolean" + "type": "boolean", + "description": "enable deduplication to drop the duplicated reads/pairs" } } }, @@ -456,27 +481,32 @@ "target_depth": { "type": "integer", "default": 100, - "description": "Target depth to sub-sample reads to" + "description": "Target depth to sub-sample reads to", + "minimum": 1 }, "min_reads": { "type": "integer", "default": 1000, - "description": "Minimum number of reads a sample requires to move forward for assembly" + "description": "Minimum number of reads a sample requires to move forward for assembly", + "minimum": 1 }, "ba_min_conting_length": { "type": "integer", "default": 200, - "description": "Minimum contig length for processing in Bakta" + "description": "Minimum contig length for processing in Bakta", + "minimum": 1 }, "qt_min_contig_length": { "type": "integer", "default": 1000, - "description": "Minimum contig length for quast" + "description": "Minimum contig length for quast", + "minimum": 1 }, "mh_min_kmer": { "type": "integer", "default": 10, - "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation" + "description": "Minimum Kmer count needed for a unique kmer to be used in genome size estimation", + "minimum": 1 } } }, From ebc7596940d82ca2ed1bb7536df878a779a18f96 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 09:58:02 -0500 Subject: [PATCH 21/26] Adding test case for min length and database path validation --- tests/main.nf.test | 75 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/main.nf.test b/tests/main.nf.test index 45ae5463..099d3274 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -215,4 +215,79 @@ nextflow_pipeline { assert workflow.stderr.contains("* --platform: 'invalid' is not a valid choice (Available choices: illumina, nanopore, pacbio, hybrid)") } } + + test("Test validation of contig minimum length parameter") { + tag "fail_validation" + + when { + params { + input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv" + outdir = "results" + + platform = "illumina" + + mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + mh_min_kmer = 1 + + dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + + min_reads = 100 + + skip_bakta = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false + + max_memory = "2.GB" + max_cpus = 1 + + // parameter to fail validation + qt_min_contig_length = -1 + } + } + + then { + assert workflow.failed + assert workflow.stderr.contains("* --qt_min_contig_length: -1 is not greater or equal to 1 (-1)") + } + } + + test("Test validation of databases parameters") { + tag "fail_validation" + + when { + params { + input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv" + outdir = "results" + + platform = "illumina" + + mh_min_kmer = 1 + + dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + + min_reads = 100 + + skip_bakta = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false + + max_memory = "2.GB" + max_cpus = 1 + + // parameter to fail validation + mash_sketch = "invalid path" + } + } + + then { + assert workflow.failed + assert workflow.stderr.contains("* --mash_sketch: string [invalid path] does not match pattern ^\\S+\$ (invalid path)") + } + } } From a8c143e8d0aaba3e93571bc065174f68d23e766f Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 10:43:48 -0500 Subject: [PATCH 22/26] Remove format --- utils/format.py | 257 ------------------------------------------------ 1 file changed, 257 deletions(-) delete mode 100755 utils/format.py diff --git a/utils/format.py b/utils/format.py deleted file mode 100755 index da7b6028..00000000 --- a/utils/format.py +++ /dev/null @@ -1,257 +0,0 @@ -""" -Reformat a mikrokondo json to un-nest dotter parameters - -2024-03-14: Matthew Wells -""" - -from __future__ import annotations - -import json -import logging -import os -import sys -import argparse -from dataclasses import dataclass - -logger = logging.getLogger(__name__) - - -@dataclass(frozen=True) -class Constants: - delimiter: str = "." - extraction_field: str = "properties" - properties_type: str = "object" - type_field: str = "type" - nesting_field: str = "definitions" - allof_field: str = "allOf" - ref_key: str = "$ref" - hidden: str = "hidden" - - -def drop_all_of_fields(schema_all_of: list, fields: set): - """ - Drop the fields in allOf of the schema that are erased - - schema_all_of list: The allOf list from the schema.json file id'd in the ref keys - fields set: the field values to delete - """ - defs_delete = frozenset([create_all_of_ref(i) for i in fields]) - - return list(filter(lambda x: x[Constants.ref_key] not in defs_delete, schema_all_of)) - -def create_all_of_ref(field): - return f"#/{Constants.nesting_field}/{field}" - - -def denested_information(keys: list[str], last_value: dict) -> dict: - """ - Recursively append new dictionaries with sub information being propagated throughout the dictionary - chain - - keys List[str]: list of keys to recursively implement as dictionaries are chained together - last_value dict: Last data entry to be appended to the chained dictionaries - """ - - if len(keys) == 1: - return last_value - - new_chain: dict = {} - temp = new_chain - for i in keys[0:-1]: - temp[i] = {} - temp[i][Constants.type_field] = Constants.properties_type - temp[i][Constants.extraction_field] = {} - temp = temp[i][Constants.extraction_field] - - temp[Constants.type_field] = Constants.properties_type - temp[Constants.extraction_field] = {keys[-1]: last_value} - - return new_chain - - -def nest_schema(properties: dict) -> dict: - """Convert a 'dotted' schema into a nested json - e.g. - properties: { - "seqkit.singularity": { - type: "string", - } - } - - into - "properties" : { - "singularity" : { - "type": string - } - } - - properties (dict): an existing list of json properties - """ - - new_dict: dict = {} - poisoned_keys = [] - for key, values in properties.items(): - if Constants.delimiter not in key: - continue - split_key = key.split(Constants.delimiter) - - if new_dict.get(split_key[0]) is None: - new_dict[split_key[0]] = {} - new_dict[split_key[0]][Constants.type_field] = Constants.properties_type - new_dict[split_key[0]][Constants.extraction_field] = {} - - denested_data = denested_information(split_key[1:], values) - if denested_data != values: - # multiple fields to be set, update the properties instead of overwriting it - - temp = denested_data - - nd_temp = new_dict[split_key[0]][Constants.extraction_field] - for i in split_key[1:-1]: - if nd_temp.get(i) is None: - nd_temp[i] = {} - nd_temp[i][Constants.type_field] = temp[i][Constants.type_field] - nd_temp[i][Constants.extraction_field] = {} - - nd_temp = nd_temp[i][Constants.extraction_field] - temp = temp[i][Constants.extraction_field] - - nd_temp[split_key[-1]] = temp[Constants.extraction_field][split_key[-1]] - if hidden := nd_temp[split_key[-1]].get(Constants.hidden): - new_dict[split_key[0]][Constants.hidden] = hidden - - else: - new_dict[split_key[0]][Constants.extraction_field][split_key[1]] = denested_data - if hidden := denested_data.get(Constants.hidden): - new_dict[split_key[0]][Constants.hidden] = hidden - - poisoned_keys.append(key) - - for i in poisoned_keys: - del properties[i] - properties.update(new_dict) - return properties - - -def read_json(fp: str) -> json: - """ - Read and return json file. - - input - """ - if not os.path.isfile(fp): - logger.critical("File not found: %s, Bailing.", fp) - sys.exit(1) - with open(fp, encoding="utf8") as in_file: - return json.load(in_file) - - -def nest_properties(schema: dict) -> dict: - """ - Extract all - """ - type_field = schema.get(Constants.type_field) - properties = None - if type_field and type_field == Constants.properties_type: - properties = schema.get(Constants.extraction_field) - if properties is None: - raise KeyError("No properties field in json schema.") - - for k, props in schema[Constants.nesting_field].items(): - new_properties = nest_schema(properties=props[Constants.extraction_field]) - del schema[Constants.nesting_field][k][Constants.extraction_field] - schema[Constants.nesting_field][k][Constants.extraction_field] = new_properties - - - new_properties = nest_schema(properties=properties) - del schema[Constants.extraction_field] - schema[Constants.extraction_field] = new_properties - drop_keys = reorganize_schema(schema) - schema[Constants.allof_field] = drop_all_of_fields(schema[Constants.allof_field], drop_keys) - - return schema - -def drop_keys_repeated(schema): - """ - Keys in the properties section need may be repeated in the definitions, those in the definitions - need to be removed from the properties field - """ - common_keys = schema[Constants.nesting_field].keys() & schema[Constants.extraction_field].keys() - for key in common_keys: - del schema[Constants.extraction_field][key] - return schema - -def reorganize_schema(schema) -> set: - """Take a newly nested schema and merge paramter definitions together to prevent errors - TODO break this function up into smaller sections - - definitions dict: Updated definitions field in a json schema - return drop_keys set: Additional fields to delete from the schema after processing - """ - - - definitions = schema[Constants.nesting_field] - top_lvl_keys = frozenset(definitions.keys()) - properties_keys = frozenset(schema[Constants.extraction_field].keys()) - drop_keys = set() - for k, v in definitions.items(): - tpl_keys = [i for i in v[Constants.extraction_field].keys() if i in top_lvl_keys] - if not tpl_keys or len(tpl_keys) == 1: - continue - for i in tpl_keys: - definitions[i][Constants.extraction_field][i].update(v[Constants.extraction_field][i][Constants.extraction_field]) - del v[Constants.extraction_field][i] - if schema[Constants.extraction_field].get(i): - del schema[Constants.extraction_field][i] - drop_keys.add(k) - - for k in drop_keys: - del definitions[k] - - for k, v in definitions.items(): - common_keys = v[Constants.extraction_field].keys() & properties_keys - if not common_keys: - continue - for i in common_keys: - props = schema[Constants.extraction_field].get(i) - if props: - v[Constants.extraction_field][i].update(props[Constants.extraction_field]) - del schema[Constants.extraction_field][i] - - return drop_keys - -def dump_schema(schema: dict, output_fp: str): - """Dump the updated schema - - schema dict: The updated json schema - output_fp: the location for the new json schema - """ - with open(output_fp, 'w', encoding='utf8') as output_file: - json.dump(schema, output_file, indent=2) - - -def reformat_schema(input_json, output): - """Resolve issues with nested paramters in a nextflow schema.json - - input os.Path: file path to input file - output os.Path: file path to output file - """ - schema_in = read_json(input_json) - updated_schema = nest_properties(schema_in) - dump_schema(updated_schema, output) - -def main(argv=None): - parser = argparse.ArgumentParser(prog=__file__, description="Fromat a nextflow") - parser.add_argument("-i", "--input-file", - type=str, - help="input file", - default=None, - required=True - ) - parser.add_argument("-o", "--output", - required=True) - args = parser.parse_args(argv) - reformat_schema(args.input_file, args.output) - -if __name__ == "__main__": - sys.exit(main()) From 7cc886d91f0d049ef4684825f6d7092e8bb985b9 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 11:21:12 -0500 Subject: [PATCH 23/26] Added final descriptions in schema --- nextflow_schema.json | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index aedbf183..5b13025e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -285,34 +285,41 @@ }, "show_hidden_params": { "type": "boolean", - "hidden": true + "hidden": true, + "description": "Show hidden parameters." }, "tracedir": { "type": "string", "default": "null/pipeline_info", - "hidden": true + "hidden": true, + "description": "Trace directory." }, "stage_in_mode": { "type": "string", "default": "symlink", - "hidden": true + "hidden": true, + "description": "Mode for staging files." }, "slurm_p": { "type": "boolean", - "hidden": true + "hidden": true, + "description": "Enable running with slurm." }, "slurm_profile": { "type": "string", - "hidden": true + "hidden": true, + "description": "Profile used for slurm." }, "validationS3PathCheck": { "type": "boolean", "default": true, - "hidden": true + "hidden": true, + "description": "Whether or not to validate S3 paths." }, "output_idx_name": { "type": "string", - "hidden": true + "hidden": true, + "description": "Index creation output file name" } } }, @@ -454,17 +461,20 @@ "fp_illumina_length_min": { "type": "integer", "default": 35, - "minimum": 1 + "minimum": 0, + "description": "reads shorter than length_required will be discarded" }, "fp_illumina_length_max": { "type": "integer", "default": 400, - "minimum": 1 + "minimum": 0, + "description": "reads longer than length_limit will be discarded, 0 means no limitation." }, "fp_single_end_length_min": { "type": "integer", "default": 1000, - "minimum": 1 + "minimum": 0, + "description": "same as fp_illumina_length_min but for single-end data. reads shorter than length_required will be discarded" }, "fp_dedup_reads": { "type": "boolean", From 0f95025702b562a99a9cc0c64f80d9b8441c0d0c Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 11:43:13 -0500 Subject: [PATCH 24/26] Added ignore parameters to avoid warnings in Nextflow --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 04014553..b8311ce5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,7 +43,7 @@ params { show_hidden_params = false validationS3PathCheck = true validationShowHiddenParams = false - validationSchemaIgnoreParams = false + validationSchemaIgnoreParams = 'abricate,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler' validationFailUnrecognisedParams = false // for the qcreport fields // SKIP options From 215d0122e42a555f014db95625e2b7d7033cb403 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 11:50:00 -0500 Subject: [PATCH 25/26] Added tests for species top hit --- tests/pipelines/main.from_assemblies.nf.test | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/pipelines/main.from_assemblies.nf.test b/tests/pipelines/main.from_assemblies.nf.test index 99d2bbcc..6bd4686a 100644 --- a/tests/pipelines/main.from_assemblies.nf.test +++ b/tests/pipelines/main.from_assemblies.nf.test @@ -88,6 +88,7 @@ nextflow_pipeline { // output metadata def ecoli_metadata = iridanext_metadata.ecoli_GCA_000947975 + assert ecoli_metadata."SpeciesTopHit" == "Escherichia coli" assert ecoli_metadata."QUAST.0.Total length" == "5333525" assert ecoli_metadata."QUAST.0.Largest contig" == "300823" assert ecoli_metadata."QUAST.0.# contigs" == "187" @@ -176,6 +177,10 @@ nextflow_pipeline { def salmonella_json = final_report.salmonella_GCA_000008105.salmonella_GCA_000008105 // Tests + assert salmonella_json.SpeciesTopHit == "Salmonella enterica" + assert iridanext_metadata.salmonella_GCA_000008105."SpeciesTopHit" == "Salmonella enterica" + assert final_report_tmap.SpeciesTopHit == "Salmonella enterica" + assert salmonella_json.QUAST."0"."Total length" == "4944000" assert iridanext_metadata.salmonella_GCA_000008105."QUAST.0.Total length" == "4944000" assert final_report_tmap."QUAST.0.Total length" == "4944000" @@ -293,6 +298,10 @@ nextflow_pipeline { def listeria_json = final_report.listeria_GCF_000196035.listeria_GCF_000196035 // Tests + assert listeria_json.SpeciesTopHit == "Listeria monocytogenes" + assert iridanext_metadata.listeria_GCF_000196035."SpeciesTopHit" == "Listeria monocytogenes" + assert final_report_tmap.SpeciesTopHit == "Listeria monocytogenes" + assert listeria_json.QUAST."0"."Total length" == "2944528" assert iridanext_metadata.listeria_GCF_000196035."QUAST.0.Total length" == "2944528" assert final_report_tmap."QUAST.0.Total length" == "2944528" From d944eb8aa07d6876f39e3010389ed01242a4be98 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Wed, 20 Mar 2024 12:02:10 -0500 Subject: [PATCH 26/26] Fixed up tests --- nextflow_schema.json | 2 +- tests/pipelines/main.from_assemblies.nf.test | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5b13025e..b0492c28 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/master/nextflow_schema.json", + "$id": "https://raw.githubusercontent.com/phac-nml/mikrokondo/main/nextflow_schema.json", "title": "phac-nml/mikrokondo pipeline parameters", "description": "Mikrokondo beta", "type": "object", diff --git a/tests/pipelines/main.from_assemblies.nf.test b/tests/pipelines/main.from_assemblies.nf.test index 6bd4686a..c4cf6b8e 100644 --- a/tests/pipelines/main.from_assemblies.nf.test +++ b/tests/pipelines/main.from_assemblies.nf.test @@ -88,7 +88,7 @@ nextflow_pipeline { // output metadata def ecoli_metadata = iridanext_metadata.ecoli_GCA_000947975 - assert ecoli_metadata."SpeciesTopHit" == "Escherichia coli" + assert ecoli_metadata."SpeciesTopHit" == "s__Escherichia coli" assert ecoli_metadata."QUAST.0.Total length" == "5333525" assert ecoli_metadata."QUAST.0.Largest contig" == "300823" assert ecoli_metadata."QUAST.0.# contigs" == "187" @@ -177,9 +177,9 @@ nextflow_pipeline { def salmonella_json = final_report.salmonella_GCA_000008105.salmonella_GCA_000008105 // Tests - assert salmonella_json.SpeciesTopHit == "Salmonella enterica" - assert iridanext_metadata.salmonella_GCA_000008105."SpeciesTopHit" == "Salmonella enterica" - assert final_report_tmap.SpeciesTopHit == "Salmonella enterica" + assert salmonella_json.SpeciesTopHit == "s__Salmonella enterica" + assert iridanext_metadata.salmonella_GCA_000008105."SpeciesTopHit" == "s__Salmonella enterica" + assert final_report_tmap.SpeciesTopHit == "s__Salmonella enterica" assert salmonella_json.QUAST."0"."Total length" == "4944000" assert iridanext_metadata.salmonella_GCA_000008105."QUAST.0.Total length" == "4944000" @@ -298,9 +298,9 @@ nextflow_pipeline { def listeria_json = final_report.listeria_GCF_000196035.listeria_GCF_000196035 // Tests - assert listeria_json.SpeciesTopHit == "Listeria monocytogenes" - assert iridanext_metadata.listeria_GCF_000196035."SpeciesTopHit" == "Listeria monocytogenes" - assert final_report_tmap.SpeciesTopHit == "Listeria monocytogenes" + assert listeria_json.SpeciesTopHit == "s__Listeria monocytogenes" + assert iridanext_metadata.listeria_GCF_000196035."SpeciesTopHit" == "s__Listeria monocytogenes" + assert final_report_tmap.SpeciesTopHit == "s__Listeria monocytogenes" assert listeria_json.QUAST."0"."Total length" == "2944528" assert iridanext_metadata.listeria_GCF_000196035."QUAST.0.Total length" == "2944528"