From 71477bd25b9df825933076195b602377fc02b9c0 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 28 Nov 2019 22:53:51 +0100 Subject: [PATCH 01/27] First draft of a parameters.settings.json file for the template --- .../parameters.settings.json | 143 ++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json new file mode 100644 index 0000000000..cebcc73798 --- /dev/null +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json @@ -0,0 +1,143 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/{{ cookiecutter.name }}/master/parameters.schema.json", + "title": "{{ cookiecutter.name }} pipeline parameters", + "description": "{{ cookiecutter.description }}", + "type": "object", + "properties": { + "input": { + "title": "Pipeline Input", + "type": "object", + "properties": { + "genome": { + "type": "string", + "description": "genome", + }, + "reads": { + "type": "string", + "description": "reads", + "default": "data/*{1,2}.fastq.gz", + }, + "single_end": { + "type": "boolean", + "description": "single_end", + }, + "outdir": { + "type": "string", + "description": "outdir", + "default": "./results", + }, + "name": { + "type": "string", + "description": "Workflow name", + }, + "multiqc_config": { + "type": "string", + "description": "multiqc_config", + "default": "$baseDir/assets/multiqc_config.yaml", + }, + "email": { + "type": "string", + "description": "email", + }, + "email_on_fail": { + "type": "string", + "description": "email_on_fail", + }, + "max_multiqc_email_size": { + "type": "string", + "description": "max_multiqc_email_size", + "default": "25 MB", + }, + "plaintext_email": { + "type": "boolean", + "description": "plaintext_email", + }, + "monochrome_logs": { + "type": "boolean", + "description": "monochrome_logs", + }, + "help": { + "type": "boolean", + "description": "help", + }, + "igenomes_base": { + "type": "string", + "description": "igenomes_base", + "default": "s3://ngi-igenomes/igenomes/", + }, + "tracedir": { + "type": "string", + "description": "tracedir", + "default": "./results/pipeline_info", + }, + "igenomes_ignore": { + "type": "boolean", + "description": "igenomes_ignore", + }, + "custom_config_version": { + "type": "string", + "description": "custom_config_version", + "default": "master", + }, + "custom_config_base": { + "type": "string", + "description": "custom_config_base", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + }, + "hostnames": { + "type": "string", + "description": "hostnames", + "default": "[crick:['.thecrick.org'], genotoul:['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'], genouest:['.genouest.org'], uppmax:['.uppmax.uu.se']]", + }, + "config_profile_description": { + "type": "string", + "description": "config_profile_description", + }, + "config_profile_contact": { + "type": "string", + "description": "config_profile_contact", + }, + "config_profile_url": { + "type": "string", + "description": "config_profile_url", + }, + "max_memory": { + "type": "string", + "description": "max_memory", + "default": "128 GB", + }, + "max_cpus": { + "type": "integer", + "description": "max_cpus", + "default": 16, + }, + "max_time": { + "type": "string", + "description": "max_time", + "default": "10d", + }, + "genomes": { + "type": "string", + "description": "genomes", + }, + "fasta": { + "type": "string", + "description": "fasta", + }, + "awsqueue": { + "type": "string", + "description": "awsqueue", + }, + "readPaths": { + "type": "string", + "description": "readPaths", + }, + "awsregion": { + "type": "string", + "description": "awsregion", + } + } + } + ] +} From fd758a096a056351ad528ebbf06e73db296b81d3 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Fri, 6 Mar 2020 20:37:39 +0000 Subject: [PATCH 02/27] Fix JSON for template JSONschema file --- .../parameters.settings.json | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json index cebcc73798..d217571184 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json @@ -11,133 +11,133 @@ "properties": { "genome": { "type": "string", - "description": "genome", + "description": "genome" }, "reads": { "type": "string", "description": "reads", - "default": "data/*{1,2}.fastq.gz", + "default": "data/*{1,2}.fastq.gz" }, "single_end": { "type": "boolean", - "description": "single_end", + "description": "single_end" }, "outdir": { "type": "string", "description": "outdir", - "default": "./results", + "default": "./results" }, "name": { "type": "string", - "description": "Workflow name", + "description": "Workflow name" }, "multiqc_config": { "type": "string", "description": "multiqc_config", - "default": "$baseDir/assets/multiqc_config.yaml", + "default": "$baseDir/assets/multiqc_config.yaml" }, "email": { "type": "string", - "description": "email", + "description": "email" }, "email_on_fail": { "type": "string", - "description": "email_on_fail", + "description": "email_on_fail" }, "max_multiqc_email_size": { "type": "string", "description": "max_multiqc_email_size", - "default": "25 MB", + "default": "25 MB" }, "plaintext_email": { "type": "boolean", - "description": "plaintext_email", + "description": "plaintext_email" }, "monochrome_logs": { "type": "boolean", - "description": "monochrome_logs", + "description": "monochrome_logs" }, "help": { "type": "boolean", - "description": "help", + "description": "help" }, "igenomes_base": { "type": "string", "description": "igenomes_base", - "default": "s3://ngi-igenomes/igenomes/", + "default": "s3://ngi-igenomes/igenomes/" }, "tracedir": { "type": "string", "description": "tracedir", - "default": "./results/pipeline_info", + "default": "./results/pipeline_info" }, "igenomes_ignore": { "type": "boolean", - "description": "igenomes_ignore", + "description": "igenomes_ignore" }, "custom_config_version": { "type": "string", "description": "custom_config_version", - "default": "master", + "default": "master" }, "custom_config_base": { "type": "string", "description": "custom_config_base", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "default": "https://raw.githubusercontent.com/nf-core/configs/master" }, "hostnames": { "type": "string", "description": "hostnames", - "default": "[crick:['.thecrick.org'], genotoul:['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'], genouest:['.genouest.org'], uppmax:['.uppmax.uu.se']]", + "default": "[crick:['.thecrick.org'], genotoul:['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'], genouest:['.genouest.org'], uppmax:['.uppmax.uu.se']]" }, "config_profile_description": { "type": "string", - "description": "config_profile_description", + "description": "config_profile_description" }, "config_profile_contact": { "type": "string", - "description": "config_profile_contact", + "description": "config_profile_contact" }, "config_profile_url": { "type": "string", - "description": "config_profile_url", + "description": "config_profile_url" }, "max_memory": { "type": "string", "description": "max_memory", - "default": "128 GB", + "default": "128 GB" }, "max_cpus": { "type": "integer", "description": "max_cpus", - "default": 16, + "default": 16 }, "max_time": { "type": "string", "description": "max_time", - "default": "10d", + "default": "10d" }, "genomes": { "type": "string", - "description": "genomes", + "description": "genomes" }, "fasta": { "type": "string", - "description": "fasta", + "description": "fasta" }, "awsqueue": { "type": "string", - "description": "awsqueue", + "description": "awsqueue" }, "readPaths": { "type": "string", - "description": "readPaths", + "description": "readPaths" }, "awsregion": { "type": "string", - "description": "awsregion", + "description": "awsregion" } } } - ] + } } From d0f20f8a75e87391034b414e3c4afce96a3aadb9 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Fri, 6 Mar 2020 21:37:43 +0000 Subject: [PATCH 03/27] Start writing new nf-core schema commands. New nf-core schema lint now validates JSON Schema documents --- nf_core/schema.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++ scripts/nf-core | 30 +++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 nf_core/schema.py diff --git a/nf_core/schema.py b/nf_core/schema.py new file mode 100644 index 0000000000..f19b15332d --- /dev/null +++ b/nf_core/schema.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +""" Code to deal with pipeline JSON Schema """ + +from __future__ import print_function + +import click +import json +import jsonschema +import logging +import os +import re +import subprocess +import sys + + +class PipelineSchema (object): + """ Class to generate a schema object with + functions to handle pipeline JSON Schema """ + + def __init__(self): + """ Initialise the object """ + + self.schema = None + + def lint_schema(self, schema_path): + """ Lint a given schema to see if it looks valid """ + try: + self.load_schema(schema_path) + except AssertionError: + sys.exit(1) + else: + logging.info("JSON Schema looks valid!") + + def load_schema(self, schema_path): + """ Load a JSON Schema from a file """ + try: + with open(schema_path, 'r') as fh: + self.schema = json.load(fh) + except json.decoder.JSONDecodeError as e: + logging.error("Could not parse JSON:\n {}".format(e)) + raise AssertionError + logging.debug("JSON file loaded: {}".format(schema_path)) + + # Check that the Schema is valid + try: + jsonschema.Draft7Validator.check_schema(self.schema) + except jsonschema.exceptions.SchemaError as e: + logging.error("Schema does not validate as Draft 7 JSONSchema:\n {}".format(e)) + raise AssertionError diff --git a/scripts/nf-core b/scripts/nf-core index 65e0311114..31c267de51 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -16,6 +16,7 @@ import nf_core.launch import nf_core.licences import nf_core.lint import nf_core.list +import nf_core.schema import nf_core.sync import logging @@ -241,6 +242,35 @@ def lint(pipeline_dir, release): sys.exit(1) +## nf-core schema subcommands +@nf_core_cli.group(cls=CustomHelpOrder) +def schema(): + """ Manage pipeline JSON Schema """ + pass + +@schema.command(help_priority=1) +@click.argument( + 'schema_path', + type = click.Path(exists=True), + required = True, + metavar = "" +) +def lint(schema_path): + """ Check that a given JSON Schema is valid """ + schema_obj = nf_core.schema.PipelineSchema() + schema_obj.lint_schema(schema_path) + +@schema.command(help_priority=2) +def build(): + """ Interactively build a schema from Nextflow params """ + raise NotImplementedError('This function has not yet been written') + +@schema.command(help_priority=3) +def validate(): + """ Validate supplied parameters against a schema """ + raise NotImplementedError('This function has not yet been written') + + @nf_core_cli.command('bump-version', help_priority=7) @click.argument( 'pipeline_dir', From 7280782a83bbee5914dcb1a5696fce51200af65d Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sat, 7 Mar 2020 00:17:57 +0100 Subject: [PATCH 04/27] Start building 'nf-core schema build' functionality --- nf_core/schema.py | 124 +++++++++++++++++++++++++++++++++++++++++----- scripts/nf-core | 16 +++++- 2 files changed, 126 insertions(+), 14 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index f19b15332d..4ed4a1439a 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -12,6 +12,8 @@ import subprocess import sys +import nf_core.utils + class PipelineSchema (object): """ Class to generate a schema object with @@ -21,29 +23,127 @@ def __init__(self): """ Initialise the object """ self.schema = None + self.pipeline_params = {} + self.quiet = False def lint_schema(self, schema_path): """ Lint a given schema to see if it looks valid """ try: self.load_schema(schema_path) - except AssertionError: + except json.decoder.JSONDecodeError as e: + logging.error("Could not parse JSON:\n {}".format(e)) + sys.exit(1) + except AssertionError as e: + logging.info("JSON Schema does not follow nf-core specs:\n {}".format(e)) + sys.exit(1) + except jsonschema.exceptions.SchemaError as e: + logging.error("Schema does not validate as Draft 7 JSONSchema:\n {}".format(e)) sys.exit(1) else: logging.info("JSON Schema looks valid!") def load_schema(self, schema_path): """ Load a JSON Schema from a file """ - try: - with open(schema_path, 'r') as fh: - self.schema = json.load(fh) - except json.decoder.JSONDecodeError as e: - logging.error("Could not parse JSON:\n {}".format(e)) - raise AssertionError + with open(schema_path, 'r') as fh: + self.schema = json.load(fh) logging.debug("JSON file loaded: {}".format(schema_path)) # Check that the Schema is valid - try: - jsonschema.Draft7Validator.check_schema(self.schema) - except jsonschema.exceptions.SchemaError as e: - logging.error("Schema does not validate as Draft 7 JSONSchema:\n {}".format(e)) - raise AssertionError + jsonschema.Draft7Validator.check_schema(self.schema) + logging.debug("JSON Schema Draft7 validated") + + # Check for nf-core schema keys + assert 'properties' in self.schema, "Schema should have 'properties' section" + assert 'input' in self.schema['properties'], "properties should have section 'input'" + assert 'properties' in self.schema['properties']['input'], "properties.input should have section 'properties'" + + def build_schema(self, pipeline_dir, quiet): + """ Interactively build a new JSON Schema for a pipeline """ + + if quiet: + self.quiet = True + + # Load a JSON Schema file if we find one + pipeline_schema_file = os.path.join(pipeline_dir, 'parameters.settings.json') + if(os.path.exists(pipeline_schema_file)): + logging.debug("Parsing existing JSON Schema: {}".format(pipeline_schema_file)) + try: + self.load_schema(pipeline_schema_file) + except Exception as e: + logging.error("Existing JSON Schema found, but it is invalid:\n {}".format(click.style(str(e), fg='red'))) + logging.info( + "Please fix or delete this file, then try again.\n" \ + "For more details, run the following command:\n " + \ + click.style("nf-core schema lint {}".format(pipeline_schema_file), fg='blue') + ) + sys.exit(1) + logging.info("Loaded existing JSON schema with {} params: {}".format(len(self.schema['properties']['input']), pipeline_schema_file)) + else: + logging.debug("Existing JSON Schema not found: {}".format(pipeline_schema_file)) + + self.get_wf_params(pipeline_dir) + self.remove_schema_notfound_config() + self.add_schema_found_config() + + # Write results to a JSON file + logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']['input']), pipeline_schema_file)) + with open(pipeline_schema_file, 'w') as fh: + json.dump(self.schema, fh, indent=4) + + def get_wf_params(self, pipeline_dir): + """ + Load the pipeline parameter defaults using `nextflow config` + Strip out only the params. values and ignore anything that is not a flat variable + """ + logging.debug("Collecting pipeline parameter defaults\n") + config = nf_core.utils.fetch_wf_config(pipeline_dir) + # Pull out just the params. values + for ckey, cval in config.items(): + if ckey.startswith('params.'): + # skip anything that's not a flat variable + if '.' in ckey[7:]: + logging.debug("Skipping pipeline param '{}' because it has nested parameter values".format(ckey)) + continue + self.pipeline_params[ckey[7:]] = cval + + def remove_schema_notfound_config(self): + """ + Strip out anything from the existing JSON Schema that's not in the nextflow params + """ + # Use iterator so that we can delete the key whilst iterating + for p_key in [k for k in self.schema['properties']['input'].keys()]: + if p_key not in self.pipeline_params.keys(): + if self.quiet or click.confirm("Parameter '{}' found in schema but not in Nextflow config. Remove it?".format(p_key), True): + del self.schema['properties']['input'][p_key] + logging.debug("Removing '{}' from JSON Schema".format(p_key)) + + def add_schema_found_config(self): + """ + Add anything that's found in the Nextflow params that's missing in the JSON Schema + """ + for p_key, p_val in self.pipeline_params.items(): + if p_key not in self.schema['properties']['input'].keys(): + if self.quiet or click.confirm("Parameter '{}' found in Nextflow config but not in JSON Schema. Add it?".format(p_key), True): + self.schema['properties']['input'][p_key] = self.prompt_config_input(p_key, p_val) + logging.debug("Adding '{}' to JSON Schema".format(p_key)) + + def prompt_config_input(self, p_key, p_val, p_schema = None): + """ + Build a JSON Schema dictionary for an input interactively + """ + if p_schema is None: + p_type = "string" + if isinstance(p_val, bool): + p_type = 'boolean' + if isinstance(p_val, int): + p_type = 'integer' + + p_schema = { + "type": p_type, + "default": p_val + } + if self.quiet: + return p_schema + else: + logging.warn("prompt_config_input not finished") + return p_schema diff --git a/scripts/nf-core b/scripts/nf-core index 31c267de51..e233c6219a 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -261,9 +261,21 @@ def lint(schema_path): schema_obj.lint_schema(schema_path) @schema.command(help_priority=2) -def build(): +@click.argument( + 'pipeline_dir', + type = click.Path(exists=True), + required = True, + metavar = "" +) +@click.option( + '--quiet', + is_flag = True, + help = "Do not build interactively, just use Nextflow defaults" +) +def build(pipeline_dir, quiet): """ Interactively build a schema from Nextflow params """ - raise NotImplementedError('This function has not yet been written') + schema_obj = nf_core.schema.PipelineSchema() + schema_obj.build_schema(pipeline_dir, quiet) @schema.command(help_priority=3) def validate(): From 1dce31dc2f4bf2ede61f85aec8affff2c796ac1f Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 12 Mar 2020 08:13:49 +0100 Subject: [PATCH 05/27] Testing and refining code for building JSON Schema --- nf_core/schema.py | 44 ++++++++++++++++++++++++++------------------ scripts/nf-core | 6 +++--- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index 4ed4a1439a..0045b78659 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -24,7 +24,7 @@ def __init__(self): self.schema = None self.pipeline_params = {} - self.quiet = False + self.use_defaults = False def lint_schema(self, schema_path): """ Lint a given schema to see if it looks valid """ @@ -57,11 +57,11 @@ def load_schema(self, schema_path): assert 'input' in self.schema['properties'], "properties should have section 'input'" assert 'properties' in self.schema['properties']['input'], "properties.input should have section 'properties'" - def build_schema(self, pipeline_dir, quiet): + def build_schema(self, pipeline_dir, use_defaults): """ Interactively build a new JSON Schema for a pipeline """ - if quiet: - self.quiet = True + if use_defaults: + self.use_defaults = True # Load a JSON Schema file if we find one pipeline_schema_file = os.path.join(pipeline_dir, 'parameters.settings.json') @@ -77,7 +77,7 @@ def build_schema(self, pipeline_dir, quiet): click.style("nf-core schema lint {}".format(pipeline_schema_file), fg='blue') ) sys.exit(1) - logging.info("Loaded existing JSON schema with {} params: {}".format(len(self.schema['properties']['input']), pipeline_schema_file)) + logging.info("Loaded existing JSON schema with {} params: {}\n".format(len(self.schema['properties']['input']['properties']), pipeline_schema_file)) else: logging.debug("Existing JSON Schema not found: {}".format(pipeline_schema_file)) @@ -86,7 +86,7 @@ def build_schema(self, pipeline_dir, quiet): self.add_schema_found_config() # Write results to a JSON file - logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']['input']), pipeline_schema_file)) + logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']['input']['properties']), pipeline_schema_file)) with open(pipeline_schema_file, 'w') as fh: json.dump(self.schema, fh, indent=4) @@ -110,24 +110,36 @@ def remove_schema_notfound_config(self): """ Strip out anything from the existing JSON Schema that's not in the nextflow params """ + params_removed = [] # Use iterator so that we can delete the key whilst iterating - for p_key in [k for k in self.schema['properties']['input'].keys()]: + for p_key in [k for k in self.schema['properties']['input']['properties'].keys()]: if p_key not in self.pipeline_params.keys(): - if self.quiet or click.confirm("Parameter '{}' found in schema but not in Nextflow config. Remove it?".format(p_key), True): - del self.schema['properties']['input'][p_key] + p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) + remove_it_nice = click.style('Remove it?', fg='yellow') + if self.use_defaults or click.confirm("Unrecognised '{}' found in schema but not in Nextflow config. {}".format(p_key_nice, remove_it_nice), True): + del self.schema['properties']['input']['properties'][p_key] logging.debug("Removing '{}' from JSON Schema".format(p_key)) + params_removed.append(click.style(p_key, fg='white', bold=True)) + if len(params_removed) > 0: + logging.info("Removed {} inputs from existing JSON Schema that were not found with `nextflow config`:\n {}\n".format(len(params_removed), ', '.join(params_removed))) def add_schema_found_config(self): """ Add anything that's found in the Nextflow params that's missing in the JSON Schema """ + params_added = [] for p_key, p_val in self.pipeline_params.items(): - if p_key not in self.schema['properties']['input'].keys(): - if self.quiet or click.confirm("Parameter '{}' found in Nextflow config but not in JSON Schema. Add it?".format(p_key), True): - self.schema['properties']['input'][p_key] = self.prompt_config_input(p_key, p_val) + if p_key not in self.schema['properties']['input']['properties'].keys(): + p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) + add_it_nice = click.style('Add to JSON Schema?', fg='cyan') + if self.use_defaults or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): + self.schema['properties']['input'][p_key] = self.build_schema_input(p_key, p_val) logging.debug("Adding '{}' to JSON Schema".format(p_key)) + params_added.append(click.style(p_key, fg='white', bold=True)) + if len(params_added) > 0: + logging.info("Added {} inputs to JSON Schema that were found with `nextflow config`:\n {}".format(len(params_added), ', '.join(params_added))) - def prompt_config_input(self, p_key, p_val, p_schema = None): + def build_schema_input(self, p_key, p_val, p_schema = None): """ Build a JSON Schema dictionary for an input interactively """ @@ -142,8 +154,4 @@ def prompt_config_input(self, p_key, p_val, p_schema = None): "type": p_type, "default": p_val } - if self.quiet: - return p_schema - else: - logging.warn("prompt_config_input not finished") - return p_schema + return p_schema diff --git a/scripts/nf-core b/scripts/nf-core index e233c6219a..15bdc91b01 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -268,14 +268,14 @@ def lint(schema_path): metavar = "" ) @click.option( - '--quiet', + '--use_defaults', is_flag = True, help = "Do not build interactively, just use Nextflow defaults" ) -def build(pipeline_dir, quiet): +def build(pipeline_dir, use_defaults): """ Interactively build a schema from Nextflow params """ schema_obj = nf_core.schema.PipelineSchema() - schema_obj.build_schema(pipeline_dir, quiet) + schema_obj.build_schema(pipeline_dir, use_defaults) @schema.command(help_priority=3) def validate(): From 80bb97346a00d78fc2560c2bed9459a42fde7a45 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 12 Mar 2020 14:25:47 +0100 Subject: [PATCH 06/27] Wrote code to handle interaction with nf-core website schema builder --- nf_core/schema.py | 150 +++++++++++++++++++++++++++++++++++++++------- scripts/nf-core | 12 +++- 2 files changed, 136 insertions(+), 26 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index 0045b78659..c790a60c1a 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -9,8 +9,12 @@ import logging import os import re +import requests +import requests_cache import subprocess import sys +import time +import webbrowser import nf_core.utils @@ -23,72 +27,91 @@ def __init__(self): """ Initialise the object """ self.schema = None + self.schema_filename = None self.pipeline_params = {} self.use_defaults = False + self.web_schema_build_url = 'https://nf-co.re/json_schema_build' + self.web_schema_build_web_url = None + self.web_schema_build_api_url = None - def lint_schema(self, schema_path): + def lint_schema(self, schema_filename=None): """ Lint a given schema to see if it looks valid """ + + if schema_filename is not None: + self.schema_filename = schema_filename + try: - self.load_schema(schema_path) + self.load_schema() + self.validate_schema() except json.decoder.JSONDecodeError as e: logging.error("Could not parse JSON:\n {}".format(e)) sys.exit(1) except AssertionError as e: logging.info("JSON Schema does not follow nf-core specs:\n {}".format(e)) sys.exit(1) - except jsonschema.exceptions.SchemaError as e: - logging.error("Schema does not validate as Draft 7 JSONSchema:\n {}".format(e)) - sys.exit(1) else: logging.info("JSON Schema looks valid!") - def load_schema(self, schema_path): + def load_schema(self): """ Load a JSON Schema from a file """ - with open(schema_path, 'r') as fh: + with open(self.schema_filename, 'r') as fh: self.schema = json.load(fh) - logging.debug("JSON file loaded: {}".format(schema_path)) + logging.debug("JSON file loaded: {}".format(self.schema_filename)) + def save_schema(self): + """ Load a JSON Schema from a file """ + # Write results to a JSON file + logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']['input']['properties']), self.schema_filename)) + with open(self.schema_filename, 'w') as fh: + json.dump(self.schema, fh, indent=4) + + def validate_schema(self): # Check that the Schema is valid - jsonschema.Draft7Validator.check_schema(self.schema) - logging.debug("JSON Schema Draft7 validated") + try: + jsonschema.Draft7Validator.check_schema(self.schema) + logging.debug("JSON Schema Draft7 validated") + except jsonschema.exceptions.SchemaError as e: + raise AssertionError("Schema does not validate as Draft 7 JSON Schema:\n {}".format(e)) # Check for nf-core schema keys assert 'properties' in self.schema, "Schema should have 'properties' section" assert 'input' in self.schema['properties'], "properties should have section 'input'" assert 'properties' in self.schema['properties']['input'], "properties.input should have section 'properties'" - def build_schema(self, pipeline_dir, use_defaults): + def build_schema(self, pipeline_dir, use_defaults, url): """ Interactively build a new JSON Schema for a pipeline """ if use_defaults: self.use_defaults = True + if url: + self.web_schema_build_url = url # Load a JSON Schema file if we find one - pipeline_schema_file = os.path.join(pipeline_dir, 'parameters.settings.json') - if(os.path.exists(pipeline_schema_file)): - logging.debug("Parsing existing JSON Schema: {}".format(pipeline_schema_file)) + self.schema_filename = os.path.join(pipeline_dir, 'parameters.settings.json') + if(os.path.exists(self.schema_filename)): + logging.debug("Parsing existing JSON Schema: {}".format(self.schema_filename)) try: - self.load_schema(pipeline_schema_file) + self.load_schema() except Exception as e: logging.error("Existing JSON Schema found, but it is invalid:\n {}".format(click.style(str(e), fg='red'))) logging.info( "Please fix or delete this file, then try again.\n" \ "For more details, run the following command:\n " + \ - click.style("nf-core schema lint {}".format(pipeline_schema_file), fg='blue') + click.style("nf-core schema lint {}".format(self.schema_filename), fg='blue') ) sys.exit(1) - logging.info("Loaded existing JSON schema with {} params: {}\n".format(len(self.schema['properties']['input']['properties']), pipeline_schema_file)) + logging.info("Loaded existing JSON schema with {} params: {}\n".format(len(self.schema['properties']['input']['properties']), self.schema_filename)) else: - logging.debug("Existing JSON Schema not found: {}".format(pipeline_schema_file)) + logging.debug("Existing JSON Schema not found: {}".format(self.schema_filename)) self.get_wf_params(pipeline_dir) self.remove_schema_notfound_config() self.add_schema_found_config() + self.save_schema() - # Write results to a JSON file - logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']['input']['properties']), pipeline_schema_file)) - with open(pipeline_schema_file, 'w') as fh: - json.dump(self.schema, fh, indent=4) + # If running interactively, send to the web for customisation + if not self.use_defaults or click.confirm("Launch web builder for customisation and editing?", True): + self.launch_web_builder() def get_wf_params(self, pipeline_dir): """ @@ -133,7 +156,7 @@ def add_schema_found_config(self): p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) add_it_nice = click.style('Add to JSON Schema?', fg='cyan') if self.use_defaults or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): - self.schema['properties']['input'][p_key] = self.build_schema_input(p_key, p_val) + self.schema['properties']['input']['properties'][p_key] = self.build_schema_input(p_key, p_val) logging.debug("Adding '{}' to JSON Schema".format(p_key)) params_added.append(click.style(p_key, fg='white', bold=True)) if len(params_added) > 0: @@ -155,3 +178,84 @@ def build_schema_input(self, p_key, p_val, p_schema = None): "default": p_val } return p_schema + + def launch_web_builder(self): + """ + Send JSON Schema to web builder and wait for response + """ + content = { + 'post_content': 'json_schema', + 'api': 'true', + 'version': nf_core.__version__, + 'schema': self.schema + } + try: + response = requests.post(url=self.web_schema_build_url, data=content) + except (requests.exceptions.Timeout): + logging.error("Schema builder URL timed out: {}".format(self.web_schema_build_url)) + except (requests.exceptions.ConnectionError): + logging.error("Could not connect to schema builder URL: {}".format(self.web_schema_build_url)) + else: + if response.status_code != 200: + logging.error("Could not access remote JSON Schema builder: {} (HTML {} Error)".format(self.web_schema_build_url, response.status_code)) + logging.debug("Response content:\n{}".format(response.content)) + else: + try: + web_response = json.loads(response.content) + assert 'status' in web_response + assert 'api_url' in web_response + assert 'web_url' in web_response + assert web_response['status'] == 'recieved' + except (json.decoder.JSONDecodeError, AssertionError) as e: + logging.error("JSON Schema builder response not recognised: {}\n See verbose log for full response (nf-core -v schema)".format(self.web_schema_build_url)) + logging.debug("Response content:\n{}".format(response.content)) + else: + self.web_schema_build_web_url = web_response['web_url'] + self.web_schema_build_api_url = web_response['api_url'] + logging.info("Opening URL: {}".format(web_response['web_url'])) + webbrowser.open(web_response['web_url']) + logging.info("Waiting for form to be completed in the browser. Use ctrl+c to stop waiting and force exit.") + self.get_web_builder_response() + + def get_web_builder_response(self): + """ + Given a URL for a Schema build response, recursively query it until results are ready. + Once ready, validate Schema and write to disk. + """ + # Clear requests_cache so that we get the updated statuses + requests_cache.clear() + try: + response = requests.get(self.web_schema_build_api_url, headers={'Cache-Control': 'no-cache'}) + except (requests.exceptions.Timeout): + logging.error("Schema builder URL timed out: {}".format(self.web_schema_build_api_url)) + except (requests.exceptions.ConnectionError): + logging.error("Could not connect to schema builder URL: {}".format(self.web_schema_build_api_url)) + else: + if response.status_code != 200: + logging.error("Could not access remote JSON Schema builder results: {} (HTML {} Error)".format(self.web_schema_build_api_url, response.status_code)) + logging.debug("Response content:\n{}".format(response.content)) + else: + try: + web_response = json.loads(response.content) + assert 'status' in web_response + except (json.decoder.JSONDecodeError, AssertionError) as e: + logging.error("JSON Schema builder results response not recognised: {}\n See verbose log for full response".format(self.web_schema_build_api_url)) + logging.debug("Response content:\n{}".format(response.content)) + else: + if web_response['status'] == 'error': + logging.error("Got error from JSON Schema builder ( {} )".format(click.style(web_response.get('message'), fg='red'))) + elif web_response['status'] == 'waiting_for_user': + time.sleep(5) # wait 5 seconds before trying again + sys.stdout.write('.') + sys.stdout.flush() + self.get_web_builder_response() + else: + logging.info("Found saved status from JSON Schema builder") + self.schema = web_response['schema'] + try: + self.validate_schema() + except AssertionError as e: + logging.info("Response from JSON Builder did not pass validation:\n {}".format(e)) + sys.exit(1) + else: + self.save_schema() diff --git a/scripts/nf-core b/scripts/nf-core index 15bdc91b01..000e2fc0d8 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -270,12 +270,18 @@ def lint(schema_path): @click.option( '--use_defaults', is_flag = True, - help = "Do not build interactively, just use Nextflow defaults" + help = "Do not build interactively, just use Nextflow defaults and exit" ) -def build(pipeline_dir, use_defaults): +@click.option( + '--url', + type = str, + default = 'https://nf-co.re/json_schema_build', + help = 'URL for the web-based Schema builder' +) +def build(pipeline_dir, use_defaults, url): """ Interactively build a schema from Nextflow params """ schema_obj = nf_core.schema.PipelineSchema() - schema_obj.build_schema(pipeline_dir, use_defaults) + schema_obj.build_schema(pipeline_dir, use_defaults, url) @schema.command(help_priority=3) def validate(): From a2bbd9f7694927e38956c185d8dfc80cbe99f6fe Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 12 Mar 2020 14:49:30 +0100 Subject: [PATCH 07/27] Encode JSON schema in POST request --- nf_core/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index c790a60c1a..d625f91832 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -187,7 +187,7 @@ def launch_web_builder(self): 'post_content': 'json_schema', 'api': 'true', 'version': nf_core.__version__, - 'schema': self.schema + 'schema': json.dumps(self.schema) } try: response = requests.post(url=self.web_schema_build_url, data=content) From 9857ff3a633599199fe73a4bc68cf57295d1dc0f Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Fri, 13 Mar 2020 15:37:35 +0100 Subject: [PATCH 08/27] Final testing and tweaks for schema builder command --- nf_core/schema.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index d625f91832..70c02e5d98 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -110,8 +110,9 @@ def build_schema(self, pipeline_dir, use_defaults, url): self.save_schema() # If running interactively, send to the web for customisation - if not self.use_defaults or click.confirm("Launch web builder for customisation and editing?", True): - self.launch_web_builder() + if not self.use_defaults: + if click.confirm(click.style("\nLaunch web builder for customisation and editing?", fg='magenta'), True): + self.launch_web_builder() def get_wf_params(self, pipeline_dir): """ @@ -187,6 +188,7 @@ def launch_web_builder(self): 'post_content': 'json_schema', 'api': 'true', 'version': nf_core.__version__, + 'status': 'waiting_for_user', 'schema': json.dumps(self.schema) } try: @@ -249,13 +251,19 @@ def get_web_builder_response(self): sys.stdout.write('.') sys.stdout.flush() self.get_web_builder_response() - else: - logging.info("Found saved status from JSON Schema builder") - self.schema = web_response['schema'] + elif web_response['status'] == 'web_builder_edited': + logging.info("Found saved status from nf-core JSON Schema builder") try: + self.schema = json.loads(web_response['schema']) self.validate_schema() + except json.decoder.JSONDecodeError as e: + logging.error("Could not parse returned JSON:\n {}".format(e)) + sys.exit(1) except AssertionError as e: logging.info("Response from JSON Builder did not pass validation:\n {}".format(e)) sys.exit(1) else: self.save_schema() + else: + logging.error("JSON Schema builder returned unexpected status ({}): {}\n See verbose log for full response".format(web_response['status'], self.web_schema_build_api_url)) + logging.debug("Response content:\n{}".format(response.content)) From 030b07b44f3e16f630b6769636eefa8fd3611588 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sat, 14 Mar 2020 09:53:44 +0100 Subject: [PATCH 09/27] Rename parameters.settings.json > nextflow_schema.json --- .../{parameters.settings.json => nextflow_schema.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename nf_core/pipeline-template/{{cookiecutter.name_noslash}}/{parameters.settings.json => nextflow_schema.json} (100%) diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json similarity index 100% rename from nf_core/pipeline-template/{{cookiecutter.name_noslash}}/parameters.settings.json rename to nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json From 4953503974fd451cac83e50f77fb7f052bfa3eda Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sat, 14 Mar 2020 09:58:00 +0100 Subject: [PATCH 10/27] Schema: Rename Input to params --- .../nextflow_schema.json | 5 ++-- nf_core/schema.py | 26 +++++++++---------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json index d217571184..7fc7674e3d 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json @@ -5,8 +5,9 @@ "description": "{{ cookiecutter.description }}", "type": "object", "properties": { - "input": { - "title": "Pipeline Input", + "params": { + "title": "Pipeline parameters", + "description": "Nextflow params config options", "type": "object", "properties": { "genome": { diff --git a/nf_core/schema.py b/nf_core/schema.py index 70c02e5d98..8ec72ef5f6 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -61,7 +61,7 @@ def load_schema(self): def save_schema(self): """ Load a JSON Schema from a file """ # Write results to a JSON file - logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']['input']['properties']), self.schema_filename)) + logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']['params']['properties']), self.schema_filename)) with open(self.schema_filename, 'w') as fh: json.dump(self.schema, fh, indent=4) @@ -75,8 +75,8 @@ def validate_schema(self): # Check for nf-core schema keys assert 'properties' in self.schema, "Schema should have 'properties' section" - assert 'input' in self.schema['properties'], "properties should have section 'input'" - assert 'properties' in self.schema['properties']['input'], "properties.input should have section 'properties'" + assert 'params' in self.schema['properties'], "top-level properties should have object 'params'" + assert 'properties' in self.schema['properties']['params'], "properties.params should have section 'properties'" def build_schema(self, pipeline_dir, use_defaults, url): """ Interactively build a new JSON Schema for a pipeline """ @@ -87,7 +87,7 @@ def build_schema(self, pipeline_dir, use_defaults, url): self.web_schema_build_url = url # Load a JSON Schema file if we find one - self.schema_filename = os.path.join(pipeline_dir, 'parameters.settings.json') + self.schema_filename = os.path.join(pipeline_dir, 'nextflow_schema.json') if(os.path.exists(self.schema_filename)): logging.debug("Parsing existing JSON Schema: {}".format(self.schema_filename)) try: @@ -100,7 +100,7 @@ def build_schema(self, pipeline_dir, use_defaults, url): click.style("nf-core schema lint {}".format(self.schema_filename), fg='blue') ) sys.exit(1) - logging.info("Loaded existing JSON schema with {} params: {}\n".format(len(self.schema['properties']['input']['properties']), self.schema_filename)) + logging.info("Loaded existing JSON schema with {} params: {}\n".format(len(self.schema['properties']['params']['properties']), self.schema_filename)) else: logging.debug("Existing JSON Schema not found: {}".format(self.schema_filename)) @@ -136,16 +136,16 @@ def remove_schema_notfound_config(self): """ params_removed = [] # Use iterator so that we can delete the key whilst iterating - for p_key in [k for k in self.schema['properties']['input']['properties'].keys()]: + for p_key in [k for k in self.schema['properties']['params']['properties'].keys()]: if p_key not in self.pipeline_params.keys(): p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) remove_it_nice = click.style('Remove it?', fg='yellow') if self.use_defaults or click.confirm("Unrecognised '{}' found in schema but not in Nextflow config. {}".format(p_key_nice, remove_it_nice), True): - del self.schema['properties']['input']['properties'][p_key] + del self.schema['properties']['params']['properties'][p_key] logging.debug("Removing '{}' from JSON Schema".format(p_key)) params_removed.append(click.style(p_key, fg='white', bold=True)) if len(params_removed) > 0: - logging.info("Removed {} inputs from existing JSON Schema that were not found with `nextflow config`:\n {}\n".format(len(params_removed), ', '.join(params_removed))) + logging.info("Removed {} params from existing JSON Schema that were not found with `nextflow config`:\n {}\n".format(len(params_removed), ', '.join(params_removed))) def add_schema_found_config(self): """ @@ -153,19 +153,19 @@ def add_schema_found_config(self): """ params_added = [] for p_key, p_val in self.pipeline_params.items(): - if p_key not in self.schema['properties']['input']['properties'].keys(): + if p_key not in self.schema['properties']['params']['properties'].keys(): p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) add_it_nice = click.style('Add to JSON Schema?', fg='cyan') if self.use_defaults or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): - self.schema['properties']['input']['properties'][p_key] = self.build_schema_input(p_key, p_val) + self.schema['properties']['params']['properties'][p_key] = self.build_schema_param(p_key, p_val) logging.debug("Adding '{}' to JSON Schema".format(p_key)) params_added.append(click.style(p_key, fg='white', bold=True)) if len(params_added) > 0: - logging.info("Added {} inputs to JSON Schema that were found with `nextflow config`:\n {}".format(len(params_added), ', '.join(params_added))) + logging.info("Added {} params to JSON Schema that were found with `nextflow config`:\n {}".format(len(params_added), ', '.join(params_added))) - def build_schema_input(self, p_key, p_val, p_schema = None): + def build_schema_param(self, p_key, p_val, p_schema = None): """ - Build a JSON Schema dictionary for an input interactively + Build a JSON Schema dictionary for an param interactively """ if p_schema is None: p_type = "string" From 28a8e5b098805165b200cffe0a67530c24bbd1ed Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sun, 15 Mar 2020 08:06:31 +0100 Subject: [PATCH 11/27] Add option --web_only for nf-core schema build --- nf_core/schema.py | 14 +++++++++----- scripts/nf-core | 9 +++++++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index 8ec72ef5f6..5135503f4a 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -30,6 +30,7 @@ def __init__(self): self.schema_filename = None self.pipeline_params = {} self.use_defaults = False + self.web_only = False self.web_schema_build_url = 'https://nf-co.re/json_schema_build' self.web_schema_build_web_url = None self.web_schema_build_api_url = None @@ -78,11 +79,13 @@ def validate_schema(self): assert 'params' in self.schema['properties'], "top-level properties should have object 'params'" assert 'properties' in self.schema['properties']['params'], "properties.params should have section 'properties'" - def build_schema(self, pipeline_dir, use_defaults, url): + def build_schema(self, pipeline_dir, use_defaults, web_only, url): """ Interactively build a new JSON Schema for a pipeline """ if use_defaults: self.use_defaults = True + if web_only: + self.web_only = True if url: self.web_schema_build_url = url @@ -104,10 +107,11 @@ def build_schema(self, pipeline_dir, use_defaults, url): else: logging.debug("Existing JSON Schema not found: {}".format(self.schema_filename)) - self.get_wf_params(pipeline_dir) - self.remove_schema_notfound_config() - self.add_schema_found_config() - self.save_schema() + if not self.web_only: + self.get_wf_params(pipeline_dir) + self.remove_schema_notfound_config() + self.add_schema_found_config() + self.save_schema() # If running interactively, send to the web for customisation if not self.use_defaults: diff --git a/scripts/nf-core b/scripts/nf-core index 000e2fc0d8..a0b1441f20 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -272,16 +272,21 @@ def lint(schema_path): is_flag = True, help = "Do not build interactively, just use Nextflow defaults and exit" ) +@click.option( + '--web_only', + is_flag = True, + help = "Skip building using Nextflow config, just launch the web tool" +) @click.option( '--url', type = str, default = 'https://nf-co.re/json_schema_build', help = 'URL for the web-based Schema builder' ) -def build(pipeline_dir, use_defaults, url): +def build(pipeline_dir, use_defaults, web_only, url): """ Interactively build a schema from Nextflow params """ schema_obj = nf_core.schema.PipelineSchema() - schema_obj.build_schema(pipeline_dir, use_defaults, url) + schema_obj.build_schema(pipeline_dir, use_defaults, web_only, url) @schema.command(help_priority=3) def validate(): From 336e4542914ed088e8129d6f5638e15d421f5221 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sun, 15 Mar 2020 08:08:04 +0100 Subject: [PATCH 12/27] Hyphens for cli flags, not underscores --- scripts/nf-core | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/nf-core b/scripts/nf-core index a0b1441f20..8e64f11561 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -268,12 +268,12 @@ def lint(schema_path): metavar = "" ) @click.option( - '--use_defaults', + '--use-defaults', is_flag = True, help = "Do not build interactively, just use Nextflow defaults and exit" ) @click.option( - '--web_only', + '--web-only', is_flag = True, help = "Skip building using Nextflow config, just launch the web tool" ) From 3a06c2b94811365304ffe447b50ced5a3fdf7bdc Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sun, 15 Mar 2020 18:27:53 +0100 Subject: [PATCH 13/27] Update pipeline template schema --- .../nextflow_schema.json | 229 ++++++++++-------- 1 file changed, 131 insertions(+), 98 deletions(-) diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json index 7fc7674e3d..58040dd30a 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json @@ -10,135 +10,168 @@ "description": "Nextflow params config options", "type": "object", "properties": { - "genome": { - "type": "string", - "description": "genome" - }, "reads": { "type": "string", - "description": "reads", - "default": "data/*{1,2}.fastq.gz" - }, - "single_end": { - "type": "boolean", - "description": "single_end" + "description": "Input FastQ files", + "default": "data/*{1,2}.fastq.gz", + "fa_icon": "" }, "outdir": { "type": "string", - "description": "outdir", - "default": "./results" + "description": "Output directory for results", + "default": "./results", + "fa_icon": "" }, - "name": { + "genome": { "type": "string", - "description": "Workflow name" + "description": "Reference genome ID", + "fa_icon": "" }, - "multiqc_config": { + "single_end": { + "type": "boolean", + "description": "Single-end sequencing data", + "fa_icon": "", + "default": "False" + }, + "name": { "type": "string", - "description": "multiqc_config", - "default": "$baseDir/assets/multiqc_config.yaml" + "description": "Workflow name", + "fa_icon": "" }, "email": { "type": "string", - "description": "email" + "description": "Email address for completion summary", + "fa_icon": "" }, "email_on_fail": { "type": "string", - "description": "email_on_fail" - }, - "max_multiqc_email_size": { - "type": "string", - "description": "max_multiqc_email_size", - "default": "25 MB" + "description": "Email address for completion summary, only when pipeline fails", + "fa_icon": "" }, "plaintext_email": { "type": "boolean", - "description": "plaintext_email" - }, - "monochrome_logs": { - "type": "boolean", - "description": "monochrome_logs" - }, - "help": { - "type": "boolean", - "description": "help" + "description": "Send plain-text email instead of HTML", + "fa_icon": "", + "hidden": true }, - "igenomes_base": { + "multiqc_config": { "type": "string", - "description": "igenomes_base", - "default": "s3://ngi-igenomes/igenomes/" + "description": "Custom config file to supply to MultiQC", + "default": "", + "fa_icon": "", + "hidden": true }, - "tracedir": { + "max_multiqc_email_size": { "type": "string", - "description": "tracedir", - "default": "./results/pipeline_info" + "description": "File size limit when attaching MultiQC reports to summary emails", + "default": "25 MB", + "fa_icon": "", + "hidden": true }, - "igenomes_ignore": { + "monochrome_logs": { "type": "boolean", - "description": "igenomes_ignore" + "description": "Do not use coloured log outputs", + "fa_icon": "", + "hidden": true }, - "custom_config_version": { - "type": "string", - "description": "custom_config_version", - "default": "master" - }, - "custom_config_base": { - "type": "string", - "description": "custom_config_base", - "default": "https://raw.githubusercontent.com/nf-core/configs/master" - }, - "hostnames": { - "type": "string", - "description": "hostnames", - "default": "[crick:['.thecrick.org'], genotoul:['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'], genouest:['.genouest.org'], uppmax:['.uppmax.uu.se']]" - }, - "config_profile_description": { - "type": "string", - "description": "config_profile_description" - }, - "config_profile_contact": { - "type": "string", - "description": "config_profile_contact" - }, - "config_profile_url": { - "type": "string", - "description": "config_profile_url" - }, - "max_memory": { - "type": "string", - "description": "max_memory", - "default": "128 GB" - }, - "max_cpus": { - "type": "integer", - "description": "max_cpus", - "default": 16 - }, - "max_time": { - "type": "string", - "description": "max_time", - "default": "10d" - }, - "genomes": { - "type": "string", - "description": "genomes" - }, - "fasta": { + "tracedir": { "type": "string", - "description": "fasta" + "description": "Directory to keep pipeline Nextflow logs and reports", + "default": "./results/pipeline_info", + "fa_icon": "", + "hidden": true }, - "awsqueue": { + "igenomes_base": { "type": "string", - "description": "awsqueue" + "description": "Directory / URL base for iGenomes references", + "default": "s3://ngi-igenomes/igenomes/", + "fa_icon": "", + "hidden": true }, - "readPaths": { - "type": "string", - "description": "readPaths" + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config", + "fa_icon": "", + "hidden": true + }, + "Maximum job request limits": { + "type": "object", + "description": "Limit the maximum computational requirements that a single job can request", + "default": "", + "properties": { + "max_cpus": { + "type": "integer", + "description": "max_cpus", + "default": 16, + "fa_icon": "", + "hidden": true + }, + "max_memory": { + "type": "string", + "description": "max_memory", + "default": "128 GB", + "fa_icon": "", + "hidden": true + }, + "max_time": { + "type": "string", + "description": "max_time", + "default": "10d", + "fa_icon": "", + "hidden": true + } + } + }, + "Institutional config params": { + "type": "object", + "description": "Params used by nf-core/configs", + "default": "", + "properties": { + "custom_config_version": { + "type": "string", + "description": "nf-core/configs parameter", + "default": "master", + "hidden": true + }, + "custom_config_base": { + "type": "string", + "description": "nf-core/configs parameter", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true + }, + "hostnames": { + "type": "string", + "description": "nf-core/configs parameter", + "default": "", + "hidden": true + }, + "config_profile_description": { + "type": "string", + "description": "nf-core/configs parameter", + "hidden": true + }, + "config_profile_contact": { + "type": "string", + "description": "nf-core/configs parameter", + "hidden": true + }, + "config_profile_url": { + "type": "string", + "description": "nf-core/configs parameter", + "hidden": true + } + } }, - "awsregion": { - "type": "string", - "description": "awsregion" + "help": { + "type": "boolean", + "description": "Display help text", + "hidden": true, + "fa_icon": "" } - } + }, + "required": [ + "reads" + ] } } } From 064621f45102867c2ba9a32711f002dfb62c3935 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sun, 15 Mar 2020 19:25:05 +0100 Subject: [PATCH 14/27] Schema: Handle groups when checking for missing or incorrect params --- nf_core/schema.py | 55 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index 5135503f4a..f2c347bbf9 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -109,8 +109,8 @@ def build_schema(self, pipeline_dir, use_defaults, web_only, url): if not self.web_only: self.get_wf_params(pipeline_dir) - self.remove_schema_notfound_config() - self.add_schema_found_config() + self.remove_schema_notfound_configs() + self.add_schema_found_configs() self.save_schema() # If running interactively, send to the web for customisation @@ -134,36 +134,61 @@ def get_wf_params(self, pipeline_dir): continue self.pipeline_params[ckey[7:]] = cval - def remove_schema_notfound_config(self): + def remove_schema_notfound_configs(self): """ Strip out anything from the existing JSON Schema that's not in the nextflow params """ params_removed = [] # Use iterator so that we can delete the key whilst iterating for p_key in [k for k in self.schema['properties']['params']['properties'].keys()]: - if p_key not in self.pipeline_params.keys(): - p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) - remove_it_nice = click.style('Remove it?', fg='yellow') - if self.use_defaults or click.confirm("Unrecognised '{}' found in schema but not in Nextflow config. {}".format(p_key_nice, remove_it_nice), True): + # Groups - we assume only one-deep + if self.schema['properties']['params']['properties'][p_key]['type'] == 'object': + for p_child_key in [k for k in self.schema['properties']['params']['properties'][p_key].get('properties', {}).keys()]: + if self.prompt_remove_schema_notfound_config(p_child_key): + del self.schema['properties']['params']['properties'][p_key]['properties'][p_child_key] + logging.debug("Removing '{}' from JSON Schema".format(p_child_key)) + params_removed.append(click.style(p_child_key, fg='white', bold=True)) + + # Top-level params + else: + if self.prompt_remove_schema_notfound_config(p_key): del self.schema['properties']['params']['properties'][p_key] logging.debug("Removing '{}' from JSON Schema".format(p_key)) params_removed.append(click.style(p_key, fg='white', bold=True)) + + if len(params_removed) > 0: logging.info("Removed {} params from existing JSON Schema that were not found with `nextflow config`:\n {}\n".format(len(params_removed), ', '.join(params_removed))) - def add_schema_found_config(self): + def prompt_remove_schema_notfound_config(self, p_key): + """ + Check if a given key is found in the nextflow config params and prompt to remove it if note + + Returns True if it should be removed, False if not. + """ + if p_key not in self.pipeline_params.keys(): + p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) + remove_it_nice = click.style('Remove it?', fg='yellow') + if self.use_defaults or click.confirm("Unrecognised '{}' found in schema but not in Nextflow config. {}".format(p_key_nice, remove_it_nice), True): + return True + return False + + def add_schema_found_configs(self): """ Add anything that's found in the Nextflow params that's missing in the JSON Schema """ params_added = [] for p_key, p_val in self.pipeline_params.items(): - if p_key not in self.schema['properties']['params']['properties'].keys(): - p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) - add_it_nice = click.style('Add to JSON Schema?', fg='cyan') - if self.use_defaults or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): - self.schema['properties']['params']['properties'][p_key] = self.build_schema_param(p_key, p_val) - logging.debug("Adding '{}' to JSON Schema".format(p_key)) - params_added.append(click.style(p_key, fg='white', bold=True)) + # Check if key is in top-level params + if not p_key in self.schema['properties']['params']['properties'].keys(): + # Check if key is in group-level params + if not any( [ p_key in param.get('properties', {}) for k, param in self.schema['properties']['params']['properties'].items() ] ): + p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) + add_it_nice = click.style('Add to JSON Schema?', fg='cyan') + if self.use_defaults or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): + self.schema['properties']['params']['properties'][p_key] = self.build_schema_param(p_key, p_val) + logging.debug("Adding '{}' to JSON Schema".format(p_key)) + params_added.append(click.style(p_key, fg='white', bold=True)) if len(params_added) > 0: logging.info("Added {} params to JSON Schema that were found with `nextflow config`:\n {}".format(len(params_added), ', '.join(params_added))) From 60ff7e23aed387b7b27f7a2b44be564c9296a30f Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 08:26:48 +0100 Subject: [PATCH 15/27] Template schema: Add some help text. --- .../nextflow_schema.json | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json index 58040dd30a..dab3e6dc95 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json @@ -14,7 +14,8 @@ "type": "string", "description": "Input FastQ files", "default": "data/*{1,2}.fastq.gz", - "fa_icon": "" + "fa_icon": "", + "help_text": "A glob pattern for input FastQ files. Should include at least one asterisk (*). For paired-end data, should contain curly brackets with two patterns differentiating the paired reads. For example: `*_R{1,2}.fastq.gz`" }, "outdir": { "type": "string", @@ -25,28 +26,35 @@ "genome": { "type": "string", "description": "Reference genome ID", - "fa_icon": "" + "fa_icon": "", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files. For example: `--genome GRCh38`" }, "single_end": { "type": "boolean", "description": "Single-end sequencing data", "fa_icon": "", - "default": "False" + "default": "False", + "help_text": "If using single-end FastQ files as an input, specify this flag to run the pipeline in single-end mode." }, "name": { "type": "string", "description": "Workflow name", - "fa_icon": "" + "fa_icon": "", + "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles." }, "email": { "type": "string", "description": "Email address for completion summary", - "fa_icon": "" + "fa_icon": "", + "help_text": "An email address to send a summary email to when the pipeline is completed.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, "email_on_fail": { "type": "string", "description": "Email address for completion summary, only when pipeline fails", - "fa_icon": "" + "fa_icon": "", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully." }, "plaintext_email": { "type": "boolean", @@ -101,21 +109,21 @@ "properties": { "max_cpus": { "type": "integer", - "description": "max_cpus", + "description": "Maximum number of CPUs that can be requested for any single job", "default": 16, "fa_icon": "", "hidden": true }, "max_memory": { "type": "string", - "description": "max_memory", + "description": "Maximum amount of memory that can be requested for any single job", "default": "128 GB", "fa_icon": "", "hidden": true }, "max_time": { "type": "string", - "description": "max_time", + "description": "Maximum amount of time that can be requested for any single job", "default": "10d", "fa_icon": "", "hidden": true From 44df2c6187c4b0c9c50d148f03fd21eb9d3661cd Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 09:50:45 +0100 Subject: [PATCH 16/27] Remove top-level params object --- .../nextflow_schema.json | 297 +++++++++--------- 1 file changed, 145 insertions(+), 152 deletions(-) diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json index dab3e6dc95..7937b1db28 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json @@ -5,181 +5,174 @@ "description": "{{ cookiecutter.description }}", "type": "object", "properties": { - "params": { - "title": "Pipeline parameters", - "description": "Nextflow params config options", + "reads": { + "type": "string", + "description": "Input FastQ files", + "default": "data/*{1,2}.fastq.gz", + "fa_icon": "", + "help_text": "A glob pattern for input FastQ files. Should include at least one asterisk (*). For paired-end data, should contain curly brackets with two patterns differentiating the paired reads. For example: `*_R{1,2}.fastq.gz`" + }, + "outdir": { + "type": "string", + "description": "Output directory for results", + "default": "./results", + "fa_icon": "" + }, + "genome": { + "type": "string", + "description": "Reference genome ID", + "fa_icon": "", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files. For example: `--genome GRCh38`" + }, + "single_end": { + "type": "boolean", + "description": "Single-end sequencing data", + "fa_icon": "", + "default": "False", + "help_text": "If using single-end FastQ files as an input, specify this flag to run the pipeline in single-end mode." + }, + "name": { + "type": "string", + "description": "Workflow name", + "fa_icon": "", + "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles." + }, + "email": { + "type": "string", + "description": "Email address for completion summary", + "fa_icon": "", + "help_text": "An email address to send a summary email to when the pipeline is completed.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails", + "fa_icon": "", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully." + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML", + "fa_icon": "", + "hidden": true + }, + "multiqc_config": { + "type": "string", + "description": "Custom config file to supply to MultiQC", + "default": "", + "fa_icon": "", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails", + "default": "25 MB", + "fa_icon": "", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs", + "fa_icon": "", + "hidden": true + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports", + "default": "./results/pipeline_info", + "fa_icon": "", + "hidden": true + }, + "igenomes_base": { + "type": "string", + "description": "Directory / URL base for iGenomes references", + "default": "s3://ngi-igenomes/igenomes/", + "fa_icon": "", + "hidden": true + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config", + "fa_icon": "", + "hidden": true + }, + "Maximum job request limits": { "type": "object", + "description": "Limit the maximum computational requirements that a single job can request", + "default": "", "properties": { - "reads": { - "type": "string", - "description": "Input FastQ files", - "default": "data/*{1,2}.fastq.gz", - "fa_icon": "", - "help_text": "A glob pattern for input FastQ files. Should include at least one asterisk (*). For paired-end data, should contain curly brackets with two patterns differentiating the paired reads. For example: `*_R{1,2}.fastq.gz`" - }, - "outdir": { - "type": "string", - "description": "Output directory for results", - "default": "./results", - "fa_icon": "" + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job", + "default": 16, + "fa_icon": "", + "hidden": true }, - "genome": { + "max_memory": { "type": "string", - "description": "Reference genome ID", - "fa_icon": "", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files. For example: `--genome GRCh38`" - }, - "single_end": { - "type": "boolean", - "description": "Single-end sequencing data", - "fa_icon": "", - "default": "False", - "help_text": "If using single-end FastQ files as an input, specify this flag to run the pipeline in single-end mode." + "description": "Maximum amount of memory that can be requested for any single job", + "default": "128 GB", + "fa_icon": "", + "hidden": true }, - "name": { + "max_time": { "type": "string", - "description": "Workflow name", - "fa_icon": "", - "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles." - }, - "email": { + "description": "Maximum amount of time that can be requested for any single job", + "default": "10d", + "fa_icon": "", + "hidden": true + } + } + }, + "Institutional config params": { + "type": "object", + "description": "Params used by nf-core/configs", + "default": "", + "properties": { + "custom_config_version": { "type": "string", - "description": "Email address for completion summary", - "fa_icon": "", - "help_text": "An email address to send a summary email to when the pipeline is completed.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + "description": "nf-core/configs parameter", + "default": "master", + "hidden": true }, - "email_on_fail": { + "custom_config_base": { "type": "string", - "description": "Email address for completion summary, only when pipeline fails", - "fa_icon": "", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully." - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML", - "fa_icon": "", + "description": "nf-core/configs parameter", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", "hidden": true }, - "multiqc_config": { + "hostnames": { "type": "string", - "description": "Custom config file to supply to MultiQC", + "description": "nf-core/configs parameter", "default": "", - "fa_icon": "", "hidden": true }, - "max_multiqc_email_size": { + "config_profile_description": { "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails", - "default": "25 MB", - "fa_icon": "", - "hidden": true - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs", - "fa_icon": "", + "description": "nf-core/configs parameter", "hidden": true }, - "tracedir": { + "config_profile_contact": { "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports", - "default": "./results/pipeline_info", - "fa_icon": "", + "description": "nf-core/configs parameter", "hidden": true }, - "igenomes_base": { + "config_profile_url": { "type": "string", - "description": "Directory / URL base for iGenomes references", - "default": "s3://ngi-igenomes/igenomes/", - "fa_icon": "", - "hidden": true - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config", - "fa_icon": "", + "description": "nf-core/configs parameter", "hidden": true - }, - "Maximum job request limits": { - "type": "object", - "description": "Limit the maximum computational requirements that a single job can request", - "default": "", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job", - "default": 16, - "fa_icon": "", - "hidden": true - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job", - "default": "128 GB", - "fa_icon": "", - "hidden": true - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job", - "default": "10d", - "fa_icon": "", - "hidden": true - } - } - }, - "Institutional config params": { - "type": "object", - "description": "Params used by nf-core/configs", - "default": "", - "properties": { - "custom_config_version": { - "type": "string", - "description": "nf-core/configs parameter", - "default": "master", - "hidden": true - }, - "custom_config_base": { - "type": "string", - "description": "nf-core/configs parameter", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true - }, - "hostnames": { - "type": "string", - "description": "nf-core/configs parameter", - "default": "", - "hidden": true - }, - "config_profile_description": { - "type": "string", - "description": "nf-core/configs parameter", - "hidden": true - }, - "config_profile_contact": { - "type": "string", - "description": "nf-core/configs parameter", - "hidden": true - }, - "config_profile_url": { - "type": "string", - "description": "nf-core/configs parameter", - "hidden": true - } - } - }, - "help": { - "type": "boolean", - "description": "Display help text", - "hidden": true, - "fa_icon": "" } - }, - "required": [ - "reads" - ] + } + }, + "help": { + "type": "boolean", + "description": "Display help text", + "hidden": true, + "fa_icon": "" } - } + }, + "required": [ + "reads" + ] } From b80fdf189a3b1680098d20023402b53e225ed633 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 09:51:22 +0100 Subject: [PATCH 17/27] Added nf-core schema validate functionality Also removed top-level params object from schema linting --- nf_core/schema.py | 88 +++++++++++++++++++++++++++++++++++++++-------- scripts/nf-core | 36 ++++++++++++++++--- 2 files changed, 104 insertions(+), 20 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index f2c347bbf9..b256a25464 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -15,8 +15,10 @@ import sys import time import webbrowser +import yaml import nf_core.utils +import nf_core.launch class PipelineSchema (object): @@ -28,6 +30,7 @@ def __init__(self): self.schema = None self.schema_filename = None + self.input_params = {} self.pipeline_params = {} self.use_defaults = False self.web_only = False @@ -48,10 +51,34 @@ def lint_schema(self, schema_filename=None): logging.error("Could not parse JSON:\n {}".format(e)) sys.exit(1) except AssertionError as e: - logging.info("JSON Schema does not follow nf-core specs:\n {}".format(e)) + logging.info(click.style("[✗] JSON Schema does not follow nf-core specs:\n {}", fg='red').format(e)) sys.exit(1) else: - logging.info("JSON Schema looks valid!") + logging.info(click.style("[✓] Pipeline schema looks valid", fg='green')) + + def get_schema_from_name(self, pipeline): + """ Given a pipeline name, try to get the JSON Schema """ + + # Supplied path exists - assume a local pipeline directory or schema + if os.path.exists(pipeline): + if os.path.basename(pipeline) == 'nextflow_schema.json': + self.schema_filename = pipeline + else: + self.schema_filename = os.path.join(pipeline, 'nextflow_schema.json') + + # Path does not exist - assume a name of a remote workflow + else: + wf = nf_core.launch.Launch(pipeline) + wf.get_local_wf() + self.schema_filename = os.path.join(wf.local_wf.local_path, 'nextflow_schema.json') + + # Check that the schema file exists + if not os.path.exists(self.schema_filename): + logging.error("Could not find pipeline schema for '{}': {}".format(pipeline, self.schema_filename)) + sys.exit(1) + + # Load and check schema + self.lint_schema() def load_schema(self): """ Load a JSON Schema from a file """ @@ -62,12 +89,45 @@ def load_schema(self): def save_schema(self): """ Load a JSON Schema from a file """ # Write results to a JSON file - logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']['params']['properties']), self.schema_filename)) + logging.info("Writing JSON schema with {} params: {}".format(len(self.schema['properties']), self.schema_filename)) with open(self.schema_filename, 'w') as fh: json.dump(self.schema, fh, indent=4) + def load_input_params(self, params_path): + """ Load a given a path to a parameters file (JSON/YAML) + + These should be input parameters used to run a pipeline with + the Nextflow -params-file option. + """ + # First, try to load as JSON + try: + with open(params_path, 'r') as fh: + self.input_params = json.load(fh) + logging.debug("Loaded JSON input params: {}".format(params_path)) + except Exception as json_e: + logging.debug("Could not load input params as JSON: {}".format(json_e)) + # This failed, try to load as YAML + try: + with open(params_path, 'r') as fh: + self.input_params = yaml.safe_load(fh) + logging.debug("Loaded YAML input params: {}".format(params_path)) + except Exception as yaml_e: + logging.error("Could not load params file as either JSON or YAML:\n JSON: {}\n YAML: {}".format(json_e, yaml_e)) + sys.exit(1) + + def validate_params(self): + """ Check given parameters against a schema and validate """ + try: + jsonschema.validate(self.input_params, self.schema) + except jsonschema.exceptions.ValidationError as e: + logging.error(click.style("[✗] Input parameters are invalid: {}".format(e.message), fg='red')) + return False + logging.info(click.style("[✓] Input parameters look valid", fg='green')) + return True + + def validate_schema(self): - # Check that the Schema is valid + """ Check that the Schema is valid """ try: jsonschema.Draft7Validator.check_schema(self.schema) logging.debug("JSON Schema Draft7 validated") @@ -76,8 +136,6 @@ def validate_schema(self): # Check for nf-core schema keys assert 'properties' in self.schema, "Schema should have 'properties' section" - assert 'params' in self.schema['properties'], "top-level properties should have object 'params'" - assert 'properties' in self.schema['properties']['params'], "properties.params should have section 'properties'" def build_schema(self, pipeline_dir, use_defaults, web_only, url): """ Interactively build a new JSON Schema for a pipeline """ @@ -103,7 +161,7 @@ def build_schema(self, pipeline_dir, use_defaults, web_only, url): click.style("nf-core schema lint {}".format(self.schema_filename), fg='blue') ) sys.exit(1) - logging.info("Loaded existing JSON schema with {} params: {}\n".format(len(self.schema['properties']['params']['properties']), self.schema_filename)) + logging.info("Loaded existing JSON schema with {} params: {}\n".format(len(self.schema['properties']), self.schema_filename)) else: logging.debug("Existing JSON Schema not found: {}".format(self.schema_filename)) @@ -140,19 +198,19 @@ def remove_schema_notfound_configs(self): """ params_removed = [] # Use iterator so that we can delete the key whilst iterating - for p_key in [k for k in self.schema['properties']['params']['properties'].keys()]: + for p_key in [k for k in self.schema['properties'].keys()]: # Groups - we assume only one-deep - if self.schema['properties']['params']['properties'][p_key]['type'] == 'object': - for p_child_key in [k for k in self.schema['properties']['params']['properties'][p_key].get('properties', {}).keys()]: + if self.schema['properties'][p_key]['type'] == 'object': + for p_child_key in [k for k in self.schema['properties'][p_key].get('properties', {}).keys()]: if self.prompt_remove_schema_notfound_config(p_child_key): - del self.schema['properties']['params']['properties'][p_key]['properties'][p_child_key] + del self.schema['properties'][p_key]['properties'][p_child_key] logging.debug("Removing '{}' from JSON Schema".format(p_child_key)) params_removed.append(click.style(p_child_key, fg='white', bold=True)) # Top-level params else: if self.prompt_remove_schema_notfound_config(p_key): - del self.schema['properties']['params']['properties'][p_key] + del self.schema['properties'][p_key] logging.debug("Removing '{}' from JSON Schema".format(p_key)) params_removed.append(click.style(p_key, fg='white', bold=True)) @@ -180,13 +238,13 @@ def add_schema_found_configs(self): params_added = [] for p_key, p_val in self.pipeline_params.items(): # Check if key is in top-level params - if not p_key in self.schema['properties']['params']['properties'].keys(): + if not p_key in self.schema['properties'].keys(): # Check if key is in group-level params - if not any( [ p_key in param.get('properties', {}) for k, param in self.schema['properties']['params']['properties'].items() ] ): + if not any( [ p_key in param.get('properties', {}) for k, param in self.schema['properties'].items() ] ): p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) add_it_nice = click.style('Add to JSON Schema?', fg='cyan') if self.use_defaults or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): - self.schema['properties']['params']['properties'][p_key] = self.build_schema_param(p_key, p_val) + self.schema['properties'][p_key] = self.build_schema_param(p_key, p_val) logging.debug("Adding '{}' to JSON Schema".format(p_key)) params_added.append(click.style(p_key, fg='white', bold=True)) if len(params_added) > 0: diff --git a/scripts/nf-core b/scripts/nf-core index 8e64f11561..885d0ebe79 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -256,7 +256,11 @@ def schema(): metavar = "" ) def lint(schema_path): - """ Check that a given JSON Schema is valid """ + """ Check that a given JSON Schema is valid. + + Runs as part of the nf-core lint command, this is a convenience + command that does just the schema linting nice and quickly. + """ schema_obj = nf_core.schema.PipelineSchema() schema_obj.lint_schema(schema_path) @@ -284,14 +288,36 @@ def lint(schema_path): help = 'URL for the web-based Schema builder' ) def build(pipeline_dir, use_defaults, web_only, url): - """ Interactively build a schema from Nextflow params """ + """ Interactively build a schema from Nextflow params. """ schema_obj = nf_core.schema.PipelineSchema() schema_obj.build_schema(pipeline_dir, use_defaults, web_only, url) @schema.command(help_priority=3) -def validate(): - """ Validate supplied parameters against a schema """ - raise NotImplementedError('This function has not yet been written') +@click.argument( + 'pipeline', + required = True, + metavar = "" +) +@click.option( + '--params', + type = click.Path(exists=True), + required = True, + help = 'JSON parameter file' +) +def validate(pipeline, params): + """ Validate supplied parameters against a schema. + + Nextflow can be run using the -params-file flag, which loads + script parameters from a JSON/YAML file. + + This command takes such a file and validates it against the + schema for the given pipeline. + """ + schema_obj = nf_core.schema.PipelineSchema() + schema_obj.get_schema_from_name(pipeline) + schema_obj.load_input_params(params) + if not schema_obj.validate_params(): + sys.exit(1) @nf_core_cli.command('bump-version', help_priority=7) From 7340d2ce1a73b1ec5fa94f408c74aba8d9a0e7cb Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 10:31:49 +0100 Subject: [PATCH 18/27] Raise exceptions instead of sys.exit(1) --- nf_core/schema.py | 35 +++++++++++++++++++++++++---------- scripts/nf-core | 9 +++++++-- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index b256a25464..1fbc5ad183 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -42,17 +42,30 @@ def lint_schema(self, schema_filename=None): """ Lint a given schema to see if it looks valid """ if schema_filename is not None: - self.schema_filename = schema_filename + if os.path.isdir(schema_filename): + self.schema_filename = os.path.join(schema_filename, 'nextflow_schema.json') + else: + self.schema_filename = schema_filename + + try: + assert os.path.exists(self.schema_filename) + assert os.path.isfile(self.schema_filename) + except AssertionError as e: + error_msg = "Schema filename not found: {}".format(self.schema_filename) + logging.error(click.style(error_msg, fg='red')) + raise AssertionError(error_msg) try: self.load_schema() self.validate_schema() except json.decoder.JSONDecodeError as e: - logging.error("Could not parse JSON:\n {}".format(e)) - sys.exit(1) + error_msg = "Could not parse JSON:\n {}".format(e) + logging.error(click.style(error_msg, fg='red')) + raise AssertionError(error_msg) except AssertionError as e: - logging.info(click.style("[✗] JSON Schema does not follow nf-core specs:\n {}", fg='red').format(e)) - sys.exit(1) + error_msg = "[✗] JSON Schema does not follow nf-core specs:\n {}".format(e) + logging.error(click.style(error_msg, fg='red')) + raise AssertionError(error_msg) else: logging.info(click.style("[✓] Pipeline schema looks valid", fg='green')) @@ -74,11 +87,12 @@ def get_schema_from_name(self, pipeline): # Check that the schema file exists if not os.path.exists(self.schema_filename): - logging.error("Could not find pipeline schema for '{}': {}".format(pipeline, self.schema_filename)) - sys.exit(1) + error = "Could not find pipeline schema for '{}': {}".format(pipeline, self.schema_filename) + logging.error(error) + raise AssertionError(error) # Load and check schema - self.lint_schema() + return self.lint_schema() def load_schema(self): """ Load a JSON Schema from a file """ @@ -112,8 +126,9 @@ def load_input_params(self, params_path): self.input_params = yaml.safe_load(fh) logging.debug("Loaded YAML input params: {}".format(params_path)) except Exception as yaml_e: - logging.error("Could not load params file as either JSON or YAML:\n JSON: {}\n YAML: {}".format(json_e, yaml_e)) - sys.exit(1) + error_msg = "Could not load params file as either JSON or YAML:\n JSON: {}\n YAML: {}".format(json_e, yaml_e) + logging.error(error_msg) + raise AssertionError(error_msg) def validate_params(self): """ Check given parameters against a schema and validate """ diff --git a/scripts/nf-core b/scripts/nf-core index 885d0ebe79..b6c07e35c5 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -262,7 +262,10 @@ def lint(schema_path): command that does just the schema linting nice and quickly. """ schema_obj = nf_core.schema.PipelineSchema() - schema_obj.lint_schema(schema_path) + try: + schema_obj.lint_schema(schema_path) + except AssertionError as e: + sys.exit(1) @schema.command(help_priority=2) @click.argument( @@ -316,7 +319,9 @@ def validate(pipeline, params): schema_obj = nf_core.schema.PipelineSchema() schema_obj.get_schema_from_name(pipeline) schema_obj.load_input_params(params) - if not schema_obj.validate_params(): + try: + schema_obj.validate_params() + except AssertionError as e: sys.exit(1) From f3f0a310ba3c475d3d7cde5c597828074e9f288c Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 10:48:45 +0100 Subject: [PATCH 19/27] Add schema linting to main lint call Also change pipeline name lint test to allow numbers. Fixes nf-core/tools#588 --- docs/lint_errors.md | 17 +++++++++++++---- nf_core/lint.py | 29 +++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/docs/lint_errors.md b/docs/lint_errors.md index 743a404e33..55c1b89477 100644 --- a/docs/lint_errors.md +++ b/docs/lint_errors.md @@ -30,9 +30,15 @@ The following files are suggested but not a hard requirement. If they are missin * `conf/base.config` * A `conf` directory with at least one config called `base.config` -Additionally, the following files must not be present: +The following files will cause a failure if the _are_ present (to fix, delete them): * `Singularity` + * As we are relying on [Docker Hub](https://https://hub.docker.com/) instead of Singularity + and all containers are automatically pulled from there, repositories should not + have a `Singularity` file present. +* `parameters.settings.json` + * The syntax for pipeline schema has changed - old `parameters.settings.json` should be + deleted and new `nextflow_schema.json` files created instead. ## Error #2 - Docker file check failed ## {#2} @@ -306,13 +312,16 @@ The nf-core workflow template contains a number of comment lines with the follow This lint test runs through all files in the pipeline and searches for these lines. -## Error #11 - Singularity file found ##{#11} +## Error #11 - Pipeline schema syntax ## {#11} -As we are relying on [Docker Hub](https://hub.docker.com/) instead of Singularity and all containers are automatically pulled from there, repositories should not have a `Singularity` file present. +Pipelines should have a `nextflow_schema.json` file that describes the different pipeline parameters (eg. `params.something`, `--something`). + +Schema should be valid JSON files and adhere to [JSONSchema](https://json-schema.org/), Draft 7. +The top-level schema should be an `object`, where each of the `properties` corresponds to a pipeline parameter. ## Error #12 - Pipeline name ## {#12} -In order to ensure consistent naming, pipeline names should contain only lower case, alphabetical characters. Otherwise a warning is displayed. +In order to ensure consistent naming, pipeline names should contain only lower case, alphanumeric characters. Otherwise a warning is displayed. ## Error #13 - Pipeline name ## {#13} diff --git a/nf_core/lint.py b/nf_core/lint.py index e095ae24d3..a084695771 100755 --- a/nf_core/lint.py +++ b/nf_core/lint.py @@ -16,6 +16,7 @@ import yaml import nf_core.utils +import nf_core.schema # Set up local caching for requests to speed up remote queries nf_core.utils.setup_requests_cachedir() @@ -173,6 +174,7 @@ def lint_pipeline(self, release_mode=False): 'check_conda_env_yaml', 'check_conda_dockerfile', 'check_pipeline_todos', + 'check_schema_lint' 'check_pipeline_name', 'check_cookiecutter_strings' ] @@ -248,7 +250,8 @@ def check_files_exist(self): # List of strings. Dails / warns if any of the strings exist. files_fail_ifexists = [ - 'Singularity' + 'Singularity', + 'parameters.settings.json' ] files_warn_ifexists = [ '.travis.yml' @@ -905,15 +908,33 @@ def check_pipeline_todos(self): l = '{}..'.format(l[:50-len(fname)]) self.warned.append((10, "TODO string found in '{}': {}".format(fname,l))) + def check_schema_lint(self): + """ Lint the pipeline JSON schema file """ + # Suppress log messages + logger = logging.getLogger() + logger.disabled = True + + # Lint the schema + schema_obj = nf_core.schema.PipelineSchema() + schema_path = os.path.join(self.path, 'nextflow_schema.json') + try: + schema_obj.lint_schema(schema_path) + self.passed.append((100, "Schema lint passed")) + except AssertionError as e: + self.failed.append((100, "Schema lint failed: {}".format(e))) + + # Reset logger + logger.disabled = False + def check_pipeline_name(self): """Check whether pipeline name adheres to lower case/no hyphen naming convention""" - if self.pipeline_name.islower() and self.pipeline_name.isalpha(): + if self.pipeline_name.islower() and self.pipeline_name.isalnum(): self.passed.append((12, "Name adheres to nf-core convention")) if not self.pipeline_name.islower(): self.warned.append((12, "Naming does not adhere to nf-core conventions: Contains uppercase letters")) - if not self.pipeline_name.isalpha(): - self.warned.append((12, "Naming does not adhere to nf-core conventions: Contains non alphabetical characters")) + if not self.pipeline_name.isalnum(): + self.warned.append((12, "Naming does not adhere to nf-core conventions: Contains non alphanumeric characters")) def check_cookiecutter_strings(self): """ From bfa4ea39cd059ea92fdf8048eced29582c446d39 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 11:40:27 +0100 Subject: [PATCH 20/27] Added new params from the template update --- .../nextflow_schema.json | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json index 7937b1db28..e2bf002a48 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/{{ cookiecutter.name }}/master/parameters.schema.json", + "$id": "https://raw.githubusercontent.com/{{ cookiecutter.name }}/master/nextflow_schema.json", "title": "{{ cookiecutter.name }} pipeline parameters", "description": "{{ cookiecutter.description }}", "type": "object", @@ -71,6 +71,22 @@ "fa_icon": "", "hidden": true }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "hidden": true, + "description": "Method used to save pipeline results to output directory", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See https://www.nextflow.io/docs/latest/process.html#publishdir for details.", + "fa_icon": "", + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "mov" + ] + }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs", From 6dc837e9c4b9997040ed4b3ed78b251b2a827fed Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 11:40:53 +0100 Subject: [PATCH 21/27] Schema: If no existing schema found, create a new one from scratch --- nf_core/schema.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/nf_core/schema.py b/nf_core/schema.py index 1fbc5ad183..49130a5686 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -32,6 +32,7 @@ def __init__(self): self.schema_filename = None self.input_params = {} self.pipeline_params = {} + self.schema_from_scratch = False self.use_defaults = False self.web_only = False self.web_schema_build_url = 'https://nf-co.re/json_schema_build' @@ -180,6 +181,20 @@ def build_schema(self, pipeline_dir, use_defaults, web_only, url): else: logging.debug("Existing JSON Schema not found: {}".format(self.schema_filename)) + # Build a skeleton schema if none already existed + if not self.schema: + logging.info("No existing schema found - creating a new one from scratch") + self.schema_from_scratch = True + config = nf_core.utils.fetch_wf_config(pipeline_dir) + self.schema = { + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/{}/master/nextflow_schema.json".format(config['manifest.name']), + "title": "{} pipeline parameters".format(config['manifest.name']), + "description": config['manifest.description'], + "type": "object", + "properties": {} + } + if not self.web_only: self.get_wf_params(pipeline_dir) self.remove_schema_notfound_configs() @@ -233,6 +248,8 @@ def remove_schema_notfound_configs(self): if len(params_removed) > 0: logging.info("Removed {} params from existing JSON Schema that were not found with `nextflow config`:\n {}\n".format(len(params_removed), ', '.join(params_removed))) + return params_removed + def prompt_remove_schema_notfound_config(self, p_key): """ Check if a given key is found in the nextflow config params and prompt to remove it if note @@ -242,7 +259,7 @@ def prompt_remove_schema_notfound_config(self, p_key): if p_key not in self.pipeline_params.keys(): p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) remove_it_nice = click.style('Remove it?', fg='yellow') - if self.use_defaults or click.confirm("Unrecognised '{}' found in schema but not in Nextflow config. {}".format(p_key_nice, remove_it_nice), True): + if self.use_defaults or self.schema_from_scratch or click.confirm("Unrecognised '{}' found in schema but not in Nextflow config. {}".format(p_key_nice, remove_it_nice), True): return True return False @@ -258,13 +275,15 @@ def add_schema_found_configs(self): if not any( [ p_key in param.get('properties', {}) for k, param in self.schema['properties'].items() ] ): p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) add_it_nice = click.style('Add to JSON Schema?', fg='cyan') - if self.use_defaults or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): + if self.use_defaults or self.schema_from_scratch or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): self.schema['properties'][p_key] = self.build_schema_param(p_key, p_val) logging.debug("Adding '{}' to JSON Schema".format(p_key)) params_added.append(click.style(p_key, fg='white', bold=True)) if len(params_added) > 0: logging.info("Added {} params to JSON Schema that were found with `nextflow config`:\n {}".format(len(params_added), ', '.join(params_added))) + return params_added + def build_schema_param(self, p_key, p_val, p_schema = None): """ Build a JSON Schema dictionary for an param interactively From 5813ad8442d1fb325433ef4829df13acf33b8b88 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 11:41:38 +0100 Subject: [PATCH 22/27] Added schema lint test to look for missing or unexpected params in schema --- docs/lint_errors.md | 21 ++++++++++---- nf_core/lint.py | 68 ++++++++++++++++++++++++++++++++------------- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/docs/lint_errors.md b/docs/lint_errors.md index 55c1b89477..b5f539c5b9 100644 --- a/docs/lint_errors.md +++ b/docs/lint_errors.md @@ -312,12 +312,9 @@ The nf-core workflow template contains a number of comment lines with the follow This lint test runs through all files in the pipeline and searches for these lines. -## Error #11 - Pipeline schema syntax ## {#11} +## Error #11 - Pipeline name ## {#11} -Pipelines should have a `nextflow_schema.json` file that describes the different pipeline parameters (eg. `params.something`, `--something`). - -Schema should be valid JSON files and adhere to [JSONSchema](https://json-schema.org/), Draft 7. -The top-level schema should be an `object`, where each of the `properties` corresponds to a pipeline parameter. +_..removed.._ ## Error #12 - Pipeline name ## {#12} @@ -328,3 +325,17 @@ In order to ensure consistent naming, pipeline names should contain only lower c The `nf-core create` pipeline template uses [cookiecutter](https://github.com/cookiecutter/cookiecutter) behind the scenes. This check fails if any cookiecutter template variables such as `{{ cookiecutter.pipeline_name }}` are fouund in your pipeline code. Finding a placeholder like this means that something was probably copied and pasted from the template without being properly rendered for your pipeline. + +## Error #14 - Pipeline schema syntax ## {#14} + +Pipelines should have a `nextflow_schema.json` file that describes the different pipeline parameters (eg. `params.something`, `--something`). + +Schema should be valid JSON files and adhere to [JSONSchema](https://json-schema.org/), Draft 7. +The top-level schema should be an `object`, where each of the `properties` corresponds to a pipeline parameter. + +## Error #15 - Schema config check ## {#15} + +The `nextflow_schema.json` pipeline schema should describe every flat parameter returned from the `nextflow config` command (params that are objects or more complex structures are ignored). +Missing parameters result in a lint failure. + +If any parameters are found in the schema that were not returned from `nextflow config` a warning is given. diff --git a/nf_core/lint.py b/nf_core/lint.py index a084695771..167da280ab 100755 --- a/nf_core/lint.py +++ b/nf_core/lint.py @@ -130,6 +130,7 @@ def __init__(self, path): self.dockerfile = [] self.conda_config = {} self.conda_package_info = {} + self.schema_obj = None self.passed = [] self.warned = [] self.failed = [] @@ -174,9 +175,10 @@ def lint_pipeline(self, release_mode=False): 'check_conda_env_yaml', 'check_conda_dockerfile', 'check_pipeline_todos', - 'check_schema_lint' 'check_pipeline_name', - 'check_cookiecutter_strings' + 'check_cookiecutter_strings', + 'check_schema_lint', + 'check_schema_params' ] if release_mode: self.release_mode = True @@ -908,24 +910,6 @@ def check_pipeline_todos(self): l = '{}..'.format(l[:50-len(fname)]) self.warned.append((10, "TODO string found in '{}': {}".format(fname,l))) - def check_schema_lint(self): - """ Lint the pipeline JSON schema file """ - # Suppress log messages - logger = logging.getLogger() - logger.disabled = True - - # Lint the schema - schema_obj = nf_core.schema.PipelineSchema() - schema_path = os.path.join(self.path, 'nextflow_schema.json') - try: - schema_obj.lint_schema(schema_path) - self.passed.append((100, "Schema lint passed")) - except AssertionError as e: - self.failed.append((100, "Schema lint failed: {}".format(e))) - - # Reset logger - logger.disabled = False - def check_pipeline_name(self): """Check whether pipeline name adheres to lower case/no hyphen naming convention""" @@ -971,6 +955,50 @@ def check_cookiecutter_strings(self): self.passed.append((13, "Did not find any cookiecutter template strings ({} files)".format(num_files))) + def check_schema_lint(self): + """ Lint the pipeline JSON schema file """ + # Suppress log messages + logger = logging.getLogger() + logger.disabled = True + + # Lint the schema + self.schema_obj = nf_core.schema.PipelineSchema() + schema_path = os.path.join(self.path, 'nextflow_schema.json') + try: + self.schema_obj.lint_schema(schema_path) + self.passed.append((14, "Schema lint passed")) + except AssertionError as e: + self.failed.append((14, "Schema lint failed: {}".format(e))) + + # Reset logger + logger.disabled = False + + def check_schema_params(self): + """ Check that the schema describes all flat params in the pipeline """ + + # First, get the top-level config options for the pipeline + # Schema object already created in the previous test + self.schema_obj.get_wf_params(self.path) + self.schema_obj.use_defaults = True + + # Remove any schema params not found in the config + removed_params = self.schema_obj.remove_schema_notfound_configs() + + # Add schema params found in the config but not the schema + added_params = self.schema_obj.add_schema_found_configs() + + if len(removed_params) > 0: + for param in removed_params: + self.warned.append((15, "Schema param '{}' not found from nextflow config".format(param))) + + if len(added_params) > 0: + for param in added_params: + self.failed.append((15, "Param '{}' from `nextflow config` not found in nextflow_schema.json".format(param))) + + if len(removed_params) == 0 and len(added_params) == 0: + self.passed.append((15, "Schema matched params returned from nextflow config")) + + def print_results(self): # Print results rl = "\n Using --release mode linting tests" if self.release_mode else '' From fa76bdf8e72e1f4bd8a35c8f2d6f8b803c66d8bc Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 11:42:02 +0100 Subject: [PATCH 23/27] Reordered nf-core schema subcommands in help --- scripts/nf-core | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/scripts/nf-core b/scripts/nf-core index b6c07e35c5..c25a6ca515 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -250,20 +250,30 @@ def schema(): @schema.command(help_priority=1) @click.argument( - 'schema_path', + 'pipeline', + required = True, + metavar = "" +) +@click.option( + '--params', type = click.Path(exists=True), required = True, - metavar = "" + help = 'JSON parameter file' ) -def lint(schema_path): - """ Check that a given JSON Schema is valid. +def validate(pipeline, params): + """ Validate supplied parameters against a schema. - Runs as part of the nf-core lint command, this is a convenience - command that does just the schema linting nice and quickly. + Nextflow can be run using the -params-file flag, which loads + script parameters from a JSON/YAML file. + + This command takes such a file and validates it against the + schema for the given pipeline. """ schema_obj = nf_core.schema.PipelineSchema() + schema_obj.get_schema_from_name(pipeline) + schema_obj.load_input_params(params) try: - schema_obj.lint_schema(schema_path) + schema_obj.validate_params() except AssertionError as e: sys.exit(1) @@ -297,34 +307,23 @@ def build(pipeline_dir, use_defaults, web_only, url): @schema.command(help_priority=3) @click.argument( - 'pipeline', - required = True, - metavar = "" -) -@click.option( - '--params', + 'schema_path', type = click.Path(exists=True), required = True, - help = 'JSON parameter file' + metavar = "" ) -def validate(pipeline, params): - """ Validate supplied parameters against a schema. - - Nextflow can be run using the -params-file flag, which loads - script parameters from a JSON/YAML file. +def lint(schema_path): + """ Check that a given JSON Schema is valid. - This command takes such a file and validates it against the - schema for the given pipeline. + Runs as part of the nf-core lint command, this is a convenience + command that does just the schema linting nice and quickly. """ schema_obj = nf_core.schema.PipelineSchema() - schema_obj.get_schema_from_name(pipeline) - schema_obj.load_input_params(params) try: - schema_obj.validate_params() + schema_obj.lint_schema(schema_path) except AssertionError as e: sys.exit(1) - @nf_core_cli.command('bump-version', help_priority=7) @click.argument( 'pipeline_dir', From 778fee1f1805237c7c62cffb4f7c3341020a6cb3 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 11:52:34 +0100 Subject: [PATCH 24/27] Wrote documentation for nf-core schema --- README.md | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/README.md b/README.md index f4f462fa18..e6535dc45a 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ A python package with helper tools for the nf-core community. * [`nf-core licences` - List software licences in a pipeline](#pipeline-software-licences) * [`nf-core create` - Create a new workflow from the nf-core template](#creating-a-new-workflow) * [`nf-core lint` - Check pipeline code against nf-core guidelines](#linting-a-workflow) +* [`nf-core schema` - Work with pipeline schema files](#working-with-pipeline-schema) * [`nf-core bump-version` - Update nf-core pipeline version number](#bumping-a-pipeline-version-number) * [`nf-core sync` - Synchronise pipeline TEMPLATE branches](#sync-a-pipeline-with-the-template) * [Citation](#citation) @@ -439,6 +440,118 @@ WARNING: Test Warnings: You can find extensive documentation about each of the lint tests in the [lint errors documentation](https://nf-co.re/errors). +## Working with pipeline schema + +nf-core pipelines have a `nextflow_schema.json` file in their root which describes the different parameters used by the workflow. +These files allow automated validation of inputs when running the pipeline, are used to generate command line help and can be used to build interfaces to launch pipelines. +Pipeline schema files are built according to the [JSONSchema specification](https://json-schema.org/) (Draft 7). + +To help developers working with pipeline schema, nf-core tools has three `schema` sub-commands: + +* `nf-core schema validate` +* `nf-core schema build` +* `nf-core schema lint` + +### nf-core schema validate + +Nextflow can take input parameters in a JSON or YAML file when running a pipeline using the `-params-file` option. +This command validates such a file against the pipeline schema. + +Usage is `nextflow schema validate --params `, eg: + +```console +$ nf-core schema validate my_pipeline --params my_inputs.json + + ,--./,-. + ___ __ __ __ ___ /,-._.--~\ + |\ | |__ __ / ` / \ |__) |__ } { + | \| | \__, \__/ | \ |___ \`-._,-`-, + `._,._,' + + +INFO: [✓] Pipeline schema looks valid + +ERROR: [✗] Input parameters are invalid: 'reads' is a required property +``` + +The `pipeline` option can be a directory containing a pipeline, a path to a schema file or the name of an nf-core pipeline (which will be downloaded using `nextflow pull`). + +### nf-core schema build + +Manually building JSONSchema documents is not trivial and can be very error prone. +Instead, the `nf-core schema build` command collects your pipeline parameters and gives interactive prompts about any missing or unexpected params. +If no existing schema is found it will create one for you. + + +Once built, the tool can send the schema to the nf-core website so that you can use a graphical interface to organise and fill in the schema. +The tool checks the status of your schema on the website and once complete, saves your changes locally. + +Usage is `nextflow schema build `, eg: + +```console +$ nf-core schema build nf-core-testpipeline + + ,--./,-. + ___ __ __ __ ___ /,-._.--~\ + |\ | |__ __ / ` / \ |__) |__ } { + | \| | \__, \__/ | \ |___ \`-._,-`-, + `._,._,' + + +INFO: Loaded existing JSON schema with 18 params: nf-core-testpipeline/nextflow_schema.json + +Unrecognised 'params.old_param' found in schema but not in Nextflow config. Remove it? [Y/n]: +Unrecognised 'params.we_removed_this_too' found in schema but not in Nextflow config. Remove it? [Y/n]: + +INFO: Removed 2 params from existing JSON Schema that were not found with `nextflow config`: + old_param, we_removed_this_too + +Found 'params.reads' in Nextflow config. Add to JSON Schema? [Y/n]: +Found 'params.outdir' in Nextflow config. Add to JSON Schema? [Y/n]: + +INFO: Added 2 params to JSON Schema that were found with `nextflow config`: + reads, outdir + +INFO: Writing JSON schema with 18 params: nf-core-testpipeline/nextflow_schema.json + +Launch web builder for customisation and editing? [Y/n]: + +INFO: Opening URL: http://localhost:8888/json_schema_build?id=1584441828_b990ac785cd6 + +INFO: Waiting for form to be completed in the browser. Use ctrl+c to stop waiting and force exit. +.......... +INFO: Found saved status from nf-core JSON Schema builder + +INFO: Writing JSON schema with 18 params: nf-core-testpipeline/nextflow_schema.json +``` + +There are three flags that you can use with this command: + +* `--no-prompts`: Make changes without prompting for confirmation each time. Does not launch web tool. +* `--web-only`: Skips comparison of the schema against the pipeline parameters and only launches the web tool. +* `--url `: Supply a custom URL for the online tool. Useful when testing locally. + +### nf-core schema lint + +The pipeline schema is linted as part of the main `nf-core lint` command, +however sometimes it can be useful to quickly check the syntax of the JSONSchema without running a full lint run. + +Usage is `nextflow schema lint `, eg: + +```console +$ nf-core schema lint nextflow_schema.json + + ,--./,-. + ___ __ __ __ ___ /,-._.--~\ + |\ | |__ __ / ` / \ |__) |__ } { + | \| | \__, \__/ | \ |___ \`-._,-`-, + `._,._,' + + +ERROR: [✗] JSON Schema does not follow nf-core specs: + Schema should have 'properties' section +``` + ## Bumping a pipeline version number When releasing a new version of a nf-core pipeline, version numbers have to be updated in several different places. The helper command `nf-core bump-version` automates this for you to avoid manual errors (and frustration!). From a2acd38f0be421e315ed182adee98c6ffcb8c5ba Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 11:52:51 +0100 Subject: [PATCH 25/27] Rename schema use_defaults to no_prompts --- nf_core/launch.py | 4 ++-- nf_core/lint.py | 2 +- nf_core/schema.py | 14 +++++++------- scripts/nf-core | 8 ++++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/nf_core/launch.py b/nf_core/launch.py index ff297c7b02..8a0726cf14 100644 --- a/nf_core/launch.py +++ b/nf_core/launch.py @@ -279,10 +279,10 @@ def prompt_param_flags(self): click.style('Parameter group: ', bold=True, underline=True), click.style(group_label, bold=True, underline=True, fg='red') )) - use_defaults = click.confirm( + no_prompts = click.confirm( "Do you want to change the group's defaults? "+click.style('[y/N]', fg='green'), default=False, show_default=False) - if not use_defaults: + if not no_prompts: continue for parameter in params: # Skip this option if the render mode is none diff --git a/nf_core/lint.py b/nf_core/lint.py index 167da280ab..4fcd1a5498 100755 --- a/nf_core/lint.py +++ b/nf_core/lint.py @@ -979,7 +979,7 @@ def check_schema_params(self): # First, get the top-level config options for the pipeline # Schema object already created in the previous test self.schema_obj.get_wf_params(self.path) - self.schema_obj.use_defaults = True + self.schema_obj.no_prompts = True # Remove any schema params not found in the config removed_params = self.schema_obj.remove_schema_notfound_configs() diff --git a/nf_core/schema.py b/nf_core/schema.py index 49130a5686..aee96dbbde 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -33,7 +33,7 @@ def __init__(self): self.input_params = {} self.pipeline_params = {} self.schema_from_scratch = False - self.use_defaults = False + self.no_prompts = False self.web_only = False self.web_schema_build_url = 'https://nf-co.re/json_schema_build' self.web_schema_build_web_url = None @@ -153,11 +153,11 @@ def validate_schema(self): # Check for nf-core schema keys assert 'properties' in self.schema, "Schema should have 'properties' section" - def build_schema(self, pipeline_dir, use_defaults, web_only, url): + def build_schema(self, pipeline_dir, no_prompts, web_only, url): """ Interactively build a new JSON Schema for a pipeline """ - if use_defaults: - self.use_defaults = True + if no_prompts: + self.no_prompts = True if web_only: self.web_only = True if url: @@ -202,7 +202,7 @@ def build_schema(self, pipeline_dir, use_defaults, web_only, url): self.save_schema() # If running interactively, send to the web for customisation - if not self.use_defaults: + if not self.no_prompts: if click.confirm(click.style("\nLaunch web builder for customisation and editing?", fg='magenta'), True): self.launch_web_builder() @@ -259,7 +259,7 @@ def prompt_remove_schema_notfound_config(self, p_key): if p_key not in self.pipeline_params.keys(): p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) remove_it_nice = click.style('Remove it?', fg='yellow') - if self.use_defaults or self.schema_from_scratch or click.confirm("Unrecognised '{}' found in schema but not in Nextflow config. {}".format(p_key_nice, remove_it_nice), True): + if self.no_prompts or self.schema_from_scratch or click.confirm("Unrecognised '{}' found in schema but not in Nextflow config. {}".format(p_key_nice, remove_it_nice), True): return True return False @@ -275,7 +275,7 @@ def add_schema_found_configs(self): if not any( [ p_key in param.get('properties', {}) for k, param in self.schema['properties'].items() ] ): p_key_nice = click.style('params.{}'.format(p_key), fg='white', bold=True) add_it_nice = click.style('Add to JSON Schema?', fg='cyan') - if self.use_defaults or self.schema_from_scratch or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): + if self.no_prompts or self.schema_from_scratch or click.confirm("Found '{}' in Nextflow config. {}".format(p_key_nice, add_it_nice), True): self.schema['properties'][p_key] = self.build_schema_param(p_key, p_val) logging.debug("Adding '{}' to JSON Schema".format(p_key)) params_added.append(click.style(p_key, fg='white', bold=True)) diff --git a/scripts/nf-core b/scripts/nf-core index c25a6ca515..7fb974ca55 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -285,9 +285,9 @@ def validate(pipeline, params): metavar = "" ) @click.option( - '--use-defaults', + '--no-prompts', is_flag = True, - help = "Do not build interactively, just use Nextflow defaults and exit" + help = "Do not confirm changes, just update parameters and exit" ) @click.option( '--web-only', @@ -300,10 +300,10 @@ def validate(pipeline, params): default = 'https://nf-co.re/json_schema_build', help = 'URL for the web-based Schema builder' ) -def build(pipeline_dir, use_defaults, web_only, url): +def build(pipeline_dir, no_prompts, web_only, url): """ Interactively build a schema from Nextflow params. """ schema_obj = nf_core.schema.PipelineSchema() - schema_obj.build_schema(pipeline_dir, use_defaults, web_only, url) + schema_obj.build_schema(pipeline_dir, no_prompts, web_only, url) @schema.command(help_priority=3) @click.argument( From f4df69c5550091261e92b0b90dca396bf2e246c0 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 11:57:29 +0100 Subject: [PATCH 26/27] Fix lint pytests --- .../nextflow_schema.json | 29 +++++++++++++++++++ tests/test_lint.py | 6 ++-- 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 tests/lint_examples/minimalworkingexample/nextflow_schema.json diff --git a/tests/lint_examples/minimalworkingexample/nextflow_schema.json b/tests/lint_examples/minimalworkingexample/nextflow_schema.json new file mode 100644 index 0000000000..8b1d9f5615 --- /dev/null +++ b/tests/lint_examples/minimalworkingexample/nextflow_schema.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/'nf-core/tools'/master/nextflow_schema.json", + "title": "'nf-core/tools' pipeline parameters", + "description": "'Minimal working example pipeline'", + "type": "object", + "properties": { + "outdir": { + "type": "string", + "default": "'./results'" + }, + "reads": { + "type": "string", + "default": "'data/*.fastq'" + }, + "single_end": { + "type": "string", + "default": "false" + }, + "custom_config_version": { + "type": "string", + "default": "'master'" + }, + "custom_config_base": { + "type": "string", + "default": "'https://raw.githubusercontent.com/nf-core/configs/master'" + } + } +} \ No newline at end of file diff --git a/tests/test_lint.py b/tests/test_lint.py index e99ebcdb98..b1d0eb2389 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -38,7 +38,7 @@ def pf(wd, path): pf(WD, 'lint_examples/license_incomplete_example')] # The maximum sum of passed tests currently possible -MAX_PASS_CHECKS = 72 +MAX_PASS_CHECKS = 75 # The additional tests passed for releases ADD_PASS_RELEASE = 1 @@ -95,7 +95,7 @@ def test_failing_missingfiles_example(self): """Tests for missing files like Dockerfile or LICENSE""" lint_obj = nf_core.lint.PipelineLint(PATH_FAILING_EXAMPLE) lint_obj.check_files_exist() - expectations = {"failed": 5, "warned": 2, "passed": 9} + expectations = {"failed": 5, "warned": 2, "passed": 10} self.assess_lint_status(lint_obj, **expectations) def test_mit_licence_example_pass(self): @@ -472,5 +472,5 @@ def test_pipeline_name_critical(self): critical_lint_obj = nf_core.lint.PipelineLint(PATH_WORKING_EXAMPLE) critical_lint_obj.pipeline_name = 'Tools123' critical_lint_obj.check_pipeline_name() - expectations = {"failed": 0, "warned": 2, "passed": 0} + expectations = {"failed": 0, "warned": 1, "passed": 0} self.assess_lint_status(critical_lint_obj, **expectations) From cb5fafb55c3e668de428c2175a3a15af81a5efe0 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 17 Mar 2020 11:59:38 +0100 Subject: [PATCH 27/27] Remove dup blank line in readme --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index e6535dc45a..3cdc74ea21 100644 --- a/README.md +++ b/README.md @@ -482,7 +482,6 @@ Manually building JSONSchema documents is not trivial and can be very error pron Instead, the `nf-core schema build` command collects your pipeline parameters and gives interactive prompts about any missing or unexpected params. If no existing schema is found it will create one for you. - Once built, the tool can send the schema to the nf-core website so that you can use a graphical interface to organise and fill in the schema. The tool checks the status of your schema on the website and once complete, saves your changes locally.