diff --git a/.gitattributes b/.gitattributes index 63841ad7..2bf72986 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,5 @@ * linguist-vendored *.nf linguist-vendored=false *.config linguist-vendored=false +*.py linguist-vendored=false +*.R linguist-vendored=false \ No newline at end of file diff --git a/.gitignore b/.gitignore index df1e0bf1..fd675dfe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .nextflow* -docs/_build \ No newline at end of file +docs/_build +testing \ No newline at end of file diff --git a/.zenodo.json b/.zenodo.json index aac9afd6..5f61c67b 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -2,7 +2,7 @@ "description": "

MpGAP is built using Nextflow, a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. It is an easy to use pipeline that adopts well known software for _de novo_ genome assembly of Illumina, Pacbio and Oxford Nanopore sequencing data through illumina only, long reads only or hybrid modes.

", "license": "other-open", "title": "fmalmeida/MpGAP: A generic multi-platform genome assembly pipeline", - "version": "v3.1", + "version": "v3.1.2", "upload_type": "software", "creators": [ { diff --git a/README.md b/README.md index 7e23156e..f036111b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - + [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3997375.svg)](https://doi.org/10.5281/zenodo.3445485) [![Releases](https://img.shields.io/github/v/release/fmalmeida/mpgap)](https://github.com/fmalmeida/mpgap/releases) @@ -78,6 +78,9 @@ Therefore, feedbacks are very well welcomed. If you believe that your use case i ```bash # for docker docker pull fmalmeida/mpgap:v3.1 + + # run + nextflow run fmalmeida/mpgap -profile docker [options] ``` * for singularity @@ -89,6 +92,9 @@ Therefore, feedbacks are very well welcomed. If you believe that your use case i export NXF_SINGULARITY_LIBRARYDIR=MY_SINGULARITY_IMAGES # your singularity storage dir export NXF_SINGULARITY_CACHEDIR=MY_SINGULARITY_CACHE # your singularity cache dir singularity pull --dir $NXF_SINGULARITY_LIBRARYDIR fmalmeida-mpgap-v3.1.img docker://fmalmeida/mpgap:v3.1 + + # run + nextflow run fmalmeida/mpgap -profile singularity [options] ``` * for conda @@ -98,6 +104,10 @@ Therefore, feedbacks are very well welcomed. If you believe that your use case i # it is better to create envs with mamba for faster solving wget https://github.com/fmalmeida/mpgap/raw/master/environment.yml conda env create -f environment.yml # advice: use mamba + + # must be executed from the base environment + # This tells nextflow to load the available mpgap environment when required + nextflow run fmalmeida/mpgap -profile conda [options] ``` :dart: Please make sure to also download its busco databases. [See the explanation](#note-on-conda) @@ -121,11 +131,13 @@ Nextflow profiles are a set of "sensible defaults" for the resource requirements The pipeline have "standard profiles" set to run the workflows with either conda, docker or singularity using the [local executor](https://www.nextflow.io/docs/latest/executor.html), which is nextflow's default and basically runs the pipeline processes in the computer where Nextflow is launched. If you need to run the pipeline using another executor such as sge, lsf, slurm, etc. you can take a look at [nextflow's manual page](https://www.nextflow.io/docs/latest/executor.html) to proper configure one in a new custom profile set in your personal copy of [MpGAP config file](https://github.com/fmalmeida/mpgap/blob/master/nextflow.config) and take advantage that nextflow allows multiple profiles to be used at once, e.g. `-profile conda,sge`. -By default, if no profile is chosen, the pipeline will "load the docker profile". Available pre-set profiles for this pipeline are: docker, conda, singularity, you can choose between them as follows: +By default, if no profile is chosen, the pipeline will try to load tools from the local machine $PATH. Available pre-set profiles for this pipeline are: `docker/conda/singularity`, you can choose between them as follows: * conda ```bash + # must be executed from the base environment + # This tells nextflow to load the available mpgap environment when required nextflow run fmalmeida/mpgap -profile conda [options] ``` @@ -199,7 +211,7 @@ It produces a long reads only assembly and polishes (correct errors) it with sho # run the pipeline setting the desired hybrid strategy globally (for all samples) nextflow run fmalmeida/mpgap \ --output output \ - --threads 5 \ + --max_cpus 5 \ --input "samplesheet.yml" \ --hybrid_strategy "both" ``` @@ -245,11 +257,11 @@ nf-core launch fmalmeida/mpgap It will result in the following:

- +

- +

## Known issues @@ -263,6 +275,14 @@ It will result in the following: ## Citation -To cite this pipeline users can use our Zenodo tag or directly via the github url. Users are encouraged to cite the programs used in this pipeline whenever they are used. +To cite this tool please refer to our [Zenodo tag](https://doi.org/10.5281/zenodo.3445485). + +This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [GPLv3](https://github.com/fmalmeida/ngs-preprocess/blob/master/LICENSE). + +> The nf-core framework for community-curated bioinformatics pipelines. +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> Nat Biotechnol. 2020 Feb 13. doi: 10.1038/s41587-020-0439-x. -Please, do not forget to cite the software that were used whenever you use its outputs. See [the list of tools](markdown/list_of_tools.md). +In addition, users are encouraged to cite the programs used in this pipeline whenever they are used. Links to resources of tools and data used in this pipeline are in [the list of tools](markdown/list_of_tools.md). diff --git a/assets/hybrid_test.yml b/assets/hybrid_test.yml new file mode 100644 index 00000000..d9c51c12 --- /dev/null +++ b/assets/hybrid_test.yml @@ -0,0 +1,17 @@ +samplesheet: + + - id: ont_hybrid + nanopore: https://github.com/fmalmeida/test_datasets/raw/main/ecoli_ont_15X.fastq.gz + genome_size: 0.5m + illumina: + - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_1.fastq.gz + - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_2.fastq.gz + hybrid_strategy: both + + - id: pacbio_hybrid + pacbio: https://github.com/fmalmeida/test_datasets/raw/main/ecoli_pacbio_15X.fastq.gz + genome_size: 0.5m + illumina: + - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_1.fastq.gz + - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_2.fastq.gz + hybrid_strategy: both \ No newline at end of file diff --git a/assets/illumina_test.yml b/assets/illumina_test.yml new file mode 100644 index 00000000..25dc81e2 --- /dev/null +++ b/assets/illumina_test.yml @@ -0,0 +1,5 @@ +samplesheet: + - id: illumina_only + illumina: + - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_1.fastq.gz + - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_2.fastq.gz \ No newline at end of file diff --git a/images/lOGO_3.png b/assets/lOGO_3.png similarity index 100% rename from images/lOGO_3.png rename to assets/lOGO_3.png diff --git a/images/lOGO_3_transparente.png b/assets/lOGO_3_transparente.png similarity index 100% rename from images/lOGO_3_transparente.png rename to assets/lOGO_3_transparente.png diff --git a/assets/lreads_test.yml b/assets/lreads_test.yml new file mode 100644 index 00000000..58ad98fa --- /dev/null +++ b/assets/lreads_test.yml @@ -0,0 +1,7 @@ +samplesheet: + - id: ont_only + nanopore: https://github.com/fmalmeida/test_datasets/raw/main/ecoli_ont_15X.fastq.gz + genome_size: 0.5m + - id: pacbio_only + pacbio: https://github.com/fmalmeida/test_datasets/raw/main/ecoli_pacbio_15X.fastq.gz + genome_size: 0.5m \ No newline at end of file diff --git a/images/nf-core-asking.png b/assets/nf-core-asking.png similarity index 100% rename from images/nf-core-asking.png rename to assets/nf-core-asking.png diff --git a/images/nf-core-gui.png b/assets/nf-core-gui.png similarity index 100% rename from images/nf-core-gui.png rename to assets/nf-core-gui.png diff --git a/conf/base.config b/conf/base.config new file mode 100644 index 00000000..bd3075db --- /dev/null +++ b/conf/base.config @@ -0,0 +1,72 @@ +process { + + // The defaults for all processes + cpus = { params.max_cpus } + memory = { params.max_memory } + time = { params.max_time } + + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // labels + withLabel:process_ultralow { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 2.GB * task.attempt, 'memory' ) } + time = { check_max( 1.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 1.h * task.attempt, 'time' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + + // Assemblies will first try to adjust themselves to a parallel execution + // If it is not possible, then it waits to use all the resources allowed + withLabel:process_assembly { + cpus = { if (task.attempt == 1) { check_max( 6 * task.attempt, 'cpus' ) } else { params.max_cpus } } + memory = { if (task.attempt == 1) { check_max( 14.GB * task.attempt, 'memory' ) } else { params.max_memory } } + time = { if (task.attempt == 1) { check_max( 16.h * task.attempt, 'time' ) } else { params.max_time } } + } + +} + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if(type == 'memory'){ + try { + if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if(type == 'time'){ + try { + if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if(type == 'cpus'){ + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} \ No newline at end of file diff --git a/conf/conda.config b/conf/conda.config new file mode 100644 index 00000000..0fdb2d9a --- /dev/null +++ b/conf/conda.config @@ -0,0 +1,5 @@ +// conda profile +params.selected_profile = "conda" +singularity.enabled = false +docker.enabled = false +process.conda = "$CONDA_PREFIX/envs/mpgap-3.1" \ No newline at end of file diff --git a/conf/defaults.config b/conf/defaults.config new file mode 100644 index 00000000..48f31b33 --- /dev/null +++ b/conf/defaults.config @@ -0,0 +1,130 @@ +/* + * Configuration File to run fmalmeida/mpgap pipeline. + */ + +params { + + /* + * Input parameter + */ + + +// Path to YAML samplesheet file. +// Please read the documentation https://mpgap.readthedocs.io/en/latest/samplesheet.html to know how to create a samplesheet file. + input = null + + /* + * Output parameters + */ + + +// Output folder name + output = "output" + + + /* + * Resources parameters + */ + +// Memory allocation for pilon polish. +// Values in Gb. Default 50G. 50G has been proved to be enough in most cases. +// This step is crucial because with not enough memory will crash and not correct your assembly. + pilon_memory_limit = 50 + + /* + * General parameters + * + * These parameters will set the default for all samples. + * However, they can also be set inside the YAML, if this happens + * the pipeline will use the value inside the YAML to overwrite + * the parameter for that specific sample. + * + * Please read the documentation https://mpgap.readthedocs.io/en/latest/samplesheet.html to know more about the samplesheet file. + */ + + +// This parameter only needs to be set if the software chosen is Canu, wtdbg2 or Haslr. Is optional for Flye. +// It is an estimate of the size of the genome. Common suffices are allowed, for example, 3.7m or 2.8g + genome_size = null + +// Select the appropriate value to pass to wtdbg2 to assemble input. +// Options are: "ont" for Nanopore reads, "rs" for PacBio RSII, "sq" for PacBio Sequel, "ccs" for PacBio CCS reads. +// By default, if not given, the pipeline will use the value "ont" if nanopore reads are used and "sq" if pacbio reads are used + wtdbg2_technology = null + +// Select the appropriate shasta config to use for assembly +// Since shasta v0.8 (Oct/2021) this parameter is now mandatory. + shasta_config = "Nanopore-Oct2021" + +// Tells the pipeline to interpret the long reads as "corrected" long reads. +// This will activate (if available) the options for corrected reads in the +// assemblers: -corrected (in canu), --pacbio-corr|--nano-corr (in flye), etc. +// Be cautious when using this parameter. If your reads are not corrected, and +// you use this parameter, you will probably do not generate any contig. + corrected_long_reads = false + +// This parameter below (hybrid_strategy) is to select the hybrid strategies adopted by the pipeline. +// Read the documentation https://mpgap.readthedocs.io/en/latest/manual.html to know more about the hybrid strategies. +// +// Whenever using this parameter, it is also possible to polish the longreads-only assemblies with Nanopolish, +// Medaka or VarianCaller (Arrow) before the polishing with shortreads (using Pilon). For that it is necessary to set +// the right parameters: pacbio_bam and nanopolish_fast5 (files given only inside YAML) or medaka_model. + hybrid_strategy = 1 + +// Default medaka model used for polishing nanopore long reads assemblies. +// Please read their manual https://github.com/nanoporetech/medaka to know more about the available models. + medaka_model = "r941_min_high_g360" + +// This parameter sets to nanopolish the max number of haplotypes to be considered. +// Sometimes the pipeline may crash because to much variation was found exceeding the limit + nanopolish_max_haplotypes = 1000 + + + /* + * Advanced parameters + * + * Controlling the execution of assemblers + * It must be set as true to skip the software and false to use it. + * Also adding the possibility to pass additional parameters to them + * Additional parameters must be in quotes and separated by spaces. + */ + + + quast_additional_parameters = null // Give additional parameters to Quast while assessing assembly metrics. + // Must be given as shown in Quast manual. E.g. " --large --eukaryote ". + + skip_spades = false // Hybrid and shortreads only assemblies + spades_additional_parameters = null // Must be given as shown in Spades manual. E.g. " --meta --plasmids " + + skip_shovill = false // Paired shortreads only assemblies + shovill_additional_parameters = null // Must be given as shown in Shovill manual. E.g. " --depth 15 " + // The pipeline already executes shovill with spades, skesa and megahit, so please, do not use it with shovill's ``--assembler`` parameter. + + skip_unicycler = false // Hybrid and shortreads only assemblies + unicycler_additional_parameters = null // Must be given as shown in Unicycler manual. E.g. " --mode conservative --no_correct " + + skip_haslr = false // Hybrid assemblies + haslr_additional_parameters = null // Must be given as shown in Haslr manual. E.g. " --cov-lr 30 " + + skip_canu = false // Longreads only assemblies + canu_additional_parameters = null // Must be given as shown in Canu manual. E.g. " correctedErrorRate=0.075 corOutCoverage=200 " + + skip_flye = false // Longreads only assemblies + flye_additional_parameters = null // Must be given as shown in Flye manual. E.g. " --meta --iterations 4 " + + skip_raven = false // Longreads only assemblies + raven_additional_parameters = null // Must be given as shown in Raven manual. E.g. " --polishing-rounds 4 " + + skip_wtdbg2 = false // Longreads only assemblies + wtdbg2_additional_parameters = null // Must be given as shown in wtdbg2 manual. E.g. " --tidy-reads 5000 " + + skip_shasta = false // Nanopore longreads only assemblies + shasta_additional_parameters = null // Must be given as shown in shasta manual. E.g. " --Reads.minReadLength 5000 " + +// Max resource options +// Defaults only, expecting to be overwritten + max_memory = '14.GB' + max_cpus = 6 + max_time = '40.h' + +} \ No newline at end of file diff --git a/conf/docker.config b/conf/docker.config new file mode 100644 index 00000000..865323ae --- /dev/null +++ b/conf/docker.config @@ -0,0 +1,7 @@ +// docker profile +params.selected_profile = "docker" +singularity.enabled = false +docker.enabled = true +docker.runOptions = '-u \$(id -u):\$(id -g)' +fixOwnership = true +process.container = "fmalmeida/mpgap:v3.1" \ No newline at end of file diff --git a/conf/singularity.config b/conf/singularity.config new file mode 100644 index 00000000..366ff31c --- /dev/null +++ b/conf/singularity.config @@ -0,0 +1,7 @@ +// singularity profile +params.selected_profile = "singularity" +docker.enabled = false +singularity.enabled = true +singularity.autoMounts = true +process.container = "docker://fmalmeida/mpgap:v3.1" +singularity.autoMounts = true \ No newline at end of file diff --git a/conf/standard.config b/conf/standard.config new file mode 100644 index 00000000..2744b423 --- /dev/null +++ b/conf/standard.config @@ -0,0 +1,6 @@ +// standard local profile -- default +// does not use any pre-configuration from profiles +// using docker as default +params.selected_profile = "none" +singularity.enabled = false +docker.enabled = false \ No newline at end of file diff --git a/docs/manual.rst b/docs/manual.rst index 9a0b5b34..ba47e024 100644 --- a/docs/manual.rst +++ b/docs/manual.rst @@ -87,8 +87,8 @@ Please note that, through the command line, the parameters that are boolean (tru All parameters described can be configured through a configuration file. We encourage users to use it since it will keep your execution cleaner and more readable. See a :ref:`config` example. -General parameters -"""""""""""""""""" +Input/Output parameters +""""""""""""""""""""""" .. list-table:: :widths: 25 15 60 @@ -98,33 +98,40 @@ General parameters - Default value - Description + * - ``--input`` + - NA + - Path to input samplesheet in YAML format. It is required. Please read the :ref:`samplesheet` reference page to understand how to properly create one. + * - ``--output`` - output - Name of directory to store assemblers results. The sample ids will be used to create sub-folder under this directory. - * - ``--threads`` - - 3 - - Number of threads to use per process. - - * - ``--parallel_jobs`` - - NA - - Number of processes to run in parallel. Each job can consume up to N threads (``--threads``). If not given, let's nextflow automatically handle it. - -Input files -""""""""""" +Max job request +"""""""""""""""" .. list-table:: - :widths: 25 15 60 + :widths: 20 10 20 50 :header-rows: 1 * - Arguments + - Required - Default value - Description - * - ``--input`` - - NA - - Path to input samplesheet in YAML format. It is required. Please read the :ref:`samplesheet` reference page to understand how to properly create one. - + * - ``--max_cpus`` + - N + - 4 + - Max number of threads to use in parallel + + * - ``--max_memory`` + - N + - 6.GB + - Max amount of memory to be used by pipeline + + * - ``--max_time`` + - N + - 40.h + - Max time for a job to run Assemblies configuration """""""""""""""""""""""" diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 29781100..1165393b 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -67,7 +67,7 @@ Running the pipeline # Run the pipeline nextflow run fmalmeida/mpgap \ --output _ASSEMBLY \ - --threads 5 \ + --max_cpus 5 \ --skip_spades \ --input "samplesheet.yml" \ --unicycler_additional_parameters '--mode conservative' \ diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy new file mode 100755 index 00000000..8183863c --- /dev/null +++ b/lib/NfcoreSchema.groovy @@ -0,0 +1,529 @@ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// + +import org.everit.json.schema.Schema +import org.everit.json.schema.loader.SchemaLoader +import org.everit.json.schema.ValidationException +import org.json.JSONObject +import org.json.JSONTokener +import org.json.JSONArray +import groovy.json.JsonSlurper +import groovy.json.JsonBuilder + +class NfcoreSchema { + + // + // Resolve Schema path relative to main workflow directory + // + public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { + return "${workflow.projectDir}/${schema_filename}" + } + + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // + /* groovylint-disable-next-line UnusedPrivateMethodParameter */ + public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { + def has_error = false + //=====================================================================// + // Check for nextflow core params and unexpected params + def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text + def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') + def nf_params = [ + // Options for base `nextflow` command + 'bg', + 'c', + 'C', + 'config', + 'd', + 'D', + 'dockerize', + 'h', + 'log', + 'q', + 'quiet', + 'syslog', + 'v', + 'version', + + // Options for `nextflow run` command + 'ansi', + 'ansi-log', + 'bg', + 'bucket-dir', + 'c', + 'cache', + 'config', + 'dsl2', + 'dump-channels', + 'dump-hashes', + 'E', + 'entry', + 'latest', + 'lib', + 'main-script', + 'N', + 'name', + 'offline', + 'params-file', + 'pi', + 'plugins', + 'poll-interval', + 'pool-size', + 'profile', + 'ps', + 'qs', + 'queue-size', + 'r', + 'resume', + 'revision', + 'stdin', + 'stub', + 'stub-run', + 'test', + 'w', + 'with-charliecloud', + 'with-conda', + 'with-dag', + 'with-docker', + 'with-mpi', + 'with-notification', + 'with-podman', + 'with-report', + 'with-singularity', + 'with-timeline', + 'with-tower', + 'with-trace', + 'with-weblog', + 'without-docker', + 'without-podman', + 'work-dir' + ] + def unexpectedParams = [] + + // Collect expected parameters from the schema + def expectedParams = [] + def enums = [:] + for (group in schemaParams) { + for (p in group.value['properties']) { + expectedParams.push(p.key) + if (group.value['properties'][p.key].containsKey('enum')) { + enums[p.key] = group.value['properties'][p.key]['enum'] + } + } + } + + for (specifiedParam in params.keySet()) { + // nextflow params + if (nf_params.contains(specifiedParam)) { + log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" + has_error = true + } + // unexpected params + def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' + def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } + def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() + def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { + // Temporarily remove camelCase/camel-case params #1035 + def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} + if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ + unexpectedParams.push(specifiedParam) + } + } + } + + //=====================================================================// + // Validate parameters against the schema + InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() + JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) + + // Remove anything that's in params.schema_ignore_params + raw_schema = removeIgnoredParams(raw_schema, params) + + Schema schema = SchemaLoader.load(raw_schema) + + // Clean the parameters + def cleanedParams = cleanParameters(params) + + // Convert to JSONObject + def jsonParams = new JsonBuilder(cleanedParams) + JSONObject params_json = new JSONObject(jsonParams.toString()) + + // Validate + try { + schema.validate(params_json) + } catch (ValidationException e) { + println '' + log.error 'ERROR: Validation of pipeline parameters failed!' + JSONObject exceptionJSON = e.toJSON() + printExceptions(exceptionJSON, params_json, log, enums) + println '' + has_error = true + } + + // Check for unexpected parameters + if (unexpectedParams.size() > 0) { + Map colors = NfcoreTemplate.logColours(false) + println '' + def warn_msg = 'Found unexpected parameters:' + for (unexpectedParam in unexpectedParams) { + warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" + } + log.warn warn_msg + log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" + println '' + } + + if (has_error) { + System.exit(1) + } + } + + // + // Beautify parameters for --help + // + public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { + Map colors = NfcoreTemplate.logColours(false) + Integer num_hidden = 0 + String output = '' + output += 'Typical pipeline command:\n\n' + output += " ${colors.cyan}${command}${colors.reset}\n\n" + Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + Integer max_chars = paramsMaxChars(params_map) + 1 + Integer desc_indent = max_chars + 14 + Integer dec_linewidth = 160 - desc_indent + for (group in params_map.keySet()) { + Integer num_params = 0 + String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (group_params.get(param).hidden && !params.show_hidden_params) { + num_hidden += 1 + continue; + } + def type = '[' + group_params.get(param).type + ']' + def description = group_params.get(param).description + def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' + def description_default = description + colors.dim + defaultValue + colors.reset + // Wrap long description texts + // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap + if (description_default.length() > dec_linewidth){ + List olines = [] + String oline = "" // " " * indent + description_default.split(" ").each() { wrd -> + if ((oline.size() + wrd.size()) <= dec_linewidth) { + oline += wrd + " " + } else { + olines += oline + oline = wrd + " " + } + } + olines += oline + description_default = olines.join("\n" + " " * desc_indent) + } + group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' + num_params += 1 + } + group_output += '\n' + if (num_params > 0){ + output += group_output + } + } + if (num_hidden > 0){ + output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset + } + output += NfcoreTemplate.dashedLine(false) + return output + } + + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // + public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision + } + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = workflow.containerEngine + } + if (workflow.container) { + workflow_summary['container'] = workflow.container + } + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value != null) { + if (param_type == 'string') { + if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { + def sub_string = schema_value.replace('\$projectDir', '') + sub_string = sub_string.replace('\${projectDir}', '') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.output') || schema_value.contains('${params.output}')) { + def sub_string = schema_value.replace('\$params.output', '') + sub_string = sub_string.replace('\${params.output}', '') + if ("${params.output}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + // We have a default in the schema, and this isn't it + if (schema_value != null && params_value != schema_value) { + sub_params.put(param, params_value) + } + // No default in the schema, and this isn't empty + else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { + sub_params.put(param, params_value) + } + } + } + params_summary.put(group, sub_params) + } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } + + // + // Beautify parameters for summary and return as string + // + public static String paramsSummaryLog(workflow, params) { + Map colors = NfcoreTemplate.logColours(false) + String output = '' + def params_map = paramsSummaryMap(workflow, params) + def max_chars = paramsMaxChars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += colors.bold + group + colors.reset + '\n' + for (param in group_params.keySet()) { + output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' + } + output += '\n' + } + } + output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" + output += NfcoreTemplate.dashedLine(false) + return output + } + + // + // Loop over nested exceptions and print the causingException + // + private static void printExceptions(ex_json, params_json, log, enums, limit=5) { + def causingExceptions = ex_json['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (ex_json['pointerToViolation'] == '#') { + log.error "* ${ex_json['message']}" + } + // Error with specific param + else { + def param = ex_json['pointerToViolation'] - ~/^#\// + def param_val = params_json[param].toString() + if (enums.containsKey(param)) { + def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" + if (enums[param].size() > limit) { + log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" + } else { + log.error "${error_msg}: ${enums[param].join(', ')})" + } + } else { + log.error "* --${param}: ${ex_json['message']} (${param_val})" + } + } + } + for (ex in causingExceptions) { + printExceptions(ex, params_json, log, enums) + } + } + + // + // Remove an element from a JSONArray + // + private static JSONArray removeElement(json_array, element) { + def list = [] + int len = json_array.length() + for (int i=0;i + if(raw_schema.keySet().contains('definitions')){ + raw_schema.definitions.each { definition -> + for (key in definition.keySet()){ + if (definition[key].get("properties").keySet().contains(ignore_param)){ + // Remove the param to ignore + definition[key].get("properties").remove(ignore_param) + // If the param was required, change this + if (definition[key].has("required")) { + def cleaned_required = removeElement(definition[key].required, ignore_param) + definition[key].put("required", cleaned_required) + } + } + } + } + } + if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { + raw_schema.get("properties").remove(ignore_param) + } + if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { + def cleaned_required = removeElement(raw_schema.required, ignore_param) + raw_schema.put("required", cleaned_required) + } + } + return raw_schema + } + + // + // Clean and check parameters relative to Nextflow native classes + // + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + // + // This function tries to read a JSON params file + // + private static LinkedHashMap paramsLoad(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = paramsRead(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - + private static LinkedHashMap paramsRead(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') + def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + * properties <- parameters can also be ungrouped, outside of definitions + parameter 1 + type + description + */ + + // Grouped params + def params_map = new LinkedHashMap() + schema_definitions.each { key, val -> + def Map group = schema_definitions."$key".properties // Gets the property object of the group + def title = schema_definitions."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + + // Ungrouped params + def ungrouped_params = new LinkedHashMap() + schema_properties.each { innerkey, value -> + ungrouped_params.put(innerkey, value) + } + params_map.put("Other parameters", ungrouped_params) + + return params_map + } + + // + // Get maximum number of characters across all parameter names + // + private static Integer paramsMaxChars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } +} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100755 index 00000000..30e0985b --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,230 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.output.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Warn if a -profile or Nextflow config has not been provided to run the pipeline + // + public static void checkConfigProvided(workflow, log) { + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity` or `-profile conda`\n" ++ + " (2) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + } + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir" ] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(false) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.output}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(false) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // pipeline logo + // + public static String logo(workflow, monochrome_logs) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100755 index 00000000..1b88aec0 --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,40 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "=============================================================================\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "===================================================================================" + } + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100755 index 00000000..328a689e --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,102 @@ +// +// This file holds several functions specific to the main.nf workflow in the fmalmeida/mpgap pipeline +// + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.5281/zenodo.3445485\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/fmalmeida/mpgap#citation" + } + + // + // Print help to screen if required + // + public static String help(workflow, params, log) { + def command = "nextflow run ${workflow.manifest.name} nextflow run fmalmeida/mpgap --output _ASSEMBLY --max_cpus 5 --input samplesheet.yml -profile [ OPTIONS ]" + def help_string = '' + help_string += NfcoreTemplate.logo(workflow, false) + help_string += NfcoreSchema.paramsHelp(workflow, params, command) + help_string += '\n' + citation(workflow) + '\n' + help_string += NfcoreTemplate.dashedLine(false) + return help_string + } + + // + // Print parameter summary log to screen + // + public static String paramsSummaryLog(workflow, params, log) { + def summary_log = '' + summary_log += NfcoreTemplate.logo(workflow, false) + summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) + summary_log += '\n' + citation(workflow) + '\n' + summary_log += NfcoreTemplate.dashedLine(false) + return summary_log + } + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + // Print help to screen if required + if (params.help) { + log.info help(workflow, params, log) + System.exit(0) + } + + // Download template config + if (params.get_config) { + new File("MPGAP.config").write(new URL ("https://github.com/fmalmeida/mpgap/raw/master/conf/defaults.config").getText()) + log.info """ + + MPGAP.config file saved in working directory + After configuration, run: + nextflow run fmalmeida/mpgap -c ./MPGAP.config + Nice code + + """.stripIndent() + System.exit(0) + } + + // Download template samplesheet + if (params.get_samplesheet) { + new File("MPGAP.config").write(new URL ("https://github.com/fmalmeida/mpgap/raw/master/example_samplesheet.yml").getText()) + log.info """ + + Samplesheet (MPGAP_samplesheet.yml) file saved in working directory + Nice code! + + """.stripIndent() + System.exit(0) + } + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + + // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) + + // Check that a -profile or Nextflow config has been provided to run the pipeline + NfcoreTemplate.checkConfigProvided(workflow, log) + + // Check that conda channels are set-up correctly + // if (params.enable_conda) { + // Utils.checkCondaChannels(log) + // } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + + } + +} diff --git a/lib/WorkflowMpGAP.groovy b/lib/WorkflowMpGAP.groovy new file mode 100755 index 00000000..67d825d8 --- /dev/null +++ b/lib/WorkflowMpGAP.groovy @@ -0,0 +1,20 @@ +// +// This file holds several functions specific to the the fmalmeida/mpgap pipeline +// + +class WorkflowMpGAP { + + // + // Check and validate parameters + // + public static void initialise(params, log) { + params.hybrid_strategy = params.hybrid_strategy.toString() + if (!params.get_config && !params.get_samplesheet && !params.help) { + if (!params.input) { + log.error "ERROR!\nA major error has occurred!\n\t==> A samplesheet has not been provided. Please, provide a samplesheet to run the analysis.\n\t Online documentation is available at: https://mpgap.readthedocs.io/en/latest/\nPlease, read the docs.\nCheers." + System.exit(1) + } + } + } + +} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar new file mode 100644 index 00000000..805c8bb5 Binary files /dev/null and b/lib/nfcore_external_java_deps.jar differ diff --git a/main.nf b/main.nf index 9a3c971c..ae9eeeaa 100644 --- a/main.nf +++ b/main.nf @@ -7,122 +7,29 @@ import org.yaml.snakeyaml.Yaml */ /* - * Include functions - */ -include { helpMessage } from './nf_functions/help.nf' -include { logMessage } from './nf_functions/logMessages.nf' - - /* - * Help message - */ -params.help = false -if (params.help) { - helpMessage() - exit 0 -} - - /* - Download configuration file, if necessary. - */ - params.get_config = false - if (params.get_config) { - new File("MPGAP.config").write(new URL ("https://github.com/fmalmeida/mpgap/raw/master/nextflow.config").getText()) - println "" - println "MPGAP.config file saved in working directory" - println "After configuration, run:" - println "nextflow run fmalmeida/mpgap -c ./MPGAP.config" - println "Nice code!\n" - - exit 0 - } - - /* - Download samplesheet, if necessary. - */ - params.get_samplesheet = false - if (params.get_samplesheet) { - new File("MPGAP_samplesheet.yml").write(new URL ("https://github.com/fmalmeida/mpgap/raw/master/example_samplesheet.yml").getText()) - println "" - println "Samplesheet (MPGAP_samplesheet.yml) file saved in working directory" - println "Nice code!\n" - - exit 0 - } - - /* - * Load general parameters and establish defaults - */ - -// General -params.output = 'output' -params.threads = 3 -params.input = '' - -// Assemblers? -params.skip_flye = false -params.skip_spades = false -params.skip_shovill = false -params.skip_canu = false -params.skip_unicycler = false -params.skip_haslr = false -params.skip_raven = false -params.skip_wtdbg2 = false -params.skip_shasta = false - -// shasta default configuration -params.shasta_config = 'Nanopore-Oct2021' - -// medaka model default configuration -params.medaka_model = 'r941_min_high_g360' - -// genome size for canu and wtdbg2 and haslr -params.genome_size = '' - -// Additional parameters for assemblers and quast -params.quast_additional_parameters = '' -params.canu_additional_parameters = '' -params.unicycler_additional_parameters = '' -params.flye_additional_parameters = '' -params.spades_additional_parameters = '' -params.shovill_additional_parameters = '' -params.haslr_additional_parameters = '' -params.raven_additional_parameters = '' -params.wtdbg2_additional_parameters = '' -params.shasta_additional_parameters = '' - -// Long reads -params.corrected_long_reads = false -params.nanopolish_max_haplotypes = 1000 - -// Hybrid strategies default configuration -params.hybrid_strategy = 1 -params.pilon_memory_limit = 50 +======================================================================================== + VALIDATE & PRINT PARAMETER SUMMARY +======================================================================================== +*/ +WorkflowMain.initialise(workflow, params, log) /* - * Define log message - */ -logMessage() - -/* - * Define custom workflows - */ +======================================================================================== + LOAD WORKFLOWS +======================================================================================== +*/ -// misc -include { parse_samplesheet } from './workflows/parse_samples.nf' -include { ASSEMBLY_QC } from './workflows/assembly_qc.nf' +include { PARSE_SAMPLESHEET } from './workflows/parse_samples.nf' +include { ASSEMBLY_QC } from './workflows/assembly_qc.nf' +include { SHORTREADS_ONLY } from './workflows/short-reads-only.nf' +include { LONGREADS_ONLY } from './workflows/long-reads-only.nf' +include { HYBRID } from './workflows/hybrid.nf' -// Short reads only -include { SHORTREADS_ONLY } from './workflows/short-reads-only.nf' - -// Long reads only -include { LONGREADS_ONLY } from './workflows/long-reads-only.nf' - -// Hybrid -include { HYBRID } from './workflows/hybrid.nf' - - /* - * DEFINE (RUN) MAIN WORKFLOW - */ +/* +======================================================================================== + DEFINE MAIN WORKFLOW +======================================================================================== +*/ workflow { @@ -134,45 +41,29 @@ workflow { You can see that processes that were not launched have an empty [- ]. """) - // with samplesheet? - if (params.input) { + // Load YAML + samplesheet_yaml = file(params.input) + parameter_yaml = samplesheet_yaml.readLines().join("\n") + new Yaml().load(parameter_yaml).each { k, v -> params[k] = v } - // Load YAML - samplesheet_yaml = file(params.input) - parameter_yaml = samplesheet_yaml.readLines().join("\n") - new Yaml().load(parameter_yaml).each { k, v -> params[k] = v } + // Copy YAML samplesheet to output directory so user has a copy of it + file(params.output).mkdir() + samplesheet_yaml.copyTo(params.output + "/" + "${samplesheet_yaml.getName()}") - // Copy YAML samplesheet to output directory so user has a copy of it - file(params.output).mkdir() - samplesheet_yaml.copyTo(params.output + "/" + "${samplesheet_yaml.getName()}") + // Parse YAML file + PARSE_SAMPLESHEET( params.samplesheet ) - // Parse YAML file - parse_samplesheet(params.samplesheet) - - // short reads only samples - SHORTREADS_ONLY(parse_samplesheet.out[0]) + // short reads only samples + SHORTREADS_ONLY( PARSE_SAMPLESHEET.out.shortreads ) - // long reads only samples - LONGREADS_ONLY(parse_samplesheet.out[1]) - - // hybrid samples - HYBRID(parse_samplesheet.out[2]) - - // QC - ASSEMBLY_QC(SHORTREADS_ONLY.out.mix(LONGREADS_ONLY.out, HYBRID.out)) + // long reads only samples + LONGREADS_ONLY( PARSE_SAMPLESHEET.out.longreads ) - } else { + // hybrid samples + HYBRID( PARSE_SAMPLESHEET.out.hybrid ) - // Message to user - println(""" - ERROR! - A major error has occurred! - ==> A samplesheet has not been provided. Please, provide a samplesheet to run the analysis. Online documentation is available at: https://mpgap.readthedocs.io/en/latest/ - Please, read the docs. - Cheers. - """) - - } + // QC + ASSEMBLY_QC( SHORTREADS_ONLY.out.mix( LONGREADS_ONLY.out, HYBRID.out ) ) } diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index 09041f69..073a301b 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -2,6 +2,17 @@ The tracking for changes started in v2. +## v3.1.2 -- [2022-Feb-28] + +This version addresses the changes discussed in [issue #33](https://github.com/fmalmeida/MpGAP/issues/33). It has three main changes: + +1. Added standard NF allocation resource rules as it is done by nf-core community + * It also uses templates of CLI help and logging messages from nf-core community. +2. Re-organized config files to keep structure cleaner +3. Changed the standard profile which will not load docker by default anymore. As it is the common practice for NF pipelines, user must explicitily select between docker/conda/singularity profiles. + +> Nothing has changed in terms of how tools are called and used, thus the docker image still the same. In fact, patch/fix releases (x.x.x) will always use the docker from breaking/features release (x.x) + ## v3.1.1 This is a super small release that is basically a hotfix. It solved the following: diff --git a/modules/Hybrid/haslr_hybrid.nf b/modules/Hybrid/haslr_hybrid.nf index 92ef3e73..e6c542ff 100644 --- a/modules/Hybrid/haslr_hybrid.nf +++ b/modules/Hybrid/haslr_hybrid.nf @@ -1,7 +1,7 @@ process haslr_hybrid { publishDir "${params.output}/${prefix}", mode: 'copy' tag "${id}" - cpus params.threads + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -17,15 +17,16 @@ process haslr_hybrid { // Check reads paired_reads = (!(sread1 =~ /input.*/) && !(sread2 =~ /input.*/)) ? "$sread1 $sread2" : "" single_reads = !(single =~ /input.*/) ? "$single" : "" + additional_params = (params.haslr_additional_parameters) ? params.haslr_additional_parameters : "" """ # run haslr haslr.py \\ - -t ${params.threads} \\ + -t $task.cpus \\ -o haslr \\ -g ${genome_size} \\ -l $lreads \\ -x ${lr_type} \\ - ${params.haslr_additional_parameters} \\ + $additional_params \\ -s ${paired_reads} ${single_reads} # rename results diff --git a/modules/Hybrid/spades_hybrid.nf b/modules/Hybrid/spades_hybrid.nf index ff452ae7..afa7c17f 100644 --- a/modules/Hybrid/spades_hybrid.nf +++ b/modules/Hybrid/spades_hybrid.nf @@ -1,7 +1,7 @@ process spades_hybrid { publishDir "${params.output}/${prefix}", mode: 'copy' tag "${id}" - cpus params.threads + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -18,12 +18,13 @@ process spades_hybrid { lr = (lr_type == 'nanopore') ? '--nanopore' : '--pacbio' paired_reads = (!(sread1 =~ /input.*/) && !(sread2 =~ /input.*/)) ? "-1 $sread1 -2 $sread2" : "" single_reads = !(single =~ /input.*/) ? "-s $single" : "" + additional_params = (params.spades_additional_parameters) ? params.spades_additional_parameters : "" """ # run spades spades.py \\ -o spades \\ - -t ${params.threads} \\ - ${params.spades_additional_parameters} \\ + -t $task.cpus \\ + $additional_params \\ ${paired_reads} \\ ${single_reads} \\ ${lr} ${lreads} diff --git a/modules/Hybrid/unicycler_hybrid.nf b/modules/Hybrid/unicycler_hybrid.nf index 3d70958e..609d2581 100644 --- a/modules/Hybrid/unicycler_hybrid.nf +++ b/modules/Hybrid/unicycler_hybrid.nf @@ -1,7 +1,7 @@ process unicycler_hybrid { publishDir "${params.output}/${prefix}", mode: 'copy' tag "${id}" - cpus params.threads + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -17,6 +17,7 @@ process unicycler_hybrid { // Check reads paired_reads = (!(sread1 =~ /input.*/) && !(sread2 =~ /input.*/)) ? "-1 $sread1 -2 $sread2" : "" single_reads = !(single =~ /input.*/) ? "-s $single" : "" + additional_params = (params.unicycler_additional_parameters) ? params.unicycler_additional_parameters : "" """ # copy spades 3.13 to dir src_dir=\$(which shasta | sed 's/shasta//g') @@ -30,8 +31,8 @@ process unicycler_hybrid { ${single_reads} \\ -l ${lreads} \\ -o unicycler \\ - -t ${params.threads} \\ - ${params.unicycler_additional_parameters} \\ + -t $task.cpus \\ + $additional_params \\ --spades_path SPAdes-3.13.0-Linux/bin/spades.py # rename results diff --git a/modules/Hybrid/unicycler_polish.nf b/modules/Hybrid/unicycler_polish.nf index acb03f9f..1a6ffeb0 100644 --- a/modules/Hybrid/unicycler_polish.nf +++ b/modules/Hybrid/unicycler_polish.nf @@ -1,7 +1,7 @@ process pilon_polish { publishDir "${params.output}/${prefix}/pilon_polished_contigs", mode: 'copy' - cpus params.threads tag "${id}" + label 'process_assembly' input: tuple val(id), file(draft), val(assembler), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -31,7 +31,7 @@ process pilon_polish { -a $draft \\ -1 ${sread1} \\ -2 ${sread2} \\ - --threads ${params.threads} &> polish.log ; + --threads $task.cpus &> polish.log ; # Save files in the desired directory mv 0* polish.log ${assembler}; @@ -49,7 +49,7 @@ process pilon_polish { # Index and align reads with bwa bwa index ${draft} ; - bwa mem -M -t ${params.threads} ${draft} ${single} > ${fixed_id}_${assembler}_aln.sam ; + bwa mem -M -t $task.cpus ${draft} ${single} > ${fixed_id}_${assembler}_aln.sam ; samtools view -bS ${fixed_id}_${assembler}_aln.sam | samtools sort > ${fixed_id}_${assembler}_aln.bam ; samtools index ${fixed_id}_${assembler}_aln.bam ; @@ -77,7 +77,7 @@ process pilon_polish { # Index and align reads with bwa bwa index ${draft} ; - bwa mem -M -t ${params.threads} ${draft} ${single} > ${fixed_id}_${assembler}_aln.sam ; + bwa mem -M -t $task.cpus ${draft} ${single} > ${fixed_id}_${assembler}_aln.sam ; samtools view -bS ${fixed_id}_${assembler}_aln.sam | samtools sort > ${fixed_id}_${assembler}_aln.bam ; samtools index ${fixed_id}_${assembler}_aln.bam ; @@ -97,7 +97,7 @@ process pilon_polish { -a first_polish.fasta \\ -1 ${sread1} \\ -2 ${sread2} \\ - --threads ${params.threads} &> polish.log ; + --threads $task.cpus &> polish.log ; # Save files in the desired directory mv 0* polish.log ${assembler}; diff --git a/modules/LongReads/canu.nf b/modules/LongReads/canu.nf index d48e3daa..8e1611c1 100644 --- a/modules/LongReads/canu.nf +++ b/modules/LongReads/canu.nf @@ -1,7 +1,8 @@ process canu { publishDir "${params.output}/${prefix}", mode: 'copy' - cpus params.threads tag "${id}" + label 'process_assembly' + input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -16,14 +17,15 @@ process canu { lr = (lr_type == 'nanopore') ? '-nanopore' : '-pacbio' corrected = (corrected_long_reads == 'true') ? '-corrected' : '' fixed_id = id - ":strategy_2" + additional_params = (params.canu_additional_parameters) ? params.canu_additional_parameters : "" """ # run canu canu \\ -p ${fixed_id} \\ -d canu \\ - maxThreads=${params.threads} \\ + maxThreads=$task.cpus \\ genomeSize=${genome_size} \\ - ${params.canu_additional_parameters} \\ + $additional_params \\ $corrected \\ $lr $lreads diff --git a/modules/LongReads/flye.nf b/modules/LongReads/flye.nf index 77bd4a54..8f436f5a 100644 --- a/modules/LongReads/flye.nf +++ b/modules/LongReads/flye.nf @@ -1,7 +1,7 @@ process flye { publishDir "${params.output}/${prefix}", mode: 'copy' - cpus params.threads tag "${id}" + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -18,14 +18,15 @@ process flye { corrected = (corrected_long_reads == 'true') ? '-corr' : '-raw' lrparam = lr + corrected gsize = (genome_size) ? "--genome-size ${genome_size}" : "" + additional_params = (params.flye_additional_parameters) ? params.flye_additional_parameters : "" """ # run flye flye \\ ${lrparam} $lreads \\ ${gsize} \\ --out-dir flye \\ - ${params.flye_additional_parameters} \\ - --threads ${params.threads} &> flye.log ; + $additional_params \\ + --threads $task.cpus &> flye.log ; # rename results mv flye/assembly.fasta flye/flye_assembly.fasta diff --git a/modules/LongReads/gcpp.nf b/modules/LongReads/gcpp.nf index 1175eeab..3d812dc8 100644 --- a/modules/LongReads/gcpp.nf +++ b/modules/LongReads/gcpp.nf @@ -1,7 +1,7 @@ process gcpp { publishDir "${params.output}/${prefix}/gcpp_polished_contigs", mode: 'copy' tag "${id}" - cpus params.threads + label 'process_assembly' input: tuple val(id), file(draft), val(assembler), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -18,14 +18,14 @@ process gcpp { # generate genome index pbmm2 \\ index \\ - -j ${params.threads} \\ + -j $task.cpus \\ ${draft} \\ draft.mmi ; # align bam pbmm2 \\ align \\ - -j ${params.threads} \\ + -j $task.cpus \\ --sort \\ draft.mmi \\ ${bams} \\ @@ -39,7 +39,7 @@ process gcpp { gcpp \\ -r ${draft} \\ -o ${assembler}_gcpp_consensus.fasta,${assembler}_gcpp_variants.gff \\ - -j ${params.threads} \\ + -j $task.cpus \\ final_pbaligned.bam ; """ } diff --git a/modules/LongReads/medaka.nf b/modules/LongReads/medaka.nf index 3e8e6f97..7a3aa936 100644 --- a/modules/LongReads/medaka.nf +++ b/modules/LongReads/medaka.nf @@ -1,6 +1,7 @@ process medaka { publishDir "${params.output}/${prefix}/medaka_polished_contigs", mode: 'copy' tag "${id}" + label 'process_assembly' input: tuple val(id), file(draft), val(assembler), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -24,7 +25,7 @@ process medaka { # as in medaka manual racon \\ -m 8 -x -6 -g -8 -w 500 \\ - -t ${params.threads} \\ + -t $task.cpus \\ ${lreads} \\ reads_mapped.paf \\ ${draft} > racon_consensus.fasta ; @@ -34,7 +35,7 @@ process medaka { -i ${lreads} \\ -d racon_consensus.fasta \\ -o ${assembler} \\ - -t ${params.threads} \\ + -t $task.cpus \\ -m ${medaka_model} ; # rename results diff --git a/modules/LongReads/nanopolish.nf b/modules/LongReads/nanopolish.nf index e79aaae2..1479fa19 100644 --- a/modules/LongReads/nanopolish.nf +++ b/modules/LongReads/nanopolish.nf @@ -1,7 +1,7 @@ process nanopolish { publishDir "${params.output}/${prefix}/nanopolished_contigs/${assembler}", mode: 'copy' - cpus params.threads tag "${id}" + label 'process_assembly' input: tuple val(id), file(draft), val(assembler), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -35,7 +35,7 @@ process nanopolish { # map reads to assembly minimap2 \\ -ax map-ont \\ - -t ${params.threads} \\ + -t $task.cpus \\ ${draft} \\ reads.fa | \\ samtools \\ @@ -55,7 +55,7 @@ process nanopolish { -r reads.fa \\ -b reads.sorted.bam \\ -g ${draft} \\ - -t ${params.threads} \\ + -t $task.cpus \\ --max-haplotypes ${nanopolish_max_haplotypes} ; # call polished fasta from vcf diff --git a/modules/LongReads/raven.nf b/modules/LongReads/raven.nf index 420903b4..c2c1680c 100644 --- a/modules/LongReads/raven.nf +++ b/modules/LongReads/raven.nf @@ -1,7 +1,7 @@ process raven { publishDir "${params.output}/${prefix}/raven", mode: 'copy' - cpus params.threads tag "${id}" + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -15,12 +15,13 @@ process raven { script: corrected = (corrected_long_reads == 'true') ? '--weaken' : '' + additional_params = (params.raven_additional_parameters) ? params.raven_additional_parameters : "" """ # run raven raven \\ - --threads ${params.threads} \\ + --threads $task.cpus \\ --graphical-fragment-assembly raven_assembly.gfa \\ - ${params.raven_additional_parameters} \\ + $additional_params \\ $corrected \\ $lreads > raven_assembly.fasta ; """ diff --git a/modules/LongReads/shasta.nf b/modules/LongReads/shasta.nf index 81047375..1b4d24d3 100644 --- a/modules/LongReads/shasta.nf +++ b/modules/LongReads/shasta.nf @@ -1,7 +1,7 @@ process shasta { publishDir "${params.output}/${prefix}", mode: 'copy' - cpus params.threads tag "${id}" + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -16,7 +16,7 @@ process shasta { script: lr = (lr_type == 'nanopore') ? '-nanopore' : '-pacbio' in_reads = (lreads.getName() - ".gz") - + additional_params = (params.shasta_additional_parameters) ? params.shasta_additional_parameters : "" """ # unzip reads gunzip -dcf $lreads > uncompressed_${in_reads} ; @@ -24,8 +24,8 @@ process shasta { # assemble shasta \\ --assemblyDirectory shasta \\ - --threads ${params.threads} \\ - ${params.shasta_additional_parameters} \\ + --threads $task.cpus \\ + $additional_params \\ --input uncompressed_${in_reads} \\ --config ${shasta_config} ; diff --git a/modules/LongReads/unicycler.nf b/modules/LongReads/unicycler.nf index 1de51ffb..9e8c39b0 100644 --- a/modules/LongReads/unicycler.nf +++ b/modules/LongReads/unicycler.nf @@ -1,7 +1,7 @@ process unicycler { publishDir "${params.output}/${prefix}", mode: 'copy' - cpus params.threads tag "${id}" + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -14,6 +14,7 @@ process unicycler { (entrypoint == 'longreads_only' || entrypoint == 'hybrid_strategy_2') script: + additional_params = (params.unicycler_additional_parameters) ? params.unicycler_additional_parameters : "" """ # copy spades 3.13 to dir src_dir=\$(which shasta | sed 's/shasta//g') @@ -25,8 +26,8 @@ process unicycler { unicycler \\ -l ${lreads} \\ -o unicycler \\ - -t ${params.threads} \\ - ${params.unicycler_additional_parameters} \\ + -t $task.cpus \\ + $additional_params \\ --spades_path SPAdes-3.13.0-Linux/bin/spades.py # rename results diff --git a/modules/LongReads/wtdbg2.nf b/modules/LongReads/wtdbg2.nf index 32c42bd1..0e8a8bf7 100644 --- a/modules/LongReads/wtdbg2.nf +++ b/modules/LongReads/wtdbg2.nf @@ -1,7 +1,7 @@ process wtdbg2 { publishDir "${params.output}/${prefix}/wtdbg2", mode: 'copy' - cpus params.threads tag "${id}" + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -15,14 +15,15 @@ process wtdbg2 { script: fixed_id = id - ":strategy_2" + additional_params = (params.wtdbg2_additional_parameters) ? params.wtdbg2_additional_parameters : "" """ # run wtdbg2 wtdbg2.pl \\ - -t ${params.threads} \\ + -t $task.cpus \\ -x ${wtdbg2_technology} \\ -g ${genome_size} \\ -o ${fixed_id} \\ - ${params.wtdbg2_additional_parameters} \\ + $additional_params \\ $lreads # rename results diff --git a/modules/QualityAssessment/multiqc.nf b/modules/QualityAssessment/multiqc.nf index 9b360aba..147ced3c 100644 --- a/modules/QualityAssessment/multiqc.nf +++ b/modules/QualityAssessment/multiqc.nf @@ -1,6 +1,7 @@ process multiqc { publishDir "${params.output}/${prefix}/00_quality_assessment", mode: 'copy' tag "${id}" + label 'process_ultralow' input: tuple val(id), val(entrypoint), val(prefix), file(quast_dirs) diff --git a/modules/QualityAssessment/quast.nf b/modules/QualityAssessment/quast.nf index 85d80564..816c8d6d 100644 --- a/modules/QualityAssessment/quast.nf +++ b/modules/QualityAssessment/quast.nf @@ -1,6 +1,7 @@ process quast { publishDir "${params.output}/${prefix}/00_quality_assessment", mode: 'copy' tag "${id}" + label 'process_low' input: tuple val(id), file(contigs), val(assembler), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -13,20 +14,21 @@ process quast { paired_param = !(sread1 =~ /input.*/ || sread2 =~ /input.*/) ? "--pe1 ${sread1} --pe2 ${sread2}" : "" single_param = !(single =~ /input.?/) ? "--single ${single}" : "" lreads_param = !(lreads =~ /input.?/) ? "--${lr_type} ${lreads}" : "" + additional_params = (params.quast_additional_parameters) ? params.quast_additional_parameters : "" if (params.selected_profile == "docker" || params.selected_profile == "conda") """ # run quast quast.py \\ -o ${assembler} \\ - -t ${params.threads} \\ + -t $task.cpus \\ ${lreads_param} \\ ${paired_param} \\ ${single_param} \\ --conserved-genes-finding \\ --rna-finding \\ --min-contig 100 \\ - ${params.quast_additional_parameters} \\ + $additional_params \\ ${contigs} """ @@ -39,14 +41,14 @@ process quast { # run quast quast.py \\ -o ${assembler} \\ - -t ${params.threads} \\ + -t $task.cpus \\ ${lreads_param} \\ ${paired_param} \\ ${single_param} \\ --conserved-genes-finding \\ --rna-finding \\ --min-contig 100 \\ - ${params.quast_additional_parameters} \\ + $additional_params \\ ${contigs} """ } diff --git a/modules/ShortReads/shovill_sreads.nf b/modules/ShortReads/shovill_sreads.nf index d1098c71..9e9e4ad2 100644 --- a/modules/ShortReads/shovill_sreads.nf +++ b/modules/ShortReads/shovill_sreads.nf @@ -2,7 +2,7 @@ process shovill { publishDir "${params.output}/${prefix}/shovill", mode: 'copy' tag "${id}" - cpus params.threads + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix), val(assembler) @@ -15,6 +15,7 @@ process shovill { !(sread1 =~ /input.*/ || sread2 =~ /input.*/) && (single =~ /input.*/) && (entrypoint == 'shortreads_only') script: + additional_params = (params.shovill_additional_parameters) ? params.shovill_additional_parameters : "" """ # run shovill shovill \\ @@ -22,8 +23,8 @@ process shovill { --assembler ${assembler} \\ --R1 $sread1 \\ --R2 $sread2 \\ - --cpus ${params.threads} \\ - ${params.shovill_additional_parameters} \\ + --cpus $task.cpus \\ + $additional_params \\ --trim ; # rename results diff --git a/modules/ShortReads/spades_sreads.nf b/modules/ShortReads/spades_sreads.nf index 15992a80..e5276f45 100644 --- a/modules/ShortReads/spades_sreads.nf +++ b/modules/ShortReads/spades_sreads.nf @@ -2,7 +2,7 @@ process spades { publishDir "${params.output}/${prefix}", mode: 'copy' tag "${id}" - cpus params.threads + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -17,12 +17,13 @@ process spades { script: param_paired = !(sread1 =~ /input.*/ || sread2 =~ /input.*/) ? "-1 $sread1 -2 $sread2" : "" param_single = !(single =~ /input.*/) ? "-s $single" : "" + additional_params = (params.spades_additional_parameters) ? params.spades_additional_parameters : "" """ # run spades spades.py \\ -o spades \\ - -t ${params.threads} \\ - ${params.spades_additional_parameters} \\ + -t $task.cpus \\ + $additional_params \\ $param_paired \\ $param_single diff --git a/modules/ShortReads/unicycler_sreads.nf b/modules/ShortReads/unicycler_sreads.nf index 5ad8244e..a1f07da0 100644 --- a/modules/ShortReads/unicycler_sreads.nf +++ b/modules/ShortReads/unicycler_sreads.nf @@ -2,7 +2,7 @@ process unicycler { publishDir "${params.output}/${prefix}", mode: 'copy' tag "${id}" - cpus params.threads + label 'process_assembly' input: tuple val(id), val(entrypoint), file(sread1), file(sread2), file(single), file(lreads), val(lr_type), val(wtdbg2_technology), val(genome_size), val(corrected_long_reads), val(medaka_model), file(fast5), val(nanopolish_max_haplotypes), val(shasta_config), file(bams), val(prefix) @@ -17,6 +17,7 @@ process unicycler { script: param_paired = !(sread1 =~ /input.*/ || sread2 =~ /input.*/) ? "-1 $sread1 -2 $sread2" : "" param_single = !(single =~ /input.*/) ? "-s $single" : "" + additional_params = (params.unicycler_additional_parameters) ? params.unicycler_additional_parameters : "" """ # copy spades 3.13 to dir src_dir=\$(which shasta | sed 's/shasta//g') @@ -29,8 +30,8 @@ process unicycler { ${param_paired} \\ ${param_single} \\ -o unicycler \\ - -t ${params.threads} \\ - ${params.unicycler_additional_parameters} \\ + -t $task.cpus \\ + $additional_params \\ --spades_path SPAdes-3.13.0-Linux/bin/spades.py # rename results diff --git a/nextflow.config b/nextflow.config index e74e8987..4ff23dff 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,164 +1,74 @@ /* - * Configuration File to run fmalmeida/mpgap pipeline. - */ + fmalmeida/mpgap pipeline configuration file -/* - * Pipeline parameters + Maintained by Felipe Marques de Almeida + Contact: almeidafmarques@outlook.com */ -params { - - /* - * Input parameter - */ - - -// Path to YAML samplesheet file. -// Please read the documentation https://mpgap.readthedocs.io/en/latest/samplesheet.html to know how to create a samplesheet file. - input = "" - - /* - * Output parameters - */ - - -// Output folder name - output = "output" - - - /* - * Resources parameters - */ - - -// Number of threads to be used by each software. - threads = 3 - -// Number of jobs to run in parallel. Be aware that each job (in parallel) can consume -// N threads (set above). Be sure to carefully check your resources before augmenting -// this parameter. For example: parallel_jobs = 2 + threads = 5 can consume until 10 -// threads at once. -// If not given, let's nextflow automatically handle it. - parallel_jobs = - -// Memory allocation for pilon polish. -// Values in Gb. Default 50G. 50G has been proved to be enough in most cases. -// This step is crucial because with not enough memory will crash and not correct your assembly. - pilon_memory_limit = 50 - - /* - * General parameters - * - * These parameters will set the default for all samples. - * However, they can also be set inside the YAML, if this happens - * the pipeline will use the value inside the YAML to overwrite - * the parameter for that specific sample. - * - * Please read the documentation https://mpgap.readthedocs.io/en/latest/samplesheet.html to know more about the samplesheet file. - */ - - -// This parameter only needs to be set if the software chosen is Canu, wtdbg2 or Haslr. Is optional for Flye. -// It is an estimate of the size of the genome. Common suffices are allowed, for example, 3.7m or 2.8g - genome_size = "" +// Load base.config (contains some label resources configuration) +includeConfig 'conf/base.config' -// Select the appropriate value to pass to wtdbg2 to assemble input. -// Options are: "ont" for Nanopore reads, "rs" for PacBio RSII, "sq" for PacBio Sequel, "ccs" for PacBio CCS reads. -// By default, if not given, the pipeline will use the value "ont" if nanopore reads are used and "sq" if pacbio reads are used - wtdbg2_technology = "" +// loading required / default pipeline parameters +includeConfig 'conf/defaults.config' +// fix type of variable expected +params.hybrid_strategy = params.hybrid_strategy.toString() -// Select the appropriate shasta config to use for assembly -// Since shasta v0.8 (Oct/2021) this parameter is now mandatory. - shasta_config = "Nanopore-Oct2021" -// Tells the pipeline to interpret the long reads as "corrected" long reads. -// This will activate (if available) the options for corrected reads in the -// assemblers: -corrected (in canu), --pacbio-corr|--nano-corr (in flye), etc. -// Be cautious when using this parameter. If your reads are not corrected, and -// you use this parameter, you will probably do not generate any contig. - corrected_long_reads = false - -// This parameter below (hybrid_strategy) is to select the hybrid strategies adopted by the pipeline. -// Read the documentation https://mpgap.readthedocs.io/en/latest/manual.html to know more about the hybrid strategies. -// -// Whenever using this parameter, it is also possible to polish the longreads-only assemblies with Nanopolish, -// Medaka or VarianCaller (Arrow) before the polishing with shortreads (using Pilon). For that it is necessary to set -// the right parameters: pacbio_bam and nanopolish_fast5 (files given only inside YAML) or medaka_model. - hybrid_strategy = 1 - -// Default medaka model used for polishing nanopore long reads assemblies. -// Please read their manual https://github.com/nanoporetech/medaka to know more about the available models. - medaka_model = "r941_min_high_g360" - -// This parameter sets to nanopolish the max number of haplotypes to be considered. -// Sometimes the pipeline may crash because to much variation was found exceeding the limit - nanopolish_max_haplotypes = 1000 - - - /* - * Advanced parameters - * - * Controlling the execution of assemblers - * It must be set as true to skip the software and false to use it. - * Also adding the possibility to pass additional parameters to them - * Additional parameters must be in quotes and separated by spaces. - */ - - - quast_additional_parameters = "" // Give additional parameters to Quast while assessing assembly metrics. - // Must be given as shown in Quast manual. E.g. " --large --eukaryote ". - - skip_spades = false // Hybrid and shortreads only assemblies - spades_additional_parameters = "" // Must be given as shown in Spades manual. E.g. " --meta --plasmids " +// Nextflow management +params { - skip_shovill = false // Paired shortreads only assemblies - shovill_additional_parameters = "" // Must be given as shown in Shovill manual. E.g. " --depth 15 " - // The pipeline already executes shovill with spades, skesa and megahit, so please, do not use it with shovill's ``--assembler`` parameter. + // Boilerplate options + tracedir = "${params.output}/pipeline_info" + help = false + get_config = false + get_samplesheet = false + validate_params = true + schema_ignore_params = 'selected_profile' + show_hidden_params = false + + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_description = null + config_profile_contact = null + config_profile_url = null + config_profile_name = null - skip_unicycler = false // Hybrid and shortreads only assemblies - unicycler_additional_parameters = "" // Must be given as shown in Unicycler manual. E.g. " --mode conservative --no_correct " +} - skip_haslr = false // Hybrid assemblies - haslr_additional_parameters = "" // Must be given as shown in Haslr manual. E.g. " --cov-lr 30 " +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} - skip_canu = false // Longreads only assemblies - canu_additional_parameters = "" // Must be given as shown in Canu manual. E.g. " correctedErrorRate=0.075 corOutCoverage=200 " +/* - skip_flye = false // Longreads only assemblies - flye_additional_parameters = "" // Must be given as shown in Flye manual. E.g. " --meta --iterations 4 " - - skip_raven = false // Longreads only assemblies - raven_additional_parameters = "" // Must be given as shown in Raven manual. E.g. " --polishing-rounds 4 " - skip_wtdbg2 = false // Longreads only assemblies - wtdbg2_additional_parameters = "" // Must be given as shown in wtdbg2 manual. E.g. " --tidy-reads 5000 " - - skip_shasta = false // Nanopore longreads only assemblies - shasta_additional_parameters = "" // Must be given as shown in shasta manual. E.g. " --Reads.minReadLength 5000 " + Configuring Nextflow Scopes. -} + Enable or not the production of Nextflow Reports -/* - * Configuring Nextflow reports */ - +def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') // Trace Report trace { - enabled = false - file = "${params.output}" + "/mpgap_trace.txt" - fields = "task_id,name,status,exit,realtime,cpus,%cpu,memory,%mem,rss" + enabled = true // Enable? true or false + file = "${params.tracedir}" + "/mpgap_tracing_${trace_timestamp}.txt" + fields = 'task_id,name,status,exit,realtime,cpus,%cpu,memory,%mem,rss' } // Timeline Report timeline { - enabled = false - file = "${params.output}" + "/mpgap_timeline.html" + enabled = true // Enable? true or false + file = "${params.tracedir}" + "/mpgap_timeline_${trace_timestamp}.html" } // Complete Report report { - enabled = true - file = "${params.output}" + "/mpgap_nextflow_report.html" + enabled = true // Enable? true or false + file = "${params.tracedir}" + "/mpgap_report_${trace_timestamp}.html" } /* @@ -168,57 +78,11 @@ report { */ profiles { - // standard local profile - // uses docker by default - standard { - // using docker as default - params.selected_profile = "docker" - singularity.enabled = false - docker.enabled = true - docker.runOptions = '-u \$(id -u):\$(id -g)' - fixOwnership = true - process.container = "fmalmeida/mpgap:v3.1" - // QueueSize limit - if (params.parallel_jobs) { - executor.local.queueSize = params.parallel_jobs - } - } - - // local executor - local { - // QueueSize limit - if (params.parallel_jobs) { - executor.local.queueSize = params.parallel_jobs - } - } - - // conda profile - conda { - params.selected_profile = "conda" - singularity.enabled = false - docker.enabled = false - process.conda = "$CONDA_PREFIX/envs/mpgap-3.1" - } - - // docker profile - docker { - params.selected_profile = "docker" - singularity.enabled = false - docker.enabled = true - docker.runOptions = '-u \$(id -u):\$(id -g)' - fixOwnership = true - process.container = "fmalmeida/mpgap:v3.1" - } - - // singularity profile - singularity { - params.selected_profile = "singularity" - docker.enabled = false - singularity.enabled = true - singularity.autoMounts = true - process.container = "docker://fmalmeida/mpgap:v3.1" - singularity.autoMounts = true - } + // load profiles + standard { includeConfig 'conf/standard.config' } + conda { includeConfig 'conf/conda.config' } + docker { includeConfig 'conf/docker.config' } + singularity { includeConfig 'conf/singularity.config' } } @@ -226,14 +90,11 @@ profiles { Adding manifest */ manifest { - name = "fmalmeida/mpgap" - author = "Felipe Almeida" - description = "Nextflow pipeline for de novo genome assembly" - homePage = "https://github.com/fmalmeida/mpgap" - mainScript = "main.nf" + name = "fmalmeida/mpgap" + author = "Felipe Marques de Almeida" + description = "Nextflow pipeline for de novo genome assembly" + homePage = "https://github.com/fmalmeida/mpgap" + mainScript = "main.nf" nextflowVersion = ">=20.10.0" - version = "3.1" -} - -// specifying number of threads wanted -process.cpus = params.threads + version = "3.1.2" +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index c84966de..54d18b5d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -14,7 +14,12 @@ "input": { "type": "string", "help_text": "Set path to the YAML samplesheet containing input data for assembly.\n\nPlease read the manual for more information: https://mpgap.readthedocs.io/en/latest/samplesheet.html", - "description": "Samplesheet YAML" + "description": "Samplesheet YAML. See https://mpgap.readthedocs.io/en/latest/samplesheet.html" + }, + "genome_size": { + "type": "string", + "description": "Set the expected genome size (E.g. 5.6m; 1.2g). Required by Canu and Haslr.", + "help_text": "It sets the expected genome size of the assembly. It is required by Canu and Haslr assemblers. It is optional for Flye, but highly recommended. E.g. 5.6m; 1.2g." }, "output": { "type": "string", @@ -29,22 +34,27 @@ "output" ] }, - "max_job_request_options": { - "title": "Max job request options", + "computational_options": { + "title": "Computational options", "type": "object", "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", + "description": "Set the top limit of resources for pipeline", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", "properties": { - "threads": { + "max_cpus": { "type": "integer", - "description": "Number of threads to use for each process", - "default": 3, - "fa_icon": "fas fa-cogs" + "default": 6, + "description": "Max amount of threads to use" }, - "parallel_jobs": { - "type": "integer", - "description": "Number of jobs to run in parallel. Each job can consume up to N threads (--threads).\nIf not given, let's nextflow automatically handle it.", - "fa_icon": "fas fa-cogs" + "max_memory": { + "type": "string", + "default": "14.GB", + "description": "Max amount of memory to use" + }, + "max_time": { + "type": "string", + "default": "40.h", + "description": "Max amount of time for a job" } } }, @@ -56,7 +66,7 @@ "properties": { "hybrid_strategy": { "type": "string", - "description": "Which hybrid strategy to run?", + "description": "Which hybrid strategy to run? Options: 1, 2, both.", "help_text": "Selects which hybrid assembly strategy to run. Please read the manual for more information: https://mpgap.readthedocs.io/en/latest/manual.html", "enum": [ "1", @@ -75,17 +85,12 @@ } } }, - "general_parameters": { - "title": "General parameters", + "long_reads_assemblers_parameters": { + "title": "Long reads assemblers parameters", "type": "object", "description": "These parameters will set the default for all samples. However, they can also be set inside the samplesheet, if that happens, it will overwrite the parameter for that specific sample", "default": "", "properties": { - "genome_size": { - "type": "string", - "description": "Set the expected genome size (E.g. 5.6m; 1.2g)", - "help_text": "It sets the expected genome size of the assembly. It is required by Canu and Haslr assemblers. It is optional for Flye, but highly recommended. E.g. 5.6m; 1.2g." - }, "wtdbg2_technology": { "type": "string", "description": "Set correct long reads technology specification for wtdbg2", @@ -96,33 +101,39 @@ "rs", "sq", "ccs" - ] + ], + "hidden": true }, "shasta_config": { "type": "string", "description": "Shasta pre-set config", "fa_icon": "fas fa-question", "help_text": "Since shasta v0.8 (Oct/2021) it now requires to select a pre-set configuration for assemblies. It defaults to Nanopore-Oct2021. Please read their manual to check for available options: https://chanzuckerberg.github.io/shasta/Configurations.html", - "default": "Nanopore-Oct2021" + "default": "Nanopore-Oct2021", + "hidden": true }, "corrected_long_reads": { "type": "boolean", "description": "By default, long reads are corrected?", "help_text": "This will activate (if available) the options for corrected reads in the assemblers: -corrected (in canu), --pacbio-corr|--nano-corr (in flye), etc. Be cautious when using this parameter. If your reads are not corrected, and you use this parameter, you will probably do not generate any contig.", - "fa_icon": "fas fa-question" + "fa_icon": "fas fa-question", + "hidden": true }, "medaka_model": { "type": "string", "help_text": "Useful for longreads-only or hybrid assemblies using strategy 2. Used to polish a nanopore longreads-only assembly with Medaka. It selects a Medaka ONT sequencing model for polishing. Please read the medaka manual https://nanoporetech.github.io/medaka/ for more instructions.", "description": "Polish with Medaka? Which model?", "default": "r941_min_high_g360", - "fa_icon": "fas fa-question" + "fa_icon": "fas fa-question", + "hidden": true }, "nanopolish_max_haplotypes": { "type": "number", "description": "Max. number of haplotypes to be considered by Nanopolish", "help_text": "Useful when polishing the genome with Nanopolish. It sets the max number of haplotypes to be considered by Nanopolish. Sometimes the pipeline may crash because to much variation was found exceeding the limit.", - "default": 1000 + "default": 1000, + "fa_icon": "fas fa-question", + "hidden": true } } }, @@ -136,47 +147,65 @@ "skip_spades": { "type": "boolean", "description": "Skip SPAdes assembler", - "help_text": "SPAdes is a short reads only and hybrid assembler." + "help_text": "SPAdes is a short reads only and hybrid assembler.", + "hidden": true, + "fa_icon": "fas fa-ban" }, "skip_shovill": { "type": "boolean", "description": "Skip Shovill assembler", - "help_text": "Shovill is a paired short reads only assembler." + "help_text": "Shovill is a paired short reads only assembler.", + "hidden": true, + "fa_icon": "fas fa-ban" }, "skip_unicycler": { "type": "boolean", "description": "Skip Unicycler assembler", - "help_text": "Unicycler is a short reads only, long reads only and hybrid assembler." + "help_text": "Unicycler is a short reads only, long reads only and hybrid assembler.", + "hidden": true, + "fa_icon": "fas fa-ban" }, "skip_haslr": { "type": "boolean", "description": "Skip Haslr assembler", - "help_text": "Haslr is a hybrid assembler." + "help_text": "Haslr is a hybrid assembler.", + "hidden": true, + "fa_icon": "fas fa-ban" }, "skip_canu": { "type": "boolean", "description": "Skip Canu assembler", - "help_text": "Canu is a long reads only assembler. Can be use for hybrid assemblies in strategy 2." + "help_text": "Canu is a long reads only assembler. Can be use for hybrid assemblies in strategy 2.", + "hidden": true, + "fa_icon": "fas fa-ban" }, "skip_flye": { "type": "boolean", "description": "Skip Flye assembler", - "help_text": "Flye is a long reads only assembler. Can be use for hybrid assemblies in strategy 2." + "help_text": "Flye is a long reads only assembler. Can be use for hybrid assemblies in strategy 2.", + "hidden": true, + "fa_icon": "fas fa-ban" }, "skip_raven": { "type": "boolean", "description": "Skip Raven assembler", - "help_text": "Raven is a long reads only assembler. Can be use for hybrid assemblies in strategy 2." + "help_text": "Raven is a long reads only assembler. Can be use for hybrid assemblies in strategy 2.", + "hidden": true, + "fa_icon": "fas fa-ban" }, "skip_wtdbg2": { "type": "boolean", "description": "Skip wtdbg2 assembler", - "help_text": "wtdbg2 is a long reads only assembler. Can be use for hybrid assemblies in strategy 2." + "help_text": "wtdbg2 is a long reads only assembler. Can be use for hybrid assemblies in strategy 2.", + "hidden": true, + "fa_icon": "fas fa-ban" }, "skip_shasta": { "type": "boolean", "description": "Skip Shasta assembler", - "help_text": "Shasta is a long reads only assembler. Can be use for hybrid assemblies in strategy 2." + "help_text": "Shasta is a long reads only assembler. Can be use for hybrid assemblies in strategy 2.", + "hidden": true, + "fa_icon": "fas fa-ban" } } }, @@ -189,55 +218,168 @@ "quast_additional_parameters": { "type": "string", "description": "QUAST additional parameters", - "help_text": "Give additional parameters to Quast while assessing assembly metrics. \nMust be in quotes and separated by spaces.\nMust be given as shown in Quast manual. E.g. \" --large --eukaryote \"." + "help_text": "Give additional parameters to Quast while assessing assembly metrics. \nMust be in quotes and separated by spaces.\nMust be given as shown in Quast manual. E.g. \" --large --eukaryote \".", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "shovill_additional_parameters": { "type": "string", "description": "Shovill additional parameters", - "help_text": "Passes additional parameters for Shovill assembler. E.g. '--depth 15 --assembler skesa'. Must be given as shown in Shovill\u2019 manual." + "help_text": "Passes additional parameters for Shovill assembler. E.g. '--depth 15 --assembler skesa'. Must be given as shown in Shovill\u2019 manual.", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "spades_additional_parameters": { "type": "string", "description": "SPAdes additional parameters", - "help_text": "Passes additional parameters for SPAdes assembler. E.g. '\u2013meta \u2013plasmids'. Must be given as shown in Spades\u2019 manual." + "help_text": "Passes additional parameters for SPAdes assembler. E.g. '\u2013meta \u2013plasmids'. Must be given as shown in Spades\u2019 manual.", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "unicycler_additional_parameters": { "type": "string", "description": "Unicycler additional parameters", - "help_text": "Passes additional parameters for Unicycler assembler. E.g. '\u2013mode conservative \u2013no_correct'. Must be given as shown in Unicycler\u2019s manual." + "help_text": "Passes additional parameters for Unicycler assembler. E.g. '\u2013mode conservative \u2013no_correct'. Must be given as shown in Unicycler\u2019s manual.", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "haslr_additional_parameters": { "type": "string", "description": "Haslr additional parameters", - "help_text": "Passes additional parameters for Haslr assembler. E.g. '--cov-lr 30'. Must be given as shown in Haslr\u2019 manual." + "help_text": "Passes additional parameters for Haslr assembler. E.g. '--cov-lr 30'. Must be given as shown in Haslr\u2019 manual.", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "canu_additional_parameters": { "type": "string", "description": "Canu additional parameters", - "help_text": "Passes additional parameters for Canu assembler. E.g. 'correctedErrorRate=0.075 corOutCoverage=200'. Must be given as shown in Canu\u2019s manual." + "help_text": "Passes additional parameters for Canu assembler. E.g. 'correctedErrorRate=0.075 corOutCoverage=200'. Must be given as shown in Canu\u2019s manual.", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "flye_additional_parameters": { "type": "string", "description": "Flye additional parameters", - "help_text": "Passes additional parameters for Flye assembler. E.g. '\u2013meta \u2013iterations 4'. Must be given as shown in Flye\u2019s manual." + "help_text": "Passes additional parameters for Flye assembler. E.g. '\u2013meta \u2013iterations 4'. Must be given as shown in Flye\u2019s manual.", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "raven_additional_parameters": { "type": "string", "description": "Raven additional parameters", - "help_text": "Passes additional parameters for Raven assembler. E.g. '--polishing-rounds 4'. Must be given as shown in Raven\u2019 manual." + "help_text": "Passes additional parameters for Raven assembler. E.g. '--polishing-rounds 4'. Must be given as shown in Raven\u2019 manual.", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "wtdbg2_additional_parameters": { "type": "string", "description": "wtdbg2 additional parameters", - "help_text": "Must be given as shown in wtdbg2 manual. E.g. \" --tidy-reads 5000 \", inside quotes and separated by spaces." + "help_text": "Must be given as shown in wtdbg2 manual. E.g. \" --tidy-reads 5000 \", inside quotes and separated by spaces.", + "hidden": true, + "fa_icon": "fas fa-quote-left" }, "shasta_additional_parameters": { "type": "string", "description": "Shasta additional parameters", - "help_text": "Must be given as shown in shasta manual. E.g. \" --Reads.minReadLength 5000 \", inside quotes and separated by spaces" + "help_text": "Must be given as shown in shasta manual. E.g. \" --Reads.minReadLength 5000 \", inside quotes and separated by spaces", + "hidden": true, + "fa_icon": "fas fa-quote-left" } }, "fa_icon": "fas fa-list-ul" + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle" + }, + "get_config": { + "type": "boolean", + "description": "Download template config for parameters", + "fa_icon": "fas fa-question-circle" + }, + "get_samplesheet": { + "type": "boolean", + "fa_icon": "fas fa-question-circle", + "description": "Download template samplesheet" + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports.", + "default": "${params.output}/pipeline_info", + "fa_icon": "fas fa-cogs", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "show_hidden_params": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } } }, "allOf": [ @@ -245,19 +387,25 @@ "$ref": "#/definitions/input_output_options" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/definitions/computational_options" }, { "$ref": "#/definitions/hybrid_assembly_strategy_parameters" }, { - "$ref": "#/definitions/general_parameters" + "$ref": "#/definitions/long_reads_assemblers_parameters" }, { "$ref": "#/definitions/turn_assemblers_on_off" }, { "$ref": "#/definitions/software_additional_parameters" + }, + { + "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/institutional_config_options" } ] } \ No newline at end of file diff --git a/nf_functions/help.nf b/nf_functions/help.nf deleted file mode 100644 index 03a61ad2..00000000 --- a/nf_functions/help.nf +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Define help message - */ - def helpMessage() { - log.info """ - Usage: - - nextflow run fmalmeida/mpgap [--help] [ -c nextflow.config ] [-with-report] [-with-trace] [-with-timeline] [OPTIONS] - - OPTIONS: - - The command line help message is not supported anymore because it has become too big and, therefore, very clumsy and confusing to read. - Please, use the nextflow.config configuration file that has comments and help messages inside it or refer to the online manual: - - https://mpgap.readthedocs.io/en/latest/manual.html - - Comments: - - This pipeline contains a massive amount of configuration variables and its usage as CLI parameters could cause the command line to be huge. Therefore, it is recommended to use the nextflow.config file in order to make parameterization easier and more readable. - - To create a configuration or samplesheet file, use: - - nextflow run fmalmeida/mpgap [--get_config] [--get_samplesheet] - """.stripIndent() - } diff --git a/nf_functions/logMessages.nf b/nf_functions/logMessages.nf deleted file mode 100644 index 0de56874..00000000 --- a/nf_functions/logMessages.nf +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Define log message - */ -def logMessage() { - log.info "====================================================================" - log.info " Container-based, fmalmeida/mpgap, generic genome assembly pipeline " - log.info "====================================================================" - def summary = [:] - // Generic parameters - summary['Output directory '] = params.output - summary['Number of threads '] = params.threads - // Workflow information - if(workflow.revision) summary['Pipeline Release'] = workflow.revision - summary['Current home'] = "$HOME" - //summary['Current user'] = "$USER" - summary['Current path'] = "$PWD" - summary['Command used'] = "$workflow.commandLine" - log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n") - log.info "=========================================" -} diff --git a/nf_functions/writeCSV.nf b/nf_functions/writeCSV.nf index a985e1a9..ba5beeeb 100644 --- a/nf_functions/writeCSV.nf +++ b/nf_functions/writeCSV.nf @@ -175,7 +175,7 @@ def write_csv(in_list) { println """ ERROR! A major error has occurred! - ==> In the YAML, the 'hybrid_strategy:' key, or the --hybrid_strategy command line, must be either 1, 2 or both. + ==> The 'hybrid_strategy:' key in the YAML, or the --hybrid_strategy command line, must be either '1', '2' or 'both'. Please the re-check the parameters. Problem in sample: ${it.id}. Cheers. """.stripIndent() diff --git a/workflows/parse_samples.nf b/workflows/parse_samples.nf index dd6597fe..10a1bc85 100644 --- a/workflows/parse_samples.nf +++ b/workflows/parse_samples.nf @@ -1,5 +1,5 @@ include { write_csv } from '../nf_functions/writeCSV.nf' -workflow parse_samplesheet { +workflow PARSE_SAMPLESHEET { take: data @@ -55,9 +55,9 @@ workflow parse_samplesheet { }.set { results } emit: - results.sronly - results.lronly - results.hybrid + shortreads = results.sronly + longreads = results.lronly + hybrid = results.hybrid }