Merge pull request #49 from nf-core/initial-release-review-changes

Apply the first set of reviewer recommendations
nf-core · May 29, 2024 · c843c3b · c843c3b
2 parents b0cf4ce + be33799
commit c843c3b
Show file tree

Hide file tree

Showing 17 changed files with 75 additions and 71 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.1
+current_version = 1.0.0
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>[a-z0-9+]+)

diff --git a/.nf-core.yml b/.nf-core.yml
@@ -3,7 +3,6 @@ lint:
   actions_ci: False
   multiqc_config: False
   files_exist:
-    - lib/NfcoreTemplate.groovy
     - lib/Utils.groovy
     - lib/WorkflowMain.groovy
     - lib/WorkflowOncoanalyser.groovy

diff --git a/README.md b/README.md
@@ -74,7 +74,7 @@ Launch oncoanalyser:
 ```bash
 nextflow run nf-core/oncoanalyser \
   -profile docker \
-  -revision 0.4.5 \
+  -revision 1.0.0 \
   --mode wgts \
   --genome GRCh38_hmf \
   --input samplesheet.csv \

diff --git a/conf/test_stub.config b/conf/test_stub.config
@@ -5,7 +5,7 @@
     Defines input files and everything required to run a fast and simple pipeline test.
 
     Use as follows:
-        nextflow run nf-core/oncoanalyser -profile test,<docker/singularity> --outdir <OUTDIR>
+        nextflow run nf-core/oncoanalyser -profile test_stub -stub --outdir <OUTDIR>
 
 ----------------------------------------------------------------------------------------
 */

diff --git a/docs/output.md b/docs/output.md
@@ -145,7 +145,7 @@ _Picard MarkDuplicates is only run on RNA alignments_
 - `<group_id>/sage/append/`
 
   - `<tumor_dna_id>.sage.append.vcf.gz`: Tumor DNA sample small variant VCF with RNA data appended.
-  - `<normal_dna_id>.sage.append.vcf.gz`: Normal DNA sample small variant VCF with RNA data appended..
+  - `<normal_dna_id>.sage.append.vcf.gz`: Normal DNA sample small variant VCF with RNA data appended.
 
 - `<group_id>/sage/somatic/`
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -55,7 +55,7 @@ details and columns are [described below](#column-descriptions).
 
 Several different input filetypes beyond FASTQ and BAM are recognised, including intermediate output files generated
 during execution such as the PURPLE output directory. The full list of recognised input filetypes is available
-[here](https://github.com/nf-core/oncoanalyser/blob/0.4.5/lib/Constants.groovy#L58-L90).
+[here](https://github.com/nf-core/oncoanalyser/blob/1.0.0/lib/Constants.groovy#L58-L90).
 
 ### Simple example
 
@@ -80,7 +80,7 @@ P1__wgts,P1,SC,tumor,rna,fastq,library_id:SC_library;lane:001,/path/to/P1.SC.tum
 
 Inputs with the `bam` filetype will be processed by MarkDups as required by hmftools. Where an input BAM has already
 been processed specifically by [HMF
-MarkDups](https://github.com/hartwigmedical/hmftools/blob/master/mark-dups/README.md), you can avoid needless
+MarkDups](https://github.com/hartwigmedical/hmftools/blob/mark-dups-v1.1.5/mark-dups/README.md), you can avoid needless
 reprocessing by setting `bam_markdups` as the filetype instead. It is important to understand that duplicate marking by
 other tools (e.g. GATK) cannot be used as a substitute since HMF MarkDups performs key operations beyond just duplicate
 marking.
@@ -146,7 +146,7 @@ The typical command for running the pipeline is as follows:
 ```bash
 nextflow run nf-core/oncoanalyser \
   -profile docker \
-  -revision 0.4.5 \
+  -revision 1.0.0 \
   --mode <wgts|targeted> \
   --genome <GRCh37_hmf|GRCh38_hmf> \
   --input samplesheet.csv \
@@ -229,7 +229,7 @@ If you wish to share such profile (such as upload as supplementary material for
 ### Selecting processes
 
 Most of the major components in oncoanalyser can be skipped using `--processes_exclude` (the full list of available
-processes can be view [here](https://github.com/nf-core/oncoanalyser/blob/0.4.5/lib/Constants.groovy#L36-L56)).
+processes can be view [here](https://github.com/nf-core/oncoanalyser/blob/1.0.0/lib/Constants.groovy#L36-L56)).
 Multiple processes can be given as a comma-separated list. While there are some use-cases for this feature (e.g.
 skipping resource intensive processes such as VIRUSBreakend), it becomes more powerful when combined with existing
 inputs as described in the following section.
@@ -271,7 +271,7 @@ And now run and skip variant calling:
 ```bash
 nextflow run nf-core/oncoanalyser \
   -profile docker \
-  -revision 0.4.5 \
+  -revision 1.0.0 \
   --mode wgts \
   --processes_exclude markdups,amber,cobalt,gridss,gripss,sage,pave \
   --genome GRCh38_hmf \
@@ -341,7 +341,7 @@ P1__wgts,P1,SC,tumor,rna,bam,/path/to/P1.SC.tumor.rna.wts.bam
 ```bash
 nextflow run nf-core/oncoanalyser \
   -profile docker \
-  -revision 0.4.5 \
+  -revision 1.0.0 \
   --mode wgts \
   --genome GRCh38_hmf \
   --prepare_reference_only \
@@ -350,7 +350,7 @@ nextflow run nf-core/oncoanalyser \
 ```
 
 Executing the above command will download and unpack default reference data without running any analysis, and once
-complete the prepared reference files can found in `./prepare_reference/reference_data/0.4.5/<datetimestamp>/`. It is
+complete the prepared reference files can found in `./prepare_reference/reference_data/1.0.0/<datetimestamp>/`. It is
 recommended to remove the Nextflow work directory after staging data to free disk space.
 
 For oncoanalyser to use locally staged reference data a custom config can be used:
@@ -442,7 +442,7 @@ config file. This avoids having to regenerate indexes for each new analysis.
 ```bash
 nextflow run nf-core/oncoanalyser \
   -profile docker \
-  -revision 0.4.5 \
+  -revision 1.0.0 \
   -config genome.custom.config \
   --mode wgts \
   \

diff --git a/lib/Processes.groovy b/lib/Processes.groovy
@@ -1,3 +1,5 @@
+import nextflow.Nextflow
+
 import Constants
 import Utils
 
@@ -29,8 +31,8 @@ class Processes {
                     return Constants.Process.valueOf(name.toUpperCase())
                 } catch(java.lang.IllegalArgumentException e) {
                     def processes_str = Processes.getProcessNames().join('\n  - ')
-                    log.error "\nERROR: recieved invalid process: '${name}'. Valid options are:\n  - ${processes_str}"
-                    System.exit(1)
+                    log.error "recieved invalid process: '${name}'. Valid options are:\n  - ${processes_str}"
+                    Nextflow.exit(1)
                 }
             }
             .unique()
@@ -45,8 +47,8 @@ class Processes {
         if (processes_shared) {
             def processes_shared_str = processes_shared.join('\n  - ')
             def message_base = 'the following processes was found in the include and the exclude list'
-            log.error "\nERROR: ${message_base}:\n  - ${processes_shared_str}"
-            System.exit(1)
+            log.error "${message_base}:\n  - ${processes_shared_str}"
+            Nextflow.exit(1)
         }
     }
 

diff --git a/lib/Utils.groovy b/lib/Utils.groovy
@@ -26,7 +26,7 @@ class Utils {
                     // Add subject id if absent or check if current matches existing
                     if (meta.containsKey('subject_id') && meta.subject_id != it.subject_id) {
                         log.error "got unexpected subject name for ${group_id} ${meta.subject_id}: ${it.subject_id}"
-                        System.exit(1)
+                        Nextflow.exit(1)
                     } else {
                         meta.subject_id = it.subject_id
                     }
@@ -36,36 +36,36 @@ class Utils {
                     if (!sample_type_enum) {
                         def sample_type_str = Utils.getEnumNames(Constants.SampleType).join('\n  - ')
                         log.error "received invalid sample type: '${it.sample_type}'. Valid options are:\n  - ${sample_type_str}"
-                        System.exit(1)
+                        Nextflow.exit(1)
                     }
 
                     // Sequence type
                     def sequence_type_enum = Utils.getEnumFromString(it.sequence_type, Constants.SequenceType)
                     if (!sequence_type_enum) {
                         def sequence_type_str = Utils.getEnumNames(Constants.SequenceType).join('\n  - ')
                         log.error "received invalid sequence type: '${it.sequence_type}'. Valid options are:\n  - ${sequence_type_str}"
-                        System.exit(1)
+                        Nextflow.exit(1)
                     }
 
                     // Filetype
                     def filetype_enum = Utils.getEnumFromString(it.filetype, Constants.FileType)
                     if (!filetype_enum) {
                         def filetype_str = Utils.getEnumNames(Constants.FileType).join('\n  - ')
                         log.error "received invalid file type: '${it.filetype}'. Valid options are:\n  - ${filetype_str}"
-                        System.exit(1)
+                        Nextflow.exit(1)
                     }
 
                     def sample_key = [sample_type_enum, sequence_type_enum]
                     def meta_sample = meta.get(sample_key, [sample_id: it.sample_id])
 
                     if (meta_sample.sample_id != it.sample_id) {
                         log.error "got unexpected sample name for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${it.sample_id}"
-                        System.exit(1)
+                        Nextflow.exit(1)
                     }
 
                     if (meta_sample.containsKey(filetype_enum) & filetype_enum != Constants.FileType.FASTQ) {
                         log.error "got duplicate file for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${filetype_enum}"
-                        System.exit(1)
+                        Nextflow.exit(1)
                     }
 
                     // Info data
@@ -81,12 +81,12 @@ class Utils {
                                 if (!info_field_enum) {
                                     def info_field_str = Utils.getEnumNames(Constants.InfoField).join('\n  - ')
                                     log.error "received invalid info field: '${k}'. Valid options are:\n  - ${info_field_str}"
-                                    System.exit(1)
+                                    Nextflow.exit(1)
                                 }
 
                                 if (info_data.containsKey(info_field_enum)) {
                                     log.error "got duplicate info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${info_field_enum}"
-                                    System.exit(1)
+                                    Nextflow.exit(1)
                                 }
 
                                 info_data[info_field_enum] = v
@@ -105,20 +105,20 @@ class Utils {
 
                         if (!info_data.containsKey(Constants.InfoField.LIBRARY_ID)) {
                             log.error "missing 'library_id' info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}"
-                            System.exit(1)
+                            Nextflow.exit(1)
                         }
 
                         if (!info_data.containsKey(Constants.InfoField.LANE)) {
                             log.error "missing 'lane' info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}"
-                            System.exit(1)
+                            Nextflow.exit(1)
                         }
 
                         def (fwd, rev) = it.filepath.tokenize(';')
                         def fastq_key = [info_data[Constants.InfoField.LIBRARY_ID], info_data[Constants.InfoField.LANE]]
 
                         if (meta_sample.containsKey(fastq_key)) {
                             log.error "got duplicate lane + library_id data for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${fastq_key}"
-                            System.exit(1)
+                            Nextflow.exit(1)
                         }
 
                         if (!meta_sample.containsKey(filetype_enum)) {
@@ -179,7 +179,7 @@ class Utils {
                         if (!index_fp.exists() && !stub_run) {
                             def (sample_type, sequence_type) = sample_key
                             log.error "no index provided or found for ${meta.group_id} ${sample_type}/${sequence_type}: ${key}: ${fp}"
-                            System.exit(1)
+                            Nextflow.exit(1)
                         }
 
                         meta[sample_key][index_enum] = index_fp
@@ -262,7 +262,7 @@ class Utils {
 
                     log.error "no BAMs nor BAM_MARKDUPs nor FASTQs provided for ${meta.group_id} ${sample_type}/${sequence_type}\n\n" +
                         "NB: BAMs or BAM_MARKDUPs or FASTQs are always required as they are the basis to determine input sample type."
-                    System.exit(1)
+                    Nextflow.exit(1)
                 }
 
             }
@@ -274,30 +274,30 @@ class Utils {
                 if (Utils.hasNormalDna(meta)) {
                     log.error "targeted mode is not compatible with the normal DNA BAM provided for ${meta.group_id}\n\n" +
                         "The targeted workflow supports only tumor DNA BAMs (and tumor RNA BAMs for TSO500)"
-                    System.exit(1)
+                    Nextflow.exit(1)
                 }
 
                 // Do not allow only tumor RNA
                 if (Utils.hasTumorRnaBam(meta) && !Utils.hasTumorDna(meta)) {
                     log.error "targeted mode is not compatible with only tumor RNA provided for ${meta.group_id}\n\n" +
                         "The targeted workflow requires tumor DNA and can optionally take tumor RNA, depending on " +
                         "the configured panel."
-                    System.exit(1)
+                    Nextflow.exit(1)
                 }
 
                 // Restrict tumor RNA inputs to the TSO500 panel
                 if (Utils.hasTumorRnaBam(meta) && run_config.panel != 'tso500') {
                     def panel = run_config.panel.toUpperCase()
-                        "Only the TSO500 panel supports tumor RNA analysis"
-                    System.exit(1)
+                    log.error "only the TSO500 panel supports tumor RNA analysis but got: ${panel}"
+                    Nextflow.exit(1)
                 }
 
             }
 
             // Do not allow normal DNA only
             if (Utils.hasNormalDna(meta) && !Utils.hasTumorDna(meta)) {
-                log.error "germline only mode not supported, found only a normal DNA BAM for ${meta.group_id}\n"
-                System.exit(1)
+                log.error "germline only mode not supported, found only a normal DNA BAM for ${meta.group_id}"
+                Nextflow.exit(1)
             }
 
             // Enforce unique samples names within groups
@@ -310,9 +310,8 @@ class Utils {
                     def key_strs = keys.collect { sample_type, sequence_type -> "${sample_type}/${sequence_type}" }
                     return "  * ${sample_id}: ${key_strs.join(", ")}"
                 }
-                log.error "duplicate sample names found for ${meta.group_id}:\n\n" +
-                    "${duplicate_message_strs.join("\n")}"
-                System.exit(1)
+                log.error "duplicate sample names found for ${meta.group_id}:\n\n${duplicate_message_strs.join("\n")}"
+                Nextflow.exit(1)
             }
 
         }
@@ -333,7 +332,7 @@ class Utils {
                 "  The genome .alt file is required when building bwa-mem2 or GRIDSS indexes\n" +
                 "  for reference genomes containing ALT contigs\n" +
                 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-            System.exit(1)
+            Nextflow.exit(1)
         }
 
         // Refuse to create STAR index for reference genome containing ALTs, refer to Slack channel
@@ -344,7 +343,7 @@ class Utils {
                 "  Refusing to create the STAR index for a reference genome with ALT contigs.\n" +
                 "  Please review https://github.com/alexdobin/STAR docs or contact us on Slack.\n" +
                 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-            System.exit(1)
+            Nextflow.exit(1)
         }
 
         // Require that an input GTF file is provided when creating STAR index
@@ -353,7 +352,7 @@ class Utils {
                 "  Creating a STAR index requires the appropriate genome transcript annotations\n" +
                 "  as a GTF file. Please contact us on Slack for further information."
                 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-            System.exit(1)
+            Nextflow.exit(1)
         }
 
     }
@@ -383,7 +382,7 @@ class Utils {
         if (!run_mode_enum) {
             def run_modes_str = Utils.getEnumNames(Constants.RunMode).join('\n  - ')
             log.error "recieved an invalid run mode: '${run_mode}'. Valid options are:\n  - ${run_modes_str}"
-            System.exit(1)
+            Nextflow.exit(1)
         }
         return run_mode_enum
     }