From c758a88b96735488c369470a1cac0e81f1a4e64b Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 22 Jul 2020 10:26:00 +0200
Subject: [PATCH 1/6] Use 20.07-RC1, add skip_qc to MD

---
 main.nf                          | 76 ++++++++------------------------
 modules/local/mark_duplicates.nf |  2 +-
 2 files changed, 19 insertions(+), 59 deletions(-)

diff --git a/main.nf b/main.nf
index e0dc8286ff..f480cfda72 100644
--- a/main.nf
+++ b/main.nf
@@ -258,8 +258,9 @@ include { BWAMEM2_MEM }           from './modules/local/bwamem2_mem.nf'
 include { GET_SOFTWARE_VERSIONS } from './modules/local/get_software_versions'
 include { OUTPUT_DOCUMENTATION }  from './modules/local/output_documentation'
 include { TRIM_GALORE }           from './modules/local/trim_galore.nf'
-include { MERGE_BAM_MAPPED }      from './modules/local/merge_mapped_bam' addParams(params)
-include { MARK_DUPLICATES }       from './modules/local/mark_duplicates' params(params)
+include { MERGE_BAM_MAPPED }      from './modules/local/merge_mapped_bam' 
+include { MARK_DUPLICATES }       from './modules/local/mark_duplicates' addParams(skip_qc: skip_qc)
+//include { BASE_RECALIBRATION }    from './modules/local/base_recalibration' params(params)
 
 /*
 ================================================================================
@@ -346,8 +347,6 @@ workflow {
     pon_tbi = params.pon ? params.pon_index ?: BUILD_INDICES.out.pon_tbi : Channel.empty()
 
     // PREPROCESSING
-    intervals_bed.dump(tag:'bedintervals')
-
     if(!('fastqc' in skip_qc))
         result_fastqc = FASTQC(input_sample)
     else
@@ -380,8 +379,20 @@ workflow {
 
     bam_mapped.view()
 
-    mark_duplicates_report =  !(params.skip_markduplicates) ? MARK_DUPLICATES(bam_mapped).duplicates_marked_report : Channel.empty()
+    if(!(params.skip_markduplicates)){
+        MARK_DUPLICATES(bam_mapped)
+        mark_duplicates_report = MARK_DUPLICATES.out.duplicates_marked_report
+        bam_duplicates_marked =  MARK_DUPLICATES.out.bam_duplicates_marked 
+    }
+    else {
+        mark_duplicates_report = Channel.empty()
+        bam_duplicates_marked = Channel.empty()
+    }
 
+    bamBaseRecalibrator = bam_duplicates_marked.combine(BUILD_INDICES.out.intervals_bed)
+    
+    //BASE_RECALIBRATION(bamBaseRecalibrator,dbsnp, dbsnp_index,fasta,)
+    
     OUTPUT_DOCUMENTATION(
         output_docs,
         output_docs_images)
@@ -546,6 +557,7 @@ workflow.onComplete {
 
 // (bam_mapped_merged, bam_mapped_merged_to_index) = bam_mapped_merged.into(2)
 
+//@Maxime: You included this process in merged_bam.nf, right?
 // process IndexBamFile {
 //     label 'cpus_8'
 
@@ -597,56 +609,6 @@ workflow.onComplete {
 // }
 // // STEP 2: MARKING DUPLICATES
 
-// process MarkDuplicates {
-//     label 'cpus_16'
-
-//     tag "${idPatient}-${idSample}"
-
-//     publishDir params.outdir, mode: params.publish_dir_mode,
-//         saveAs: {
-//             if (it == "${idSample}.bam.metrics") "Reports/${idSample}/MarkDuplicates/${it}"
-//             else "Preprocessing/${idSample}/DuplicatesMarked/${it}"
-//         }
-
-//     input:
-//         set idPatient, idSample, file("${idSample}.bam") from bam_mapped_merged
-
-//     output:
-//         set idPatient, idSample, file("${idSample}.md.bam"), file("${idSample}.md.bam.bai") into bam_duplicates_marked
-//         set idPatient, idSample into tsv_bam_duplicates_marked
-//         file ("${idSample}.bam.metrics") optional true into duplicates_marked_report
-
-//     when: !(params.skip_markduplicates)
-
-//     script:
-//     markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" +  (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
-//     metrics = 'markduplicates' in skip_qc ? '' : "-M ${idSample}.bam.metrics"
-//     if (params.no_gatk_spark)
-//     """
-//     gatk --java-options ${markdup_java_options} \
-//         MarkDuplicates \
-//         --MAX_RECORDS_IN_RAM 50000 \
-//         --INPUT ${idSample}.bam \
-//         --METRICS_FILE ${idSample}.bam.metrics \
-//         --TMP_DIR . \
-//         --ASSUME_SORT_ORDER coordinate \
-//         --CREATE_INDEX true \
-//         --OUTPUT ${idSample}.md.bam
-
-//     mv ${idSample}.md.bai ${idSample}.md.bam.bai
-//     """
-//     else
-//     """
-//     gatk --java-options ${markdup_java_options} \
-//         MarkDuplicatesSpark \
-//         -I ${idSample}.bam \
-//         -O ${idSample}.md.bam \
-//         ${metrics} \
-//         --tmp-dir . \
-//         --create-output-bam-index true \
-//         --spark-master local[${task.cpus}]
-//     """
-// }
 
 // (tsv_bam_duplicates_marked, tsv_bam_duplicates_marked_sample) = tsv_bam_duplicates_marked.into(2)
 
@@ -681,9 +643,7 @@ workflow.onComplete {
 
 // (bamMD, bamMDToJoin, bam_duplicates_marked) = bam_duplicates_marked.into(3)
 
-// bamBaseRecalibrator = bamMD.combine(intBaseRecalibrator)
-
-// bamBaseRecalibrator = bamBaseRecalibrator.dump(tag:'BAM FOR BASERECALIBRATOR')
+// 
 
 // // STEP 2': SENTIEON DEDUP
 
diff --git a/modules/local/mark_duplicates.nf b/modules/local/mark_duplicates.nf
index fe1f9e8c2f..40a6b4fad6 100644
--- a/modules/local/mark_duplicates.nf
+++ b/modules/local/mark_duplicates.nf
@@ -11,7 +11,7 @@ process MARK_DUPLICATES {
      output:
          tuple idPatient, idSample, path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit:  bam_duplicates_marked
          tuple idPatient, idSample, emit: tsv_bam_duplicates_marked
-         path "${idSample}.bam.metrics", emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet)
+         path "${idSample}.bam.metrics", optional : true, emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet)
           
      script:
      markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" +  (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""

From b6552f2520fc64da26cd3a1cc1df7749e19d4691 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 22 Jul 2020 10:33:57 +0200
Subject: [PATCH 2/6] Add blank line for linting

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bac12f4210..4d03680099 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
 
 - [#238](https://github.com/nf-core/sarek/pull/238) -Add subworkflow for building all the indices
 - [#241](https://github.com/nf-core/sarek/pull/241) -Add modules and workflows parts for preprocessing steps
+
 ## [dev](https://github.com/nf-core/sarek/tree/dev)
 
 - [#234](https://github.com/nf-core/sarek/pull/234) -Switching to DSL2

From f34fc2fb56cfdb184df9e66168d99e4bf9242319 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 22 Jul 2020 10:39:46 +0200
Subject: [PATCH 3/6] Bump minimal version to 20.07

---
 .github/workflows/ci.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 477455964d..3fa1d1f8c3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         # Nextflow versions: check pipeline minimum and current latest
-        nxf_ver: ['20.04.1', '']
+        nxf_ver: ['20.07.0', '']
     steps:
       - uses: actions/checkout@v2
       - name: Install Nextflow
@@ -42,7 +42,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.04.1'
+          NXF_VER: '20.07.0'
       - name: Pull docker image
         run: |
           docker pull nfcore/sarek:dev
@@ -65,7 +65,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '19.10.0'
+          NXF_VER: '20.07.0'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Get test data
@@ -114,7 +114,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '19.10.0'
+          NXF_VER: '20.07.0'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Run ${{ matrix.profile }} test
@@ -145,7 +145,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '19.10.0'
+          NXF_VER: '20.07.0'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Run ${{ matrix.tool }} test

From b151dd566b5cb7fbdcecf70cd1e24d438a8ed440 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 22 Jul 2020 10:44:40 +0200
Subject: [PATCH 4/6] Fix version naming

---
 .github/workflows/ci.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3fa1d1f8c3..ecaf2237ae 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         # Nextflow versions: check pipeline minimum and current latest
-        nxf_ver: ['20.07.0', '']
+        nxf_ver: ['20.07.0-RC1', '']
     steps:
       - uses: actions/checkout@v2
       - name: Install Nextflow
@@ -42,7 +42,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0'
+          NXF_VER: '20.07.0-RC1'
       - name: Pull docker image
         run: |
           docker pull nfcore/sarek:dev
@@ -65,7 +65,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0'
+          NXF_VER: '20.07.0-RC1'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Get test data
@@ -93,7 +93,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '19.10.0'
+          NXF_VER: '20.07.0-RC1'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Run test for minimal genomes
@@ -114,7 +114,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0'
+          NXF_VER: '20.07.0-RC1'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Run ${{ matrix.profile }} test
@@ -145,7 +145,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0'
+          NXF_VER: '20.07.0-RC1'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Run ${{ matrix.tool }} test

From 7c78d31ffa50192c9d05b790402569d1b9c8d005 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 22 Jul 2020 10:53:46 +0200
Subject: [PATCH 5/6] use nextflow.enable.dsl and fix some minor things for
 that

---
 main.nf                             |  2 +-
 modules/local/base_recalibration.nf | 40 +++++++++++++++++++++++++++++
 modules/local/mark_duplicates.nf    | 12 ++++-----
 modules/local/merge_mapped_bam.nf   |  4 +--
 modules/local/trim_galore.nf        |  2 +-
 5 files changed, 50 insertions(+), 10 deletions(-)
 create mode 100644 modules/local/base_recalibration.nf

diff --git a/main.nf b/main.nf
index f480cfda72..e742ec8568 100644
--- a/main.nf
+++ b/main.nf
@@ -19,7 +19,7 @@ nf-core/sarek:
 --------------------------------------------------------------------------------
 */
 
-nextflow.preview.dsl = 2
+nextflow.enable.dsl=2
 
 // Print help message if required
 
diff --git a/modules/local/base_recalibration.nf b/modules/local/base_recalibration.nf
new file mode 100644
index 0000000000..9eca053656
--- /dev/null
+++ b/modules/local/base_recalibration.nf
@@ -0,0 +1,40 @@
+// process BASE_RECALIBRATION {
+//     label 'cpus_1'
+
+//     tag "${idPatient}-${idSample}-${intervalBed.baseName}"
+
+//     input:
+//         tuple idPatient, idSample, file(bam), file(bai), file(intervalBed) //from bamBaseRecalibrator
+//         path dbsnp //from dbsnp
+//         path dbsnpIndex// from dbsnp_tbi
+//         path fasta //from fasta
+//         path dict // from dict
+//         path fastaFai // from fai
+//         path knownIndels // from known_indels
+//         path knownIndelsIndex // from known_indels_tbi
+
+//     output:
+//         tuple idPatient, idSample, file "${prefix}${idSample}.recal.table", emit: tableGatherBQSRReports
+//         tuple idPatient, idSample, emit: recalTableTSVnoInt
+
+//     //when: params.known_indels
+
+//     script:
+//     dbsnpOptions = params.dbsnp ? "--known-sites ${dbsnp}" : ""
+//     knownOptions = params.known_indels ? knownIndels.collect{"--known-sites ${it}"}.join(' ') : ""
+//     prefix = params.no_intervals ? "" : "${intervalBed.baseName}_"
+//     intervalsOptions = params.no_intervals ? "" : "-L ${intervalBed}"
+//     // TODO: --use-original-qualities ???
+//     """
+//     gatk --java-options -Xmx${task.memory.toGiga()}g \
+//         BaseRecalibrator \
+//         -I ${bam} \
+//         -O ${prefix}${idSample}.recal.table \
+//         --tmp-dir . \
+//         -R ${fasta} \
+//         ${intervalsOptions} \
+//         ${dbsnpOptions} \
+//         ${knownOptions} \
+//         --verbosity INFO
+//     """
+// }
\ No newline at end of file
diff --git a/modules/local/mark_duplicates.nf b/modules/local/mark_duplicates.nf
index 40a6b4fad6..c2cae3d722 100644
--- a/modules/local/mark_duplicates.nf
+++ b/modules/local/mark_duplicates.nf
@@ -7,16 +7,16 @@ process MARK_DUPLICATES {
              else "Preprocessing/${idSample}/DuplicatesMarked/${it}"
          }
      input:
-         tuple idPatient, idSample, path("${idSample}.bam") 
+         tuple val(idPatient), val(idSample), path("${idSample}.bam") 
      output:
-         tuple idPatient, idSample, path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit:  bam_duplicates_marked
-         tuple idPatient, idSample, emit: tsv_bam_duplicates_marked
-         path "${idSample}.bam.metrics", optional : true, emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet)
+         tuple val(idPatient), val(idSample), path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit:  bam_duplicates_marked
+         tuple val(idPatient), val(idSample), emit: tsv_bam_duplicates_marked
+         path "${idSample}.bam.metrics", optional : true, emit: duplicates_marked_report 
           
      script:
      markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" +  (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
-     //metrics = 'markduplicates' in skip_qc ? '' : "-M ${idSample}.bam.metrics"
-     metrics = "-M ${idSample}.bam.metrics"
+     metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${idSample}.bam.metrics"
+
      if (params.no_gatk_spark)
      """
      gatk --java-options ${markdup_java_options} \
diff --git a/modules/local/merge_mapped_bam.nf b/modules/local/merge_mapped_bam.nf
index 8e70342bbd..66758c0d11 100644
--- a/modules/local/merge_mapped_bam.nf
+++ b/modules/local/merge_mapped_bam.nf
@@ -4,10 +4,10 @@ process MERGE_BAM_MAPPED {
     tag "${patient}-${sample}"
 
     input:
-        tuple patient, sample, run, path(bam), path(bai)
+        tuple val(patient), val(sample), val(run), path(bam), path(bai)
 
     output:
-        tuple patient, sample, path("${sample}.bam"), path("${sample}.bam.bai")
+        tuple val(patient), val(sample), path("${sample}.bam"), path("${sample}.bam.bai")
 
     script:
     """
diff --git a/modules/local/trim_galore.nf b/modules/local/trim_galore.nf
index d0bf00ff88..a49ccda2d7 100644
--- a/modules/local/trim_galore.nf
+++ b/modules/local/trim_galore.nf
@@ -16,7 +16,7 @@ process TRIM_GALORE {
 
     output:
         path "*.{html,zip,txt}", emit: report
-        tuple idPatient, idSample, idRun, path("${idSample}_${idRun}_R1_val_1.fq.gz"), path("${idSample}_${idRun}_R2_val_2.fq.gz"), emit: trimmed_reads
+        tuple val(idPatient), val(idSample), val(idRun), path("${idSample}_${idRun}_R1_val_1.fq.gz"), path("${idSample}_${idRun}_R2_val_2.fq.gz"), emit: trimmed_reads
 
     script:
     // Calculate number of --cores for TrimGalore based on value of task.cpus

From 9bb93fc6b3811ace3773bb13716fd6c1e084c875 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Wed, 22 Jul 2020 11:20:29 +0200
Subject: [PATCH 6/6] Bump nf version in badge

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 353d9e3194..29fab9a95d 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 > **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing**
 
-[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.06.0--edge-brightgreen.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.07.0--RC1-brightgreen.svg)](https://www.nextflow.io/)
 [![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/)
 [![DOI](https://zenodo.org/badge/184289291.svg)](https://zenodo.org/badge/latestdoi/184289291)