From c758a88b96735488c369470a1cac0e81f1a4e64b Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Wed, 22 Jul 2020 10:26:00 +0200 Subject: [PATCH 1/6] Use 20.07-RC1, add skip_qc to MD --- main.nf | 76 ++++++++------------------------ modules/local/mark_duplicates.nf | 2 +- 2 files changed, 19 insertions(+), 59 deletions(-) diff --git a/main.nf b/main.nf index e0dc8286ff..f480cfda72 100644 --- a/main.nf +++ b/main.nf @@ -258,8 +258,9 @@ include { BWAMEM2_MEM } from './modules/local/bwamem2_mem.nf' include { GET_SOFTWARE_VERSIONS } from './modules/local/get_software_versions' include { OUTPUT_DOCUMENTATION } from './modules/local/output_documentation' include { TRIM_GALORE } from './modules/local/trim_galore.nf' -include { MERGE_BAM_MAPPED } from './modules/local/merge_mapped_bam' addParams(params) -include { MARK_DUPLICATES } from './modules/local/mark_duplicates' params(params) +include { MERGE_BAM_MAPPED } from './modules/local/merge_mapped_bam' +include { MARK_DUPLICATES } from './modules/local/mark_duplicates' addParams(skip_qc: skip_qc) +//include { BASE_RECALIBRATION } from './modules/local/base_recalibration' params(params) /* ================================================================================ @@ -346,8 +347,6 @@ workflow { pon_tbi = params.pon ? params.pon_index ?: BUILD_INDICES.out.pon_tbi : Channel.empty() // PREPROCESSING - intervals_bed.dump(tag:'bedintervals') - if(!('fastqc' in skip_qc)) result_fastqc = FASTQC(input_sample) else @@ -380,8 +379,20 @@ workflow { bam_mapped.view() - mark_duplicates_report = !(params.skip_markduplicates) ? MARK_DUPLICATES(bam_mapped).duplicates_marked_report : Channel.empty() + if(!(params.skip_markduplicates)){ + MARK_DUPLICATES(bam_mapped) + mark_duplicates_report = MARK_DUPLICATES.out.duplicates_marked_report + bam_duplicates_marked = MARK_DUPLICATES.out.bam_duplicates_marked + } + else { + mark_duplicates_report = Channel.empty() + bam_duplicates_marked = Channel.empty() + } + bamBaseRecalibrator = bam_duplicates_marked.combine(BUILD_INDICES.out.intervals_bed) + + //BASE_RECALIBRATION(bamBaseRecalibrator,dbsnp, dbsnp_index,fasta,) + OUTPUT_DOCUMENTATION( output_docs, output_docs_images) @@ -546,6 +557,7 @@ workflow.onComplete { // (bam_mapped_merged, bam_mapped_merged_to_index) = bam_mapped_merged.into(2) +//@Maxime: You included this process in merged_bam.nf, right? // process IndexBamFile { // label 'cpus_8' @@ -597,56 +609,6 @@ workflow.onComplete { // } // // STEP 2: MARKING DUPLICATES -// process MarkDuplicates { -// label 'cpus_16' - -// tag "${idPatient}-${idSample}" - -// publishDir params.outdir, mode: params.publish_dir_mode, -// saveAs: { -// if (it == "${idSample}.bam.metrics") "Reports/${idSample}/MarkDuplicates/${it}" -// else "Preprocessing/${idSample}/DuplicatesMarked/${it}" -// } - -// input: -// set idPatient, idSample, file("${idSample}.bam") from bam_mapped_merged - -// output: -// set idPatient, idSample, file("${idSample}.md.bam"), file("${idSample}.md.bam.bai") into bam_duplicates_marked -// set idPatient, idSample into tsv_bam_duplicates_marked -// file ("${idSample}.bam.metrics") optional true into duplicates_marked_report - -// when: !(params.skip_markduplicates) - -// script: -// markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\"" -// metrics = 'markduplicates' in skip_qc ? '' : "-M ${idSample}.bam.metrics" -// if (params.no_gatk_spark) -// """ -// gatk --java-options ${markdup_java_options} \ -// MarkDuplicates \ -// --MAX_RECORDS_IN_RAM 50000 \ -// --INPUT ${idSample}.bam \ -// --METRICS_FILE ${idSample}.bam.metrics \ -// --TMP_DIR . \ -// --ASSUME_SORT_ORDER coordinate \ -// --CREATE_INDEX true \ -// --OUTPUT ${idSample}.md.bam - -// mv ${idSample}.md.bai ${idSample}.md.bam.bai -// """ -// else -// """ -// gatk --java-options ${markdup_java_options} \ -// MarkDuplicatesSpark \ -// -I ${idSample}.bam \ -// -O ${idSample}.md.bam \ -// ${metrics} \ -// --tmp-dir . \ -// --create-output-bam-index true \ -// --spark-master local[${task.cpus}] -// """ -// } // (tsv_bam_duplicates_marked, tsv_bam_duplicates_marked_sample) = tsv_bam_duplicates_marked.into(2) @@ -681,9 +643,7 @@ workflow.onComplete { // (bamMD, bamMDToJoin, bam_duplicates_marked) = bam_duplicates_marked.into(3) -// bamBaseRecalibrator = bamMD.combine(intBaseRecalibrator) - -// bamBaseRecalibrator = bamBaseRecalibrator.dump(tag:'BAM FOR BASERECALIBRATOR') +// // // STEP 2': SENTIEON DEDUP diff --git a/modules/local/mark_duplicates.nf b/modules/local/mark_duplicates.nf index fe1f9e8c2f..40a6b4fad6 100644 --- a/modules/local/mark_duplicates.nf +++ b/modules/local/mark_duplicates.nf @@ -11,7 +11,7 @@ process MARK_DUPLICATES { output: tuple idPatient, idSample, path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit: bam_duplicates_marked tuple idPatient, idSample, emit: tsv_bam_duplicates_marked - path "${idSample}.bam.metrics", emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet) + path "${idSample}.bam.metrics", optional : true, emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet) script: markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\"" From b6552f2520fc64da26cd3a1cc1df7749e19d4691 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Wed, 22 Jul 2020 10:33:57 +0200 Subject: [PATCH 2/6] Add blank line for linting --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bac12f4210..4d03680099 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#238](https://github.com/nf-core/sarek/pull/238) -Add subworkflow for building all the indices - [#241](https://github.com/nf-core/sarek/pull/241) -Add modules and workflows parts for preprocessing steps + ## [dev](https://github.com/nf-core/sarek/tree/dev) - [#234](https://github.com/nf-core/sarek/pull/234) -Switching to DSL2 From f34fc2fb56cfdb184df9e66168d99e4bf9242319 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Wed, 22 Jul 2020 10:39:46 +0200 Subject: [PATCH 3/6] Bump minimal version to 20.07 --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 477455964d..3fa1d1f8c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: # Nextflow versions: check pipeline minimum and current latest - nxf_ver: ['20.04.1', ''] + nxf_ver: ['20.07.0', ''] steps: - uses: actions/checkout@v2 - name: Install Nextflow @@ -42,7 +42,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '20.04.1' + NXF_VER: '20.07.0' - name: Pull docker image run: | docker pull nfcore/sarek:dev @@ -65,7 +65,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '19.10.0' + NXF_VER: '20.07.0' - name: Pull docker image run: docker pull nfcore/sarek:dev - name: Get test data @@ -114,7 +114,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '19.10.0' + NXF_VER: '20.07.0' - name: Pull docker image run: docker pull nfcore/sarek:dev - name: Run ${{ matrix.profile }} test @@ -145,7 +145,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '19.10.0' + NXF_VER: '20.07.0' - name: Pull docker image run: docker pull nfcore/sarek:dev - name: Run ${{ matrix.tool }} test From b151dd566b5cb7fbdcecf70cd1e24d438a8ed440 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Wed, 22 Jul 2020 10:44:40 +0200 Subject: [PATCH 4/6] Fix version naming --- .github/workflows/ci.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3fa1d1f8c3..ecaf2237ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: # Nextflow versions: check pipeline minimum and current latest - nxf_ver: ['20.07.0', ''] + nxf_ver: ['20.07.0-RC1', ''] steps: - uses: actions/checkout@v2 - name: Install Nextflow @@ -42,7 +42,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '20.07.0' + NXF_VER: '20.07.0-RC1' - name: Pull docker image run: | docker pull nfcore/sarek:dev @@ -65,7 +65,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '20.07.0' + NXF_VER: '20.07.0-RC1' - name: Pull docker image run: docker pull nfcore/sarek:dev - name: Get test data @@ -93,7 +93,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '19.10.0' + NXF_VER: '20.07.0-RC1' - name: Pull docker image run: docker pull nfcore/sarek:dev - name: Run test for minimal genomes @@ -114,7 +114,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '20.07.0' + NXF_VER: '20.07.0-RC1' - name: Pull docker image run: docker pull nfcore/sarek:dev - name: Run ${{ matrix.profile }} test @@ -145,7 +145,7 @@ jobs: sudo mv nextflow /usr/local/bin/ env: # Only check Nextflow pipeline minimum version - NXF_VER: '20.07.0' + NXF_VER: '20.07.0-RC1' - name: Pull docker image run: docker pull nfcore/sarek:dev - name: Run ${{ matrix.tool }} test From 7c78d31ffa50192c9d05b790402569d1b9c8d005 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Wed, 22 Jul 2020 10:53:46 +0200 Subject: [PATCH 5/6] use nextflow.enable.dsl and fix some minor things for that --- main.nf | 2 +- modules/local/base_recalibration.nf | 40 +++++++++++++++++++++++++++++ modules/local/mark_duplicates.nf | 12 ++++----- modules/local/merge_mapped_bam.nf | 4 +-- modules/local/trim_galore.nf | 2 +- 5 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 modules/local/base_recalibration.nf diff --git a/main.nf b/main.nf index f480cfda72..e742ec8568 100644 --- a/main.nf +++ b/main.nf @@ -19,7 +19,7 @@ nf-core/sarek: -------------------------------------------------------------------------------- */ -nextflow.preview.dsl = 2 +nextflow.enable.dsl=2 // Print help message if required diff --git a/modules/local/base_recalibration.nf b/modules/local/base_recalibration.nf new file mode 100644 index 0000000000..9eca053656 --- /dev/null +++ b/modules/local/base_recalibration.nf @@ -0,0 +1,40 @@ +// process BASE_RECALIBRATION { +// label 'cpus_1' + +// tag "${idPatient}-${idSample}-${intervalBed.baseName}" + +// input: +// tuple idPatient, idSample, file(bam), file(bai), file(intervalBed) //from bamBaseRecalibrator +// path dbsnp //from dbsnp +// path dbsnpIndex// from dbsnp_tbi +// path fasta //from fasta +// path dict // from dict +// path fastaFai // from fai +// path knownIndels // from known_indels +// path knownIndelsIndex // from known_indels_tbi + +// output: +// tuple idPatient, idSample, file "${prefix}${idSample}.recal.table", emit: tableGatherBQSRReports +// tuple idPatient, idSample, emit: recalTableTSVnoInt + +// //when: params.known_indels + +// script: +// dbsnpOptions = params.dbsnp ? "--known-sites ${dbsnp}" : "" +// knownOptions = params.known_indels ? knownIndels.collect{"--known-sites ${it}"}.join(' ') : "" +// prefix = params.no_intervals ? "" : "${intervalBed.baseName}_" +// intervalsOptions = params.no_intervals ? "" : "-L ${intervalBed}" +// // TODO: --use-original-qualities ??? +// """ +// gatk --java-options -Xmx${task.memory.toGiga()}g \ +// BaseRecalibrator \ +// -I ${bam} \ +// -O ${prefix}${idSample}.recal.table \ +// --tmp-dir . \ +// -R ${fasta} \ +// ${intervalsOptions} \ +// ${dbsnpOptions} \ +// ${knownOptions} \ +// --verbosity INFO +// """ +// } \ No newline at end of file diff --git a/modules/local/mark_duplicates.nf b/modules/local/mark_duplicates.nf index 40a6b4fad6..c2cae3d722 100644 --- a/modules/local/mark_duplicates.nf +++ b/modules/local/mark_duplicates.nf @@ -7,16 +7,16 @@ process MARK_DUPLICATES { else "Preprocessing/${idSample}/DuplicatesMarked/${it}" } input: - tuple idPatient, idSample, path("${idSample}.bam") + tuple val(idPatient), val(idSample), path("${idSample}.bam") output: - tuple idPatient, idSample, path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit: bam_duplicates_marked - tuple idPatient, idSample, emit: tsv_bam_duplicates_marked - path "${idSample}.bam.metrics", optional : true, emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet) + tuple val(idPatient), val(idSample), path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit: bam_duplicates_marked + tuple val(idPatient), val(idSample), emit: tsv_bam_duplicates_marked + path "${idSample}.bam.metrics", optional : true, emit: duplicates_marked_report script: markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\"" - //metrics = 'markduplicates' in skip_qc ? '' : "-M ${idSample}.bam.metrics" - metrics = "-M ${idSample}.bam.metrics" + metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${idSample}.bam.metrics" + if (params.no_gatk_spark) """ gatk --java-options ${markdup_java_options} \ diff --git a/modules/local/merge_mapped_bam.nf b/modules/local/merge_mapped_bam.nf index 8e70342bbd..66758c0d11 100644 --- a/modules/local/merge_mapped_bam.nf +++ b/modules/local/merge_mapped_bam.nf @@ -4,10 +4,10 @@ process MERGE_BAM_MAPPED { tag "${patient}-${sample}" input: - tuple patient, sample, run, path(bam), path(bai) + tuple val(patient), val(sample), val(run), path(bam), path(bai) output: - tuple patient, sample, path("${sample}.bam"), path("${sample}.bam.bai") + tuple val(patient), val(sample), path("${sample}.bam"), path("${sample}.bam.bai") script: """ diff --git a/modules/local/trim_galore.nf b/modules/local/trim_galore.nf index d0bf00ff88..a49ccda2d7 100644 --- a/modules/local/trim_galore.nf +++ b/modules/local/trim_galore.nf @@ -16,7 +16,7 @@ process TRIM_GALORE { output: path "*.{html,zip,txt}", emit: report - tuple idPatient, idSample, idRun, path("${idSample}_${idRun}_R1_val_1.fq.gz"), path("${idSample}_${idRun}_R2_val_2.fq.gz"), emit: trimmed_reads + tuple val(idPatient), val(idSample), val(idRun), path("${idSample}_${idRun}_R1_val_1.fq.gz"), path("${idSample}_${idRun}_R2_val_2.fq.gz"), emit: trimmed_reads script: // Calculate number of --cores for TrimGalore based on value of task.cpus From 9bb93fc6b3811ace3773bb13716fd6c1e084c875 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Wed, 22 Jul 2020 11:20:29 +0200 Subject: [PATCH 6/6] Bump nf version in badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 353d9e3194..29fab9a95d 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ > **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing** -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.06.0--edge-brightgreen.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.07.0--RC1-brightgreen.svg)](https://www.nextflow.io/) [![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/) [![DOI](https://zenodo.org/badge/184289291.svg)](https://zenodo.org/badge/latestdoi/184289291)