Skip to content

Commit

Permalink
Merge pull request #252 from FriederikeHanssen/add_index
Browse files Browse the repository at this point in the history
Use 20.07-RC1, add skip_qc to MD
  • Loading branch information
maxulysse authored Jul 22, 2020
2 parents 55e1679 + 9bb93fc commit 5bceacf
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 75 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
strategy:
matrix:
# Nextflow versions: check pipeline minimum and current latest
nxf_ver: ['20.04.1', '']
nxf_ver: ['20.07.0-RC1', '']
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
Expand Down Expand Up @@ -42,7 +42,7 @@ jobs:
sudo mv nextflow /usr/local/bin/
env:
# Only check Nextflow pipeline minimum version
NXF_VER: '20.04.1'
NXF_VER: '20.07.0-RC1'
- name: Pull docker image
run: |
docker pull nfcore/sarek:dev
Expand All @@ -65,7 +65,7 @@ jobs:
sudo mv nextflow /usr/local/bin/
env:
# Only check Nextflow pipeline minimum version
NXF_VER: '19.10.0'
NXF_VER: '20.07.0-RC1'
- name: Pull docker image
run: docker pull nfcore/sarek:dev
- name: Get test data
Expand Down Expand Up @@ -93,7 +93,7 @@ jobs:
sudo mv nextflow /usr/local/bin/
env:
# Only check Nextflow pipeline minimum version
NXF_VER: '19.10.0'
NXF_VER: '20.07.0-RC1'
- name: Pull docker image
run: docker pull nfcore/sarek:dev
- name: Run test for minimal genomes
Expand All @@ -114,7 +114,7 @@ jobs:
sudo mv nextflow /usr/local/bin/
env:
# Only check Nextflow pipeline minimum version
NXF_VER: '19.10.0'
NXF_VER: '20.07.0-RC1'
- name: Pull docker image
run: docker pull nfcore/sarek:dev
- name: Run ${{ matrix.profile }} test
Expand Down Expand Up @@ -145,7 +145,7 @@ jobs:
sudo mv nextflow /usr/local/bin/
env:
# Only check Nextflow pipeline minimum version
NXF_VER: '19.10.0'
NXF_VER: '20.07.0-RC1'
- name: Pull docker image
run: docker pull nfcore/sarek:dev
- name: Run ${{ matrix.tool }} test
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a

- [#238](https://github.com/nf-core/sarek/pull/238) -Add subworkflow for building all the indices
- [#241](https://github.com/nf-core/sarek/pull/241) -Add modules and workflows parts for preprocessing steps

## [dev](https://github.com/nf-core/sarek/tree/dev)

- [#234](https://github.com/nf-core/sarek/pull/234) -Switching to DSL2
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

> **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing**
[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.06.0--edge-brightgreen.svg)](https://www.nextflow.io/)
[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.07.0--RC1-brightgreen.svg)](https://www.nextflow.io/)
[![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/)
[![DOI](https://zenodo.org/badge/184289291.svg)](https://zenodo.org/badge/latestdoi/184289291)

Expand Down
78 changes: 19 additions & 59 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ nf-core/sarek:
--------------------------------------------------------------------------------
*/

nextflow.preview.dsl = 2
nextflow.enable.dsl=2

// Print help message if required

Expand Down Expand Up @@ -258,8 +258,9 @@ include { BWAMEM2_MEM } from './modules/local/bwamem2_mem.nf'
include { GET_SOFTWARE_VERSIONS } from './modules/local/get_software_versions'
include { OUTPUT_DOCUMENTATION } from './modules/local/output_documentation'
include { TRIM_GALORE } from './modules/local/trim_galore.nf'
include { MERGE_BAM_MAPPED } from './modules/local/merge_mapped_bam' addParams(params)
include { MARK_DUPLICATES } from './modules/local/mark_duplicates' params(params)
include { MERGE_BAM_MAPPED } from './modules/local/merge_mapped_bam'
include { MARK_DUPLICATES } from './modules/local/mark_duplicates' addParams(skip_qc: skip_qc)
//include { BASE_RECALIBRATION } from './modules/local/base_recalibration' params(params)

/*
================================================================================
Expand Down Expand Up @@ -346,8 +347,6 @@ workflow {
pon_tbi = params.pon ? params.pon_index ?: BUILD_INDICES.out.pon_tbi : Channel.empty()

// PREPROCESSING
intervals_bed.dump(tag:'bedintervals')

if(!('fastqc' in skip_qc))
result_fastqc = FASTQC(input_sample)
else
Expand Down Expand Up @@ -380,8 +379,20 @@ workflow {

bam_mapped.view()

mark_duplicates_report = !(params.skip_markduplicates) ? MARK_DUPLICATES(bam_mapped).duplicates_marked_report : Channel.empty()
if(!(params.skip_markduplicates)){
MARK_DUPLICATES(bam_mapped)
mark_duplicates_report = MARK_DUPLICATES.out.duplicates_marked_report
bam_duplicates_marked = MARK_DUPLICATES.out.bam_duplicates_marked
}
else {
mark_duplicates_report = Channel.empty()
bam_duplicates_marked = Channel.empty()
}

bamBaseRecalibrator = bam_duplicates_marked.combine(BUILD_INDICES.out.intervals_bed)

//BASE_RECALIBRATION(bamBaseRecalibrator,dbsnp, dbsnp_index,fasta,)

OUTPUT_DOCUMENTATION(
output_docs,
output_docs_images)
Expand Down Expand Up @@ -546,6 +557,7 @@ workflow.onComplete {

// (bam_mapped_merged, bam_mapped_merged_to_index) = bam_mapped_merged.into(2)

//@Maxime: You included this process in merged_bam.nf, right?
// process IndexBamFile {
// label 'cpus_8'

Expand Down Expand Up @@ -597,56 +609,6 @@ workflow.onComplete {
// }
// // STEP 2: MARKING DUPLICATES

// process MarkDuplicates {
// label 'cpus_16'

// tag "${idPatient}-${idSample}"

// publishDir params.outdir, mode: params.publish_dir_mode,
// saveAs: {
// if (it == "${idSample}.bam.metrics") "Reports/${idSample}/MarkDuplicates/${it}"
// else "Preprocessing/${idSample}/DuplicatesMarked/${it}"
// }

// input:
// set idPatient, idSample, file("${idSample}.bam") from bam_mapped_merged

// output:
// set idPatient, idSample, file("${idSample}.md.bam"), file("${idSample}.md.bam.bai") into bam_duplicates_marked
// set idPatient, idSample into tsv_bam_duplicates_marked
// file ("${idSample}.bam.metrics") optional true into duplicates_marked_report

// when: !(params.skip_markduplicates)

// script:
// markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
// metrics = 'markduplicates' in skip_qc ? '' : "-M ${idSample}.bam.metrics"
// if (params.no_gatk_spark)
// """
// gatk --java-options ${markdup_java_options} \
// MarkDuplicates \
// --MAX_RECORDS_IN_RAM 50000 \
// --INPUT ${idSample}.bam \
// --METRICS_FILE ${idSample}.bam.metrics \
// --TMP_DIR . \
// --ASSUME_SORT_ORDER coordinate \
// --CREATE_INDEX true \
// --OUTPUT ${idSample}.md.bam

// mv ${idSample}.md.bai ${idSample}.md.bam.bai
// """
// else
// """
// gatk --java-options ${markdup_java_options} \
// MarkDuplicatesSpark \
// -I ${idSample}.bam \
// -O ${idSample}.md.bam \
// ${metrics} \
// --tmp-dir . \
// --create-output-bam-index true \
// --spark-master local[${task.cpus}]
// """
// }

// (tsv_bam_duplicates_marked, tsv_bam_duplicates_marked_sample) = tsv_bam_duplicates_marked.into(2)

Expand Down Expand Up @@ -681,9 +643,7 @@ workflow.onComplete {

// (bamMD, bamMDToJoin, bam_duplicates_marked) = bam_duplicates_marked.into(3)

// bamBaseRecalibrator = bamMD.combine(intBaseRecalibrator)

// bamBaseRecalibrator = bamBaseRecalibrator.dump(tag:'BAM FOR BASERECALIBRATOR')
//

// // STEP 2': SENTIEON DEDUP

Expand Down
40 changes: 40 additions & 0 deletions modules/local/base_recalibration.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// process BASE_RECALIBRATION {
// label 'cpus_1'

// tag "${idPatient}-${idSample}-${intervalBed.baseName}"

// input:
// tuple idPatient, idSample, file(bam), file(bai), file(intervalBed) //from bamBaseRecalibrator
// path dbsnp //from dbsnp
// path dbsnpIndex// from dbsnp_tbi
// path fasta //from fasta
// path dict // from dict
// path fastaFai // from fai
// path knownIndels // from known_indels
// path knownIndelsIndex // from known_indels_tbi

// output:
// tuple idPatient, idSample, file "${prefix}${idSample}.recal.table", emit: tableGatherBQSRReports
// tuple idPatient, idSample, emit: recalTableTSVnoInt

// //when: params.known_indels

// script:
// dbsnpOptions = params.dbsnp ? "--known-sites ${dbsnp}" : ""
// knownOptions = params.known_indels ? knownIndels.collect{"--known-sites ${it}"}.join(' ') : ""
// prefix = params.no_intervals ? "" : "${intervalBed.baseName}_"
// intervalsOptions = params.no_intervals ? "" : "-L ${intervalBed}"
// // TODO: --use-original-qualities ???
// """
// gatk --java-options -Xmx${task.memory.toGiga()}g \
// BaseRecalibrator \
// -I ${bam} \
// -O ${prefix}${idSample}.recal.table \
// --tmp-dir . \
// -R ${fasta} \
// ${intervalsOptions} \
// ${dbsnpOptions} \
// ${knownOptions} \
// --verbosity INFO
// """
// }
12 changes: 6 additions & 6 deletions modules/local/mark_duplicates.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ process MARK_DUPLICATES {
else "Preprocessing/${idSample}/DuplicatesMarked/${it}"
}
input:
tuple idPatient, idSample, path("${idSample}.bam")
tuple val(idPatient), val(idSample), path("${idSample}.bam")
output:
tuple idPatient, idSample, path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit: bam_duplicates_marked
tuple idPatient, idSample, emit: tsv_bam_duplicates_marked
path "${idSample}.bam.metrics", emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet)
tuple val(idPatient), val(idSample), path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit: bam_duplicates_marked
tuple val(idPatient), val(idSample), emit: tsv_bam_duplicates_marked
path "${idSample}.bam.metrics", optional : true, emit: duplicates_marked_report

script:
markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
//metrics = 'markduplicates' in skip_qc ? '' : "-M ${idSample}.bam.metrics"
metrics = "-M ${idSample}.bam.metrics"
metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${idSample}.bam.metrics"

if (params.no_gatk_spark)
"""
gatk --java-options ${markdup_java_options} \
Expand Down
4 changes: 2 additions & 2 deletions modules/local/merge_mapped_bam.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ process MERGE_BAM_MAPPED {
tag "${patient}-${sample}"

input:
tuple patient, sample, run, path(bam), path(bai)
tuple val(patient), val(sample), val(run), path(bam), path(bai)

output:
tuple patient, sample, path("${sample}.bam"), path("${sample}.bam.bai")
tuple val(patient), val(sample), path("${sample}.bam"), path("${sample}.bam.bai")

script:
"""
Expand Down
2 changes: 1 addition & 1 deletion modules/local/trim_galore.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ process TRIM_GALORE {

output:
path "*.{html,zip,txt}", emit: report
tuple idPatient, idSample, idRun, path("${idSample}_${idRun}_R1_val_1.fq.gz"), path("${idSample}_${idRun}_R2_val_2.fq.gz"), emit: trimmed_reads
tuple val(idPatient), val(idSample), val(idRun), path("${idSample}_${idRun}_R1_val_1.fq.gz"), path("${idSample}_${idRun}_R2_val_2.fq.gz"), emit: trimmed_reads

script:
// Calculate number of --cores for TrimGalore based on value of task.cpus
Expand Down

0 comments on commit 5bceacf

Please sign in to comment.