Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use 20.07-RC1, add skip_qc to MD #252

Merged
merged 6 commits into from
Jul 22, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a

- [#238](https://github.com/nf-core/sarek/pull/238) -Add subworkflow for building all the indices
- [#241](https://github.com/nf-core/sarek/pull/241) -Add modules and workflows parts for preprocessing steps

## [dev](https://github.com/nf-core/sarek/tree/dev)

- [#234](https://github.com/nf-core/sarek/pull/234) -Switching to DSL2
Expand Down
76 changes: 18 additions & 58 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,9 @@ include { BWAMEM2_MEM } from './modules/local/bwamem2_mem.nf'
include { GET_SOFTWARE_VERSIONS } from './modules/local/get_software_versions'
include { OUTPUT_DOCUMENTATION } from './modules/local/output_documentation'
include { TRIM_GALORE } from './modules/local/trim_galore.nf'
include { MERGE_BAM_MAPPED } from './modules/local/merge_mapped_bam' addParams(params)
include { MARK_DUPLICATES } from './modules/local/mark_duplicates' params(params)
include { MERGE_BAM_MAPPED } from './modules/local/merge_mapped_bam'
include { MARK_DUPLICATES } from './modules/local/mark_duplicates' addParams(skip_qc: skip_qc)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, now I understand better

//include { BASE_RECALIBRATION } from './modules/local/base_recalibration' params(params)

/*
================================================================================
Expand Down Expand Up @@ -346,8 +347,6 @@ workflow {
pon_tbi = params.pon ? params.pon_index ?: BUILD_INDICES.out.pon_tbi : Channel.empty()

// PREPROCESSING
intervals_bed.dump(tag:'bedintervals')

if(!('fastqc' in skip_qc))
result_fastqc = FASTQC(input_sample)
else
Expand Down Expand Up @@ -380,8 +379,20 @@ workflow {

bam_mapped.view()

mark_duplicates_report = !(params.skip_markduplicates) ? MARK_DUPLICATES(bam_mapped).duplicates_marked_report : Channel.empty()
if(!(params.skip_markduplicates)){
MARK_DUPLICATES(bam_mapped)
mark_duplicates_report = MARK_DUPLICATES.out.duplicates_marked_report
bam_duplicates_marked = MARK_DUPLICATES.out.bam_duplicates_marked
}
else {
mark_duplicates_report = Channel.empty()
bam_duplicates_marked = Channel.empty()
}

bamBaseRecalibrator = bam_duplicates_marked.combine(BUILD_INDICES.out.intervals_bed)

//BASE_RECALIBRATION(bamBaseRecalibrator,dbsnp, dbsnp_index,fasta,)

OUTPUT_DOCUMENTATION(
output_docs,
output_docs_images)
Expand Down Expand Up @@ -546,6 +557,7 @@ workflow.onComplete {

// (bam_mapped_merged, bam_mapped_merged_to_index) = bam_mapped_merged.into(2)

//@Maxime: You included this process in merged_bam.nf, right?
// process IndexBamFile {
// label 'cpus_8'

Expand Down Expand Up @@ -597,56 +609,6 @@ workflow.onComplete {
// }
// // STEP 2: MARKING DUPLICATES

// process MarkDuplicates {
// label 'cpus_16'

// tag "${idPatient}-${idSample}"

// publishDir params.outdir, mode: params.publish_dir_mode,
// saveAs: {
// if (it == "${idSample}.bam.metrics") "Reports/${idSample}/MarkDuplicates/${it}"
// else "Preprocessing/${idSample}/DuplicatesMarked/${it}"
// }

// input:
// set idPatient, idSample, file("${idSample}.bam") from bam_mapped_merged

// output:
// set idPatient, idSample, file("${idSample}.md.bam"), file("${idSample}.md.bam.bai") into bam_duplicates_marked
// set idPatient, idSample into tsv_bam_duplicates_marked
// file ("${idSample}.bam.metrics") optional true into duplicates_marked_report

// when: !(params.skip_markduplicates)

// script:
// markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
// metrics = 'markduplicates' in skip_qc ? '' : "-M ${idSample}.bam.metrics"
// if (params.no_gatk_spark)
// """
// gatk --java-options ${markdup_java_options} \
// MarkDuplicates \
// --MAX_RECORDS_IN_RAM 50000 \
// --INPUT ${idSample}.bam \
// --METRICS_FILE ${idSample}.bam.metrics \
// --TMP_DIR . \
// --ASSUME_SORT_ORDER coordinate \
// --CREATE_INDEX true \
// --OUTPUT ${idSample}.md.bam

// mv ${idSample}.md.bai ${idSample}.md.bam.bai
// """
// else
// """
// gatk --java-options ${markdup_java_options} \
// MarkDuplicatesSpark \
// -I ${idSample}.bam \
// -O ${idSample}.md.bam \
// ${metrics} \
// --tmp-dir . \
// --create-output-bam-index true \
// --spark-master local[${task.cpus}]
// """
// }

// (tsv_bam_duplicates_marked, tsv_bam_duplicates_marked_sample) = tsv_bam_duplicates_marked.into(2)

Expand Down Expand Up @@ -681,9 +643,7 @@ workflow.onComplete {

// (bamMD, bamMDToJoin, bam_duplicates_marked) = bam_duplicates_marked.into(3)

// bamBaseRecalibrator = bamMD.combine(intBaseRecalibrator)

// bamBaseRecalibrator = bamBaseRecalibrator.dump(tag:'BAM FOR BASERECALIBRATOR')
//

// // STEP 2': SENTIEON DEDUP

Expand Down
2 changes: 1 addition & 1 deletion modules/local/mark_duplicates.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ process MARK_DUPLICATES {
output:
tuple idPatient, idSample, path("${idSample}.md.bam"), path("${idSample}.md.bam.bai"), emit: bam_duplicates_marked
tuple idPatient, idSample, emit: tsv_bam_duplicates_marked
path "${idSample}.bam.metrics", emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet)
path "${idSample}.bam.metrics", optional : true, emit: duplicates_marked_report //is optional , applies when skip_qc is used(not implemented yet)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually we will need to rename this option.

you can't actually skip markduplicates with skip_qc, but you actually don't keep the report.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. So maybe skip_markduplicates_report and make it an extra parameter not included in the skip_qc list?


script:
markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
Expand Down