Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Enhance Qualimap BAMQC #693

Merged
merged 6 commits into from
Nov 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- [#671](https://github.com/SciLifeLab/Sarek/pull/671) - publishDir modes are now params
- [#677](https://github.com/SciLifeLab/Sarek/pull/677) - Update docs
- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Update old awsbatch configuration
- [#693](https://github.com/SciLifeLab/Sarek/pull/693) - Qualimap bamQC is now ran after mapping and after recalibration for better QC

### `Fixed`

Expand Down
2 changes: 1 addition & 1 deletion conf/aws-batch.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*/

params {
genome_base = params.genome == 'GRCh37' ? "s3://sarek-references/Homo_sapiens/GATK/GRCh37" : params.genome == 'iGRCh38' ? "s3://sarek-references/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
genome_base = params.genome == 'iGRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'iGRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
publishDirMode = 'copy'
}

Expand Down
5 changes: 4 additions & 1 deletion conf/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ process {
withName:RunAscat {
container = "${params.repository}/r-base:${params.tag}"
}
withName:RunBamQC {
withName:RunBamQCmapped {
container = "${params.repository}/sarek:${params.tag}"
}
withName:RunBamQCrecalibrated {
container = "${params.repository}/sarek:${params.tag}"
}
withName:RunBcftoolsStats {
Expand Down
17 changes: 15 additions & 2 deletions conf/genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ params {
//AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf"
//AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx"
}
'iGRCh37' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci"
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf"
dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx"
genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict"
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai"
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}"
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions_CAW.list"
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf"
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx"
snpeffDb = "GRCh37.75"
}
'iGRCh38' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz"
Expand All @@ -51,8 +64,8 @@ params {
genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai"
bwaIndex = "${params.genome_base}/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}"
intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions.hg38.bed"
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,Homo_sapiens_assembly38.known_indels}.vcf.gz"
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
knownIndels = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
snpeffDb = "GRCh38.86"
}
'smallGRCh37' {
Expand Down
5 changes: 4 additions & 1 deletion conf/singularity-path.config
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ process {
withName:RunAscat {
container = "${params.containerPath}/r-base-${params.tag}.simg"
}
withName:RunBamQC {
withName:RunBamQCmapped {
container = "${params.containerPath}/sarek-${params.tag}.simg"
}
withName:RunBamQCrecalibrated {
container = "${params.containerPath}/sarek-${params.tag}.simg"
}
withName:RunBcftoolsStats {
Expand Down
6 changes: 5 additions & 1 deletion conf/uppmax-localhost.config
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ process {
withName:RunAscat {
memory = {params.singleCPUMem * 2 * task.attempt}
}
withName:RunBamQC {
withName:RunBamQCmapped {
cpus = 16
memory = {params.totalMemory}
}
withName:RunBamQCrecalibrated {
cpus = 16
memory = {params.totalMemory}
}
Expand Down
6 changes: 5 additions & 1 deletion conf/uppmax-slurm.config
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ process {
memory = {params.singleCPUMem * 2 * task.attempt}
queue = 'core'
}
withName:RunBamQC {
withName:RunBamQCmapped {
cpus = 16
}
withName:RunBamQCrecalibrated {
cpus = 16
}
withName:RunBcftoolsStats {
cpus = 1
Expand Down
46 changes: 1 addition & 45 deletions germlineVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ kate: syntax groovy; space-indent on; indent-width 2;
https://github.com/SciLifeLab/Sarek/README.md
--------------------------------------------------------------------------------
Processes overview
- RunSamtoolsStats - Run Samtools stats on recalibrated BAM files
- RunBamQC - Run qualimap BamQC on recalibrated BAM files
- CreateIntervalBeds - Create and sort intervals into bed files
- RunHaplotypecaller - Run HaplotypeCaller for Germline Variant Calling (Parallelized processes)
- RunGenotypeGVCFs - Run HaplotypeCaller for Germline Variant Calling (Parallelized processes)
Expand Down Expand Up @@ -89,7 +87,7 @@ if (params.verbose) bamFiles = bamFiles.view {
}

// assume input is recalibrated, ignore explicitBqsrNeeded
(bamForBamQC, bamForSamToolsStats, recalibratedBam, recalTables) = bamFiles.into(4)
(recalibratedBam, recalTables) = bamFiles.into(2)

recalTables = recalTables.map{ it + [null] } // null recalibration table means: do not use --BQSR

Expand All @@ -101,48 +99,6 @@ if (params.verbose) recalibratedBam = recalibratedBam.view {
Files : [${it[3].fileName}, ${it[4].fileName}]"
}

process RunSamtoolsStats {
tag {idPatient + "-" + idSample}

publishDir directoryMap.samtoolsStats, mode: params.publishDirMode

input:
set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats

output:
file ("${bam}.samtools.stats.out") into samtoolsStatsReport

when: !params.noReports

script: QC.samtoolsStats(bam)
}

if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view {
"SAMTools stats report:\n\
File : [${it.fileName}]"
}

process RunBamQC {
tag {idPatient + "-" + idSample}

publishDir directoryMap.bamQC, mode: params.publishDirMode

input:
set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC

output:
file(idSample) into bamQCreport

when: !params.noReports && !params.noBAMQC

script: QC.bamQC(bam,idSample,task.memory)
}

if (params.verbose) bamQCreport = bamQCreport.view {
"BamQC report:\n\
Dir : [${it.fileName}]"
}

// Here we have a recalibrated bam set, but we need to separate the bam files based on patient status.
// The sample tsv config file which is formatted like: "subject status sample lane fastq1 fastq2"
// cf fastqFiles channel, I decided just to add _status to the sample name to have less changes to do.
Expand Down
11 changes: 0 additions & 11 deletions lib/QC.groovy
Original file line number Diff line number Diff line change
@@ -1,15 +1,4 @@
class QC {
// Run bamQC on vcf file
static def bamQC(bam, idSample, mem) {
"""
qualimap --java-mem-size=${mem.toGiga()}G \
bamqc \
-bam ${bam} \
-outdir ${idSample} \
-outformat HTML
"""
}

// Run bcftools on vcf file
static def bcftools(vcf) {
"""
Expand Down
60 changes: 53 additions & 7 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ kate: syntax groovy; space-indent on; indent-width 2;
- CreateRecalibrationTable - Create Recalibration Table with BaseRecalibrator
- RecalibrateBam - Recalibrate Bam with PrintReads
- RunSamtoolsStats - Run Samtools stats on recalibrated BAM files
- RunBamQC - Run qualimap BamQC on recalibrated BAM files
- RunBamQCmapped - Run qualimap BamQC on mapped BAM files
- RunBamQCrecalibrated - Run qualimap BamQC on recalibrated BAM files
================================================================================
= C O N F I G U R A T I O N =
================================================================================
Expand Down Expand Up @@ -73,7 +74,6 @@ if (!params.sample && !params.sampleDir) {
}

// Set up the fastqFiles and bamFiles channels. One of them remains empty
// Except for step annotate, in which both stay empty
fastqFiles = Channel.empty()
bamFiles = Channel.empty()
if (tsvPath) {
Expand Down Expand Up @@ -152,7 +152,7 @@ process MapReads {
set file(genomeFile), file(bwaIndex) from Channel.value([referenceMap.genomeFile, referenceMap.bwaIndex])

output:
set idPatient, status, idSample, idRun, file("${idRun}.bam") into mappedBam
set idPatient, status, idSample, idRun, file("${idRun}.bam") into (mappedBam, mappedBamForQC)

when: step == 'mapping' && !params.onlyQC

Expand All @@ -174,6 +174,40 @@ if (params.verbose) mappedBam = mappedBam.view {
File : [${it[4].fileName}]"
}

process RunBamQCmapped {
tag {idPatient + "-" + idSample}

publishDir directoryMap.bamQC, mode: params.publishDirMode

input:
set idPatient, status, idSample, idRun, file(bam) from mappedBamForQC

output:
file(idSample) into bamQCmappedReport

when: !params.noReports && !params.noBAMQC

script:
"""
qualimap --java-mem-size=${task.memory.toGiga()}G \
bamqc \
-bam ${bam} \
--paint-chromosome-limits \
--genome-gc-distr HUMAN \
-nt ${task.cpus} \
-skip-duplicated \
--skip-dup-mode 0 \
-outdir ${idSample} \
-outformat HTML
"""
}

if (params.verbose) bamQCmappedReport = bamQCmappedReport.view {
"BamQC report:\n\
Dir : [${it.fileName}]"
}


// Sort bam whether they are standalone or should be merged
// Borrowed code from https://github.com/guigolab/chip-nf

Expand Down Expand Up @@ -416,7 +450,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view {
File : [${it.fileName}]"
}

process RunBamQC {
process RunBamQCrecalibrated {
tag {idPatient + "-" + idSample}

publishDir directoryMap.bamQC, mode: params.publishDirMode
Expand All @@ -425,14 +459,26 @@ process RunBamQC {
set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC

output:
file(idSample) into bamQCreport
file(idSample) into bamQCrecalibratedReport

when: !params.noReports && !params.noBAMQC

script: QC.bamQC(bam,idSample,task.memory)
script:
"""
qualimap --java-mem-size=${task.memory.toGiga()}G \
bamqc \
-bam ${bam} \
--paint-chromosome-limits \
--genome-gc-distr HUMAN \
-nt ${task.cpus} \
-skip-duplicated \
--skip-dup-mode 0 \
-outdir ${idSample} \
-outformat HTML
"""
}

if (params.verbose) bamQCreport = bamQCreport.view {
if (params.verbose) bamQCrecalibratedReport = bamQCrecalibratedReport.view {
"BamQC report:\n\
Dir : [${it.fileName}]"
}
Expand Down
46 changes: 1 addition & 45 deletions somaticVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ kate: syntax groovy; space-indent on; indent-width 2;
https://github.com/SciLifeLab/Sarek/README.md
--------------------------------------------------------------------------------
Processes overview
- RunSamtoolsStats - Run Samtools stats on recalibrated BAM files
- RunBamQC - Run qualimap BamQC on recalibrated BAM files
- CreateIntervalBeds - Create and sort intervals into bed files
- RunMutect2 - Run MuTect2 for Variant Calling (Parallelized processes)
- RunFreeBayes - Run FreeBayes for Variant Calling (Parallelized processes)
Expand Down Expand Up @@ -98,7 +96,7 @@ if (params.verbose) bamFiles = bamFiles.view {
}

// assume input is recalibrated, ignore explicitBqsrNeeded
(bamForBamQC, bamForSamToolsStats, recalibratedBam, recalTables) = bamFiles.into(4)
(recalibratedBam, recalTables) = bamFiles.into(2)

recalTables = recalTables.map{ it + [null] } // null recalibration table means: do not use --BQSR

Expand All @@ -110,48 +108,6 @@ if (params.verbose) recalibratedBam = recalibratedBam.view {
Files : [${it[3].fileName}, ${it[4].fileName}]"
}

process RunSamtoolsStats {
tag {idPatient + "-" + idSample}

publishDir directoryMap.samtoolsStats, mode: params.publishDirMode

input:
set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats

output:
file ("${bam}.samtools.stats.out") into samtoolsStatsReport

when: !params.noReports

script: QC.samtoolsStats(bam)
}

if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view {
"SAMTools stats report:\n\
File : [${it.fileName}]"
}

process RunBamQC {
tag {idPatient + "-" + idSample}

publishDir directoryMap.bamQC, mode: params.publishDirMode

input:
set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC

output:
file(idSample) into bamQCreport

when: !params.noReports && !params.noBAMQC

script: QC.bamQC(bam,idSample,task.memory)
}

if (params.verbose) bamQCreport = bamQCreport.view {
"BamQC report:\n\
Dir : [${it.fileName}]"
}

// Here we have a recalibrated bam set, but we need to separate the bam files based on patient status.
// The sample tsv config file which is formatted like: "subject status sample lane fastq1 fastq2"
// cf fastqFiles channel, I decided just to add _status to the sample name to have less changes to do.
Expand Down