Skip to content

Commit

Permalink
add: MSIsensor to Pair Somatic Variant Calling
Browse files Browse the repository at this point in the history
  • Loading branch information
maxulysse committed Oct 22, 2020
1 parent b60be04 commit 3517511
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 72 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,13 @@ jobs:
strategy:
matrix:
# tool: [Haplotypecaller, Freebayes, Manta, mpileup, MSIsensor, Strelka, TIDDIT]
tool: [Haplotypecaller, Manta, Strelka]
tool: [Haplotypecaller, Manta, MSIsensor, Strelka]
intervals: [--no_intervals, '']
exclude:
- tool: Manta
intervals: --no_intervals
# - tool: MSIsensor
# intervals: --no_intervals
- tool: MSIsensor
intervals: --no_intervals
- tool: Strelka
intervals: --no_intervals
# - tool: TIDDIT
Expand Down
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ params {
publish_dir = "reference"
publish_files = "false"
}
'msisensor_scan' {
publish_dir = "reference"
publish_files = "false"
}
'samtools_faidx' {
publish_dir = "reference"
publish_files = "false"
Expand Down Expand Up @@ -152,6 +156,10 @@ params {
publish_dir = "variant_calling"
publish_files = ['vcf.gz':'manta', 'vcf.gz.tbi':'manta']
}
'msisensor_msi' {
publish_by_id = "true"
publish_dir = "variant_calling"
}
'strelka_somatic' {
publish_by_id = "true"
publish_dir = "variant_calling"
Expand Down
59 changes: 4 additions & 55 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ if (params.save_reference) modules['bwamem2_index'].publish_files
if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals']
if (params.save_reference) modules['dict'].publish_files = ['dict':'dict']
if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai']
if (params.save_reference) modules['msisensor_scan'].publish_files = ['list':'msi']
if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp']
if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource']
if (params.save_reference) modules['tabix_known_indels'].publish_files = ['vcf.gz.tbi':'known_indels']
Expand Down Expand Up @@ -288,6 +289,7 @@ include { BUILD_INDICES } from './modules/local/subworkflow/build_indices' addPa
bwamem2_index_options: modules['bwamem2_index'],
create_intervals_bed_options: modules['create_intervals_bed'],
gatk_dict_options: modules['dict'],
msisensor_scan_options: modules['msisensor_scan'],
samtools_faidx_options: modules['samtools_faidx'],
tabix_dbsnp_options: modules['tabix_dbsnp'],
tabix_germline_resource_options: modules['tabix_germline_resource'],
Expand Down Expand Up @@ -423,6 +425,7 @@ workflow {
known_indels_tbi = params.known_indels ? params.known_indels_index ? file(params.known_indels_index) : BUILD_INDICES.out.known_indels_tbi.collect() : file("${params.outdir}/no_file")
pon_tbi = params.pon ? params.pon_index ? file(params.pon_index) : BUILD_INDICES.out.pon_tbi : file("${params.outdir}/no_file")

msisensor_scan = BUILD_INDICES.out.msisensor_scan
/*
================================================================================
PREPROCESSING
Expand Down Expand Up @@ -565,6 +568,7 @@ workflow {
fai,
fasta,
intervals,
msisensor_scan,
target_bed,
tools)

Expand Down Expand Up @@ -1272,61 +1276,6 @@ workflow.onComplete {
// """
// }

// // STEP MSISENSOR.1 - SCAN

// // Scan reference genome for microsatellites
// process MSIsensor_scan {
// label 'cpus_1'
// label 'memory_max'

// tag "${fasta}"

// input:
// file(fasta) from fasta
// file(fastaFai) from fai

// output:
// file "microsatellites.list" into msi_scan_ch

// when: 'msisensor' in tools

// script:
// """
// msisensor scan -d ${fasta} -o microsatellites.list
// """
// }

// // STEP MSISENSOR.2 - SCORE

// // Score the normal vs somatic pair of bams

// process MSIsensor_msi {
// label 'cpus_4'
// label 'memory_max'

// tag "${idSampleTumor}_vs_${idSampleNormal}"

// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/MSIsensor", mode: params.publish_dir_mode

// input:
// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamMsisensor
// file msiSites from msi_scan_ch

// output:
// set val("Msisensor"), idPatient, file("${idSampleTumor}_vs_${idSampleNormal}_msisensor"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_dis"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_germline"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_somatic") into msisensor_out_ch

// when: 'msisensor' in tools

// script:
// """
// msisensor msi -d ${msiSites} \
// -b 4 \
// -n ${bamNormal} \
// -t ${bamTumor} \
// -o ${idSampleTumor}_vs_${idSampleNormal}_msisensor
// """
// }

// // STEP ASCAT.1 - ALLELECOUNTER

// // Run commands and code from Malin Larsson
Expand Down
7 changes: 7 additions & 0 deletions modules/local/subworkflow/build_indices.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ params.bwa_index_options = [:]
params.bwamem2_index_options = [:]
params.create_intervals_bed_options = [:]
params.gatk_dict_options = [:]
params.msisensor_scan_options = [:]
params.samtools_faidx_options = [:]
params.tabix_dbsnp_options = [:]
params.tabix_germline_resource_options = [:]
Expand All @@ -26,6 +27,7 @@ include { HTSLIB_TABIX as TABIX_DBSNP } from '../../nf-core/softw
include { HTSLIB_TABIX as TABIX_GERMLINE_RESOURCE } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_germline_resource_options)
include { HTSLIB_TABIX as TABIX_KNOWN_INDELS } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_known_indels_options)
include { HTSLIB_TABIX as TABIX_PON } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_pon_options)
include { MSISENSOR_SCAN } from '../../nf-core/software/msisensor/scan.nf' addParams(options: params.msisensor_scan_options)
include { SAMTOOLS_FAIDX } from '../../nf-core/software/samtools/faidx.nf' addParams(options: params.samtools_faidx_options)

workflow BUILD_INDICES{
Expand Down Expand Up @@ -66,6 +68,10 @@ workflow BUILD_INDICES{
if (!(params.known_indels_index) && params.known_indels && ('mapping' in step || 'preparerecalibration' in step))
result_known_indels_tbi = TABIX_KNOWN_INDELS(known_indels)

result_msisensor_scan = Channel.empty()
if ('msisensor' in tools)
result_msisensor_scan = MSISENSOR_SCAN(fasta, result_fai)

result_pon_tbi = Channel.empty()
if (!(params.pon_index) && params.pon && ('tnscope' in tools || 'mutect2' in tools))
result_pon_tbi = TABIX_PON(pon)
Expand Down Expand Up @@ -107,5 +113,6 @@ workflow BUILD_INDICES{
germline_resource_tbi = result_germline_resource_tbi
intervals = result_intervals
known_indels_tbi = result_known_indels_tbi
msisensor_scan = result_msisensor_scan
pon_tbi = result_pon_tbi
}
31 changes: 20 additions & 11 deletions modules/local/subworkflow/pair_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,26 @@
================================================================================
*/

params.strelka_options = [:]
params.manta_options = [:]
params.msisensor_msi_options = [:]
params.strelka_options = [:]

include { STRELKA_SOMATIC as STRELKA } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_options)
include { MANTA_SOMATIC as MANTA } from '../../nf-core/software/manta/somatic' addParams(options: params.manta_options)
include { MSISENSOR_MSI } from '../../nf-core/software/msisensor/msi' addParams(options: params.msisensor_msi_options)
include { STRELKA_SOMATIC as STRELKA } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_options)

workflow PAIR_VARIANT_CALLING {
take:
bam // channel: [mandatory] bam
dbsnp // channel: [mandatory] dbsnp
dbsnp_tbi // channel: [mandatory] dbsnp_tbi
dict // channel: [mandatory] dict
fai // channel: [mandatory] fai
fasta // channel: [mandatory] fasta
intervals // channel: [mandatory] intervals
target_bed // channel: [optional] target_bed
tools // list: [mandatory] list of tools
bam // channel: [mandatory] bam
dbsnp // channel: [mandatory] dbsnp
dbsnp_tbi // channel: [mandatory] dbsnp_tbi
dict // channel: [mandatory] dict
fai // channel: [mandatory] fai
fasta // channel: [mandatory] fasta
intervals // channel: [mandatory] intervals
msisensor_scan // channel: [optional] msisensor_scan
target_bed // channel: [optional] target_bed
tools // list: [mandatory] list of tools

main:

Expand Down Expand Up @@ -64,6 +67,12 @@ workflow PAIR_VARIANT_CALLING {
manta_vcf = manta_candidate_small_indels_vcf.mix(manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf)
}

if ('msisensor' in tools) {
MSISENSOR_MSI(
bam_pair,
msisensor_scan)
}

if ('strelka' in tools) {
STRELKA(
bam_pair,
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/software/manta/somatic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ process MANTA_SOMATIC {
container container

input:
tuple val(meta), path(bam_normal), path (bai_normal), path(bam_tumor), path (bai_tumor)
tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor)
path fasta
path fai
path target_bed
Expand Down
48 changes: 48 additions & 0 deletions modules/nf-core/software/msisensor/msi.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
include { initOptions; saveFiles; getSoftwareName } from './../functions'

params.options = [:]
def options = initOptions(params.options)

environment = params.enable_conda ? "bioconda::msisensor=0.5" : null
container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2"
if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2"

process MSISENSOR_MSI {
tag "${meta.id}"

label 'CPUS_1'
label 'MEMORY_MAX'

publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }

conda environment
container container

input:
tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor)
path msisensor_scan

output:
tuple val(meta), path("*.list")

script:
def software = getSoftwareName(task.process)
def ioptions = initOptions(options)
def prefix = ioptions.suffix ? "msisensor_${meta.id}${ioptions.suffix}" : "msisensor_${meta.id}"
// TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable
// TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
// using the Nextflow "task" variable e.g. "--threads $task.cpus"
"""
msisensor msi -d ${msisensor_scan} \
-b 4 \
-t ${bam_tumor} \
-n ${bam_normal} \
-o ${prefix}
mv ${prefix} ${prefix}.list
mv ${prefix}_dis ${prefix}_dis.list
mv ${prefix}_germline ${prefix}_germline.list
mv ${prefix}_somatic ${prefix}_somatic.list
"""
}
39 changes: 39 additions & 0 deletions modules/nf-core/software/msisensor/scan.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
include { initOptions; saveFiles; getSoftwareName } from './../functions'

params.options = [:]
def options = initOptions(params.options)

environment = params.enable_conda ? "bioconda::msisensor=0.5" : null
container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2"
if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2"

process MSISENSOR_SCAN {
tag "${fasta}"

label 'CPUS_1'
label 'MEMORY_MAX'

publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }

conda environment
container container

input:
path fasta
path fai

output:
path "microsatellites.list"

script:
def software = getSoftwareName(task.process)
def ioptions = initOptions(options)
def prefix = ioptions.suffix ? "msisensor_${ioptions.suffix}" : "msisensor_"
// TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable
// TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
// using the Nextflow "task" variable e.g. "--threads $task.cpus"
"""
msisensor scan -d ${fasta} -o microsatellites.list
"""
}
2 changes: 1 addition & 1 deletion modules/nf-core/software/strelka/germline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ process STRELKA_GERMLINE {
container container

input:
tuple val(meta), path(bam), path (bai)
tuple val(meta), path(bam), path(bai)
path fasta
path fai
path target_bed
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/software/strelka/somatic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ process STRELKA_SOMATIC {
container container

input:
tuple val(meta), path(bam_normal), path (bai_normal), path(bam_tumor), path (bai_tumor)
tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor)
path fasta
path fai
path target_bed
Expand Down

0 comments on commit 3517511

Please sign in to comment.