diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c66c4c23c8..5910d0b0db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ on: env: NFT_DIFF: "pdiff" NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" - NFT_VER: "0.9.2" + NFT_VER: 0.9.2 NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity @@ -34,8 +34,8 @@ jobs: fail-fast: false matrix: NXF_VER: - - "24.04.2" - - "latest-everything" + - 24.04.2 + - latest-everything filter: ["workflow", "function", "pipeline"] # filter: ["process", "workflow", "function", "pipeline"] profile: ["conda", "docker", "singularity"] diff --git a/.gitignore b/.gitignore index 9cc2a80834..3a81dcfbdb 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ null/ test-datasets/ test.tap test.xml +.cursorrules +TODO.md diff --git a/.nf-core.yml b/.nf-core.yml index 7beff01213..1bf0c8a09b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -4,6 +4,8 @@ lint: files_exist: - .github/workflows/awsfulltest.yml - .github/workflows/awstest.yml + - conf/igenomes.config + - conf/igenomes_ignored.config - conf/modules.config files_unchanged: - .gitignore diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index afc253a919..0000000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,331 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines reference genomes using iGenome paths. - Can be used by any config that customises the base path using: - $params.igenomes_base / --igenomes_base ----------------------------------------------------------------------------------------- -*/ - -params { - // illumina iGenomes reference file paths - genomes { - 'GATK.GRCh37' { - ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_alleles_hg19.zip" - ascat_genome = 'hg19' - ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_loci_hg19.zip" - ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/GC_G1000_hg19.zip" - ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/RT_G1000_hg19.zip" - bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/" - chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes" - dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi" - dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_138.b37.vcf.gz' - dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" - fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" - germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz" - germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz.tbi" - intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" - known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" - known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" - known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.snps.high_confidence.b37.vcf.gz' - known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" - known_indels_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.indels.b37.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.b37.vcf.gz' - mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" - ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" - snpeff_db = 'GRCh37.87' - vep_cache_version = '113' - vep_genome = 'GRCh37' - vep_species = 'homo_sapiens' - } - 'GATK.GRCh38' { - ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_alleles_hg38.zip" - ascat_genome = 'hg38' - ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_loci_hg38.zip" - ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/GC_G1000_hg38.zip" - ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/RT_G1000_hg38.zip" - bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" - cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" - chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes" - dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" - dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz' - dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" - dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" - fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" - germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz" - germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz.tbi" - intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" - known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" - known_indels_vqsr = '--resource:gatk,known=false,training=true,truth=true,prior=10.0 Homo_sapiens_assembly38.known_indels.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' - known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" - known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" - known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_omni2.5.hg38.vcf.gz' - mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" - ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" - pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" - pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" - sentieon_dnascope_model = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" - snpeff_db = 'GRCh38.105' - vep_cache_version = '113' - vep_genome = 'GRCh38' - vep_species = 'homo_sapiens' - } - 'Ensembl.GRCh37' { - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - snpeff_db = 'GRCh37.87' - vep_cache_version = '113' - vep_genome = 'GRCh37' - vep_species = 'homo_sapiens' - } - 'NCBI.GRCh38' { - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - ngscheckmate_bed ="${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" - snpeff_db = 'GRCh38.105' - vep_cache_version = '113' - vep_genome = 'GRCh38' - vep_species = 'homo_sapiens' - } - 'CHM13' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - } - 'GRCm38' { - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - chr_dir = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Chromosomes" - dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" - dict = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.dict" - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" - intervals = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/intervals/GRCm38_calling_list.bed" - known_indels = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" - mappability = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Control-FREEC/GRCm38_68_mm10.gem" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - snpeff_db = 'GRCm38.99' - vep_cache_version = '102' - vep_genome = 'GRCm38' - vep_species = 'mus_musculus' - } - 'TAIR10' { - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - } - 'EB2' { - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - snpeff_db = 'UMD3.1.75' - vep_cache_version = '94' - vep_genome = 'UMD3.1' - vep_species = 'bos_taurus' - } - 'WBcel235' { - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = 'WBcel235.105' - vep_cache_version = '113' - vep_genome = 'WBcel235' - vep_species = 'caenorhabditis_elegans' - } - 'CanFam3.1' { - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - snpeff_db = 'CanFam3.1.99' - vep_cache_version = '104' - vep_genome = 'CanFam3.1' - vep_species = 'canis_lupus_familiaris' - } - 'GRCz10' { - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - } - 'BDGP6' { - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - } - 'EquCab2' { - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - } - 'EB1' { - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - } - 'Gm01' { - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - } - 'IRGSP-1.0' { - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - } - 'CHIMP2.1.4' { - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - } - 'Rnor_5.0' { - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - } - 'Rnor_6.0' { - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - } - 'R64-1-1' { - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = 'R64-1-1.105' - vep_cache_version = '113' - vep_genome = 'R64-1-1' - vep_species = 'saccharomyces_cerevisiae' - } - 'EF2' { - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - } - 'Sbi1' { - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - } - 'AGPv3' { - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - } - 'hg38' { - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = 'GRCh38.105' - vep_cache_version = '113' - vep_genome = 'GRCh38' - vep_species = 'homo_sapiens' - } - 'hg19' { - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - snpeff_db = 'GRCh37.87' - vep_cache_version = '113' - vep_genome = 'GRCh37' - vep_species = 'homo_sapiens' - } - 'mm10' { - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - snpeff_db = 'GRCm38.99' - vep_cache_version = '102' - vep_genome = 'GRCm38' - vep_species = 'mus_musculus' - } - 'bosTau8' { - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - } - 'ce10' { - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - } - 'canFam3' { - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - } - 'danRer10' { - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - } - 'dm6' { - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - } - 'equCab2' { - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - } - 'galGal4' { - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - } - 'panTro4' { - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - } - 'rn6' { - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - } - 'sacCer3' { - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - } - 'susScr3' { - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - } - 'testdata.nf-core.sarek' { - dbsnp = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" - dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz' - dict = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.dict" - fasta = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta" - fasta_fai = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta.fai" - germline_resource = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" - germline_resource_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi" - intervals = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.interval_list" - known_indels = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" - known_indels_vqsr = '--resource:mills,known=false,training=true,truth=true,prior=10.0 mills_and_1000G.indels.vcf.gz' - ngscheckmate_bed = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed" - snpeff_db = 'WBcel235.105' - vep_cache_version = '113' - vep_genome = 'WBcel235' - vep_species = 'caenorhabditis_elegans' - } - } -} diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config deleted file mode 100644 index b4034d8243..0000000000 --- a/conf/igenomes_ignored.config +++ /dev/null @@ -1,9 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Empty genomes dictionary to use when igenomes is ignored. ----------------------------------------------------------------------------------------- -*/ - -params.genomes = [:] diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index 1f57237be7..69835dcf78 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -15,26 +15,6 @@ process { - withName: 'BWAMEM1_INDEX' { - ext.when = { !params.bwa && params.step == "mapping" && (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem")} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "bwa", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'BWAMEM2_INDEX' { - ext.when = { !params.bwamem2 && params.step == "mapping" && params.aligner == "bwa-mem2" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "bwamem2", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - withName: 'CNVKIT_ANTITARGET' { ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } publishDir = [ @@ -56,106 +36,6 @@ process { ] } - withName: 'DRAGMAP_HASHTABLE' { - ext.when = { !params.dragmap && params.step == "mapping" && params.aligner == "dragmap" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "dragmap", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'GATK4_CREATESEQUENCEDICTIONARY' { - ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/dict" }, - pattern: "*dict", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'MSISENSORPRO_SCAN' { - ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/msi" }, - pattern: "*list", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'SAMTOOLS_FAIDX' { - ext.when = { !params.fasta_fai && params.step != "annotate" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/fai" }, - pattern: "*fai", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_BCFTOOLS_ANNOTATIONS' { - ext.when = { !params.bcftools_annotations_tbi && params.bcftools_annotations && params.tools && params.tools.split(',').contains('bcfann') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/bcfann" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_DBSNP' { - ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope') || params.tools.split(',').contains('mutect2'))) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/dbsnp" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_GERMLINE_RESOURCE' { - ext.when = { !params.germline_resource_tbi && params.germline_resource && params.tools && params.tools.split(',').contains('mutect2') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/germline_resource" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_KNOWN_INDELS' { - ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope'))) ) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/known_indels" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_KNOWN_SNPS' { - ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') )) ) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/known_snps" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_PON' { - ext.when = { !params.pon_tbi && params.pon && params.tools && params.tools.split(',').contains('mutect2') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/pon" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' { ext.when = { params.tools && params.tools.split(',').contains('ascat')} publishDir = [ diff --git a/conf/modules/prepare_intervals.config b/conf/modules/prepare_intervals.config index 815903b996..7004574ede 100644 --- a/conf/modules/prepare_intervals.config +++ b/conf/modules/prepare_intervals.config @@ -14,37 +14,14 @@ // PREPARE INTERVALS process { - - withName: 'BUILD_INTERVALS' { - ext.args = { "-v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }'" } - ext.suffix = { "bed" } - } - - withName: 'CREATE_INTERVALS_BED' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "*bed", - saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } - ] - } - - withName: 'GATK4_INTERVALLISTTOBED' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "*bed", - saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } + withName: 'NFCORE_SAREK:PREPARE_INTERVALS:PREPARE_INTERVALS:.*' { + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false ] } withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT|TABIX_BGZIPTABIX_INTERVAL_COMBINED' { ext.prefix = {"${meta.id}"} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "*bed.gz", - saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } - ] } } diff --git a/conf/test.config b/conf/test.config index 5f38bfd90d..d5cf6986d1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,16 +27,16 @@ params { // Input data input = "${projectDir}/tests/csv/3.0/fastq_single.csv" - // small genome on igenomes - igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - genome = 'testdata.nf-core.sarek' + // small reference genome + igenomes_base = 's3://nf-core-references/test_data/' + + // TODO: VERSION + references = 'https://raw.githubusercontent.com/nf-core/references-assets/main/genomes/Homo_sapiens/test/GRCh38_chr22.yml' // Small reference genome bcftools_annotations = "${params.modules_testdata_base_path}/genomics/sarscov2/illumina/vcf/test2.vcf.gz" bcftools_annotations_tbi = "${params.modules_testdata_base_path}/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi" bcftools_header_lines = "${projectDir}/tests/config/bcfann_test_header.txt" - snpeff_cache = null - vep_cache = null // Sentieon sentieon_dnascope_model = "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" diff --git a/main.nf b/main.nf index effa97ef16..e0b47d22e0 100755 --- a/main.nf +++ b/main.nf @@ -19,103 +19,53 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.ascat_alleles = getGenomeAttribute('ascat_alleles') -params.ascat_genome = getGenomeAttribute('ascat_genome') -params.ascat_loci = getGenomeAttribute('ascat_loci') -params.ascat_loci_gc = getGenomeAttribute('ascat_loci_gc') -params.ascat_loci_rt = getGenomeAttribute('ascat_loci_rt') -params.bwa = getGenomeAttribute('bwa') -params.bwamem2 = getGenomeAttribute('bwamem2') -params.cf_chrom_len = getGenomeAttribute('cf_chrom_len') -params.chr_dir = getGenomeAttribute('chr_dir') -params.dbsnp = getGenomeAttribute('dbsnp') -params.dbsnp_tbi = getGenomeAttribute('dbsnp_tbi') -params.dbsnp_vqsr = getGenomeAttribute('dbsnp_vqsr') -params.dict = getGenomeAttribute('dict') -params.dragmap = getGenomeAttribute('dragmap') -params.fasta = getGenomeAttribute('fasta') -params.fasta_fai = getGenomeAttribute('fasta_fai') -params.germline_resource = getGenomeAttribute('germline_resource') -params.germline_resource_tbi = getGenomeAttribute('germline_resource_tbi') -params.intervals = getGenomeAttribute('intervals') -params.known_indels = getGenomeAttribute('known_indels') -params.known_indels_tbi = getGenomeAttribute('known_indels_tbi') -params.known_indels_vqsr = getGenomeAttribute('known_indels_vqsr') -params.known_snps = getGenomeAttribute('known_snps') -params.known_snps_tbi = getGenomeAttribute('known_snps_tbi') -params.known_snps_vqsr = getGenomeAttribute('known_snps_vqsr') -params.mappability = getGenomeAttribute('mappability') -params.ngscheckmate_bed = getGenomeAttribute('ngscheckmate_bed') -params.pon = getGenomeAttribute('pon') -params.pon_tbi = getGenomeAttribute('pon_tbi') -params.sentieon_dnascope_model = getGenomeAttribute('sentieon_dnascope_model') -params.snpeff_db = getGenomeAttribute('snpeff_db') -params.vep_cache_version = getGenomeAttribute('vep_cache_version') -params.vep_genome = getGenomeAttribute('vep_genome') -params.vep_species = getGenomeAttribute('vep_species') -aligner = params.aligner +include { SAREK } from './workflows/sarek' +include { ANNOTATION_CACHE_INITIALISATION } from './subworkflows/local/annotation_cache_initialisation' +include { DOWNLOAD_CACHE_SNPEFF_VEP } from './subworkflows/local/download_cache_snpeff_vep' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' +include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' +include { get_references_file } from './subworkflows/nf-core/utils_references' +include { get_references_value } from './subworkflows/nf-core/utils_references' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SAREK } from './workflows/sarek' -include { ANNOTATION_CACHE_INITIALISATION } from './subworkflows/local/annotation_cache_initialisation' -include { DOWNLOAD_CACHE_SNPEFF_VEP } from './subworkflows/local/download_cache_snpeff_vep' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' -include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' -include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' -include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' - -// Initialize fasta file with meta map: -fasta = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() - -// Initialize file channels based on params, defined in the params.genomes[params.genome] scope -bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty() -bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty() -cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [] -dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) -fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() -germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input -known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) -known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) -mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) -pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) -sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) - -// Initialize value channels based on params, defined in the params.genomes[params.genome] scope -ascat_genome = params.ascat_genome ?: Channel.empty() -dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty() -known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty() -known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty() -ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty() -snpeff_db = params.snpeff_db ?: Channel.empty() -vep_cache_version = params.vep_cache_version ?: Channel.empty() -vep_genome = params.vep_genome ?: Channel.empty() -vep_species = params.vep_species ?: Channel.empty() - -vep_extra_files = [] +workflow { + // SUBWORKFLOW: Run initialisation tasks + PIPELINE_INITIALISATION( + params.version, + params.validate_params, + args, + params.outdir, + params.input, + params.references, + params.step, + ) -if (params.dbnsfp && params.dbnsfp_tbi) { - vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) - vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) -} + // WORKFLOW: Run main workflow + NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet, PIPELINE_INITIALISATION.out.references, params.aligner) -if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { - vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) + // SUBWORKFLOW: Run completion tasks + PIPELINE_COMPLETION( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_SAREK.out.multiqc_report, + ) } /* @@ -128,154 +78,172 @@ if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && p workflow NFCORE_SAREK { take: samplesheet + references + aligner main: versions = Channel.empty() - // build indexes if needed - PREPARE_GENOME( - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, - bcftools_annotations, - params.chr_dir, - dbsnp, - fasta, - germline_resource, - known_indels, - known_snps, - pon) + // References' files from the references yaml or params + ascat_alleles = get_references_file(references, params.ascat_alleles, 'ascat_alleles', params.igenomes_base) + ascat_loci = get_references_file(references, params.ascat_loci, 'ascat_loci', params.igenomes_base) + ascat_loci_gc = get_references_file(references, params.ascat_loci_gc, 'ascat_loci_gc', params.igenomes_base) + ascat_loci_rt = get_references_file(references, params.ascat_loci_rt, 'ascat_loci_rt', params.igenomes_base) + bwamem1_index = get_references_file(references, params.bwa, 'bwamem1_index', params.igenomes_base) + bwamem2_index = get_references_file(references, params.bwamem2, 'bwamem2_index', params.igenomes_base) + cf_chrom_len = get_references_file(references, params.cf_chrom_len, 'cf_chrom_len', params.igenomes_base) + chr_dir = get_references_file(references, params.chr_dir, 'chr_dir', params.igenomes_base) + dragmap_hashtable = get_references_file(references, params.dragmap, 'dragmap_hashtable', params.igenomes_base) + fasta = get_references_file(references, params.fasta, 'fasta', params.igenomes_base) + fasta_dict = get_references_file(references, params.dict, 'fasta_dict', params.igenomes_base) + fasta_fai = get_references_file(references, params.fasta_fai, 'fasta_fai', params.igenomes_base) + intervals_bed = get_references_file(references, params.intervals, 'intervals_bed', params.igenomes_base) + mappability = get_references_file(references, params.mappability, 'mappability', params.igenomes_base) + msisensorpro_scan = get_references_file(references, params.msisensorpro_scan, 'msisensorpro_scan', params.igenomes_base) + ngscheckmate_bed = get_references_file(references, params.ngscheckmate_bed, 'ngscheckmate_bed', params.igenomes_base) + sentieon_dnascope_model = get_references_file(references, params.sentieon_dnascope_model, 'sentieon_dnascope_model', params.igenomes_base) + + // References' values from the references yaml or params + ascat_genome = get_references_value(references, params.ascat_genome, 'ascat_genome') + snpeff_db = get_references_value(references, params.snpeff_db, 'snpeff_db') + vep_cache_version = get_references_value(references, params.vep_cache_version, 'vep_cache_version') + vep_genome = get_references_value(references, params.vep_genome, 'vep_genome') + vep_species = get_references_value(references, params.vep_species, 'vep_species') + + // References' VCFs and related from the references yaml or params + dbsnp = get_references_file(references, params.dbsnp, 'vcf_dbsnp_vcf', params.igenomes_base) + dbsnp_tbi = get_references_file(references, params.dbsnp_tbi, 'vcf_dbsnp_vcf_tbi', params.igenomes_base) + dbsnp_vqsr = get_references_value(references, params.dbsnp_vqsr, 'vcf_dbsnp_vcf_vqsr') + germline_resource = get_references_file(references, params.germline_resource, 'vcf_germline_resource_vcf', params.igenomes_base) + germline_resource_tbi = get_references_file(references, params.germline_resource_tbi, 'vcf_germline_resource_vcf_tbi', params.igenomes_base) + known_indels = get_references_file(references, params.known_indels, 'vcf_known_indels_vcf', params.igenomes_base) + known_indels_tbi = get_references_file(references, params.known_indels_tbi, 'vcf_known_indels_vcf_tbi', params.igenomes_base) + known_indels_vqsr = get_references_value(references, params.known_indels_vqsr, 'vcf_known_indels_vcf_vqsr') + known_snps = get_references_file(references, params.known_snps, 'vcf_known_snps_vcf', params.igenomes_base) + known_snps_tbi = get_references_file(references, params.known_snps_tbi, 'vcf_known_snps_vcf_tbi', params.igenomes_base) + known_snps_vqsr = get_references_value(references, params.known_snps_vqsr, 'vcf_known_snps_vcf_vqsr') + pon = get_references_file(references, params.pon, 'vcf_pon_vcf', params.igenomes_base) + pon_tbi = get_references_file(references, params.pon_tbi, 'vcf_pon_vcf_tbi', params.igenomes_base) - // Gather built indices or get them from the params - // Built from the fasta file: - dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() - : PREPARE_GENOME.out.dict - fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).map{ it -> [ [id:'fai'], it ] }.collect() - : PREPARE_GENOME.out.fasta_fai - bwa = params.bwa ? Channel.fromPath(params.bwa).map{ it -> [ [id:'bwa'], it ] }.collect() - : PREPARE_GENOME.out.bwa - bwamem2 = params.bwamem2 ? Channel.fromPath(params.bwamem2).map{ it -> [ [id:'bwamem2'], it ] }.collect() - : PREPARE_GENOME.out.bwamem2 - dragmap = params.dragmap ? Channel.fromPath(params.dragmap).map{ it -> [ [id:'dragmap'], it ] }.collect() - : PREPARE_GENOME.out.hashtable + // known_sites is made by grouping both the dbsnp and the known snps/indels resources + // Which can either or both be optional + known_sites_indels = dbsnp.mix(known_indels).groupTuple().collect() + known_sites_indels_tbi = dbsnp_tbi.mix(known_indels_tbi).groupTuple().collect() + known_sites_snps = dbsnp.mix(known_snps).groupTuple().collect() + known_sites_snps_tbi = dbsnp_tbi.mix(known_snps_tbi).groupTuple().collect() // Gather index for mapping given the chosen aligner - index_alignment = (aligner == "bwa-mem" || aligner == "sentieon-bwamem") ? bwa : - aligner == "bwa-mem2" ? bwamem2 : - dragmap + index_alignment = aligner == "bwa-mem" || aligner == "sentieon-bwamem" + ? bwamem1_index + : aligner == "bwa-mem2" + ? bwamem2_index + : dragmap_hashtable - // TODO: add a params for msisensorpro_scan - msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan + bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.value([]) + bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : Channel.value([]) : Channel.value([]) + bcftools_header_lines = params.bcftools_header_lines ?: Channel.value([]) - // For ASCAT, extracted from zip or tar.gz files - allele_files = PREPARE_GENOME.out.allele_files - chr_files = PREPARE_GENOME.out.chr_files - gc_file = PREPARE_GENOME.out.gc_file - loci_files = PREPARE_GENOME.out.loci_files - rt_file = PREPARE_GENOME.out.rt_file + vep_extra_files = [] - // Tabix indexed vcf files - bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.value([]) - dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) - germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries - known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) - known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi).collect() : PREPARE_GENOME.out.known_snps_tbi : Channel.value([]) - pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : Channel.value([]) + if (params.dbnsfp && params.dbnsfp_tbi) { + vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) + vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) + } - // known_sites is made by grouping both the dbsnp and the known snps/indels resources - // Which can either or both be optional - known_sites_indels = dbsnp.concat(known_indels).collect() - known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() - known_sites_snps = dbsnp.concat(known_snps).collect() - known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() + if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { + vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) + } // Build intervals if needed - PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) + PREPARE_INTERVALS(intervals_bed, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) // Intervals for speed up preprocessing/variant calling by spread/gather // [interval.bed] all intervals in one file - intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined - intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined intervals_bed_combined_for_variant_calling = PREPARE_INTERVALS.out.intervals_bed_combined // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) - intervals_for_preprocessing = params.wes ? - intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : - Channel.value([ [ id:'null' ], [] ]) - intervals = PREPARE_INTERVALS.out.intervals_bed // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather - intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather - intervals_and_num_intervals = intervals.map{ interval, num_intervals -> - if ( num_intervals < 1 ) [ [], num_intervals ] - else [ interval, num_intervals ] + intervals_for_preprocessing = params.wes + ? intervals_bed_combined.map { it -> [[id: it.baseName], it] }.collect() + : Channel.value([[id: 'null'], []]) + intervals = PREPARE_INTERVALS.out.intervals_bed + // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi + // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + intervals_and_num_intervals = intervals.map { interval, num_intervals -> + if (num_intervals < 1) { + [[], num_intervals] + } + else { + [interval, num_intervals] + } } - intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals -> - if ( num_intervals < 1 ) [ [], [], num_intervals ] - else [ intervals[0], intervals[1], num_intervals ] + intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map { intervals_, num_intervals -> + if (num_intervals < 1) { + [[], [], num_intervals] + } + else { + [intervals_[0], intervals_[1], num_intervals] + } } if (params.tools && params.tools.split(',').contains('cnvkit')) { if (params.cnvkit_reference) { cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect() - } else { + } + else { PREPARE_REFERENCE_CNVKIT(fasta, intervals_bed_combined) cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions) } - } else { + } + else { cnvkit_reference = Channel.value([]) } // Gather used softwares versions - versions = versions.mix(PREPARE_GENOME.out.versions) versions = versions.mix(PREPARE_INTERVALS.out.versions) - vep_fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] - - // Download cache - if (params.download_cache) { - // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache - ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) - snpeff_info = Channel.of([ [ id:"${params.snpeff_db}" ], params.snpeff_db ]) - DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) - snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache - vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } + vep_fasta = params.vep_include_fasta ? fasta.map { fasta_ -> [[id: fasta_.baseName], fasta_] } : [[id: 'null'], []] - versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions) - } else { - // Looks for cache information either locally or on the cloud - ANNOTATION_CACHE_INITIALISATION( - (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), - params.snpeff_cache, - params.snpeff_db, - (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), - params.vep_cache, - params.vep_species, - params.vep_cache_version, - params.vep_genome, - params.vep_custom_args, - "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") + // Looks for cache information either locally or on the cloud + ANNOTATION_CACHE_INITIALISATION( + (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), + params.snpeff_cache, + snpeff_db, + (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), + params.vep_cache, + vep_species, + vep_cache_version, + vep_genome, + params.vep_custom_args, + "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.", + ) - snpeff_cache = ANNOTATION_CACHE_INITIALISATION.out.snpeff_cache - vep_cache = ANNOTATION_CACHE_INITIALISATION.out.ensemblvep_cache - } + snpeff_cache = ANNOTATION_CACHE_INITIALISATION.out.snpeff_cache + vep_cache = ANNOTATION_CACHE_INITIALISATION.out.ensemblvep_cache // // WORKFLOW: Run pipeline // - SAREK(samplesheet, - allele_files, + SAREK( + samplesheet, + ascat_alleles, bcftools_annotations, bcftools_annotations_tbi, bcftools_header_lines, cf_chrom_len, - chr_files, + chr_dir, cnvkit_reference, dbsnp, dbsnp_tbi, dbsnp_vqsr, - dict, + fasta_dict, fasta, fasta_fai, - gc_file, + ascat_loci_gc, germline_resource, germline_resource_tbi, index_alignment, @@ -291,86 +259,24 @@ workflow NFCORE_SAREK { known_sites_snps, known_sites_snps_tbi, known_snps_vqsr, - loci_files, + ascat_loci, mappability, msisensorpro_scan, ngscheckmate_bed, pon, pon_tbi, - rt_file, + ascat_loci_rt, sentieon_dnascope_model, snpeff_cache, + snpeff_db, vep_cache, vep_cache_version, vep_extra_files, vep_fasta, vep_genome, - vep_species + vep_species, ) + emit: multiqc_report = SAREK.out.multiqc_report // channel: /path/to/multiqc_report.html } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow { - - main: - // - // SUBWORKFLOW: Run initialisation tasks - // - PIPELINE_INITIALISATION( - params.version, - params.validate_params, - params.monochrome_logs, - args, - params.outdir, - params.input - ) - - // - // WORKFLOW: Run main workflow - // - NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet) - - // - // SUBWORKFLOW: Run completion tasks - // - PIPELINE_COMPLETION( - params.email, - params.email_on_fail, - params.plaintext_email, - params.outdir, - params.monochrome_logs, - params.hook_url, - NFCORE_SAREK.out.multiqc_report - ) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// Get attribute from genome config file e.g. fasta -// - -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/modules.json b/modules.json index ad4fd57616..0be80b67ef 100644 --- a/modules.json +++ b/modules.json @@ -534,6 +534,11 @@ "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", "installed_by": ["subworkflows"] }, + "utils_references": { + "branch": "master", + "git_sha": "c18de39a419659720e2482df14df21affdc30f47", + "installed_by": ["subworkflows"] + }, "vcf_annotate_ensemblvep": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index ad42e6ad53..733cca37e1 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -1,19 +1,19 @@ process CREATE_INTERVALS_BED { - tag "$intervals" + tag "${intervals}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'biocontainers/gawk:5.1.0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' + : 'biocontainers/gawk:5.1.0'}" input: - path(intervals) - val(nucleotides_per_second) + tuple val(meta), path(intervals) + val nucleotides_per_second output: - path("*.bed") , emit: bed - path "versions.yml" , emit: versions + path ("*.bed"), emit: bed + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -47,7 +47,8 @@ process CREATE_INTERVALS_BED { gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') END_VERSIONS """ - } else if (intervals.toString().toLowerCase().endsWith("interval_list")) { + } + else if (intervals.toString().toLowerCase().endsWith("interval_list")) { """ grep -v '^@' ${intervals} | awk -vFS="\t" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); @@ -59,7 +60,8 @@ process CREATE_INTERVALS_BED { gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') END_VERSIONS """ - } else { + } + else { """ awk -vFS="[:-]" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); @@ -75,11 +77,9 @@ process CREATE_INTERVALS_BED { stub: def prefix = task.ext.prefix ?: "${intervals.baseName}" - def metrics = task.ext.metrics ?: "${prefix}.metrics" - // def prefix_basename = prefix.substring(0, prefix.lastIndexOf(".")) """ - touch ${prefix}.stub.bed + touch ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/baserecalibrator/main.nf b/modules/nf-core/gatk4/baserecalibrator/main.nf index 1a29986265..f009c3d09e 100644 --- a/modules/nf-core/gatk4/baserecalibrator/main.nf +++ b/modules/nf-core/gatk4/baserecalibrator/main.nf @@ -1,23 +1,23 @@ process GATK4_BASERECALIBRATOR { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0' + : 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0'}" input: tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path known_sites - path known_sites_tbi + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(known_sites) + tuple val(meta6), path(known_sites_tbi) output: tuple val(meta), path("*.table"), emit: table - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -25,25 +25,26 @@ process GATK4_BASERECALIBRATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def interval_command = intervals ? "--intervals $intervals" : "" - def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') + def interval_command = intervals ? "--intervals ${intervals}" : "" + def sites_command = known_sites.collect { "--known-sites ${it}" }.join(' ') def avail_mem = 3072 if (!task.memory) { - log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ BaseRecalibrator \\ - --input $input \\ + --input ${input} \\ --output ${prefix}.table \\ - --reference $fasta \\ - $interval_command \\ - $sites_command \\ + --reference ${fasta} \\ + ${interval_command} \\ + ${sites_command} \\ --tmp-dir . \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index de5e6d4ca6..84dbdd3852 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,19 +11,56 @@ params { // Mandatory arguments // Input options - input = null // No default input - input_restart = null // No default automatic input - step = 'mapping' // Starts with mapping + input = null // No default input + input_restart = null // No default automatic input + step = 'mapping' // Starts with mapping // References - genome = 'GATK.GRCh38' - igenomes_base = 's3://ngi-igenomes/igenomes/' - snpeff_cache = 's3://annotation-cache/snpeff_cache/' - vep_cache = 's3://annotation-cache/vep_cache/' - igenomes_ignore = false - save_reference = false // Built references not saved - build_only_index = false // Only build the reference indexes - download_cache = false // Do not download annotation cache + genome = 'GATK/GRCh38' + igenomes_base = 's3://ngi-igenomes/igenomes/' + // TODO: VERSION + references_config_base = "https://raw.githubusercontent.com/nf-core/references-assets/main" + references = "${params.references_config_base}/igenomes/${genome}.yml" + snpeff_cache = 's3://annotation-cache/snpeff_cache/' + vep_cache = 's3://annotation-cache/vep_cache/' + download_cache = false // Do not download annotation cache + + // params for references in yaml file + ascat_alleles = null + ascat_genome = null + ascat_loci = null + ascat_loci_gc = null + ascat_loci_rt = null + bwa = null + bwamem2 = null + cf_chrom_len = null + chr_dir = null + dbsnp = null + dbsnp_tbi = null + dbsnp_vqsr = null + dict = null + dragmap = null + fasta = null + fasta_fai = null + germline_resource = null + germline_resource_tbi = null + intervals = null + known_indels = null + known_indels_tbi = null + known_indels_vqsr = null + known_snps = null + known_snps_tbi = null + known_snps_vqsr = null + mappability = null + msisensorpro_scan = null + ngscheckmate_bed = null + pon = null + pon_tbi = null + sentieon_dnascope_model = null + snpeff_db = null + vep_cache_version = null + vep_genome = null + vep_species = null // Main options no_intervals = false // Intervals will be built from the fasta file @@ -80,60 +117,60 @@ params { wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers // Annotation - bcftools_annotations = null // No extra annotation file - bcftools_annotations_tbi = null // No extra annotation file index - bcftools_header_lines = null // No header lines to be added to the VCF file - dbnsfp = null // No dbnsfp processed file - dbnsfp_consequence = null // No default consequence for dbnsfp plugin - dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin - dbnsfp_tbi = null // No dbnsfp processed file index - outdir_cache = null // No default outdir cache - spliceai_indel = null // No spliceai_indel file - spliceai_indel_tbi = null // No spliceai_indel file index - spliceai_snv = null // No spliceai_snv file - spliceai_snv_tbi = null // No spliceai_snv file index - vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP - vep_dbnsfp = null // dbnsfp plugin disabled within VEP - vep_include_fasta = false // Don't use fasta file for annotation with VEP - vep_loftee = null // loftee plugin disabled within VEP - vep_out_format = "vcf" - vep_spliceai = null // spliceai plugin disabled within VEP - vep_spliceregion = null // spliceregion plugin disabled within VEP - vep_version = "111.0-0" // Should be updated when we update VEP, needs this to get full path to some plugins + bcftools_annotations = null // No extra annotation file + bcftools_annotations_tbi = null // No extra annotation file index + bcftools_header_lines = null // No header lines to be added to the VCF file + dbnsfp = null // No dbnsfp processed file + dbnsfp_consequence = null // No default consequence for dbnsfp plugin + dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin + dbnsfp_tbi = null // No dbnsfp processed file index + outdir_cache = null // No default outdir cache + spliceai_indel = null // No spliceai_indel file + spliceai_indel_tbi = null // No spliceai_indel file index + spliceai_snv = null // No spliceai_snv file + spliceai_snv_tbi = null // No spliceai_snv file index + vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP + vep_dbnsfp = null // dbnsfp plugin disabled within VEP + vep_include_fasta = false // Don't use fasta file for annotation with VEP + vep_loftee = null // loftee plugin disabled within VEP + vep_out_format = "vcf" + vep_spliceai = null // spliceai plugin disabled within VEP + vep_spliceregion = null // spliceregion plugin disabled within VEP + vep_version = "111.0-0" // Should be updated when we update VEP, needs this to get full path to some plugins // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - help_full = false - show_hidden = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null - test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek3' - modules_testdata_base_path = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek3' + modules_testdata_base_path = null // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines @@ -321,9 +358,6 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' -// Load igenomes.config if required -includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -392,7 +426,6 @@ plugins { } validation { - defaultIgnoreParams = ["genomes"] lenientMode = true help { enabled = true @@ -437,7 +470,6 @@ includeConfig 'conf/modules/modules.config' // prepare reference includeConfig 'conf/modules/download_cache.config' -includeConfig 'conf/modules/prepare_genome.config' includeConfig 'conf/modules/prepare_intervals.config' // preprocessing @@ -477,4 +509,3 @@ includeConfig 'conf/modules/lofreq.config' //annotate includeConfig 'conf/modules/annotate.config' - diff --git a/nextflow_schema.json b/nextflow_schema.json index 5cdf35d555..fda766b430 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -365,7 +365,7 @@ "type": "string", "fa_icon": "fas fa-file", "description": "Panel-of-normals VCF (bgzipped) for GATK Mutect2", - "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is highly recommended to make your own PON, as it depends on sequencer and library preparation.\n\nThe pipeline is shipped with a panel-of-normals for `--genome GATK.GRCh38` provided by [GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-). \n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped." + "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is highly recommended to make your own PON, as it depends on sequencer and library preparation.\n\nThe pipeline is shipped with a panel-of-normals for `--genome GATK/GRCh38` provided by [GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-). \n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped." }, "pon_tbi": { "type": "string", @@ -536,10 +536,10 @@ } } }, - "general_reference_genome_options": { - "title": "General reference genome options", + "reference_genome_options": { + "title": "Reference genome options", "type": "object", - "description": "General options to interact with reference genomes.", + "description": "Reference genome related files and options required for the workflow. If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.", "default": "", "properties": { "igenomes_base": { @@ -549,122 +549,111 @@ "fa_icon": "fas fa-ban", "default": "s3://ngi-igenomes/igenomes/" }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. **NB** You can then run `Sarek` by specifying at least a FASTA genome file" + "genome": { + "type": "string", + "description": "Name of the reference genome in AWS iGenomes or nf-core/references.", + "default": "GATK/GRCh38", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using AWS iGenomes or nf-core/references, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GATK/GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, - "save_reference": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Save built references.", - "help_text": "Set this parameter, if you wish to save all computed reference files. This is useful to avoid re-computation on future runs." + "references_config_base": { + "type": "string", + "fa_icon": "fas fa-users-cog", + "description": "Base directory for references yaml files", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the yaml files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "default": "https://raw.githubusercontent.com/nf-core/references-assets/main" }, - "build_only_index": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Only built references.", - "help_text": "Set this parameter, if you wish to compute and save all computed reference files. No alignment or any other downstream steps will be performed." + "references": { + "format": "file-path", + "type": "string", + "description": "path to reference genome", + "fa_icon": "fas fa-book", + "help_text": "Use this parameter to specify the path to a yaml reference genome file.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", + "default": "/path/to/references" }, "download_cache": { "type": "boolean", "fa_icon": "fas fa-download", "description": "Download annotation cache.", "help_text": "Set this parameter, if you wish to download annotation cache.\nUsing this parameter will download cache even if --snpeff_cache and --vep_cache are provided." - } - }, - "fa_icon": "fas fa-dna" - }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow. If you use AWS iGenomes, this has already been set for you appropriately.", - "properties": { - "genome": { + }, + "ascat_alleles": { "type": "string", - "description": "Name of iGenomes reference.", - "default": "GATK.GRCh38", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "fa_icon": "fas fa-file", + "description": "Path to ASCAT allele zip file.", + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "ascat_genome": { "type": "string", "description": "ASCAT genome.", - "help_text": "Must be set to run ASCAT, either hg19 or hg38.\n\nIf you use AWS iGenomes, this has already been set for you appropriately.", + "help_text": "Must be set to run ASCAT, either hg19 or hg38.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.", "enum": ["hg19", "hg38"] }, - "ascat_alleles": { - "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to ASCAT allele zip file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." - }, "ascat_loci": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to ASCAT loci zip file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "ascat_loci_gc": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to ASCAT GC content correction file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "ascat_loci_rt": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to ASCAT RT (replictiming) correction file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "bwa": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to BWA mem indices.", - "help_text": "If you wish to recompute indices available on igenomes, set `--bwa false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you wish to recompute indices available on igenomes, set `--bwa false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "bwamem2": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to bwa-mem2 mem indices.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs." }, "chr_dir": { "type": "string", "fa_icon": "fas fa-folder-open", "description": "Path to chromosomes folder used with ControLFREEC.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dbsnp": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to dbsnp file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dbsnp_tbi": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to dbsnp index.", - "help_text": "> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dbsnp_vqsr": { "type": "string", "fa_icon": "fas fa-copy", - "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dict": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to FASTA dictionary file.", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dragmap": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to dragmap indices.", - "help_text": "If you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "fasta": { "type": "string", @@ -673,7 +662,7 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified.\n\nIf you use AWS iGenomes, this has already been set for you appropriately.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.", "fa_icon": "fas fa-file" }, "fasta_fai": { @@ -682,7 +671,7 @@ "format": "file-path", "exists": true, "mimetype": "text/plain", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately.", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.", "description": "Path to FASTA reference index." }, "germline_resource": { @@ -692,7 +681,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to GATK Mutect2 Germline Resource File.", - "help_text": "The germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "The germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "germline_resource_tbi": { "type": "string", @@ -701,7 +690,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to GATK Mutect2 Germline Resource Index.", - "help_text": "> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_indels": { "type": "string", @@ -710,7 +699,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to known indels file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_indels_tbi": { "type": "string", @@ -719,12 +708,12 @@ "exists": true, "mimetype": "text/plain", "description": "Path to known indels file index.", - "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_indels_vqsr": { "type": "string", "fa_icon": "fas fa-book", - "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling). If you use AWS iGenomes, this has already been set for you appropriately." + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling). If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_snps": { "type": "string", @@ -733,7 +722,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to known snps file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_snps_tbi": { "type": "string", @@ -742,12 +731,12 @@ "exists": true, "mimetype": "text/plain", "description": "Path to known snps file snps.", - "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_snps_vqsr": { "type": "string", "fa_icon": "fas fa-book", - "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).If you use AWS iGenomes, this has already been set for you appropriately." + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "mappability": { "type": "string", @@ -756,7 +745,16 @@ "exists": true, "mimetype": "text/plain", "description": "Path to Control-FREEC mappability file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." + }, + "msisensorpro_scan": { + "type": "string", + "fa_icon": "fas fa-file", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Path to MSISensorPro scan file.", + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "ngscheckmate_bed": { "type": "string", @@ -765,7 +763,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to SNP bed file for sample checking with NGSCheckMate", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "sentieon_dnascope_model": { "type": "string", @@ -774,7 +772,7 @@ "exists": true, "mimetype": "text/plain", "description": "Machine learning model for Sentieon Dnascope.", - "help_text": " It is recommended to use DNAscope with a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering. Sentieon can provide you with a model trained using a subset of the data from the GiAB truth-set found in https://github.com/genome-in-a-bottle. In addition, Sentieon can assist you in the creation of models using your own data, which will calibrate the specifics of your sequencing and bio-informatics processing.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": " It is recommended to use DNAscope with a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering. Sentieon can provide you with a model trained using a subset of the data from the GiAB truth-set found in https://github.com/genome-in-a-bottle. In addition, Sentieon can assist you in the creation of models using your own data, which will calibrate the specifics of your sequencing and bio-informatics processing.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "snpeff_cache": { "type": "string", @@ -782,13 +780,13 @@ "fa_icon": "fas fa-cloud-download-alt", "default": "s3://annotation-cache/snpeff_cache/", "description": "Path to snpEff cache.", - "help_text": "Path to snpEff cache which should contain the relevant genome and build directory in the path ${snpeff_species}.${snpeff_version}\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "Path to snpEff cache which should contain the relevant genome and build directory in the path ${snpeff_species}.${snpeff_version}\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "snpeff_db": { "type": "string", "fa_icon": "fas fa-database", "description": "snpEff DB version.", - "help_text": "This is used to specify the database to be use to annotate with.\nAlternatively databases' names can be listed with the `snpEff databases`.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "This is used to specify the database to be use to annotate with.\nAlternatively databases' names can be listed with the `snpEff databases`.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "vep_cache": { "type": "string", @@ -796,25 +794,25 @@ "fa_icon": "fas fa-cloud-download-alt", "default": "s3://annotation-cache/vep_cache/", "description": "Path to VEP cache.", - "help_text": "Path to VEP cache which should contain the relevant species, genome and build directories at the path ${vep_species}/${vep_genome}_${vep_cache_version}\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "Path to VEP cache which should contain the relevant species, genome and build directories at the path ${vep_species}/${vep_genome}_${vep_cache_version}\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "vep_cache_version": { "type": "string", "fa_icon": "fas fa-tag", "description": "VEP cache version.", - "help_text": "Alternative cache version can be used to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "Alternative cache version can be used to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "vep_genome": { "type": "string", "fa_icon": "fas fa-microscope", "description": "VEP genome.", - "help_text": "This is used to specify the genome when looking for local cache, or cloud based cache.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "This is used to specify the genome when looking for local cache, or cloud based cache.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "vep_species": { "type": "string", "fa_icon": "fas fa-microscope", "description": "VEP species.", - "help_text": "Alternatively species listed in Ensembl Genomes caches can be used.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "Alternatively species listed in Ensembl Genomes caches can be used.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." } }, "help_text": "The pipeline config files come bundled with paths to the Illumina iGenomes reference index files.\nThe configuration is set up to use the AWS-iGenomes resource\ncf https://ewels.github.io/AWS-iGenomes/." @@ -1016,9 +1014,6 @@ { "$ref": "#/$defs/annotation" }, - { - "$ref": "#/$defs/general_reference_genome_options" - }, { "$ref": "#/$defs/reference_genome_options" }, diff --git a/subworkflows/local/annotation_cache_initialisation/main.nf b/subworkflows/local/annotation_cache_initialisation/main.nf index 572bcfc43b..ddb6236d78 100644 --- a/subworkflows/local/annotation_cache_initialisation/main.nf +++ b/subworkflows/local/annotation_cache_initialisation/main.nf @@ -11,7 +11,7 @@ workflow ANNOTATION_CACHE_INITIALISATION { take: snpeff_enabled - snpeff_cache + snpeff_cache_in snpeff_db vep_enabled vep_cache @@ -23,34 +23,50 @@ workflow ANNOTATION_CACHE_INITIALISATION { main: if (snpeff_enabled) { - def snpeff_annotation_cache_key = (snpeff_cache == "s3://annotation-cache/snpeff_cache/") ? "${snpeff_db}/" : "" - def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_db}" - def snpeff_cache_path_full = file("$snpeff_cache/$snpeff_cache_dir", type: 'dir') - if ( !snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory() ) { - if (snpeff_cache == "s3://annotation-cache/snpeff_cache/") { - error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") - } else { - error("Path provided with SnpEff cache is invalid.\nMake sure there is a directory named ${snpeff_cache_dir} in ${snpeff_cache}./n${help_message}") + snpeff_cache = snpeff_db.map { _id, snpeff_db_ -> + def snpeff_annotation_cache_key = snpeff_cache_in == "s3://annotation-cache/snpeff_cache/" ? "${snpeff_db_}/" : "" + def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_db_}" + def snpeff_cache_path_full = file("${snpeff_cache_in}/${snpeff_cache_dir}", type: 'dir') + if (!snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory()) { + if (snpeff_cache_in == "s3://annotation-cache/snpeff_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } + else { + error("Path provided with SnpEff cache is invalid.\nMake sure there is a directory named ${snpeff_cache_dir} in ${snpeff_cache_in}./n${help_message}") + } } + [[id: snpeff_db_], file("${snpeff_cache_in}/${snpeff_annotation_cache_key}")] } - snpeff_cache = Channel.fromPath(file("${snpeff_cache}/${snpeff_annotation_cache_key}"), checkIfExists: true).collect() - .map{ cache -> [ [ id:"${snpeff_db}" ], cache ] } - } else snpeff_cache = [] + } + else { + snpeff_cache = [] + } if (vep_enabled) { - def vep_annotation_cache_key = (vep_cache == "s3://annotation-cache/vep_cache/") ? "${vep_cache_version}_${vep_genome}/" : "" - def vep_species_suffix = vep_custom_args.contains("--merged") ? '_merged' : (vep_custom_args.contains("--refseq") ? '_refseq' : '') - def vep_cache_dir = "${vep_annotation_cache_key}${vep_species}${vep_species_suffix}/${vep_cache_version}_${vep_genome}" - def vep_cache_path_full = file("$vep_cache/$vep_cache_dir", type: 'dir') - if ( !vep_cache_path_full.exists() || !vep_cache_path_full.isDirectory() ) { - if (vep_cache == "s3://annotation-cache/vep_cache/") { - error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") - } else { - error("Path provided with VEP cache is invalid.\nMake sure there is a directory named ${vep_cache_dir} in ${vep_cache}./n${help_message}") + ensemblvep_cache = vep_cache_version + .join(vep_species) + .join(vep_genome) + .groupTuple() + .map { _id, vep_cache_version_, vep_species_, vep_genome_ -> + def vep_annotation_cache_key = vep_cache == "s3://annotation-cache/vep_cache/" ? "${vep_cache_version_[0]}_${vep_genome_[0]}/" : "" + def vep_species_suffix = vep_custom_args.contains("--merged") ? '_merged' : (vep_custom_args.contains("--refseq") ? '_refseq' : '') + def vep_cache_dir = "${vep_annotation_cache_key}${vep_species_[0]}${vep_species_suffix}/${vep_cache_version_[0]}_${vep_genome_[0]}" + def vep_cache_path_full = file("${vep_cache}/${vep_cache_dir}", type: 'dir') + + if (!vep_cache_path_full.exists() || !vep_cache_path_full.isDirectory()) { + if (vep_cache == "s3://annotation-cache/vep_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } + else { + error("Path provided with VEP cache is invalid.\nMake sure there is a directory named ${vep_cache_dir} in ${vep_cache}./n${help_message}") + } + } + [file("${vep_cache}/${vep_annotation_cache_key}")] } - } - ensemblvep_cache = Channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() - } else ensemblvep_cache = [] + } + else { + ensemblvep_cache = [] + } emit: ensemblvep_cache // channel: [ meta, cache ] diff --git a/subworkflows/local/bam_baserecalibrator/main.nf b/subworkflows/local/bam_baserecalibrator/main.nf index 285ad6b856..0b64706225 100644 --- a/subworkflows/local/bam_baserecalibrator/main.nf +++ b/subworkflows/local/bam_baserecalibrator/main.nf @@ -21,27 +21,32 @@ workflow BAM_BASERECALIBRATOR { versions = Channel.empty() // Combine cram and intervals for spread and gather strategy - cram_intervals = cram.combine(intervals) - // Move num_intervals to meta map - .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, intervals ] } + // Move num_intervals to meta map + cram_intervals = cram + .combine(intervals) + .map { meta, cram_, crai_, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], cram_, crai_, intervals_] } // RUN BASERECALIBRATOR - GATK4_BASERECALIBRATOR(cram_intervals, fasta.map{ meta, it -> [ it ] }, fasta_fai.map{ meta, it -> [ it ] }, dict.map{ meta, it -> [ it ] }, known_sites, known_sites_tbi) + GATK4_BASERECALIBRATOR(cram_intervals, fasta, fasta_fai, dict, known_sites, known_sites_tbi) // Figuring out if there is one or more table(s) from the same sample - table_to_merge = GATK4_BASERECALIBRATOR.out.table.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple().branch{ - // Use meta.num_intervals to asses number of intervals - single: it[0].num_intervals <= 1 - multiple: it[0].num_intervals > 1 - } + // Use meta.num_intervals to asses number of intervals + table_to_merge = GATK4_BASERECALIBRATOR.out.table + .map { meta, table -> [groupKey(meta, meta.num_intervals), table] } + .groupTuple() + .branch { + single: it[0].num_intervals <= 1 + multiple: it[0].num_intervals > 1 + } // Only when using intervals GATK4_GATHERBQSRREPORTS(table_to_merge.multiple) // Mix intervals and no_intervals channels together - table_bqsr = GATK4_GATHERBQSRREPORTS.out.table.mix(table_to_merge.single.map{ meta, table -> [ meta, table[0] ] }) - // Remove no longer necessary field: num_intervals - .map{ meta, table -> [ meta - meta.subMap('num_intervals'), table ] } + // Remove no longer necessary field: num_intervals + table_bqsr = GATK4_GATHERBQSRREPORTS.out.table + .mix(table_to_merge.single.map { meta, table -> [meta, table[0]] }) + .map { meta, table -> [meta - meta.subMap('num_intervals'), table] } // Gather versions of all tools used versions = versions.mix(GATK4_BASERECALIBRATOR.out.versions) @@ -49,6 +54,5 @@ workflow BAM_BASERECALIBRATOR { emit: table_bqsr // channel: [ meta, table ] - versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/cram_sampleqc/main.nf b/subworkflows/local/cram_sampleqc/main.nf index 504dc0a735..c74fa44db0 100644 --- a/subworkflows/local/cram_sampleqc/main.nf +++ b/subworkflows/local/cram_sampleqc/main.nf @@ -2,44 +2,42 @@ include { BAM_NGSCHECKMATE } from '../../../subworkflo include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../../../subworkflows/local/cram_qc_mosdepth_samtools/main' workflow CRAM_SAMPLEQC { - take: - cram // channel: [ val(meta), cram, crai ] - ngscheckmate_bed // channel: [ ngscheckmate_bed ] - fasta // channel: [ fasta ] - skip_baserecalibration // boolean: - intervals_for_preprocessing // channel: + cram // channel: [ meta, cram, crai ] + ngscheckmate_bed // channel: [ meta, ngscheckmate_bed ] + fasta // channel: [ meta, fasta ] + skip_baserecalibration // boolean + intervals_for_preprocessing // channel: [ meta, intervals ] main: versions = Channel.empty() - reports = Channel.empty() + reports = Channel.empty() if (!skip_baserecalibration) { CRAM_QC_RECAL( cram, fasta, - intervals_for_preprocessing) + intervals_for_preprocessing, + ) // Gather QC reports - reports = CRAM_QC_RECAL.out.reports.collect{ meta, report -> report } + reports = CRAM_QC_RECAL.out.reports.collect { _meta, report -> report } // Gather used softwares versions versions = versions.mix(CRAM_QC_RECAL.out.versions) } - BAM_NGSCHECKMATE(cram.map{meta, cram, crai -> [meta, cram]}, ngscheckmate_bed.map{bed -> [[id: "ngscheckmate"], bed]}, fasta) + BAM_NGSCHECKMATE(cram.map { meta, cram_, _crai -> [meta, cram_] }, ngscheckmate_bed, fasta) versions = versions.mix(BAM_NGSCHECKMATE.out.versions.first()) emit: - corr_matrix = BAM_NGSCHECKMATE.out.corr_matrix // channel: [ meta, corr_matrix ] - matched = BAM_NGSCHECKMATE.out.matched // channel: [ meta, matched ] - all = BAM_NGSCHECKMATE.out.all // channel: [ meta, all ] - vcf = BAM_NGSCHECKMATE.out.vcf // channel: [ meta, vcf ] - pdf = BAM_NGSCHECKMATE.out.pdf // channel: [ meta, pdf ] + corr_matrix = BAM_NGSCHECKMATE.out.corr_matrix // channel: [ meta, corr_matrix ] + matched = BAM_NGSCHECKMATE.out.matched // channel: [ meta, matched ] + all = BAM_NGSCHECKMATE.out.all // channel: [ meta, all ] + vcf = BAM_NGSCHECKMATE.out.vcf // channel: [ meta, vcf ] + pdf = BAM_NGSCHECKMATE.out.pdf // channel: [ meta, pdf ] reports - - versions // channel: [ versions.yml ] + versions // channel: [ versions.yml ] } - diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 772af47b37..29993d4d68 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -8,131 +8,89 @@ // Condition is based on params.step and params.tools // If and extra condition exists, it's specified in comments -include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf-core/bwa/index/main' -include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' -include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable/main' -include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' -include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan/main' -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' -include { TABIX_TABIX as TABIX_BCFTOOLS_ANNOTATIONS } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_GERMLINE_RESOURCE } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_KNOWN_SNPS } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_PON } from '../../../modules/nf-core/tabix/tabix/main' -include { UNTAR as UNTAR_CHR_DIR } from '../../../modules/nf-core/untar/main' -include { UNZIP as UNZIP_ALLELES } from '../../../modules/nf-core/unzip/main' -include { UNZIP as UNZIP_GC } from '../../../modules/nf-core/unzip/main' -include { UNZIP as UNZIP_LOCI } from '../../../modules/nf-core/unzip/main' -include { UNZIP as UNZIP_RT } from '../../../modules/nf-core/unzip/main' +include { UNTAR as UNTAR_CHR_DIR } from '../../../modules/nf-core/untar' +include { UNZIP as UNZIP_ALLELES } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_GC } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_LOCI } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_RT } from '../../../modules/nf-core/unzip' workflow PREPARE_GENOME { take: - ascat_alleles // params.ascat_alleles - ascat_loci // params.ascat_loci - ascat_loci_gc // params.ascat_loci_gc - ascat_loci_rt // params.ascat_loci_rt - bcftools_annotations // channel: [optional] bcftools annotations file - chr_dir // params.chr_dir - dbsnp // channel: [optional] dbsnp - fasta // channel: [mandatory] fasta - germline_resource // channel: [optional] germline_resource - known_indels // channel: [optional] known_indels - known_snps // channel: [optional] known_snps - pon // channel: [optional] pon - + ascat_alleles // params.ascat_alleles + ascat_loci // params.ascat_loci + ascat_loci_gc // params.ascat_loci_gc + ascat_loci_rt // params.ascat_loci_rt + chr_dir // params.chr_dir main: versions = Channel.empty() - BWAMEM1_INDEX(fasta) // If aligner is bwa-mem - BWAMEM2_INDEX(fasta) // If aligner is bwa-mem2 - DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap - - GATK4_CREATESEQUENCEDICTIONARY(fasta) - MSISENSORPRO_SCAN(fasta) - SAMTOOLS_FAIDX(fasta, [ [ id:'no_fai' ], [] ] ) - - // the following are flattened and mapped in case the user supplies more than one value for the param - // written for KNOWN_INDELS, but preemptively applied to the rest - // [ file1, file2 ] becomes [ [ meta1, file1 ], [ meta2, file2 ] ] - // outputs are collected to maintain a single channel for relevant TBI files - TABIX_BCFTOOLS_ANNOTATIONS(bcftools_annotations.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - TABIX_DBSNP(dbsnp.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - TABIX_GERMLINE_RESOURCE(germline_resource.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - TABIX_KNOWN_SNPS(known_snps.flatten().map{ it -> [ [ id:it.baseName ], it ] } ) - TABIX_KNOWN_INDELS(known_indels.flatten().map{ it -> [ [ id:it.baseName ], it ] } ) - TABIX_PON(pon.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - // prepare ascat and controlfreec reference files - if (!ascat_alleles) allele_files = Channel.empty() + if (!ascat_alleles) { + allele_files = Channel.empty() + } else if (ascat_alleles.endsWith(".zip")) { - UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - allele_files = UNZIP_ALLELES.out.unzipped_archive.map{ it[1] } + UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles)).collect().map { it -> [[id: it[0].baseName], it] }) + allele_files = UNZIP_ALLELES.out.unzipped_archive.map { it[1] } versions = versions.mix(UNZIP_ALLELES.out.versions) - } else allele_files = Channel.fromPath(ascat_alleles).collect() - - if (!ascat_loci) loci_files = Channel.empty() + } + else { + allele_files = Channel.fromPath(ascat_alleles).collect() + } + + if (!ascat_loci) { + loci_files = Channel.empty() + } else if (ascat_loci.endsWith(".zip")) { - UNZIP_LOCI(Channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - loci_files = UNZIP_LOCI.out.unzipped_archive.map{ it[1] } + UNZIP_LOCI(Channel.fromPath(file(ascat_loci)).collect().map { it -> [[id: it[0].baseName], it] }) + loci_files = UNZIP_LOCI.out.unzipped_archive.map { it[1] } versions = versions.mix(UNZIP_LOCI.out.versions) - } else loci_files = Channel.fromPath(ascat_loci).collect() - - if (!ascat_loci_gc) gc_file = Channel.value([]) + } + else { + loci_files = Channel.fromPath(ascat_loci).collect() + } + + if (!ascat_loci_gc) { + gc_file = Channel.value([]) + } else if (ascat_loci_gc.endsWith(".zip")) { - UNZIP_GC(Channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - gc_file = UNZIP_GC.out.unzipped_archive.map{ it[1] } + UNZIP_GC(Channel.fromPath(file(ascat_loci_gc)).collect().map { it -> [[id: it[0].baseName], it] }) + gc_file = UNZIP_GC.out.unzipped_archive.map { it[1] } versions = versions.mix(UNZIP_GC.out.versions) - } else gc_file = Channel.fromPath(ascat_loci_gc).collect() - - if (!ascat_loci_rt) rt_file = Channel.value([]) + } + else { + gc_file = Channel.fromPath(ascat_loci_gc).collect() + } + + if (!ascat_loci_rt) { + rt_file = Channel.value([]) + } else if (ascat_loci_rt.endsWith(".zip")) { - UNZIP_RT(Channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - rt_file = UNZIP_RT.out.unzipped_archive.map{ it[1] } + UNZIP_RT(Channel.fromPath(file(ascat_loci_rt)).collect().map { it -> [[id: it[0].baseName], it] }) + rt_file = UNZIP_RT.out.unzipped_archive.map { it[1] } versions = versions.mix(UNZIP_RT.out.versions) - } else rt_file = Channel.fromPath(ascat_loci_rt).collect() - - if (!chr_dir) chr_files = Channel.value([]) + } + else { + rt_file = Channel.fromPath(ascat_loci_rt).collect() + } + + if (!chr_dir) { + chr_files = Channel.value([]) + } else if (chr_dir.endsWith(".tar.gz")) { - UNTAR_CHR_DIR(Channel.fromPath(file(chr_dir)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - chr_files = UNTAR_CHR_DIR.out.untar.map{ it[1] } + UNTAR_CHR_DIR(Channel.fromPath(file(chr_dir)).collect().map { it -> [[id: it[0].baseName], it] }) + chr_files = UNTAR_CHR_DIR.out.untar.map { it[1] } versions = versions.mix(UNTAR_CHR_DIR.out.versions) - } else chr_files = Channel.fromPath(chr_dir).collect() - - // Gather versions of all tools used - versions = versions.mix(BWAMEM1_INDEX.out.versions) - versions = versions.mix(BWAMEM2_INDEX.out.versions) - versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) - versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - versions = versions.mix(MSISENSORPRO_SCAN.out.versions) - versions = versions.mix(SAMTOOLS_FAIDX.out.versions) - versions = versions.mix(TABIX_BCFTOOLS_ANNOTATIONS.out.versions) - versions = versions.mix(TABIX_DBSNP.out.versions) - versions = versions.mix(TABIX_GERMLINE_RESOURCE.out.versions) - versions = versions.mix(TABIX_KNOWN_INDELS.out.versions) - versions = versions.mix(TABIX_KNOWN_SNPS.out.versions) - versions = versions.mix(TABIX_PON.out.versions) + } + else { + chr_files = Channel.fromPath(chr_dir).collect() + } emit: - bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: bcftools_annotations.vcf.gz.tbi - bwa = BWAMEM1_INDEX.out.index.collect() // path: bwa/* - bwamem2 = BWAMEM2_INDEX.out.index.collect() // path: bwamem2/* - hashtable = DRAGMAP_HASHTABLE.out.hashmap.collect() // path: dragmap/* - dbsnp_tbi = TABIX_DBSNP.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: dbsnb.vcf.gz.tbi - dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict.collect() // path: genome.fasta.dict - fasta_fai = SAMTOOLS_FAIDX.out.fai.collect() // path: genome.fasta.fai - germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi - known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list - pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi - - allele_files // path: allele_files - chr_files // path: chr_files - gc_file // path: gc_file - loci_files // path: loci_files - rt_file // path: rt_file - - versions // channel: [ versions.yml ] + allele_files // path: allele_files + chr_files // path: chr_files + gc_file // path: gc_file + loci_files // path: loci_files + rt_file // path: rt_file + versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf index 27c4e9c145..939f7acece 100644 --- a/subworkflows/local/prepare_intervals/main.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -6,113 +6,114 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed/main' -include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed/main' -include { GAWK as BUILD_INTERVALS } from '../../../modules/nf-core/gawk/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_COMBINED } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed' +include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_COMBINED } from '../../../modules/nf-core/tabix/bgziptabix' workflow PREPARE_INTERVALS { take: - fasta_fai // mandatory [ fasta_fai ] - intervals // [ params.intervals ] - no_intervals // [ params.no_intervals ] - nucleotides_per_second - outdir - step + intervals // mandatory [ intervals_bed ] + no_intervals // boolean [ params.no_intervals ] + nucleotides_per_second // mandatory [ params.nucleotides_per_second ] + outdir // mandatory [ params.outdir ] + step // mandatory [ params.step ] main: versions = Channel.empty() - intervals_bed = Channel.empty() // List of [ bed, num_intervals ], one for each region - intervals_bed_gz_tbi = Channel.empty() // List of [ bed.gz, bed,gz.tbi, num_intervals ], one for each region - intervals_combined = Channel.empty() // Single bed file containing all intervals + // intervals_bed - List of [ bed, num_intervals ], one per region + // intervals_bed_gz_tbi - List of [ bed.gz, bed,gz.tbi, num_intervals ], one per region + // intervals_bed_combined - Single bed file containing all intervals + intervals_bed = Channel.empty() + intervals_bed_gz_tbi = Channel.empty() + intervals_bed_combined = Channel.empty() if (no_intervals) { - file("${outdir}/no_intervals.bed").text = "no_intervals\n" - file("${outdir}/no_intervals.bed.gz").text = "no_intervals\n" + file("${outdir}/no_intervals.bed").text = "no_intervals\n" + file("${outdir}/no_intervals.bed.gz").text = "no_intervals\n" file("${outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n" - intervals_bed = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] } - intervals_bed_gz_tbi = Channel.fromPath(file("${outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] } - intervals_combined = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] } - } else if (step != 'annotate' && step != 'controlfreec') { - // If no interval/target file is provided, then generated intervals from FASTA file - if (!intervals) { - BUILD_INTERVALS(fasta_fai, []) - - intervals_combined = BUILD_INTERVALS.out.output - - CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }, nucleotides_per_second) - - intervals_bed = CREATE_INTERVALS_BED.out.bed - - versions = versions.mix(BUILD_INTERVALS.out.versions) - versions = versions.mix(CREATE_INTERVALS_BED.out.versions) - } else { - intervals_combined = Channel.fromPath(file(intervals)).map{it -> [ [ id:it.baseName ], it ] } - CREATE_INTERVALS_BED(file(intervals), nucleotides_per_second) + intervals_bed = Channel.fromPath(file("${outdir}/no_intervals.bed")).map { it -> [it, 0] } + intervals_bed_gz_tbi = Channel.fromPath(file("${outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map { it -> [it, 0] } + intervals_bed_combined = Channel.fromPath(file("${outdir}/no_intervals.bed")).map { it -> [[id: it.simpleName], it] } + } + else if (step != 'annotate' && step != 'controlfreec') { + CREATE_INTERVALS_BED(intervals, nucleotides_per_second) - intervals_bed = CREATE_INTERVALS_BED.out.bed + intervals_bed = CREATE_INTERVALS_BED.out.bed - versions = versions.mix(CREATE_INTERVALS_BED.out.versions) + versions = versions.mix(CREATE_INTERVALS_BED.out.versions) - // If interval file is not provided as .bed, but e.g. as .interval_list then convert to BED format - if (intervals.endsWith(".interval_list")) { - GATK4_INTERVALLISTTOBED(intervals_combined) - intervals_combined = GATK4_INTERVALLISTTOBED.out.bed - versions = versions.mix(GATK4_INTERVALLISTTOBED.out.versions) - } + intervals_branch = intervals.branch { _meta, intervals_ -> + interval_list: intervals_.endsWith(".interval_list") + bed: true } - // Now for the intervals.bed the following operations are done: - // 1. Intervals file is split up into multiple bed files for scatter/gather - // 2. Each bed file is indexed + GATK4_INTERVALLISTTOBED(intervals_branch.interval_list) + // TODO: test this with an interval_list + intervals_bed_combined = intervals.mix(GATK4_INTERVALLISTTOBED.out.bed).last() + versions = versions.mix(GATK4_INTERVALLISTTOBED.out.versions) - // 1. Intervals file is split up into multiple bed files for scatter/gather & grouping together small intervals - intervals_bed = intervals_bed.flatten() - .map{ intervalFile -> + // Now for the intervals.bed the following operations are done: + // 1/ Split up intervals bed file into multiple bed files for scatter/gather + // 2/ Tabix index each bed file + + // 1/ Split up intervals bed file into multiple bed files for scatter/gather + // Also group together small intervals + // And add the number of intervals to the channel + intervals_bed = intervals_bed + .flatten() + .map { intervals_ -> def duration = 0.0 - for (line in intervalFile.readLines()) { - final fields = line.split('\t') - if (fields.size() >= 5) duration += fields[4].toFloat() - else { - start = fields[1].toInteger() - end = fields[2].toInteger() - duration += (end - start) / nucleotides_per_second + intervals_ + .readLines() + .each { line -> + def fields = line.split('\t') + if (fields.size() >= 5) { + duration += fields[4].toFloat() + } + else { + def start = fields[1].toInteger() + def end = fields[2].toInteger() + duration += (end - start) / nucleotides_per_second + } } - } - [ duration, intervalFile ] - }.toSortedList({ a, b -> b[0] <=> a[0] }) - .flatten().collate(2).map{ duration, intervalFile -> intervalFile }.collect() - // Adding number of intervals as elements - .map{ it -> [ it, it.size() ] } + [duration, intervals_] + } + .toSortedList { a, b -> b[0] <=> a[0] } + .flatten() + .collate(2) + .map { _duration, intervals_ -> intervals_ } + .collect() + .map { intervals_ -> [intervals_, intervals_.size()] } .transpose() - // 2. Create bed.gz and bed.gz.tbi for each interval file. They are split by region (see above) - TABIX_BGZIPTABIX_INTERVAL_SPLIT(intervals_bed.map{ file, num_intervals -> [ [ id:file.baseName], file ] }) + // 2/ Tabix index each bed file + TABIX_BGZIPTABIX_INTERVAL_SPLIT(intervals_bed.map { intervals_, _num_intervals -> [[id: intervals_.baseName], intervals_] }) - intervals_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.gz_tbi.map{ meta, bed, tbi -> [ bed, tbi ] }.toList() - // Adding number of intervals as elements - .map{ it -> [ it, it.size() ] } + intervals_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.gz_tbi + .map { _meta, intervals_gz_, intervals_gz_tbi_ -> [intervals_gz_, intervals_gz_tbi_] } + .toList() + .map { it -> [it, it.size()] } .transpose() versions = versions.mix(TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.versions) } - TABIX_BGZIPTABIX_INTERVAL_COMBINED(intervals_combined) + TABIX_BGZIPTABIX_INTERVAL_COMBINED(intervals_bed_combined) versions = versions.mix(TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.versions) - intervals_bed_combined = intervals_combined.map{meta, bed -> bed }.collect() - intervals_bed_gz_tbi_combined = TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.gz_tbi.map{meta, gz, tbi -> [gz, tbi] }.collect() + // intervals_bed and intervals_bed_gz_tbi are the intervals split for parallel execution, and contain the number of intervals + // intervals_bed_combined and intervals_bed_gz_tbi_combined are all intervals collected in one file + + intervals_bed_combined = intervals_bed_combined.map { _meta, intervals_ -> intervals_ }.collect() + intervals_bed_gz_tbi_combined = TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.gz_tbi.map { _meta, intervals_gz, intervals_gz_tbi -> [intervals_gz, intervals_gz_tbi] }.collect() emit: - // Intervals split for parallel execution intervals_bed // [ intervals.bed, num_intervals ] intervals_bed_gz_tbi // [ intervals.bed.gz, intervals.bed.gz.tbi, num_intervals ] - // All intervals in one file intervals_bed_combined // [ intervals.bed ] intervals_bed_gz_tbi_combined // [ intervals.bed.gz, intervals.bed.gz.tbi] - - versions // [ versions.yml ] + versions // [ versions.yml ] } diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf index 1c0d80a1db..414e8a168a 100644 --- a/subworkflows/local/samplesheet_to_channel/main.nf +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -1,176 +1,196 @@ -workflow SAMPLESHEET_TO_CHANNEL{ - +workflow SAMPLESHEET_TO_CHANNEL { take: - ch_from_samplesheet // - aligner // - ascat_alleles // - ascat_loci // - ascat_loci_gc // - ascat_loci_rt // - bcftools_annotations // - bcftools_annotations_tbi // - bcftools_header_lines // - build_only_index // - dbsnp // - fasta // - germline_resource // - intervals // - joint_germline // - joint_mutect2 // - known_indels // - known_snps // - no_intervals // - pon // - sentieon_dnascope_emit_mode // - sentieon_haplotyper_emit_mode // - seq_center // - seq_platform // - skip_tools // - snpeff_cache // - snpeff_db // - step // - tools // - umi_read_structure // - wes // + ch_from_samplesheet // + references // + aligner // + bcftools_annotations // + bcftools_annotations_tbi // + bcftools_header_lines // + joint_germline // + joint_mutect2 // + no_intervals // + sentieon_dnascope_emit_mode // + sentieon_haplotyper_emit_mode // + seq_center // + seq_platform // + skip_tools // + step // + tools // + umi_read_structure // + wes // main: - ch_from_samplesheet.dump(tag:"ch_from_samplesheet") - input_sample = ch_from_samplesheet.map{ meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller -> - // generate patient_sample key to group lanes together - [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller] ] - }.tap{ ch_with_patient_sample } // save the channel - .groupTuple() //group by patient_sample to get all lanes - .map { patient_sample, ch_items -> - // get number of lanes per sample - [ patient_sample, ch_items.size() ] - }.combine(ch_with_patient_sample, by: 0) // for each entry add numLanes - .map { patient_sample, num_lanes, ch_items -> - (meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items - if (meta.lane && fastq_2) { - meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "fastq_gz", num_lanes: num_lanes.toInteger(), size: 1] - - if (step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] - else { - error("Samplesheet contains fastq files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + ch_from_samplesheet.dump(tag: "ch_from_samplesheet") + input_sample = ch_from_samplesheet + .map { meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller -> + // generate patient_sample key to group lanes together + [meta.patient + meta.sample, [meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller]] + } + .tap { ch_with_patient_sample } + .groupTuple() + .map { patient_sample, ch_items -> + // get number of lanes per sample + [patient_sample, ch_items.size()] + } + .combine(ch_with_patient_sample, by: 0) + .combine(references) + .map { patient_sample, num_lanes, ch_items, _meta2, fasta -> + def (meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items + if (meta.lane && fastq_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "fastq_gz", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [fastq_1, fastq_2]] + } + else { + error("Samplesheet contains fastq files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // start from TWO spring-files - one with R1 and one with R2 - } else if (meta.lane && spring_1 && spring_2) { - meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "two_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] - - if (step == 'mapping') return [ meta, [ spring_1, spring_2 ] ] - else { - error("Samplesheet contains spring files (in columns `spring_1` and `spring_2`) but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (meta.lane && spring_1 && spring_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "two_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [spring_1, spring_2]] + } + else { + error("Samplesheet contains spring files (in columns `spring_1` and `spring_2`) but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // start from ONE spring-file containing both R1 and R2 - } else if (meta.lane && spring_1 && !spring_2) { - meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "one_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] - - if (step == 'mapping') return [ meta, [ spring_1 ] ] - else { - error("Samplesheet contains a spring file (in columns `spring_1`) but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (meta.lane && spring_1 && !spring_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "one_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [spring_1]] + } + else { + error("Samplesheet contains a spring file (in columns `spring_1`) but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // start from BAM - } else if (meta.lane && bam) { - if (step != 'mapping' && !bai) { - error("BAM index (bai) should be provided.") + else if (meta.lane && bam) { + if (step != 'mapping' && !bai) { + error("BAM index (bai) should be provided.") + } + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = seq_center ? "CN:${seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${fasta}\\tPL:${seq_platform}\"" + + meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] + + if (step != 'annotate') { + return [meta - meta.subMap('lane'), bam, bai] + } + else { + error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] - def CN = seq_center ? "CN:${seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${fasta}\\tPL:${seq_platform}\"" - - meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] - - if (step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] - else { - error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (table && cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), cram, crai, table] + } + else { + error("Samplesheet contains cram files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // recalibration - } else if (table && cram) { - meta = meta + [id: meta.sample, data_type: 'cram'] - - if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] - else { - error("Samplesheet contains cram files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (table && bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), bam, bai, table] + } + else { + error("Samplesheet contains bam files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // recalibration when skipping MarkDuplicates - } else if (table && bam) { - meta = meta + [id: meta.sample, data_type: 'bam'] - - if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] - else { - error("Samplesheet contains bam files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), cram, crai] + } + else { + error("Samplesheet contains cram files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // prepare_recalibration or variant_calling - } else if (cram) { - meta = meta + [id: meta.sample, data_type: 'cram'] - - if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] - else { - error("Samplesheet contains cram files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), bam, bai] + } + else { + error("Samplesheet contains bam files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` - } else if (bam) { - meta = meta + [id: meta.sample, data_type: 'bam'] - - if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] - else { - error("Samplesheet contains bam files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (vcf) { + meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] + + if (step == 'annotate') { + return [meta - meta.subMap('lane'), vcf] + } + else { + error("Samplesheet contains vcf files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // annotation - } else if (vcf) { - meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] - - if (step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] else { - error("Samplesheet contains vcf files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + error("Missing or unknown field in csv file header. Please check your samplesheet") } - } else { - error("Missing or unknown field in csv file header. Please check your samplesheet") } - } - if (step != 'annotate' && tools && !build_only_index) { - // Two checks for ensuring that the pipeline stops with a meaningful error message if - // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and - // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. - input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples - if (!build_only_index) { - def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] - def tools_tumor_asked = [] - tools_tumor.each{ tool -> - if (tools.split(',').contains(tool)) tools_tumor_asked.add(tool) + if (step != 'annotate' && tools) { + // Two checks for ensuring that the pipeline stops with a meaningful error message if + // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. + input_sample + .filter { it[0].status == 1 } + .ifEmpty { + // In this case, the sample-sheet contains no tumor-samples + def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] + def tools_tumor_asked = [] + tools_tumor.each { tool -> + if (tools.split(',').contains(tool)) { + tools_tumor_asked.add(tool) + } + } + if (!tools_tumor_asked.isEmpty()) { + error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + } } - if (!tools_tumor_asked.isEmpty()) { - error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + + input_sample + .filter { it[0].status == 0 } + .ifEmpty { + // In this case, the sample-sheet contains no normal/germline-samples + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] + def requested_tools_requiring_normal_samples = [] + tools_requiring_normal_samples.each { tool_requiring_normal_samples -> + if (tools.split(',').contains(tool_requiring_normal_samples)) { + requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + } + } + if (!requested_tools_requiring_normal_samples.isEmpty()) { + error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + } } - } } - input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples - def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] - def requested_tools_requiring_normal_samples = [] - tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> - if (tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + // Fails when wrongfull extension for intervals file + + references.map { meta, _fasta -> + if (wes && !step == 'annotate') { + if (meta.intervals_bed && !meta.intervals_bed.endsWith("bed")) { + error("Target file specified with `intervals_bed:` must be in BED format for targeted data") + } + else { + log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") + } } - if (!requested_tools_requiring_normal_samples.isEmpty()) { - error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + else if (meta.intervals_bed && !meta.intervals_bed.endsWith("bed") && !meta.intervals_bed.endsWith("list")) { + error("Intervals file must end with .bed, .list, or .interval_list") } + return true } - } - - // Fails when wrongfull extension for intervals file - if (wes && !step == 'annotate') { - if (intervals && !intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") - else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") - } else if (intervals && !intervals.endsWith("bed") && !intervals.endsWith("list")) error("Intervals file must end with .bed, .list, or .interval_list") if (step == 'mapping' && aligner.contains("dragmap") && !(skip_tools && skip_tools.split(',').contains("baserecalibrator"))) { log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") @@ -180,79 +200,75 @@ workflow SAMPLESHEET_TO_CHANNEL{ error("Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner.") } - if (tools && tools.split(',').contains("sentieon_haplotyper") && joint_germline && (!sentieon_haplotyper_emit_mode || !(sentieon_haplotyper_emit_mode.contains('gvcf')))) { + if (tools && tools.split(',').contains("sentieon_haplotyper") && joint_germline && (!sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.contains('gvcf'))) { error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.") } // Fails or warns when missing files or params for ascat - if (tools && tools.split(',').contains('ascat')) { - if (!ascat_alleles) { - error("No allele files were provided for running ASCAT. Please provide a zip folder with allele files.") - } - if (!ascat_loci) { - error("No loci files were provided for running ASCAT. Please provide a zip folder with loci files.") - } - if (!ascat_loci_gc && !ascat_loci_rt) { - log.warn("No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.") - } - if (wes) { - log.warn("Default reference files not suited for running ASCAT on WES data. It's recommended to use the reference files provided here: https://github.com/Wedge-lab/battenberg#required-reference-files") + references.map { meta, _fasta -> + if (tools && tools.split(',').contains('ascat')) { + if (!meta.ascat_alleles) { + error("No allele files were provided for running ASCAT. Please provide a zip folder with allele files.") + } + if (!meta.ascat_loci) { + error("No loci files were provided for running ASCAT. Please provide a zip folder with loci files.") + } + if (!meta.ascat_loci_gc && !meta.ascat_loci_rt) { + log.warn("No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.") + } + if (wes) { + log.warn("Default reference files not suited for running ASCAT on WES data. It's recommended to use the reference files provided here: https://github.com/Wedge-lab/battenberg#required-reference-files") + } } + return true } // Warns when missing files or params for mutect2 - if (tools && tools.split(',').contains('mutect2')) { - if (!pon) { - log.warn("No Panel-of-normal was specified for Mutect2.\nIt is highly recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2\nFor more information on how to create one: https://gatk.broadinstitute.org/hc/en-us/articles/5358921041947-CreateSomaticPanelOfNormals-BETA-") - } - if (!germline_resource) { - log.warn("If Mutect2 is specified without a germline resource, no filtering will be done.\nIt is recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2") - } - if (pon && pon.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { - log.warn("The default Panel-of-Normals provided by GATK is used for Mutect2.\nIt is highly recommended to generate one from normal samples that are technical similar to the tumor ones.\nFor more information: https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-") + references.map { meta, _fasta -> + if (tools && tools.split(',').contains('mutect2')) { + if (!meta.vcf_pon_vcf) { + log.warn("No Panel-of-normal was specified for Mutect2.\nIt is highly recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2\nFor more information on how to create one: https://gatk.broadinstitute.org/hc/en-us/articles/5358921041947-CreateSomaticPanelOfNormals-BETA-") + } + if (!meta.vcf_germline_resource_vcf) { + log.warn("If Mutect2 is specified without a germline resource, no filtering will be done.\nIt is recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2") + } + if (meta.vcf_pon_vcf && meta.vcf_pon_vcf.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { + log.warn("The default Panel-of-Normals provided by GATK is used for Mutect2.\nIt is highly recommended to generate one from normal samples that are technical similar to the tumor ones.\nFor more information: https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-") + } } + return true } // Fails when missing resources for baserecalibrator // Warns when missing resources for haplotypecaller - if (!dbsnp && !known_indels) { - if (step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!skip_tools || (skip_tools && !skip_tools.split(',').contains('baserecalibrator')))) { - error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.") + references.map { meta, _fasta -> + if (!meta.vcf_dbsnp_vcf && !meta.vcf_known_indels_vcf) { + if (step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!skip_tools || (skip_tools && !skip_tools.split(',').contains('baserecalibrator')))) { + error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.") + } + if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope'))) { + log.warn("If GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-") + } } - if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope'))) { - log.warn "If GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-" + if (joint_germline && (!tools || !(tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')))) { + error("The GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ") } - } - if (joint_germline && (!tools || !(tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')))) { - error("The GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ") - } - if ( - tools && - ( - tools.split(',').contains('haplotypecaller') || - tools.split(',').contains('sentieon_haplotyper') || - tools.split(',').contains('sentieon_dnascope') - ) && - joint_germline && - ( !dbsnp || !known_indels || !known_snps || no_intervals ) - ) { - log.warn("""If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator \n\ -Joint germline variant calling also requires intervals in order to genotype the samples. As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed.""") - } + if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')) && joint_germline && (!meta.vcf_dbsnp_vcf || !meta.vcf_known_indels_vcf || !meta.vcf_known_snps_vcf || no_intervals)) { + log.warn( + """If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator \n\ + Joint germline variant calling also requires intervals in order to genotype the samples. As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed.""" + ) + } - if (tools && - tools.split(',').contains('sentieon_dnascope') && joint_germline && - ( !sentieon_dnascope_emit_mode || !sentieon_dnascope_emit_mode.split(',').contains('gvcf') ) - ) { - error("When using Sentieon Dnascope for joint-germline variant-calling the option `--sentieon_dnascope_emit_mode` has to include `gvcf`.") - } + if (tools && tools.split(',').contains('sentieon_dnascope') && joint_germline && (!sentieon_dnascope_emit_mode || !sentieon_dnascope_emit_mode.split(',').contains('gvcf'))) { + error("When using Sentieon Dnascope for joint-germline variant-calling the option `--sentieon_dnascope_emit_mode` has to include `gvcf`.") + } - if (tools && - tools.split(',').contains('sentieon_haplotyper') && joint_germline && - ( !sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.split(',').contains('gvcf') ) - ) { - error("When using Sentieon Haplotyper for joint-germline variant-calling the option `--sentieon_haplotyper_emit_mode` has to include `gvcf`.") + if (tools && tools.split(',').contains('sentieon_haplotyper') && joint_germline && (!sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.split(',').contains('gvcf'))) { + error("When using Sentieon Haplotyper for joint-germline variant-calling the option `--sentieon_haplotyper_emit_mode` has to include `gvcf`.") + } + return true } @@ -268,8 +284,8 @@ Joint germline variant calling also requires intervals in order to genotype the // Fails when missing sex information for CNV tools if (tools && (tools.split(',').contains('ascat') || tools.split(',').contains('controlfreec'))) { - input_sample.map{ - if (it[0].sex == 'NA' ) { + input_sample.map { + if (it[0].sex == 'NA') { error("Please specify sex information for each sample in your samplesheet when using '--tools' with 'ascat' or 'controlfreec'.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } } @@ -280,12 +296,6 @@ Joint germline variant calling also requires intervals in order to genotype the error("Please specify --bcftools_annotations, --bcftools_annotations_tbi, and --bcftools_header_lines, when using BCFTools annotations") } - // Fails when snpeff annotation is enabled but snpeff_db is not specified - if ((snpeff_cache && tools && (tools.split(',').contains("snpeff") || tools.split(',').contains('merge'))) && - !snpeff_db) { - error("Please specify --snpeff_db") - } - emit: input_sample - } +} diff --git a/subworkflows/local/samplesheet_to_channel/tests/main.nf.test b/subworkflows/local/samplesheet_to_channel/tests/main.nf.test index e1c8682d27..e306558c77 100644 --- a/subworkflows/local/samplesheet_to_channel/tests/main.nf.test +++ b/subworkflows/local/samplesheet_to_channel/tests/main.nf.test @@ -10,44 +10,32 @@ nextflow_workflow { } workflow { """ - // define inputs of the workflow here. Example: - input[0] = Channel.of([ - ['patient':'test', 'sample':'test', - 'sex':'XX', 'status':0, 'lane':'test_L1'], + input[0] = Channel.of([ // samplesheet + ['patient':'test', 'sample':'test', 'sex':'XX', 'status':0, 'lane':'test_L1'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true), [], [], [], [], [], [], [], [], [] ]) - input[1] = 'bwa-mem' // aligner - input[2] = [] // ascat_alleles - input[3] = [] // ascat_loci - input[4] = [] // ascat_loci_gc - input[5] = [] // ascat_loci_rt - input[6] = [] // bcftools_annotations - input[7] = [] // bcftools_annotations_tbi - input[8] = [] // bcftools_header_lines - input[9] = false // build_only_index - input[10] = [] // dbsnp - input[11] = [] // fasta - input[12] = [] // germline_resource - input[13] = [] // intervals - input[14] = false // joint_germline - input[15] = false // joint_mutect2 - input[16] = [] // known_indels - input[17] = [] // known_snps - input[18] = false // no_intervals - input[19] = [] // pon - input[20] = 'variant' // sentieon_dnascope_emit_mode - input[21] = 'variant' // sentieon_haplotyper_emit_mode - input[22] = '' // seq_center - input[23] = 'ILLUMINA' // seq_platform - input[24] = 'baserecalibrator' // skip_tools - input[25] = [] // snpeff_cache - input[26] = 'WBcel235.105' // snpeff_db - input[27] = 'mapping' // step - input[28] = 'strelka' // tools - input[29] = [] // umi_read_structure - input[30] = false // wes + input[1] = Channel.of([ // references + ['id':'genome', 'vcf':[]], + [] + ]) + input[2] = 'bwa-mem' // aligner + input[3] = [] // bcftools_annotations + input[4] = [] // bcftools_annotations_tbi + input[5] = [] // bcftools_header_lines + input[6] = false // joint_germline + input[7] = false // joint_mutect2 + input[8] = false // no_intervals + input[9] = 'variant' // sentieon_dnascope_emit_mode + input[10] = 'variant' // sentieon_haplotyper_emit_mode + input[11] = '' // seq_center + input[12] = 'ILLUMINA' // seq_platform + input[13] = 'baserecalibrator' // skip_tools + input[14] = 'mapping' // step + input[15] = 'strelka' // tools + input[16] = [] // umi_read_structure + input[17] = false // wes """ } } @@ -56,7 +44,5 @@ nextflow_workflow { assert workflow.success assert snapshot(workflow.out).match() } - } - } diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf index ce568284c7..8c5dd16709 100644 --- a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf @@ -8,19 +8,19 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SAMPLESHEET_TO_CHANNEL } from '../samplesheet_to_channel' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' -include { getWorkflowVersion } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { logColours } from '../../nf-core/utils_nfcore_pipeline' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' +include { SAMPLESHEET_TO_CHANNEL } from '../samplesheet_to_channel' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { getWorkflowVersion } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { logColours } from '../../nf-core/utils_nfcore_pipeline' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -29,14 +29,14 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' */ workflow PIPELINE_INITIALISATION { - take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved input // string: Path to input samplesheet + references // string: Path to references + step // string: The step to retrieve input from main: @@ -45,20 +45,20 @@ workflow PIPELINE_INITIALISATION { // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // // Validate parameters and generate parameter summary to stdout // - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, - null + null, ) // @@ -66,100 +66,36 @@ workflow PIPELINE_INITIALISATION { // UTILS_NFCORE_PIPELINE(nextflow_cli_args) - // - // Custom validation for pipeline parameters - // - validateInputParameters() - - // Check input path parameters to see if they exist - def checkPathParamList = [ - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, - params.bwa, - params.bwamem2, - params.bcftools_annotations, - params.bcftools_annotations_tbi, - params.bcftools_header_lines, - params.cf_chrom_len, - params.chr_dir, - params.cnvkit_reference, - params.dbnsfp, - params.dbnsfp_tbi, - params.dbsnp, - params.dbsnp_tbi, - params.dict, - params.dragmap, - params.fasta, - params.fasta_fai, - params.germline_resource, - params.germline_resource_tbi, - params.input, - params.intervals, - params.known_indels, - params.known_indels_tbi, - params.known_snps, - params.known_snps_tbi, - params.mappability, - params.multiqc_config, - params.ngscheckmate_bed, - params.pon, - params.pon_tbi, - params.sentieon_dnascope_model, - params.spliceai_indel, - params.spliceai_indel_tbi, - params.spliceai_snv, - params.spliceai_snv_tbi - ] - -// only check if we are using the tools -if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache) -if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache) - - // def retrieveInput(need_input, step, outdir) { - - params.input_restart = retrieveInput((!params.build_only_index && !params.input), params.step, params.outdir) - - ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? - Channel.fromList(samplesheetToList(params.input, "$projectDir/assets/schema_input.json")) : - Channel.fromList(samplesheetToList(params.input_restart, "$projectDir/assets/schema_input.json")) + ch_from_samplesheet = input + ? Channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) + : Channel.fromList(samplesheetToList(retrieveInput(step, outdir), "${projectDir}/assets/schema_input.json")) + + ch_from_references = Channel.fromList(samplesheetToList(references, "${projectDir}/subworkflows/nf-core/utils_references/schema_references.json")) SAMPLESHEET_TO_CHANNEL( ch_from_samplesheet, + ch_from_references, params.aligner, - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, params.bcftools_annotations, params.bcftools_annotations_tbi, params.bcftools_header_lines, - params.build_only_index, - params.dbsnp, - params.fasta, - params.germline_resource, - params.intervals, params.joint_germline, params.joint_mutect2, - params.known_indels, - params.known_snps, params.no_intervals, - params.pon, params.sentieon_dnascope_emit_mode, params.sentieon_haplotyper_emit_mode, params.seq_center, params.seq_platform, params.skip_tools, - params.snpeff_cache, - params.snpeff_db, params.step, params.tools, params.umi_read_structure, - params.wes) + params.wes, + ) emit: samplesheet = SAMPLESHEET_TO_CHANNEL.out.input_sample + references = ch_from_references versions } @@ -170,7 +106,6 @@ if (params.tools && (params.tools.split(',').contains('vep') || params.tools. */ workflow PIPELINE_COMPLETION { - take: email // string: email address email_on_fail // string: email address sent on pipeline failure @@ -197,7 +132,7 @@ workflow PIPELINE_COMPLETION { plaintext_email, outdir, monochrome_logs, - multiqc_report_list.getVal() + multiqc_report_list.getVal(), ) } @@ -208,7 +143,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } @@ -217,41 +152,7 @@ workflow PIPELINE_COMPLETION { FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// Check and validate pipeline parameters -// -def validateInputParameters() { - genomeExistsError() -} - -// -// Validate channels from input samplesheet -// -def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 - if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - - return [ metas[0], fastqs ] -} -// -// Exit pipeline if incorrect --genome key provided -// -def genomeExistsError() { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - error(error_string) - } -} // // Generate methods description for MultiQC // @@ -260,11 +161,11 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + ".", + ].join(' ').trim() return citation_text } @@ -274,9 +175,9 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "