diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c66c4c23c8..5910d0b0db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ on: env: NFT_DIFF: "pdiff" NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" - NFT_VER: "0.9.2" + NFT_VER: 0.9.2 NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity @@ -34,8 +34,8 @@ jobs: fail-fast: false matrix: NXF_VER: - - "24.04.2" - - "latest-everything" + - 24.04.2 + - latest-everything filter: ["workflow", "function", "pipeline"] # filter: ["process", "workflow", "function", "pipeline"] profile: ["conda", "docker", "singularity"] diff --git a/.gitignore b/.gitignore index 9cc2a80834..3a81dcfbdb 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ null/ test-datasets/ test.tap test.xml +.cursorrules +TODO.md diff --git a/.nf-core.yml b/.nf-core.yml index 7beff01213..1bf0c8a09b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -4,6 +4,8 @@ lint: files_exist: - .github/workflows/awsfulltest.yml - .github/workflows/awstest.yml + - conf/igenomes.config + - conf/igenomes_ignored.config - conf/modules.config files_unchanged: - .gitignore diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index afc253a919..0000000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,331 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines reference genomes using iGenome paths. - Can be used by any config that customises the base path using: - $params.igenomes_base / --igenomes_base ----------------------------------------------------------------------------------------- -*/ - -params { - // illumina iGenomes reference file paths - genomes { - 'GATK.GRCh37' { - ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_alleles_hg19.zip" - ascat_genome = 'hg19' - ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_loci_hg19.zip" - ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/GC_G1000_hg19.zip" - ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/RT_G1000_hg19.zip" - bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/" - chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes" - dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi" - dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_138.b37.vcf.gz' - dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" - fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" - germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz" - germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz.tbi" - intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" - known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" - known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" - known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.snps.high_confidence.b37.vcf.gz' - known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" - known_indels_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.indels.b37.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.b37.vcf.gz' - mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" - ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" - snpeff_db = 'GRCh37.87' - vep_cache_version = '113' - vep_genome = 'GRCh37' - vep_species = 'homo_sapiens' - } - 'GATK.GRCh38' { - ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_alleles_hg38.zip" - ascat_genome = 'hg38' - ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_loci_hg38.zip" - ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/GC_G1000_hg38.zip" - ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/RT_G1000_hg38.zip" - bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" - cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" - chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes" - dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" - dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz' - dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" - dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" - fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" - germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz" - germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz.tbi" - intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" - known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" - known_indels_vqsr = '--resource:gatk,known=false,training=true,truth=true,prior=10.0 Homo_sapiens_assembly38.known_indels.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' - known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" - known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" - known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_omni2.5.hg38.vcf.gz' - mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" - ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" - pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" - pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" - sentieon_dnascope_model = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" - snpeff_db = 'GRCh38.105' - vep_cache_version = '113' - vep_genome = 'GRCh38' - vep_species = 'homo_sapiens' - } - 'Ensembl.GRCh37' { - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - snpeff_db = 'GRCh37.87' - vep_cache_version = '113' - vep_genome = 'GRCh37' - vep_species = 'homo_sapiens' - } - 'NCBI.GRCh38' { - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - ngscheckmate_bed ="${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" - snpeff_db = 'GRCh38.105' - vep_cache_version = '113' - vep_genome = 'GRCh38' - vep_species = 'homo_sapiens' - } - 'CHM13' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - } - 'GRCm38' { - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - chr_dir = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Chromosomes" - dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" - dict = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.dict" - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" - intervals = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/intervals/GRCm38_calling_list.bed" - known_indels = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" - mappability = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Control-FREEC/GRCm38_68_mm10.gem" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - snpeff_db = 'GRCm38.99' - vep_cache_version = '102' - vep_genome = 'GRCm38' - vep_species = 'mus_musculus' - } - 'TAIR10' { - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - } - 'EB2' { - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - snpeff_db = 'UMD3.1.75' - vep_cache_version = '94' - vep_genome = 'UMD3.1' - vep_species = 'bos_taurus' - } - 'WBcel235' { - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = 'WBcel235.105' - vep_cache_version = '113' - vep_genome = 'WBcel235' - vep_species = 'caenorhabditis_elegans' - } - 'CanFam3.1' { - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - snpeff_db = 'CanFam3.1.99' - vep_cache_version = '104' - vep_genome = 'CanFam3.1' - vep_species = 'canis_lupus_familiaris' - } - 'GRCz10' { - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - } - 'BDGP6' { - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - } - 'EquCab2' { - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - } - 'EB1' { - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - } - 'Gm01' { - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - } - 'IRGSP-1.0' { - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - } - 'CHIMP2.1.4' { - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - } - 'Rnor_5.0' { - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - } - 'Rnor_6.0' { - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - } - 'R64-1-1' { - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = 'R64-1-1.105' - vep_cache_version = '113' - vep_genome = 'R64-1-1' - vep_species = 'saccharomyces_cerevisiae' - } - 'EF2' { - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - } - 'Sbi1' { - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - } - 'AGPv3' { - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - } - 'hg38' { - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = 'GRCh38.105' - vep_cache_version = '113' - vep_genome = 'GRCh38' - vep_species = 'homo_sapiens' - } - 'hg19' { - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - snpeff_db = 'GRCh37.87' - vep_cache_version = '113' - vep_genome = 'GRCh37' - vep_species = 'homo_sapiens' - } - 'mm10' { - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - snpeff_db = 'GRCm38.99' - vep_cache_version = '102' - vep_genome = 'GRCm38' - vep_species = 'mus_musculus' - } - 'bosTau8' { - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - } - 'ce10' { - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - } - 'canFam3' { - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - } - 'danRer10' { - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - } - 'dm6' { - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - } - 'equCab2' { - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - } - 'galGal4' { - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - } - 'panTro4' { - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - } - 'rn6' { - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - } - 'sacCer3' { - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - } - 'susScr3' { - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - } - 'testdata.nf-core.sarek' { - dbsnp = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" - dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz' - dict = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.dict" - fasta = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta" - fasta_fai = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta.fai" - germline_resource = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" - germline_resource_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi" - intervals = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.interval_list" - known_indels = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" - known_indels_vqsr = '--resource:mills,known=false,training=true,truth=true,prior=10.0 mills_and_1000G.indels.vcf.gz' - ngscheckmate_bed = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed" - snpeff_db = 'WBcel235.105' - vep_cache_version = '113' - vep_genome = 'WBcel235' - vep_species = 'caenorhabditis_elegans' - } - } -} diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config deleted file mode 100644 index b4034d8243..0000000000 --- a/conf/igenomes_ignored.config +++ /dev/null @@ -1,9 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Empty genomes dictionary to use when igenomes is ignored. ----------------------------------------------------------------------------------------- -*/ - -params.genomes = [:] diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index 1f57237be7..69835dcf78 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -15,26 +15,6 @@ process { - withName: 'BWAMEM1_INDEX' { - ext.when = { !params.bwa && params.step == "mapping" && (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem")} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "bwa", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'BWAMEM2_INDEX' { - ext.when = { !params.bwamem2 && params.step == "mapping" && params.aligner == "bwa-mem2" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "bwamem2", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - withName: 'CNVKIT_ANTITARGET' { ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } publishDir = [ @@ -56,106 +36,6 @@ process { ] } - withName: 'DRAGMAP_HASHTABLE' { - ext.when = { !params.dragmap && params.step == "mapping" && params.aligner == "dragmap" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "dragmap", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'GATK4_CREATESEQUENCEDICTIONARY' { - ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/dict" }, - pattern: "*dict", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'MSISENSORPRO_SCAN' { - ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/msi" }, - pattern: "*list", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'SAMTOOLS_FAIDX' { - ext.when = { !params.fasta_fai && params.step != "annotate" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/fai" }, - pattern: "*fai", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_BCFTOOLS_ANNOTATIONS' { - ext.when = { !params.bcftools_annotations_tbi && params.bcftools_annotations && params.tools && params.tools.split(',').contains('bcfann') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/bcfann" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_DBSNP' { - ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope') || params.tools.split(',').contains('mutect2'))) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/dbsnp" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_GERMLINE_RESOURCE' { - ext.when = { !params.germline_resource_tbi && params.germline_resource && params.tools && params.tools.split(',').contains('mutect2') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/germline_resource" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_KNOWN_INDELS' { - ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope'))) ) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/known_indels" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_KNOWN_SNPS' { - ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') )) ) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/known_snps" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - - withName: 'TABIX_PON' { - ext.when = { !params.pon_tbi && params.pon && params.tools && params.tools.split(',').contains('mutect2') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/pon" }, - pattern: "*vcf.gz.tbi", - saveAs: { params.save_reference || params.build_only_index ? it : null } - ] - } - withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' { ext.when = { params.tools && params.tools.split(',').contains('ascat')} publishDir = [ diff --git a/conf/modules/prepare_intervals.config b/conf/modules/prepare_intervals.config index 815903b996..7004574ede 100644 --- a/conf/modules/prepare_intervals.config +++ b/conf/modules/prepare_intervals.config @@ -14,37 +14,14 @@ // PREPARE INTERVALS process { - - withName: 'BUILD_INTERVALS' { - ext.args = { "-v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }'" } - ext.suffix = { "bed" } - } - - withName: 'CREATE_INTERVALS_BED' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "*bed", - saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } - ] - } - - withName: 'GATK4_INTERVALLISTTOBED' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "*bed", - saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } + withName: 'NFCORE_SAREK:PREPARE_INTERVALS:PREPARE_INTERVALS:.*' { + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false ] } withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT|TABIX_BGZIPTABIX_INTERVAL_COMBINED' { ext.prefix = {"${meta.id}"} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "*bed.gz", - saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } - ] } } diff --git a/conf/test.config b/conf/test.config index 5f38bfd90d..d5cf6986d1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,16 +27,16 @@ params { // Input data input = "${projectDir}/tests/csv/3.0/fastq_single.csv" - // small genome on igenomes - igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - genome = 'testdata.nf-core.sarek' + // small reference genome + igenomes_base = 's3://nf-core-references/test_data/' + + // TODO: VERSION + references = 'https://raw.githubusercontent.com/nf-core/references-assets/main/genomes/Homo_sapiens/test/GRCh38_chr22.yml' // Small reference genome bcftools_annotations = "${params.modules_testdata_base_path}/genomics/sarscov2/illumina/vcf/test2.vcf.gz" bcftools_annotations_tbi = "${params.modules_testdata_base_path}/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi" bcftools_header_lines = "${projectDir}/tests/config/bcfann_test_header.txt" - snpeff_cache = null - vep_cache = null // Sentieon sentieon_dnascope_model = "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" diff --git a/main.nf b/main.nf index effa97ef16..e0b47d22e0 100755 --- a/main.nf +++ b/main.nf @@ -19,103 +19,53 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.ascat_alleles = getGenomeAttribute('ascat_alleles') -params.ascat_genome = getGenomeAttribute('ascat_genome') -params.ascat_loci = getGenomeAttribute('ascat_loci') -params.ascat_loci_gc = getGenomeAttribute('ascat_loci_gc') -params.ascat_loci_rt = getGenomeAttribute('ascat_loci_rt') -params.bwa = getGenomeAttribute('bwa') -params.bwamem2 = getGenomeAttribute('bwamem2') -params.cf_chrom_len = getGenomeAttribute('cf_chrom_len') -params.chr_dir = getGenomeAttribute('chr_dir') -params.dbsnp = getGenomeAttribute('dbsnp') -params.dbsnp_tbi = getGenomeAttribute('dbsnp_tbi') -params.dbsnp_vqsr = getGenomeAttribute('dbsnp_vqsr') -params.dict = getGenomeAttribute('dict') -params.dragmap = getGenomeAttribute('dragmap') -params.fasta = getGenomeAttribute('fasta') -params.fasta_fai = getGenomeAttribute('fasta_fai') -params.germline_resource = getGenomeAttribute('germline_resource') -params.germline_resource_tbi = getGenomeAttribute('germline_resource_tbi') -params.intervals = getGenomeAttribute('intervals') -params.known_indels = getGenomeAttribute('known_indels') -params.known_indels_tbi = getGenomeAttribute('known_indels_tbi') -params.known_indels_vqsr = getGenomeAttribute('known_indels_vqsr') -params.known_snps = getGenomeAttribute('known_snps') -params.known_snps_tbi = getGenomeAttribute('known_snps_tbi') -params.known_snps_vqsr = getGenomeAttribute('known_snps_vqsr') -params.mappability = getGenomeAttribute('mappability') -params.ngscheckmate_bed = getGenomeAttribute('ngscheckmate_bed') -params.pon = getGenomeAttribute('pon') -params.pon_tbi = getGenomeAttribute('pon_tbi') -params.sentieon_dnascope_model = getGenomeAttribute('sentieon_dnascope_model') -params.snpeff_db = getGenomeAttribute('snpeff_db') -params.vep_cache_version = getGenomeAttribute('vep_cache_version') -params.vep_genome = getGenomeAttribute('vep_genome') -params.vep_species = getGenomeAttribute('vep_species') -aligner = params.aligner +include { SAREK } from './workflows/sarek' +include { ANNOTATION_CACHE_INITIALISATION } from './subworkflows/local/annotation_cache_initialisation' +include { DOWNLOAD_CACHE_SNPEFF_VEP } from './subworkflows/local/download_cache_snpeff_vep' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' +include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' +include { get_references_file } from './subworkflows/nf-core/utils_references' +include { get_references_value } from './subworkflows/nf-core/utils_references' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SAREK } from './workflows/sarek' -include { ANNOTATION_CACHE_INITIALISATION } from './subworkflows/local/annotation_cache_initialisation' -include { DOWNLOAD_CACHE_SNPEFF_VEP } from './subworkflows/local/download_cache_snpeff_vep' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' -include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' -include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' -include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' - -// Initialize fasta file with meta map: -fasta = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() - -// Initialize file channels based on params, defined in the params.genomes[params.genome] scope -bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.empty() -bcftools_header_lines = params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty() -cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [] -dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) -fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() -germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input -known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) -known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) -mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) -pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) -sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) - -// Initialize value channels based on params, defined in the params.genomes[params.genome] scope -ascat_genome = params.ascat_genome ?: Channel.empty() -dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty() -known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty() -known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty() -ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty() -snpeff_db = params.snpeff_db ?: Channel.empty() -vep_cache_version = params.vep_cache_version ?: Channel.empty() -vep_genome = params.vep_genome ?: Channel.empty() -vep_species = params.vep_species ?: Channel.empty() - -vep_extra_files = [] +workflow { + // SUBWORKFLOW: Run initialisation tasks + PIPELINE_INITIALISATION( + params.version, + params.validate_params, + args, + params.outdir, + params.input, + params.references, + params.step, + ) -if (params.dbnsfp && params.dbnsfp_tbi) { - vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) - vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) -} + // WORKFLOW: Run main workflow + NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet, PIPELINE_INITIALISATION.out.references, params.aligner) -if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { - vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) - vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) + // SUBWORKFLOW: Run completion tasks + PIPELINE_COMPLETION( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_SAREK.out.multiqc_report, + ) } /* @@ -128,154 +78,172 @@ if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && p workflow NFCORE_SAREK { take: samplesheet + references + aligner main: versions = Channel.empty() - // build indexes if needed - PREPARE_GENOME( - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, - bcftools_annotations, - params.chr_dir, - dbsnp, - fasta, - germline_resource, - known_indels, - known_snps, - pon) + // References' files from the references yaml or params + ascat_alleles = get_references_file(references, params.ascat_alleles, 'ascat_alleles', params.igenomes_base) + ascat_loci = get_references_file(references, params.ascat_loci, 'ascat_loci', params.igenomes_base) + ascat_loci_gc = get_references_file(references, params.ascat_loci_gc, 'ascat_loci_gc', params.igenomes_base) + ascat_loci_rt = get_references_file(references, params.ascat_loci_rt, 'ascat_loci_rt', params.igenomes_base) + bwamem1_index = get_references_file(references, params.bwa, 'bwamem1_index', params.igenomes_base) + bwamem2_index = get_references_file(references, params.bwamem2, 'bwamem2_index', params.igenomes_base) + cf_chrom_len = get_references_file(references, params.cf_chrom_len, 'cf_chrom_len', params.igenomes_base) + chr_dir = get_references_file(references, params.chr_dir, 'chr_dir', params.igenomes_base) + dragmap_hashtable = get_references_file(references, params.dragmap, 'dragmap_hashtable', params.igenomes_base) + fasta = get_references_file(references, params.fasta, 'fasta', params.igenomes_base) + fasta_dict = get_references_file(references, params.dict, 'fasta_dict', params.igenomes_base) + fasta_fai = get_references_file(references, params.fasta_fai, 'fasta_fai', params.igenomes_base) + intervals_bed = get_references_file(references, params.intervals, 'intervals_bed', params.igenomes_base) + mappability = get_references_file(references, params.mappability, 'mappability', params.igenomes_base) + msisensorpro_scan = get_references_file(references, params.msisensorpro_scan, 'msisensorpro_scan', params.igenomes_base) + ngscheckmate_bed = get_references_file(references, params.ngscheckmate_bed, 'ngscheckmate_bed', params.igenomes_base) + sentieon_dnascope_model = get_references_file(references, params.sentieon_dnascope_model, 'sentieon_dnascope_model', params.igenomes_base) + + // References' values from the references yaml or params + ascat_genome = get_references_value(references, params.ascat_genome, 'ascat_genome') + snpeff_db = get_references_value(references, params.snpeff_db, 'snpeff_db') + vep_cache_version = get_references_value(references, params.vep_cache_version, 'vep_cache_version') + vep_genome = get_references_value(references, params.vep_genome, 'vep_genome') + vep_species = get_references_value(references, params.vep_species, 'vep_species') + + // References' VCFs and related from the references yaml or params + dbsnp = get_references_file(references, params.dbsnp, 'vcf_dbsnp_vcf', params.igenomes_base) + dbsnp_tbi = get_references_file(references, params.dbsnp_tbi, 'vcf_dbsnp_vcf_tbi', params.igenomes_base) + dbsnp_vqsr = get_references_value(references, params.dbsnp_vqsr, 'vcf_dbsnp_vcf_vqsr') + germline_resource = get_references_file(references, params.germline_resource, 'vcf_germline_resource_vcf', params.igenomes_base) + germline_resource_tbi = get_references_file(references, params.germline_resource_tbi, 'vcf_germline_resource_vcf_tbi', params.igenomes_base) + known_indels = get_references_file(references, params.known_indels, 'vcf_known_indels_vcf', params.igenomes_base) + known_indels_tbi = get_references_file(references, params.known_indels_tbi, 'vcf_known_indels_vcf_tbi', params.igenomes_base) + known_indels_vqsr = get_references_value(references, params.known_indels_vqsr, 'vcf_known_indels_vcf_vqsr') + known_snps = get_references_file(references, params.known_snps, 'vcf_known_snps_vcf', params.igenomes_base) + known_snps_tbi = get_references_file(references, params.known_snps_tbi, 'vcf_known_snps_vcf_tbi', params.igenomes_base) + known_snps_vqsr = get_references_value(references, params.known_snps_vqsr, 'vcf_known_snps_vcf_vqsr') + pon = get_references_file(references, params.pon, 'vcf_pon_vcf', params.igenomes_base) + pon_tbi = get_references_file(references, params.pon_tbi, 'vcf_pon_vcf_tbi', params.igenomes_base) - // Gather built indices or get them from the params - // Built from the fasta file: - dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() - : PREPARE_GENOME.out.dict - fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).map{ it -> [ [id:'fai'], it ] }.collect() - : PREPARE_GENOME.out.fasta_fai - bwa = params.bwa ? Channel.fromPath(params.bwa).map{ it -> [ [id:'bwa'], it ] }.collect() - : PREPARE_GENOME.out.bwa - bwamem2 = params.bwamem2 ? Channel.fromPath(params.bwamem2).map{ it -> [ [id:'bwamem2'], it ] }.collect() - : PREPARE_GENOME.out.bwamem2 - dragmap = params.dragmap ? Channel.fromPath(params.dragmap).map{ it -> [ [id:'dragmap'], it ] }.collect() - : PREPARE_GENOME.out.hashtable + // known_sites is made by grouping both the dbsnp and the known snps/indels resources + // Which can either or both be optional + known_sites_indels = dbsnp.mix(known_indels).groupTuple().collect() + known_sites_indels_tbi = dbsnp_tbi.mix(known_indels_tbi).groupTuple().collect() + known_sites_snps = dbsnp.mix(known_snps).groupTuple().collect() + known_sites_snps_tbi = dbsnp_tbi.mix(known_snps_tbi).groupTuple().collect() // Gather index for mapping given the chosen aligner - index_alignment = (aligner == "bwa-mem" || aligner == "sentieon-bwamem") ? bwa : - aligner == "bwa-mem2" ? bwamem2 : - dragmap + index_alignment = aligner == "bwa-mem" || aligner == "sentieon-bwamem" + ? bwamem1_index + : aligner == "bwa-mem2" + ? bwamem2_index + : dragmap_hashtable - // TODO: add a params for msisensorpro_scan - msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan + bcftools_annotations = params.bcftools_annotations ? Channel.fromPath(params.bcftools_annotations).collect() : Channel.value([]) + bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : Channel.value([]) : Channel.value([]) + bcftools_header_lines = params.bcftools_header_lines ?: Channel.value([]) - // For ASCAT, extracted from zip or tar.gz files - allele_files = PREPARE_GENOME.out.allele_files - chr_files = PREPARE_GENOME.out.chr_files - gc_file = PREPARE_GENOME.out.gc_file - loci_files = PREPARE_GENOME.out.loci_files - rt_file = PREPARE_GENOME.out.rt_file + vep_extra_files = [] - // Tabix indexed vcf files - bcftools_annotations_tbi = params.bcftools_annotations ? params.bcftools_annotations_tbi ? Channel.fromPath(params.bcftools_annotations_tbi).collect() : PREPARE_GENOME.out.bcftools_annotations_tbi : Channel.value([]) - dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) - germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] //do not change to Channel.value([]), the check for its existence then fails for Getpileupsumamries - known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) - known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi).collect() : PREPARE_GENOME.out.known_snps_tbi : Channel.value([]) - pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : Channel.value([]) + if (params.dbnsfp && params.dbnsfp_tbi) { + vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) + vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) + } - // known_sites is made by grouping both the dbsnp and the known snps/indels resources - // Which can either or both be optional - known_sites_indels = dbsnp.concat(known_indels).collect() - known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() - known_sites_snps = dbsnp.concat(known_snps).collect() - known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() + if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { + vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) + } // Build intervals if needed - PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) + PREPARE_INTERVALS(intervals_bed, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) // Intervals for speed up preprocessing/variant calling by spread/gather // [interval.bed] all intervals in one file - intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined - intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined intervals_bed_combined_for_variant_calling = PREPARE_INTERVALS.out.intervals_bed_combined // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) - intervals_for_preprocessing = params.wes ? - intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : - Channel.value([ [ id:'null' ], [] ]) - intervals = PREPARE_INTERVALS.out.intervals_bed // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather - intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather - intervals_and_num_intervals = intervals.map{ interval, num_intervals -> - if ( num_intervals < 1 ) [ [], num_intervals ] - else [ interval, num_intervals ] + intervals_for_preprocessing = params.wes + ? intervals_bed_combined.map { it -> [[id: it.baseName], it] }.collect() + : Channel.value([[id: 'null'], []]) + intervals = PREPARE_INTERVALS.out.intervals_bed + // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi + // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + intervals_and_num_intervals = intervals.map { interval, num_intervals -> + if (num_intervals < 1) { + [[], num_intervals] + } + else { + [interval, num_intervals] + } } - intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals -> - if ( num_intervals < 1 ) [ [], [], num_intervals ] - else [ intervals[0], intervals[1], num_intervals ] + intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map { intervals_, num_intervals -> + if (num_intervals < 1) { + [[], [], num_intervals] + } + else { + [intervals_[0], intervals_[1], num_intervals] + } } if (params.tools && params.tools.split(',').contains('cnvkit')) { if (params.cnvkit_reference) { cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect() - } else { + } + else { PREPARE_REFERENCE_CNVKIT(fasta, intervals_bed_combined) cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions) } - } else { + } + else { cnvkit_reference = Channel.value([]) } // Gather used softwares versions - versions = versions.mix(PREPARE_GENOME.out.versions) versions = versions.mix(PREPARE_INTERVALS.out.versions) - vep_fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] - - // Download cache - if (params.download_cache) { - // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache - ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) - snpeff_info = Channel.of([ [ id:"${params.snpeff_db}" ], params.snpeff_db ]) - DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) - snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache - vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } + vep_fasta = params.vep_include_fasta ? fasta.map { fasta_ -> [[id: fasta_.baseName], fasta_] } : [[id: 'null'], []] - versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions) - } else { - // Looks for cache information either locally or on the cloud - ANNOTATION_CACHE_INITIALISATION( - (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), - params.snpeff_cache, - params.snpeff_db, - (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), - params.vep_cache, - params.vep_species, - params.vep_cache_version, - params.vep_genome, - params.vep_custom_args, - "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") + // Looks for cache information either locally or on the cloud + ANNOTATION_CACHE_INITIALISATION( + (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), + params.snpeff_cache, + snpeff_db, + (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), + params.vep_cache, + vep_species, + vep_cache_version, + vep_genome, + params.vep_custom_args, + "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.", + ) - snpeff_cache = ANNOTATION_CACHE_INITIALISATION.out.snpeff_cache - vep_cache = ANNOTATION_CACHE_INITIALISATION.out.ensemblvep_cache - } + snpeff_cache = ANNOTATION_CACHE_INITIALISATION.out.snpeff_cache + vep_cache = ANNOTATION_CACHE_INITIALISATION.out.ensemblvep_cache // // WORKFLOW: Run pipeline // - SAREK(samplesheet, - allele_files, + SAREK( + samplesheet, + ascat_alleles, bcftools_annotations, bcftools_annotations_tbi, bcftools_header_lines, cf_chrom_len, - chr_files, + chr_dir, cnvkit_reference, dbsnp, dbsnp_tbi, dbsnp_vqsr, - dict, + fasta_dict, fasta, fasta_fai, - gc_file, + ascat_loci_gc, germline_resource, germline_resource_tbi, index_alignment, @@ -291,86 +259,24 @@ workflow NFCORE_SAREK { known_sites_snps, known_sites_snps_tbi, known_snps_vqsr, - loci_files, + ascat_loci, mappability, msisensorpro_scan, ngscheckmate_bed, pon, pon_tbi, - rt_file, + ascat_loci_rt, sentieon_dnascope_model, snpeff_cache, + snpeff_db, vep_cache, vep_cache_version, vep_extra_files, vep_fasta, vep_genome, - vep_species + vep_species, ) + emit: multiqc_report = SAREK.out.multiqc_report // channel: /path/to/multiqc_report.html } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow { - - main: - // - // SUBWORKFLOW: Run initialisation tasks - // - PIPELINE_INITIALISATION( - params.version, - params.validate_params, - params.monochrome_logs, - args, - params.outdir, - params.input - ) - - // - // WORKFLOW: Run main workflow - // - NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet) - - // - // SUBWORKFLOW: Run completion tasks - // - PIPELINE_COMPLETION( - params.email, - params.email_on_fail, - params.plaintext_email, - params.outdir, - params.monochrome_logs, - params.hook_url, - NFCORE_SAREK.out.multiqc_report - ) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// Get attribute from genome config file e.g. fasta -// - -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/modules.json b/modules.json index ad4fd57616..0be80b67ef 100644 --- a/modules.json +++ b/modules.json @@ -534,6 +534,11 @@ "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", "installed_by": ["subworkflows"] }, + "utils_references": { + "branch": "master", + "git_sha": "c18de39a419659720e2482df14df21affdc30f47", + "installed_by": ["subworkflows"] + }, "vcf_annotate_ensemblvep": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index ad42e6ad53..733cca37e1 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -1,19 +1,19 @@ process CREATE_INTERVALS_BED { - tag "$intervals" + tag "${intervals}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'biocontainers/gawk:5.1.0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' + : 'biocontainers/gawk:5.1.0'}" input: - path(intervals) - val(nucleotides_per_second) + tuple val(meta), path(intervals) + val nucleotides_per_second output: - path("*.bed") , emit: bed - path "versions.yml" , emit: versions + path ("*.bed"), emit: bed + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -47,7 +47,8 @@ process CREATE_INTERVALS_BED { gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') END_VERSIONS """ - } else if (intervals.toString().toLowerCase().endsWith("interval_list")) { + } + else if (intervals.toString().toLowerCase().endsWith("interval_list")) { """ grep -v '^@' ${intervals} | awk -vFS="\t" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); @@ -59,7 +60,8 @@ process CREATE_INTERVALS_BED { gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') END_VERSIONS """ - } else { + } + else { """ awk -vFS="[:-]" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); @@ -75,11 +77,9 @@ process CREATE_INTERVALS_BED { stub: def prefix = task.ext.prefix ?: "${intervals.baseName}" - def metrics = task.ext.metrics ?: "${prefix}.metrics" - // def prefix_basename = prefix.substring(0, prefix.lastIndexOf(".")) """ - touch ${prefix}.stub.bed + touch ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/baserecalibrator/main.nf b/modules/nf-core/gatk4/baserecalibrator/main.nf index 1a29986265..f009c3d09e 100644 --- a/modules/nf-core/gatk4/baserecalibrator/main.nf +++ b/modules/nf-core/gatk4/baserecalibrator/main.nf @@ -1,23 +1,23 @@ process GATK4_BASERECALIBRATOR { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0' + : 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0'}" input: tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path known_sites - path known_sites_tbi + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(known_sites) + tuple val(meta6), path(known_sites_tbi) output: tuple val(meta), path("*.table"), emit: table - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -25,25 +25,26 @@ process GATK4_BASERECALIBRATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def interval_command = intervals ? "--intervals $intervals" : "" - def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') + def interval_command = intervals ? "--intervals ${intervals}" : "" + def sites_command = known_sites.collect { "--known-sites ${it}" }.join(' ') def avail_mem = 3072 if (!task.memory) { - log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ BaseRecalibrator \\ - --input $input \\ + --input ${input} \\ --output ${prefix}.table \\ - --reference $fasta \\ - $interval_command \\ - $sites_command \\ + --reference ${fasta} \\ + ${interval_command} \\ + ${sites_command} \\ --tmp-dir . \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index de5e6d4ca6..84dbdd3852 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,19 +11,56 @@ params { // Mandatory arguments // Input options - input = null // No default input - input_restart = null // No default automatic input - step = 'mapping' // Starts with mapping + input = null // No default input + input_restart = null // No default automatic input + step = 'mapping' // Starts with mapping // References - genome = 'GATK.GRCh38' - igenomes_base = 's3://ngi-igenomes/igenomes/' - snpeff_cache = 's3://annotation-cache/snpeff_cache/' - vep_cache = 's3://annotation-cache/vep_cache/' - igenomes_ignore = false - save_reference = false // Built references not saved - build_only_index = false // Only build the reference indexes - download_cache = false // Do not download annotation cache + genome = 'GATK/GRCh38' + igenomes_base = 's3://ngi-igenomes/igenomes/' + // TODO: VERSION + references_config_base = "https://raw.githubusercontent.com/nf-core/references-assets/main" + references = "${params.references_config_base}/igenomes/${genome}.yml" + snpeff_cache = 's3://annotation-cache/snpeff_cache/' + vep_cache = 's3://annotation-cache/vep_cache/' + download_cache = false // Do not download annotation cache + + // params for references in yaml file + ascat_alleles = null + ascat_genome = null + ascat_loci = null + ascat_loci_gc = null + ascat_loci_rt = null + bwa = null + bwamem2 = null + cf_chrom_len = null + chr_dir = null + dbsnp = null + dbsnp_tbi = null + dbsnp_vqsr = null + dict = null + dragmap = null + fasta = null + fasta_fai = null + germline_resource = null + germline_resource_tbi = null + intervals = null + known_indels = null + known_indels_tbi = null + known_indels_vqsr = null + known_snps = null + known_snps_tbi = null + known_snps_vqsr = null + mappability = null + msisensorpro_scan = null + ngscheckmate_bed = null + pon = null + pon_tbi = null + sentieon_dnascope_model = null + snpeff_db = null + vep_cache_version = null + vep_genome = null + vep_species = null // Main options no_intervals = false // Intervals will be built from the fasta file @@ -80,60 +117,60 @@ params { wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers // Annotation - bcftools_annotations = null // No extra annotation file - bcftools_annotations_tbi = null // No extra annotation file index - bcftools_header_lines = null // No header lines to be added to the VCF file - dbnsfp = null // No dbnsfp processed file - dbnsfp_consequence = null // No default consequence for dbnsfp plugin - dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin - dbnsfp_tbi = null // No dbnsfp processed file index - outdir_cache = null // No default outdir cache - spliceai_indel = null // No spliceai_indel file - spliceai_indel_tbi = null // No spliceai_indel file index - spliceai_snv = null // No spliceai_snv file - spliceai_snv_tbi = null // No spliceai_snv file index - vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP - vep_dbnsfp = null // dbnsfp plugin disabled within VEP - vep_include_fasta = false // Don't use fasta file for annotation with VEP - vep_loftee = null // loftee plugin disabled within VEP - vep_out_format = "vcf" - vep_spliceai = null // spliceai plugin disabled within VEP - vep_spliceregion = null // spliceregion plugin disabled within VEP - vep_version = "111.0-0" // Should be updated when we update VEP, needs this to get full path to some plugins + bcftools_annotations = null // No extra annotation file + bcftools_annotations_tbi = null // No extra annotation file index + bcftools_header_lines = null // No header lines to be added to the VCF file + dbnsfp = null // No dbnsfp processed file + dbnsfp_consequence = null // No default consequence for dbnsfp plugin + dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin + dbnsfp_tbi = null // No dbnsfp processed file index + outdir_cache = null // No default outdir cache + spliceai_indel = null // No spliceai_indel file + spliceai_indel_tbi = null // No spliceai_indel file index + spliceai_snv = null // No spliceai_snv file + spliceai_snv_tbi = null // No spliceai_snv file index + vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP + vep_dbnsfp = null // dbnsfp plugin disabled within VEP + vep_include_fasta = false // Don't use fasta file for annotation with VEP + vep_loftee = null // loftee plugin disabled within VEP + vep_out_format = "vcf" + vep_spliceai = null // spliceai plugin disabled within VEP + vep_spliceregion = null // spliceregion plugin disabled within VEP + vep_version = "111.0-0" // Should be updated when we update VEP, needs this to get full path to some plugins // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - help_full = false - show_hidden = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null - test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek3' - modules_testdata_base_path = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek3' + modules_testdata_base_path = null // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines @@ -321,9 +358,6 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' -// Load igenomes.config if required -includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -392,7 +426,6 @@ plugins { } validation { - defaultIgnoreParams = ["genomes"] lenientMode = true help { enabled = true @@ -437,7 +470,6 @@ includeConfig 'conf/modules/modules.config' // prepare reference includeConfig 'conf/modules/download_cache.config' -includeConfig 'conf/modules/prepare_genome.config' includeConfig 'conf/modules/prepare_intervals.config' // preprocessing @@ -477,4 +509,3 @@ includeConfig 'conf/modules/lofreq.config' //annotate includeConfig 'conf/modules/annotate.config' - diff --git a/nextflow_schema.json b/nextflow_schema.json index 5cdf35d555..fda766b430 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -365,7 +365,7 @@ "type": "string", "fa_icon": "fas fa-file", "description": "Panel-of-normals VCF (bgzipped) for GATK Mutect2", - "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is highly recommended to make your own PON, as it depends on sequencer and library preparation.\n\nThe pipeline is shipped with a panel-of-normals for `--genome GATK.GRCh38` provided by [GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-). \n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped." + "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is highly recommended to make your own PON, as it depends on sequencer and library preparation.\n\nThe pipeline is shipped with a panel-of-normals for `--genome GATK/GRCh38` provided by [GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-). \n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped." }, "pon_tbi": { "type": "string", @@ -536,10 +536,10 @@ } } }, - "general_reference_genome_options": { - "title": "General reference genome options", + "reference_genome_options": { + "title": "Reference genome options", "type": "object", - "description": "General options to interact with reference genomes.", + "description": "Reference genome related files and options required for the workflow. If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.", "default": "", "properties": { "igenomes_base": { @@ -549,122 +549,111 @@ "fa_icon": "fas fa-ban", "default": "s3://ngi-igenomes/igenomes/" }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. **NB** You can then run `Sarek` by specifying at least a FASTA genome file" + "genome": { + "type": "string", + "description": "Name of the reference genome in AWS iGenomes or nf-core/references.", + "default": "GATK/GRCh38", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using AWS iGenomes or nf-core/references, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GATK/GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, - "save_reference": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Save built references.", - "help_text": "Set this parameter, if you wish to save all computed reference files. This is useful to avoid re-computation on future runs." + "references_config_base": { + "type": "string", + "fa_icon": "fas fa-users-cog", + "description": "Base directory for references yaml files", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the yaml files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "default": "https://raw.githubusercontent.com/nf-core/references-assets/main" }, - "build_only_index": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Only built references.", - "help_text": "Set this parameter, if you wish to compute and save all computed reference files. No alignment or any other downstream steps will be performed." + "references": { + "format": "file-path", + "type": "string", + "description": "path to reference genome", + "fa_icon": "fas fa-book", + "help_text": "Use this parameter to specify the path to a yaml reference genome file.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", + "default": "/path/to/references" }, "download_cache": { "type": "boolean", "fa_icon": "fas fa-download", "description": "Download annotation cache.", "help_text": "Set this parameter, if you wish to download annotation cache.\nUsing this parameter will download cache even if --snpeff_cache and --vep_cache are provided." - } - }, - "fa_icon": "fas fa-dna" - }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow. If you use AWS iGenomes, this has already been set for you appropriately.", - "properties": { - "genome": { + }, + "ascat_alleles": { "type": "string", - "description": "Name of iGenomes reference.", - "default": "GATK.GRCh38", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "fa_icon": "fas fa-file", + "description": "Path to ASCAT allele zip file.", + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "ascat_genome": { "type": "string", "description": "ASCAT genome.", - "help_text": "Must be set to run ASCAT, either hg19 or hg38.\n\nIf you use AWS iGenomes, this has already been set for you appropriately.", + "help_text": "Must be set to run ASCAT, either hg19 or hg38.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.", "enum": ["hg19", "hg38"] }, - "ascat_alleles": { - "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to ASCAT allele zip file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." - }, "ascat_loci": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to ASCAT loci zip file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "ascat_loci_gc": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to ASCAT GC content correction file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "ascat_loci_rt": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to ASCAT RT (replictiming) correction file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "bwa": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to BWA mem indices.", - "help_text": "If you wish to recompute indices available on igenomes, set `--bwa false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you wish to recompute indices available on igenomes, set `--bwa false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "bwamem2": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to bwa-mem2 mem indices.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs." }, "chr_dir": { "type": "string", "fa_icon": "fas fa-folder-open", "description": "Path to chromosomes folder used with ControLFREEC.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dbsnp": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to dbsnp file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dbsnp_tbi": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to dbsnp index.", - "help_text": "> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dbsnp_vqsr": { "type": "string", "fa_icon": "fas fa-copy", - "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dict": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to FASTA dictionary file.", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "dragmap": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to dragmap indices.", - "help_text": "If you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "fasta": { "type": "string", @@ -673,7 +662,7 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified.\n\nIf you use AWS iGenomes, this has already been set for you appropriately.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.", "fa_icon": "fas fa-file" }, "fasta_fai": { @@ -682,7 +671,7 @@ "format": "file-path", "exists": true, "mimetype": "text/plain", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately.", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately.", "description": "Path to FASTA reference index." }, "germline_resource": { @@ -692,7 +681,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to GATK Mutect2 Germline Resource File.", - "help_text": "The germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "The germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "germline_resource_tbi": { "type": "string", @@ -701,7 +690,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to GATK Mutect2 Germline Resource Index.", - "help_text": "> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_indels": { "type": "string", @@ -710,7 +699,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to known indels file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_indels_tbi": { "type": "string", @@ -719,12 +708,12 @@ "exists": true, "mimetype": "text/plain", "description": "Path to known indels file index.", - "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_indels_vqsr": { "type": "string", "fa_icon": "fas fa-book", - "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling). If you use AWS iGenomes, this has already been set for you appropriately." + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling). If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_snps": { "type": "string", @@ -733,7 +722,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to known snps file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_snps_tbi": { "type": "string", @@ -742,12 +731,12 @@ "exists": true, "mimetype": "text/plain", "description": "Path to known snps file snps.", - "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "known_snps_vqsr": { "type": "string", "fa_icon": "fas fa-book", - "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).If you use AWS iGenomes, this has already been set for you appropriately." + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "mappability": { "type": "string", @@ -756,7 +745,16 @@ "exists": true, "mimetype": "text/plain", "description": "Path to Control-FREEC mappability file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." + }, + "msisensorpro_scan": { + "type": "string", + "fa_icon": "fas fa-file", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Path to MSISensorPro scan file.", + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "ngscheckmate_bed": { "type": "string", @@ -765,7 +763,7 @@ "exists": true, "mimetype": "text/plain", "description": "Path to SNP bed file for sample checking with NGSCheckMate", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "If you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "sentieon_dnascope_model": { "type": "string", @@ -774,7 +772,7 @@ "exists": true, "mimetype": "text/plain", "description": "Machine learning model for Sentieon Dnascope.", - "help_text": " It is recommended to use DNAscope with a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering. Sentieon can provide you with a model trained using a subset of the data from the GiAB truth-set found in https://github.com/genome-in-a-bottle. In addition, Sentieon can assist you in the creation of models using your own data, which will calibrate the specifics of your sequencing and bio-informatics processing.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": " It is recommended to use DNAscope with a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering. Sentieon can provide you with a model trained using a subset of the data from the GiAB truth-set found in https://github.com/genome-in-a-bottle. In addition, Sentieon can assist you in the creation of models using your own data, which will calibrate the specifics of your sequencing and bio-informatics processing.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "snpeff_cache": { "type": "string", @@ -782,13 +780,13 @@ "fa_icon": "fas fa-cloud-download-alt", "default": "s3://annotation-cache/snpeff_cache/", "description": "Path to snpEff cache.", - "help_text": "Path to snpEff cache which should contain the relevant genome and build directory in the path ${snpeff_species}.${snpeff_version}\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "Path to snpEff cache which should contain the relevant genome and build directory in the path ${snpeff_species}.${snpeff_version}\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "snpeff_db": { "type": "string", "fa_icon": "fas fa-database", "description": "snpEff DB version.", - "help_text": "This is used to specify the database to be use to annotate with.\nAlternatively databases' names can be listed with the `snpEff databases`.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "This is used to specify the database to be use to annotate with.\nAlternatively databases' names can be listed with the `snpEff databases`.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "vep_cache": { "type": "string", @@ -796,25 +794,25 @@ "fa_icon": "fas fa-cloud-download-alt", "default": "s3://annotation-cache/vep_cache/", "description": "Path to VEP cache.", - "help_text": "Path to VEP cache which should contain the relevant species, genome and build directories at the path ${vep_species}/${vep_genome}_${vep_cache_version}\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "Path to VEP cache which should contain the relevant species, genome and build directories at the path ${vep_species}/${vep_genome}_${vep_cache_version}\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "vep_cache_version": { "type": "string", "fa_icon": "fas fa-tag", "description": "VEP cache version.", - "help_text": "Alternative cache version can be used to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "Alternative cache version can be used to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "vep_genome": { "type": "string", "fa_icon": "fas fa-microscope", "description": "VEP genome.", - "help_text": "This is used to specify the genome when looking for local cache, or cloud based cache.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "This is used to specify the genome when looking for local cache, or cloud based cache.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." }, "vep_species": { "type": "string", "fa_icon": "fas fa-microscope", "description": "VEP species.", - "help_text": "Alternatively species listed in Ensembl Genomes caches can be used.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + "help_text": "Alternatively species listed in Ensembl Genomes caches can be used.\n\nIf you use AWS iGenomes or nf-core/references, this has already been set for you appropriately." } }, "help_text": "The pipeline config files come bundled with paths to the Illumina iGenomes reference index files.\nThe configuration is set up to use the AWS-iGenomes resource\ncf https://ewels.github.io/AWS-iGenomes/." @@ -1016,9 +1014,6 @@ { "$ref": "#/$defs/annotation" }, - { - "$ref": "#/$defs/general_reference_genome_options" - }, { "$ref": "#/$defs/reference_genome_options" }, diff --git a/subworkflows/local/annotation_cache_initialisation/main.nf b/subworkflows/local/annotation_cache_initialisation/main.nf index 572bcfc43b..ddb6236d78 100644 --- a/subworkflows/local/annotation_cache_initialisation/main.nf +++ b/subworkflows/local/annotation_cache_initialisation/main.nf @@ -11,7 +11,7 @@ workflow ANNOTATION_CACHE_INITIALISATION { take: snpeff_enabled - snpeff_cache + snpeff_cache_in snpeff_db vep_enabled vep_cache @@ -23,34 +23,50 @@ workflow ANNOTATION_CACHE_INITIALISATION { main: if (snpeff_enabled) { - def snpeff_annotation_cache_key = (snpeff_cache == "s3://annotation-cache/snpeff_cache/") ? "${snpeff_db}/" : "" - def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_db}" - def snpeff_cache_path_full = file("$snpeff_cache/$snpeff_cache_dir", type: 'dir') - if ( !snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory() ) { - if (snpeff_cache == "s3://annotation-cache/snpeff_cache/") { - error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") - } else { - error("Path provided with SnpEff cache is invalid.\nMake sure there is a directory named ${snpeff_cache_dir} in ${snpeff_cache}./n${help_message}") + snpeff_cache = snpeff_db.map { _id, snpeff_db_ -> + def snpeff_annotation_cache_key = snpeff_cache_in == "s3://annotation-cache/snpeff_cache/" ? "${snpeff_db_}/" : "" + def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_db_}" + def snpeff_cache_path_full = file("${snpeff_cache_in}/${snpeff_cache_dir}", type: 'dir') + if (!snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory()) { + if (snpeff_cache_in == "s3://annotation-cache/snpeff_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } + else { + error("Path provided with SnpEff cache is invalid.\nMake sure there is a directory named ${snpeff_cache_dir} in ${snpeff_cache_in}./n${help_message}") + } } + [[id: snpeff_db_], file("${snpeff_cache_in}/${snpeff_annotation_cache_key}")] } - snpeff_cache = Channel.fromPath(file("${snpeff_cache}/${snpeff_annotation_cache_key}"), checkIfExists: true).collect() - .map{ cache -> [ [ id:"${snpeff_db}" ], cache ] } - } else snpeff_cache = [] + } + else { + snpeff_cache = [] + } if (vep_enabled) { - def vep_annotation_cache_key = (vep_cache == "s3://annotation-cache/vep_cache/") ? "${vep_cache_version}_${vep_genome}/" : "" - def vep_species_suffix = vep_custom_args.contains("--merged") ? '_merged' : (vep_custom_args.contains("--refseq") ? '_refseq' : '') - def vep_cache_dir = "${vep_annotation_cache_key}${vep_species}${vep_species_suffix}/${vep_cache_version}_${vep_genome}" - def vep_cache_path_full = file("$vep_cache/$vep_cache_dir", type: 'dir') - if ( !vep_cache_path_full.exists() || !vep_cache_path_full.isDirectory() ) { - if (vep_cache == "s3://annotation-cache/vep_cache/") { - error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") - } else { - error("Path provided with VEP cache is invalid.\nMake sure there is a directory named ${vep_cache_dir} in ${vep_cache}./n${help_message}") + ensemblvep_cache = vep_cache_version + .join(vep_species) + .join(vep_genome) + .groupTuple() + .map { _id, vep_cache_version_, vep_species_, vep_genome_ -> + def vep_annotation_cache_key = vep_cache == "s3://annotation-cache/vep_cache/" ? "${vep_cache_version_[0]}_${vep_genome_[0]}/" : "" + def vep_species_suffix = vep_custom_args.contains("--merged") ? '_merged' : (vep_custom_args.contains("--refseq") ? '_refseq' : '') + def vep_cache_dir = "${vep_annotation_cache_key}${vep_species_[0]}${vep_species_suffix}/${vep_cache_version_[0]}_${vep_genome_[0]}" + def vep_cache_path_full = file("${vep_cache}/${vep_cache_dir}", type: 'dir') + + if (!vep_cache_path_full.exists() || !vep_cache_path_full.isDirectory()) { + if (vep_cache == "s3://annotation-cache/vep_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } + else { + error("Path provided with VEP cache is invalid.\nMake sure there is a directory named ${vep_cache_dir} in ${vep_cache}./n${help_message}") + } + } + [file("${vep_cache}/${vep_annotation_cache_key}")] } - } - ensemblvep_cache = Channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() - } else ensemblvep_cache = [] + } + else { + ensemblvep_cache = [] + } emit: ensemblvep_cache // channel: [ meta, cache ] diff --git a/subworkflows/local/bam_baserecalibrator/main.nf b/subworkflows/local/bam_baserecalibrator/main.nf index 285ad6b856..0b64706225 100644 --- a/subworkflows/local/bam_baserecalibrator/main.nf +++ b/subworkflows/local/bam_baserecalibrator/main.nf @@ -21,27 +21,32 @@ workflow BAM_BASERECALIBRATOR { versions = Channel.empty() // Combine cram and intervals for spread and gather strategy - cram_intervals = cram.combine(intervals) - // Move num_intervals to meta map - .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, intervals ] } + // Move num_intervals to meta map + cram_intervals = cram + .combine(intervals) + .map { meta, cram_, crai_, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], cram_, crai_, intervals_] } // RUN BASERECALIBRATOR - GATK4_BASERECALIBRATOR(cram_intervals, fasta.map{ meta, it -> [ it ] }, fasta_fai.map{ meta, it -> [ it ] }, dict.map{ meta, it -> [ it ] }, known_sites, known_sites_tbi) + GATK4_BASERECALIBRATOR(cram_intervals, fasta, fasta_fai, dict, known_sites, known_sites_tbi) // Figuring out if there is one or more table(s) from the same sample - table_to_merge = GATK4_BASERECALIBRATOR.out.table.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple().branch{ - // Use meta.num_intervals to asses number of intervals - single: it[0].num_intervals <= 1 - multiple: it[0].num_intervals > 1 - } + // Use meta.num_intervals to asses number of intervals + table_to_merge = GATK4_BASERECALIBRATOR.out.table + .map { meta, table -> [groupKey(meta, meta.num_intervals), table] } + .groupTuple() + .branch { + single: it[0].num_intervals <= 1 + multiple: it[0].num_intervals > 1 + } // Only when using intervals GATK4_GATHERBQSRREPORTS(table_to_merge.multiple) // Mix intervals and no_intervals channels together - table_bqsr = GATK4_GATHERBQSRREPORTS.out.table.mix(table_to_merge.single.map{ meta, table -> [ meta, table[0] ] }) - // Remove no longer necessary field: num_intervals - .map{ meta, table -> [ meta - meta.subMap('num_intervals'), table ] } + // Remove no longer necessary field: num_intervals + table_bqsr = GATK4_GATHERBQSRREPORTS.out.table + .mix(table_to_merge.single.map { meta, table -> [meta, table[0]] }) + .map { meta, table -> [meta - meta.subMap('num_intervals'), table] } // Gather versions of all tools used versions = versions.mix(GATK4_BASERECALIBRATOR.out.versions) @@ -49,6 +54,5 @@ workflow BAM_BASERECALIBRATOR { emit: table_bqsr // channel: [ meta, table ] - versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/cram_sampleqc/main.nf b/subworkflows/local/cram_sampleqc/main.nf index 504dc0a735..c74fa44db0 100644 --- a/subworkflows/local/cram_sampleqc/main.nf +++ b/subworkflows/local/cram_sampleqc/main.nf @@ -2,44 +2,42 @@ include { BAM_NGSCHECKMATE } from '../../../subworkflo include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../../../subworkflows/local/cram_qc_mosdepth_samtools/main' workflow CRAM_SAMPLEQC { - take: - cram // channel: [ val(meta), cram, crai ] - ngscheckmate_bed // channel: [ ngscheckmate_bed ] - fasta // channel: [ fasta ] - skip_baserecalibration // boolean: - intervals_for_preprocessing // channel: + cram // channel: [ meta, cram, crai ] + ngscheckmate_bed // channel: [ meta, ngscheckmate_bed ] + fasta // channel: [ meta, fasta ] + skip_baserecalibration // boolean + intervals_for_preprocessing // channel: [ meta, intervals ] main: versions = Channel.empty() - reports = Channel.empty() + reports = Channel.empty() if (!skip_baserecalibration) { CRAM_QC_RECAL( cram, fasta, - intervals_for_preprocessing) + intervals_for_preprocessing, + ) // Gather QC reports - reports = CRAM_QC_RECAL.out.reports.collect{ meta, report -> report } + reports = CRAM_QC_RECAL.out.reports.collect { _meta, report -> report } // Gather used softwares versions versions = versions.mix(CRAM_QC_RECAL.out.versions) } - BAM_NGSCHECKMATE(cram.map{meta, cram, crai -> [meta, cram]}, ngscheckmate_bed.map{bed -> [[id: "ngscheckmate"], bed]}, fasta) + BAM_NGSCHECKMATE(cram.map { meta, cram_, _crai -> [meta, cram_] }, ngscheckmate_bed, fasta) versions = versions.mix(BAM_NGSCHECKMATE.out.versions.first()) emit: - corr_matrix = BAM_NGSCHECKMATE.out.corr_matrix // channel: [ meta, corr_matrix ] - matched = BAM_NGSCHECKMATE.out.matched // channel: [ meta, matched ] - all = BAM_NGSCHECKMATE.out.all // channel: [ meta, all ] - vcf = BAM_NGSCHECKMATE.out.vcf // channel: [ meta, vcf ] - pdf = BAM_NGSCHECKMATE.out.pdf // channel: [ meta, pdf ] + corr_matrix = BAM_NGSCHECKMATE.out.corr_matrix // channel: [ meta, corr_matrix ] + matched = BAM_NGSCHECKMATE.out.matched // channel: [ meta, matched ] + all = BAM_NGSCHECKMATE.out.all // channel: [ meta, all ] + vcf = BAM_NGSCHECKMATE.out.vcf // channel: [ meta, vcf ] + pdf = BAM_NGSCHECKMATE.out.pdf // channel: [ meta, pdf ] reports - - versions // channel: [ versions.yml ] + versions // channel: [ versions.yml ] } - diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 772af47b37..29993d4d68 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -8,131 +8,89 @@ // Condition is based on params.step and params.tools // If and extra condition exists, it's specified in comments -include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf-core/bwa/index/main' -include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' -include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable/main' -include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' -include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan/main' -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' -include { TABIX_TABIX as TABIX_BCFTOOLS_ANNOTATIONS } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_GERMLINE_RESOURCE } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_KNOWN_SNPS } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_PON } from '../../../modules/nf-core/tabix/tabix/main' -include { UNTAR as UNTAR_CHR_DIR } from '../../../modules/nf-core/untar/main' -include { UNZIP as UNZIP_ALLELES } from '../../../modules/nf-core/unzip/main' -include { UNZIP as UNZIP_GC } from '../../../modules/nf-core/unzip/main' -include { UNZIP as UNZIP_LOCI } from '../../../modules/nf-core/unzip/main' -include { UNZIP as UNZIP_RT } from '../../../modules/nf-core/unzip/main' +include { UNTAR as UNTAR_CHR_DIR } from '../../../modules/nf-core/untar' +include { UNZIP as UNZIP_ALLELES } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_GC } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_LOCI } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_RT } from '../../../modules/nf-core/unzip' workflow PREPARE_GENOME { take: - ascat_alleles // params.ascat_alleles - ascat_loci // params.ascat_loci - ascat_loci_gc // params.ascat_loci_gc - ascat_loci_rt // params.ascat_loci_rt - bcftools_annotations // channel: [optional] bcftools annotations file - chr_dir // params.chr_dir - dbsnp // channel: [optional] dbsnp - fasta // channel: [mandatory] fasta - germline_resource // channel: [optional] germline_resource - known_indels // channel: [optional] known_indels - known_snps // channel: [optional] known_snps - pon // channel: [optional] pon - + ascat_alleles // params.ascat_alleles + ascat_loci // params.ascat_loci + ascat_loci_gc // params.ascat_loci_gc + ascat_loci_rt // params.ascat_loci_rt + chr_dir // params.chr_dir main: versions = Channel.empty() - BWAMEM1_INDEX(fasta) // If aligner is bwa-mem - BWAMEM2_INDEX(fasta) // If aligner is bwa-mem2 - DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap - - GATK4_CREATESEQUENCEDICTIONARY(fasta) - MSISENSORPRO_SCAN(fasta) - SAMTOOLS_FAIDX(fasta, [ [ id:'no_fai' ], [] ] ) - - // the following are flattened and mapped in case the user supplies more than one value for the param - // written for KNOWN_INDELS, but preemptively applied to the rest - // [ file1, file2 ] becomes [ [ meta1, file1 ], [ meta2, file2 ] ] - // outputs are collected to maintain a single channel for relevant TBI files - TABIX_BCFTOOLS_ANNOTATIONS(bcftools_annotations.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - TABIX_DBSNP(dbsnp.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - TABIX_GERMLINE_RESOURCE(germline_resource.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - TABIX_KNOWN_SNPS(known_snps.flatten().map{ it -> [ [ id:it.baseName ], it ] } ) - TABIX_KNOWN_INDELS(known_indels.flatten().map{ it -> [ [ id:it.baseName ], it ] } ) - TABIX_PON(pon.flatten().map{ it -> [ [ id:it.baseName ], it ] }) - // prepare ascat and controlfreec reference files - if (!ascat_alleles) allele_files = Channel.empty() + if (!ascat_alleles) { + allele_files = Channel.empty() + } else if (ascat_alleles.endsWith(".zip")) { - UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - allele_files = UNZIP_ALLELES.out.unzipped_archive.map{ it[1] } + UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles)).collect().map { it -> [[id: it[0].baseName], it] }) + allele_files = UNZIP_ALLELES.out.unzipped_archive.map { it[1] } versions = versions.mix(UNZIP_ALLELES.out.versions) - } else allele_files = Channel.fromPath(ascat_alleles).collect() - - if (!ascat_loci) loci_files = Channel.empty() + } + else { + allele_files = Channel.fromPath(ascat_alleles).collect() + } + + if (!ascat_loci) { + loci_files = Channel.empty() + } else if (ascat_loci.endsWith(".zip")) { - UNZIP_LOCI(Channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - loci_files = UNZIP_LOCI.out.unzipped_archive.map{ it[1] } + UNZIP_LOCI(Channel.fromPath(file(ascat_loci)).collect().map { it -> [[id: it[0].baseName], it] }) + loci_files = UNZIP_LOCI.out.unzipped_archive.map { it[1] } versions = versions.mix(UNZIP_LOCI.out.versions) - } else loci_files = Channel.fromPath(ascat_loci).collect() - - if (!ascat_loci_gc) gc_file = Channel.value([]) + } + else { + loci_files = Channel.fromPath(ascat_loci).collect() + } + + if (!ascat_loci_gc) { + gc_file = Channel.value([]) + } else if (ascat_loci_gc.endsWith(".zip")) { - UNZIP_GC(Channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - gc_file = UNZIP_GC.out.unzipped_archive.map{ it[1] } + UNZIP_GC(Channel.fromPath(file(ascat_loci_gc)).collect().map { it -> [[id: it[0].baseName], it] }) + gc_file = UNZIP_GC.out.unzipped_archive.map { it[1] } versions = versions.mix(UNZIP_GC.out.versions) - } else gc_file = Channel.fromPath(ascat_loci_gc).collect() - - if (!ascat_loci_rt) rt_file = Channel.value([]) + } + else { + gc_file = Channel.fromPath(ascat_loci_gc).collect() + } + + if (!ascat_loci_rt) { + rt_file = Channel.value([]) + } else if (ascat_loci_rt.endsWith(".zip")) { - UNZIP_RT(Channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - rt_file = UNZIP_RT.out.unzipped_archive.map{ it[1] } + UNZIP_RT(Channel.fromPath(file(ascat_loci_rt)).collect().map { it -> [[id: it[0].baseName], it] }) + rt_file = UNZIP_RT.out.unzipped_archive.map { it[1] } versions = versions.mix(UNZIP_RT.out.versions) - } else rt_file = Channel.fromPath(ascat_loci_rt).collect() - - if (!chr_dir) chr_files = Channel.value([]) + } + else { + rt_file = Channel.fromPath(ascat_loci_rt).collect() + } + + if (!chr_dir) { + chr_files = Channel.value([]) + } else if (chr_dir.endsWith(".tar.gz")) { - UNTAR_CHR_DIR(Channel.fromPath(file(chr_dir)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) - chr_files = UNTAR_CHR_DIR.out.untar.map{ it[1] } + UNTAR_CHR_DIR(Channel.fromPath(file(chr_dir)).collect().map { it -> [[id: it[0].baseName], it] }) + chr_files = UNTAR_CHR_DIR.out.untar.map { it[1] } versions = versions.mix(UNTAR_CHR_DIR.out.versions) - } else chr_files = Channel.fromPath(chr_dir).collect() - - // Gather versions of all tools used - versions = versions.mix(BWAMEM1_INDEX.out.versions) - versions = versions.mix(BWAMEM2_INDEX.out.versions) - versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) - versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - versions = versions.mix(MSISENSORPRO_SCAN.out.versions) - versions = versions.mix(SAMTOOLS_FAIDX.out.versions) - versions = versions.mix(TABIX_BCFTOOLS_ANNOTATIONS.out.versions) - versions = versions.mix(TABIX_DBSNP.out.versions) - versions = versions.mix(TABIX_GERMLINE_RESOURCE.out.versions) - versions = versions.mix(TABIX_KNOWN_INDELS.out.versions) - versions = versions.mix(TABIX_KNOWN_SNPS.out.versions) - versions = versions.mix(TABIX_PON.out.versions) + } + else { + chr_files = Channel.fromPath(chr_dir).collect() + } emit: - bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: bcftools_annotations.vcf.gz.tbi - bwa = BWAMEM1_INDEX.out.index.collect() // path: bwa/* - bwamem2 = BWAMEM2_INDEX.out.index.collect() // path: bwamem2/* - hashtable = DRAGMAP_HASHTABLE.out.hashmap.collect() // path: dragmap/* - dbsnp_tbi = TABIX_DBSNP.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: dbsnb.vcf.gz.tbi - dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict.collect() // path: genome.fasta.dict - fasta_fai = SAMTOOLS_FAIDX.out.fai.collect() // path: genome.fasta.fai - germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi - known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list - pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi - - allele_files // path: allele_files - chr_files // path: chr_files - gc_file // path: gc_file - loci_files // path: loci_files - rt_file // path: rt_file - - versions // channel: [ versions.yml ] + allele_files // path: allele_files + chr_files // path: chr_files + gc_file // path: gc_file + loci_files // path: loci_files + rt_file // path: rt_file + versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf index 27c4e9c145..939f7acece 100644 --- a/subworkflows/local/prepare_intervals/main.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -6,113 +6,114 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed/main' -include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed/main' -include { GAWK as BUILD_INTERVALS } from '../../../modules/nf-core/gawk/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_COMBINED } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed' +include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_COMBINED } from '../../../modules/nf-core/tabix/bgziptabix' workflow PREPARE_INTERVALS { take: - fasta_fai // mandatory [ fasta_fai ] - intervals // [ params.intervals ] - no_intervals // [ params.no_intervals ] - nucleotides_per_second - outdir - step + intervals // mandatory [ intervals_bed ] + no_intervals // boolean [ params.no_intervals ] + nucleotides_per_second // mandatory [ params.nucleotides_per_second ] + outdir // mandatory [ params.outdir ] + step // mandatory [ params.step ] main: versions = Channel.empty() - intervals_bed = Channel.empty() // List of [ bed, num_intervals ], one for each region - intervals_bed_gz_tbi = Channel.empty() // List of [ bed.gz, bed,gz.tbi, num_intervals ], one for each region - intervals_combined = Channel.empty() // Single bed file containing all intervals + // intervals_bed - List of [ bed, num_intervals ], one per region + // intervals_bed_gz_tbi - List of [ bed.gz, bed,gz.tbi, num_intervals ], one per region + // intervals_bed_combined - Single bed file containing all intervals + intervals_bed = Channel.empty() + intervals_bed_gz_tbi = Channel.empty() + intervals_bed_combined = Channel.empty() if (no_intervals) { - file("${outdir}/no_intervals.bed").text = "no_intervals\n" - file("${outdir}/no_intervals.bed.gz").text = "no_intervals\n" + file("${outdir}/no_intervals.bed").text = "no_intervals\n" + file("${outdir}/no_intervals.bed.gz").text = "no_intervals\n" file("${outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n" - intervals_bed = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] } - intervals_bed_gz_tbi = Channel.fromPath(file("${outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] } - intervals_combined = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] } - } else if (step != 'annotate' && step != 'controlfreec') { - // If no interval/target file is provided, then generated intervals from FASTA file - if (!intervals) { - BUILD_INTERVALS(fasta_fai, []) - - intervals_combined = BUILD_INTERVALS.out.output - - CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }, nucleotides_per_second) - - intervals_bed = CREATE_INTERVALS_BED.out.bed - - versions = versions.mix(BUILD_INTERVALS.out.versions) - versions = versions.mix(CREATE_INTERVALS_BED.out.versions) - } else { - intervals_combined = Channel.fromPath(file(intervals)).map{it -> [ [ id:it.baseName ], it ] } - CREATE_INTERVALS_BED(file(intervals), nucleotides_per_second) + intervals_bed = Channel.fromPath(file("${outdir}/no_intervals.bed")).map { it -> [it, 0] } + intervals_bed_gz_tbi = Channel.fromPath(file("${outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map { it -> [it, 0] } + intervals_bed_combined = Channel.fromPath(file("${outdir}/no_intervals.bed")).map { it -> [[id: it.simpleName], it] } + } + else if (step != 'annotate' && step != 'controlfreec') { + CREATE_INTERVALS_BED(intervals, nucleotides_per_second) - intervals_bed = CREATE_INTERVALS_BED.out.bed + intervals_bed = CREATE_INTERVALS_BED.out.bed - versions = versions.mix(CREATE_INTERVALS_BED.out.versions) + versions = versions.mix(CREATE_INTERVALS_BED.out.versions) - // If interval file is not provided as .bed, but e.g. as .interval_list then convert to BED format - if (intervals.endsWith(".interval_list")) { - GATK4_INTERVALLISTTOBED(intervals_combined) - intervals_combined = GATK4_INTERVALLISTTOBED.out.bed - versions = versions.mix(GATK4_INTERVALLISTTOBED.out.versions) - } + intervals_branch = intervals.branch { _meta, intervals_ -> + interval_list: intervals_.endsWith(".interval_list") + bed: true } - // Now for the intervals.bed the following operations are done: - // 1. Intervals file is split up into multiple bed files for scatter/gather - // 2. Each bed file is indexed + GATK4_INTERVALLISTTOBED(intervals_branch.interval_list) + // TODO: test this with an interval_list + intervals_bed_combined = intervals.mix(GATK4_INTERVALLISTTOBED.out.bed).last() + versions = versions.mix(GATK4_INTERVALLISTTOBED.out.versions) - // 1. Intervals file is split up into multiple bed files for scatter/gather & grouping together small intervals - intervals_bed = intervals_bed.flatten() - .map{ intervalFile -> + // Now for the intervals.bed the following operations are done: + // 1/ Split up intervals bed file into multiple bed files for scatter/gather + // 2/ Tabix index each bed file + + // 1/ Split up intervals bed file into multiple bed files for scatter/gather + // Also group together small intervals + // And add the number of intervals to the channel + intervals_bed = intervals_bed + .flatten() + .map { intervals_ -> def duration = 0.0 - for (line in intervalFile.readLines()) { - final fields = line.split('\t') - if (fields.size() >= 5) duration += fields[4].toFloat() - else { - start = fields[1].toInteger() - end = fields[2].toInteger() - duration += (end - start) / nucleotides_per_second + intervals_ + .readLines() + .each { line -> + def fields = line.split('\t') + if (fields.size() >= 5) { + duration += fields[4].toFloat() + } + else { + def start = fields[1].toInteger() + def end = fields[2].toInteger() + duration += (end - start) / nucleotides_per_second + } } - } - [ duration, intervalFile ] - }.toSortedList({ a, b -> b[0] <=> a[0] }) - .flatten().collate(2).map{ duration, intervalFile -> intervalFile }.collect() - // Adding number of intervals as elements - .map{ it -> [ it, it.size() ] } + [duration, intervals_] + } + .toSortedList { a, b -> b[0] <=> a[0] } + .flatten() + .collate(2) + .map { _duration, intervals_ -> intervals_ } + .collect() + .map { intervals_ -> [intervals_, intervals_.size()] } .transpose() - // 2. Create bed.gz and bed.gz.tbi for each interval file. They are split by region (see above) - TABIX_BGZIPTABIX_INTERVAL_SPLIT(intervals_bed.map{ file, num_intervals -> [ [ id:file.baseName], file ] }) + // 2/ Tabix index each bed file + TABIX_BGZIPTABIX_INTERVAL_SPLIT(intervals_bed.map { intervals_, _num_intervals -> [[id: intervals_.baseName], intervals_] }) - intervals_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.gz_tbi.map{ meta, bed, tbi -> [ bed, tbi ] }.toList() - // Adding number of intervals as elements - .map{ it -> [ it, it.size() ] } + intervals_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.gz_tbi + .map { _meta, intervals_gz_, intervals_gz_tbi_ -> [intervals_gz_, intervals_gz_tbi_] } + .toList() + .map { it -> [it, it.size()] } .transpose() versions = versions.mix(TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.versions) } - TABIX_BGZIPTABIX_INTERVAL_COMBINED(intervals_combined) + TABIX_BGZIPTABIX_INTERVAL_COMBINED(intervals_bed_combined) versions = versions.mix(TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.versions) - intervals_bed_combined = intervals_combined.map{meta, bed -> bed }.collect() - intervals_bed_gz_tbi_combined = TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.gz_tbi.map{meta, gz, tbi -> [gz, tbi] }.collect() + // intervals_bed and intervals_bed_gz_tbi are the intervals split for parallel execution, and contain the number of intervals + // intervals_bed_combined and intervals_bed_gz_tbi_combined are all intervals collected in one file + + intervals_bed_combined = intervals_bed_combined.map { _meta, intervals_ -> intervals_ }.collect() + intervals_bed_gz_tbi_combined = TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.gz_tbi.map { _meta, intervals_gz, intervals_gz_tbi -> [intervals_gz, intervals_gz_tbi] }.collect() emit: - // Intervals split for parallel execution intervals_bed // [ intervals.bed, num_intervals ] intervals_bed_gz_tbi // [ intervals.bed.gz, intervals.bed.gz.tbi, num_intervals ] - // All intervals in one file intervals_bed_combined // [ intervals.bed ] intervals_bed_gz_tbi_combined // [ intervals.bed.gz, intervals.bed.gz.tbi] - - versions // [ versions.yml ] + versions // [ versions.yml ] } diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf index 1c0d80a1db..414e8a168a 100644 --- a/subworkflows/local/samplesheet_to_channel/main.nf +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -1,176 +1,196 @@ -workflow SAMPLESHEET_TO_CHANNEL{ - +workflow SAMPLESHEET_TO_CHANNEL { take: - ch_from_samplesheet // - aligner // - ascat_alleles // - ascat_loci // - ascat_loci_gc // - ascat_loci_rt // - bcftools_annotations // - bcftools_annotations_tbi // - bcftools_header_lines // - build_only_index // - dbsnp // - fasta // - germline_resource // - intervals // - joint_germline // - joint_mutect2 // - known_indels // - known_snps // - no_intervals // - pon // - sentieon_dnascope_emit_mode // - sentieon_haplotyper_emit_mode // - seq_center // - seq_platform // - skip_tools // - snpeff_cache // - snpeff_db // - step // - tools // - umi_read_structure // - wes // + ch_from_samplesheet // + references // + aligner // + bcftools_annotations // + bcftools_annotations_tbi // + bcftools_header_lines // + joint_germline // + joint_mutect2 // + no_intervals // + sentieon_dnascope_emit_mode // + sentieon_haplotyper_emit_mode // + seq_center // + seq_platform // + skip_tools // + step // + tools // + umi_read_structure // + wes // main: - ch_from_samplesheet.dump(tag:"ch_from_samplesheet") - input_sample = ch_from_samplesheet.map{ meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller -> - // generate patient_sample key to group lanes together - [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller] ] - }.tap{ ch_with_patient_sample } // save the channel - .groupTuple() //group by patient_sample to get all lanes - .map { patient_sample, ch_items -> - // get number of lanes per sample - [ patient_sample, ch_items.size() ] - }.combine(ch_with_patient_sample, by: 0) // for each entry add numLanes - .map { patient_sample, num_lanes, ch_items -> - (meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items - if (meta.lane && fastq_2) { - meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "fastq_gz", num_lanes: num_lanes.toInteger(), size: 1] - - if (step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] - else { - error("Samplesheet contains fastq files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + ch_from_samplesheet.dump(tag: "ch_from_samplesheet") + input_sample = ch_from_samplesheet + .map { meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller -> + // generate patient_sample key to group lanes together + [meta.patient + meta.sample, [meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller]] + } + .tap { ch_with_patient_sample } + .groupTuple() + .map { patient_sample, ch_items -> + // get number of lanes per sample + [patient_sample, ch_items.size()] + } + .combine(ch_with_patient_sample, by: 0) + .combine(references) + .map { patient_sample, num_lanes, ch_items, _meta2, fasta -> + def (meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items + if (meta.lane && fastq_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "fastq_gz", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [fastq_1, fastq_2]] + } + else { + error("Samplesheet contains fastq files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // start from TWO spring-files - one with R1 and one with R2 - } else if (meta.lane && spring_1 && spring_2) { - meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "two_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] - - if (step == 'mapping') return [ meta, [ spring_1, spring_2 ] ] - else { - error("Samplesheet contains spring files (in columns `spring_1` and `spring_2`) but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (meta.lane && spring_1 && spring_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "two_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [spring_1, spring_2]] + } + else { + error("Samplesheet contains spring files (in columns `spring_1` and `spring_2`) but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // start from ONE spring-file containing both R1 and R2 - } else if (meta.lane && spring_1 && !spring_2) { - meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "one_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] - - if (step == 'mapping') return [ meta, [ spring_1 ] ] - else { - error("Samplesheet contains a spring file (in columns `spring_1`) but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (meta.lane && spring_1 && !spring_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "one_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [spring_1]] + } + else { + error("Samplesheet contains a spring file (in columns `spring_1`) but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // start from BAM - } else if (meta.lane && bam) { - if (step != 'mapping' && !bai) { - error("BAM index (bai) should be provided.") + else if (meta.lane && bam) { + if (step != 'mapping' && !bai) { + error("BAM index (bai) should be provided.") + } + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = seq_center ? "CN:${seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${fasta}\\tPL:${seq_platform}\"" + + meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] + + if (step != 'annotate') { + return [meta - meta.subMap('lane'), bam, bai] + } + else { + error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] - def CN = seq_center ? "CN:${seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${fasta}\\tPL:${seq_platform}\"" - - meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] - - if (step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] - else { - error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (table && cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), cram, crai, table] + } + else { + error("Samplesheet contains cram files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // recalibration - } else if (table && cram) { - meta = meta + [id: meta.sample, data_type: 'cram'] - - if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] - else { - error("Samplesheet contains cram files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (table && bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), bam, bai, table] + } + else { + error("Samplesheet contains bam files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // recalibration when skipping MarkDuplicates - } else if (table && bam) { - meta = meta + [id: meta.sample, data_type: 'bam'] - - if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] - else { - error("Samplesheet contains bam files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), cram, crai] + } + else { + error("Samplesheet contains cram files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // prepare_recalibration or variant_calling - } else if (cram) { - meta = meta + [id: meta.sample, data_type: 'cram'] - - if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] - else { - error("Samplesheet contains cram files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), bam, bai] + } + else { + error("Samplesheet contains bam files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` - } else if (bam) { - meta = meta + [id: meta.sample, data_type: 'bam'] - - if (!(step == 'mapping' || step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] - else { - error("Samplesheet contains bam files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + else if (vcf) { + meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] + + if (step == 'annotate') { + return [meta - meta.subMap('lane'), vcf] + } + else { + error("Samplesheet contains vcf files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } } - - // annotation - } else if (vcf) { - meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] - - if (step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] else { - error("Samplesheet contains vcf files but step is `$step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + error("Missing or unknown field in csv file header. Please check your samplesheet") } - } else { - error("Missing or unknown field in csv file header. Please check your samplesheet") } - } - if (step != 'annotate' && tools && !build_only_index) { - // Two checks for ensuring that the pipeline stops with a meaningful error message if - // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and - // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. - input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples - if (!build_only_index) { - def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] - def tools_tumor_asked = [] - tools_tumor.each{ tool -> - if (tools.split(',').contains(tool)) tools_tumor_asked.add(tool) + if (step != 'annotate' && tools) { + // Two checks for ensuring that the pipeline stops with a meaningful error message if + // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. + input_sample + .filter { it[0].status == 1 } + .ifEmpty { + // In this case, the sample-sheet contains no tumor-samples + def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] + def tools_tumor_asked = [] + tools_tumor.each { tool -> + if (tools.split(',').contains(tool)) { + tools_tumor_asked.add(tool) + } + } + if (!tools_tumor_asked.isEmpty()) { + error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + } } - if (!tools_tumor_asked.isEmpty()) { - error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + + input_sample + .filter { it[0].status == 0 } + .ifEmpty { + // In this case, the sample-sheet contains no normal/germline-samples + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] + def requested_tools_requiring_normal_samples = [] + tools_requiring_normal_samples.each { tool_requiring_normal_samples -> + if (tools.split(',').contains(tool_requiring_normal_samples)) { + requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + } + } + if (!requested_tools_requiring_normal_samples.isEmpty()) { + error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + } } - } } - input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples - def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] - def requested_tools_requiring_normal_samples = [] - tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> - if (tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + // Fails when wrongfull extension for intervals file + + references.map { meta, _fasta -> + if (wes && !step == 'annotate') { + if (meta.intervals_bed && !meta.intervals_bed.endsWith("bed")) { + error("Target file specified with `intervals_bed:` must be in BED format for targeted data") + } + else { + log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") + } } - if (!requested_tools_requiring_normal_samples.isEmpty()) { - error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + else if (meta.intervals_bed && !meta.intervals_bed.endsWith("bed") && !meta.intervals_bed.endsWith("list")) { + error("Intervals file must end with .bed, .list, or .interval_list") } + return true } - } - - // Fails when wrongfull extension for intervals file - if (wes && !step == 'annotate') { - if (intervals && !intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") - else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") - } else if (intervals && !intervals.endsWith("bed") && !intervals.endsWith("list")) error("Intervals file must end with .bed, .list, or .interval_list") if (step == 'mapping' && aligner.contains("dragmap") && !(skip_tools && skip_tools.split(',').contains("baserecalibrator"))) { log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") @@ -180,79 +200,75 @@ workflow SAMPLESHEET_TO_CHANNEL{ error("Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner.") } - if (tools && tools.split(',').contains("sentieon_haplotyper") && joint_germline && (!sentieon_haplotyper_emit_mode || !(sentieon_haplotyper_emit_mode.contains('gvcf')))) { + if (tools && tools.split(',').contains("sentieon_haplotyper") && joint_germline && (!sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.contains('gvcf'))) { error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.") } // Fails or warns when missing files or params for ascat - if (tools && tools.split(',').contains('ascat')) { - if (!ascat_alleles) { - error("No allele files were provided for running ASCAT. Please provide a zip folder with allele files.") - } - if (!ascat_loci) { - error("No loci files were provided for running ASCAT. Please provide a zip folder with loci files.") - } - if (!ascat_loci_gc && !ascat_loci_rt) { - log.warn("No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.") - } - if (wes) { - log.warn("Default reference files not suited for running ASCAT on WES data. It's recommended to use the reference files provided here: https://github.com/Wedge-lab/battenberg#required-reference-files") + references.map { meta, _fasta -> + if (tools && tools.split(',').contains('ascat')) { + if (!meta.ascat_alleles) { + error("No allele files were provided for running ASCAT. Please provide a zip folder with allele files.") + } + if (!meta.ascat_loci) { + error("No loci files were provided for running ASCAT. Please provide a zip folder with loci files.") + } + if (!meta.ascat_loci_gc && !meta.ascat_loci_rt) { + log.warn("No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.") + } + if (wes) { + log.warn("Default reference files not suited for running ASCAT on WES data. It's recommended to use the reference files provided here: https://github.com/Wedge-lab/battenberg#required-reference-files") + } } + return true } // Warns when missing files or params for mutect2 - if (tools && tools.split(',').contains('mutect2')) { - if (!pon) { - log.warn("No Panel-of-normal was specified for Mutect2.\nIt is highly recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2\nFor more information on how to create one: https://gatk.broadinstitute.org/hc/en-us/articles/5358921041947-CreateSomaticPanelOfNormals-BETA-") - } - if (!germline_resource) { - log.warn("If Mutect2 is specified without a germline resource, no filtering will be done.\nIt is recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2") - } - if (pon && pon.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { - log.warn("The default Panel-of-Normals provided by GATK is used for Mutect2.\nIt is highly recommended to generate one from normal samples that are technical similar to the tumor ones.\nFor more information: https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-") + references.map { meta, _fasta -> + if (tools && tools.split(',').contains('mutect2')) { + if (!meta.vcf_pon_vcf) { + log.warn("No Panel-of-normal was specified for Mutect2.\nIt is highly recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2\nFor more information on how to create one: https://gatk.broadinstitute.org/hc/en-us/articles/5358921041947-CreateSomaticPanelOfNormals-BETA-") + } + if (!meta.vcf_germline_resource_vcf) { + log.warn("If Mutect2 is specified without a germline resource, no filtering will be done.\nIt is recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2") + } + if (meta.vcf_pon_vcf && meta.vcf_pon_vcf.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { + log.warn("The default Panel-of-Normals provided by GATK is used for Mutect2.\nIt is highly recommended to generate one from normal samples that are technical similar to the tumor ones.\nFor more information: https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-") + } } + return true } // Fails when missing resources for baserecalibrator // Warns when missing resources for haplotypecaller - if (!dbsnp && !known_indels) { - if (step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!skip_tools || (skip_tools && !skip_tools.split(',').contains('baserecalibrator')))) { - error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.") + references.map { meta, _fasta -> + if (!meta.vcf_dbsnp_vcf && !meta.vcf_known_indels_vcf) { + if (step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!skip_tools || (skip_tools && !skip_tools.split(',').contains('baserecalibrator')))) { + error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.") + } + if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope'))) { + log.warn("If GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-") + } } - if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope'))) { - log.warn "If GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-" + if (joint_germline && (!tools || !(tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')))) { + error("The GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ") } - } - if (joint_germline && (!tools || !(tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')))) { - error("The GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ") - } - if ( - tools && - ( - tools.split(',').contains('haplotypecaller') || - tools.split(',').contains('sentieon_haplotyper') || - tools.split(',').contains('sentieon_dnascope') - ) && - joint_germline && - ( !dbsnp || !known_indels || !known_snps || no_intervals ) - ) { - log.warn("""If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator \n\ -Joint germline variant calling also requires intervals in order to genotype the samples. As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed.""") - } + if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')) && joint_germline && (!meta.vcf_dbsnp_vcf || !meta.vcf_known_indels_vcf || !meta.vcf_known_snps_vcf || no_intervals)) { + log.warn( + """If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator \n\ + Joint germline variant calling also requires intervals in order to genotype the samples. As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed.""" + ) + } - if (tools && - tools.split(',').contains('sentieon_dnascope') && joint_germline && - ( !sentieon_dnascope_emit_mode || !sentieon_dnascope_emit_mode.split(',').contains('gvcf') ) - ) { - error("When using Sentieon Dnascope for joint-germline variant-calling the option `--sentieon_dnascope_emit_mode` has to include `gvcf`.") - } + if (tools && tools.split(',').contains('sentieon_dnascope') && joint_germline && (!sentieon_dnascope_emit_mode || !sentieon_dnascope_emit_mode.split(',').contains('gvcf'))) { + error("When using Sentieon Dnascope for joint-germline variant-calling the option `--sentieon_dnascope_emit_mode` has to include `gvcf`.") + } - if (tools && - tools.split(',').contains('sentieon_haplotyper') && joint_germline && - ( !sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.split(',').contains('gvcf') ) - ) { - error("When using Sentieon Haplotyper for joint-germline variant-calling the option `--sentieon_haplotyper_emit_mode` has to include `gvcf`.") + if (tools && tools.split(',').contains('sentieon_haplotyper') && joint_germline && (!sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.split(',').contains('gvcf'))) { + error("When using Sentieon Haplotyper for joint-germline variant-calling the option `--sentieon_haplotyper_emit_mode` has to include `gvcf`.") + } + return true } @@ -268,8 +284,8 @@ Joint germline variant calling also requires intervals in order to genotype the // Fails when missing sex information for CNV tools if (tools && (tools.split(',').contains('ascat') || tools.split(',').contains('controlfreec'))) { - input_sample.map{ - if (it[0].sex == 'NA' ) { + input_sample.map { + if (it[0].sex == 'NA') { error("Please specify sex information for each sample in your samplesheet when using '--tools' with 'ascat' or 'controlfreec'.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") } } @@ -280,12 +296,6 @@ Joint germline variant calling also requires intervals in order to genotype the error("Please specify --bcftools_annotations, --bcftools_annotations_tbi, and --bcftools_header_lines, when using BCFTools annotations") } - // Fails when snpeff annotation is enabled but snpeff_db is not specified - if ((snpeff_cache && tools && (tools.split(',').contains("snpeff") || tools.split(',').contains('merge'))) && - !snpeff_db) { - error("Please specify --snpeff_db") - } - emit: input_sample - } +} diff --git a/subworkflows/local/samplesheet_to_channel/tests/main.nf.test b/subworkflows/local/samplesheet_to_channel/tests/main.nf.test index e1c8682d27..e306558c77 100644 --- a/subworkflows/local/samplesheet_to_channel/tests/main.nf.test +++ b/subworkflows/local/samplesheet_to_channel/tests/main.nf.test @@ -10,44 +10,32 @@ nextflow_workflow { } workflow { """ - // define inputs of the workflow here. Example: - input[0] = Channel.of([ - ['patient':'test', 'sample':'test', - 'sex':'XX', 'status':0, 'lane':'test_L1'], + input[0] = Channel.of([ // samplesheet + ['patient':'test', 'sample':'test', 'sex':'XX', 'status':0, 'lane':'test_L1'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true), [], [], [], [], [], [], [], [], [] ]) - input[1] = 'bwa-mem' // aligner - input[2] = [] // ascat_alleles - input[3] = [] // ascat_loci - input[4] = [] // ascat_loci_gc - input[5] = [] // ascat_loci_rt - input[6] = [] // bcftools_annotations - input[7] = [] // bcftools_annotations_tbi - input[8] = [] // bcftools_header_lines - input[9] = false // build_only_index - input[10] = [] // dbsnp - input[11] = [] // fasta - input[12] = [] // germline_resource - input[13] = [] // intervals - input[14] = false // joint_germline - input[15] = false // joint_mutect2 - input[16] = [] // known_indels - input[17] = [] // known_snps - input[18] = false // no_intervals - input[19] = [] // pon - input[20] = 'variant' // sentieon_dnascope_emit_mode - input[21] = 'variant' // sentieon_haplotyper_emit_mode - input[22] = '' // seq_center - input[23] = 'ILLUMINA' // seq_platform - input[24] = 'baserecalibrator' // skip_tools - input[25] = [] // snpeff_cache - input[26] = 'WBcel235.105' // snpeff_db - input[27] = 'mapping' // step - input[28] = 'strelka' // tools - input[29] = [] // umi_read_structure - input[30] = false // wes + input[1] = Channel.of([ // references + ['id':'genome', 'vcf':[]], + [] + ]) + input[2] = 'bwa-mem' // aligner + input[3] = [] // bcftools_annotations + input[4] = [] // bcftools_annotations_tbi + input[5] = [] // bcftools_header_lines + input[6] = false // joint_germline + input[7] = false // joint_mutect2 + input[8] = false // no_intervals + input[9] = 'variant' // sentieon_dnascope_emit_mode + input[10] = 'variant' // sentieon_haplotyper_emit_mode + input[11] = '' // seq_center + input[12] = 'ILLUMINA' // seq_platform + input[13] = 'baserecalibrator' // skip_tools + input[14] = 'mapping' // step + input[15] = 'strelka' // tools + input[16] = [] // umi_read_structure + input[17] = false // wes """ } } @@ -56,7 +44,5 @@ nextflow_workflow { assert workflow.success assert snapshot(workflow.out).match() } - } - } diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf index ce568284c7..8c5dd16709 100644 --- a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf @@ -8,19 +8,19 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SAMPLESHEET_TO_CHANNEL } from '../samplesheet_to_channel' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' -include { getWorkflowVersion } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { logColours } from '../../nf-core/utils_nfcore_pipeline' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' +include { SAMPLESHEET_TO_CHANNEL } from '../samplesheet_to_channel' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { getWorkflowVersion } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { logColours } from '../../nf-core/utils_nfcore_pipeline' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -29,14 +29,14 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' */ workflow PIPELINE_INITIALISATION { - take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved input // string: Path to input samplesheet + references // string: Path to references + step // string: The step to retrieve input from main: @@ -45,20 +45,20 @@ workflow PIPELINE_INITIALISATION { // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // // Validate parameters and generate parameter summary to stdout // - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, - null + null, ) // @@ -66,100 +66,36 @@ workflow PIPELINE_INITIALISATION { // UTILS_NFCORE_PIPELINE(nextflow_cli_args) - // - // Custom validation for pipeline parameters - // - validateInputParameters() - - // Check input path parameters to see if they exist - def checkPathParamList = [ - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, - params.bwa, - params.bwamem2, - params.bcftools_annotations, - params.bcftools_annotations_tbi, - params.bcftools_header_lines, - params.cf_chrom_len, - params.chr_dir, - params.cnvkit_reference, - params.dbnsfp, - params.dbnsfp_tbi, - params.dbsnp, - params.dbsnp_tbi, - params.dict, - params.dragmap, - params.fasta, - params.fasta_fai, - params.germline_resource, - params.germline_resource_tbi, - params.input, - params.intervals, - params.known_indels, - params.known_indels_tbi, - params.known_snps, - params.known_snps_tbi, - params.mappability, - params.multiqc_config, - params.ngscheckmate_bed, - params.pon, - params.pon_tbi, - params.sentieon_dnascope_model, - params.spliceai_indel, - params.spliceai_indel_tbi, - params.spliceai_snv, - params.spliceai_snv_tbi - ] - -// only check if we are using the tools -if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache) -if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache) - - // def retrieveInput(need_input, step, outdir) { - - params.input_restart = retrieveInput((!params.build_only_index && !params.input), params.step, params.outdir) - - ch_from_samplesheet = params.build_only_index ? Channel.empty() : params.input ? - Channel.fromList(samplesheetToList(params.input, "$projectDir/assets/schema_input.json")) : - Channel.fromList(samplesheetToList(params.input_restart, "$projectDir/assets/schema_input.json")) + ch_from_samplesheet = input + ? Channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) + : Channel.fromList(samplesheetToList(retrieveInput(step, outdir), "${projectDir}/assets/schema_input.json")) + + ch_from_references = Channel.fromList(samplesheetToList(references, "${projectDir}/subworkflows/nf-core/utils_references/schema_references.json")) SAMPLESHEET_TO_CHANNEL( ch_from_samplesheet, + ch_from_references, params.aligner, - params.ascat_alleles, - params.ascat_loci, - params.ascat_loci_gc, - params.ascat_loci_rt, params.bcftools_annotations, params.bcftools_annotations_tbi, params.bcftools_header_lines, - params.build_only_index, - params.dbsnp, - params.fasta, - params.germline_resource, - params.intervals, params.joint_germline, params.joint_mutect2, - params.known_indels, - params.known_snps, params.no_intervals, - params.pon, params.sentieon_dnascope_emit_mode, params.sentieon_haplotyper_emit_mode, params.seq_center, params.seq_platform, params.skip_tools, - params.snpeff_cache, - params.snpeff_db, params.step, params.tools, params.umi_read_structure, - params.wes) + params.wes, + ) emit: samplesheet = SAMPLESHEET_TO_CHANNEL.out.input_sample + references = ch_from_references versions } @@ -170,7 +106,6 @@ if (params.tools && (params.tools.split(',').contains('vep') || params.tools. */ workflow PIPELINE_COMPLETION { - take: email // string: email address email_on_fail // string: email address sent on pipeline failure @@ -197,7 +132,7 @@ workflow PIPELINE_COMPLETION { plaintext_email, outdir, monochrome_logs, - multiqc_report_list.getVal() + multiqc_report_list.getVal(), ) } @@ -208,7 +143,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } @@ -217,41 +152,7 @@ workflow PIPELINE_COMPLETION { FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// Check and validate pipeline parameters -// -def validateInputParameters() { - genomeExistsError() -} - -// -// Validate channels from input samplesheet -// -def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 - if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - - return [ metas[0], fastqs ] -} -// -// Exit pipeline if incorrect --genome key provided -// -def genomeExistsError() { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - error(error_string) - } -} // // Generate methods description for MultiQC // @@ -260,11 +161,11 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + ".", + ].join(' ').trim() return citation_text } @@ -274,9 +175,9 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • ", + ].join(' ').trim() return reference_text } @@ -298,7 +199,10 @@ def methodsDescriptionText(mqc_methods_yaml) { temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " } meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) - } else meta["doi_text"] = "" + } + else { + meta["doi_text"] = "" + } meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of the pipeline version used.
  • " // Tool references @@ -312,66 +216,43 @@ def methodsDescriptionText(mqc_methods_yaml) { def methods_text = mqc_methods_yaml.text - def engine = new groovy.text.SimpleTemplateEngine() + def engine = new groovy.text.SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html.toString() } -// -// nf-core/sarek logo -// -def nfCoreLogo(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.white} ____${colors.reset} - ${colors.white} .´ _ `.${colors.reset} - ${colors.white} / ${colors.green}|\\${colors.reset}`-_ \\${colors.reset} ${colors.blue} __ __ ___ ${colors.reset} - ${colors.white} | ${colors.green}| \\${colors.reset} `-|${colors.reset} ${colors.blue}|__` /\\ |__) |__ |__/${colors.reset} - ${colors.white} \\ ${colors.green}| \\${colors.reset} /${colors.reset} ${colors.blue}.__| /¯¯\\ | \\ |___ | \\${colors.reset} - ${colors.white} `${colors.green}|${colors.reset}____${colors.green}\\${colors.reset}´${colors.reset} - - ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) -} - // // retrieveInput // -def retrieveInput(need_input, step, outdir) { +def retrieveInput(step, outdir) { def input = null - if (!params.input && !params.build_only_index) { - switch (step) { - case 'mapping': error("Can't start $step step without samplesheet") - break - case 'markduplicates': log.warn("Using file ${outdir}/csv/mapped.csv"); - input = outdir + "/csv/mapped.csv" - break - case 'prepare_recalibration': log.warn("Using file ${outdir}/csv/markduplicates_no_table.csv"); - input = outdir + "/csv/markduplicates_no_table.csv" - break - case 'recalibrate': log.warn("Using file ${outdir}/csv/markduplicates.csv"); - input = outdir + "/csv/markduplicates.csv" - break - case 'variant_calling': log.warn("Using file ${outdir}/csv/recalibrated.csv"); - input = outdir + "/csv/recalibrated.csv" - break - // case 'controlfreec': csv_file = file("${outdir}/variant_calling/csv/control-freec_mpileup.csv", checkIfExists: true); break - case 'annotate': log.warn("Using file ${outdir}/csv/variantcalled.csv"); - input = outdir + "/csv/variantcalled.csv" - break - default: log.warn("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - error("Unknown step $step") - } + if (step == 'mapping') { + error("Can't start ${step} step without samplesheet") + } + else if (step == 'markduplicates') { + log.warn("Using file ${outdir}/csv/mapped.csv") + input = outdir + "/csv/mapped.csv" + } + else if (step == 'prepare_recalibration') { + log.warn("Using file ${outdir}/csv/markduplicates_no_table.csv") + input = outdir + "/csv/markduplicates_no_table.csv" + } + else if (step == 'recalibrate') { + log.warn("Using file ${outdir}/csv/markduplicates.csv") + input = outdir + "/csv/markduplicates.csv" + } + else if (step == 'variant_calling') { + log.warn("Using file ${outdir}/csv/recalibrated.csv") + input = outdir + "/csv/recalibrated.csv" + } + else if (step == 'annotate') { + log.warn("Using file ${outdir}/csv/variantcalled.csv") + input = outdir + "/csv/variantcalled.csv" + } + else { + log.warn("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") + error("Unknown step ${step}") } return input } diff --git a/subworkflows/local/vcf_annotate_all/main.nf b/subworkflows/local/vcf_annotate_all/main.nf index 20cab4be1c..11e645455b 100644 --- a/subworkflows/local/vcf_annotate_all/main.nf +++ b/subworkflows/local/vcf_annotate_all/main.nf @@ -9,9 +9,9 @@ include { VCF_ANNOTATE_SNPEFF } from '../../nf-core/vc workflow VCF_ANNOTATE_ALL { take: - vcf // channel: [ val(meta), vcf ] + vcf // channel: [ val(meta), vcf ] fasta - tools // Mandatory, list of tools to apply + tools // Mandatory, list of tools to apply snpeff_db snpeff_cache vep_genome @@ -37,18 +37,17 @@ workflow VCF_ANNOTATE_ALL { versions = versions.mix(VCF_ANNOTATE_BCFTOOLS.out.versions) } - if (tools.split(',').contains('merge') || tools.split(',').contains('snpeff')) { - VCF_ANNOTATE_SNPEFF(vcf, snpeff_db, snpeff_cache) + VCF_ANNOTATE_SNPEFF(vcf, snpeff_db.map { _meta, snpeff_db_ -> snpeff_db_ }, snpeff_cache) - reports = reports.mix(VCF_ANNOTATE_SNPEFF.out.reports.map{ meta, reports -> [ reports ] }) + reports = reports.mix(VCF_ANNOTATE_SNPEFF.out.reports.map { _meta, reports_ -> [reports_] }) vcf_ann = vcf_ann.mix(VCF_ANNOTATE_SNPEFF.out.vcf_tbi) versions = versions.mix(VCF_ANNOTATE_SNPEFF.out.versions) } if (tools.split(',').contains('merge')) { - vcf_ann_for_merge = VCF_ANNOTATE_SNPEFF.out.vcf_tbi.map{ meta, vcf, tbi -> [ meta, vcf, [] ] } - VCF_ANNOTATE_MERGE(vcf_ann_for_merge, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + vcf_ann_for_merge = VCF_ANNOTATE_SNPEFF.out.vcf_tbi.map { meta, vcf_, _tbi -> [meta, vcf_, []] } + VCF_ANNOTATE_MERGE(vcf_ann_for_merge, fasta, vep_genome.map { _meta, vep_genome_ -> vep_genome_ }, vep_species.map { _meta, vep_species_ -> vep_species_ }, vep_cache_version.map { _meta, vep_cache_version_ -> vep_cache_version_ }, vep_cache, vep_extra_files) reports = reports.mix(VCF_ANNOTATE_MERGE.out.reports) vcf_ann = vcf_ann.mix(VCF_ANNOTATE_MERGE.out.vcf_tbi) @@ -56,20 +55,20 @@ workflow VCF_ANNOTATE_ALL { } if (tools.split(',').contains('vep')) { - vcf_for_vep = vcf.map{ meta, vcf -> [ meta, vcf, [] ] } - VCF_ANNOTATE_ENSEMBLVEP(vcf_for_vep, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + vcf_for_vep = vcf.map { meta, vcf_ -> [meta, vcf_, []] } + VCF_ANNOTATE_ENSEMBLVEP(vcf_for_vep, fasta, vep_genome.map { _meta, vep_genome_ -> vep_genome_ }, vep_species.map { _meta, vep_species_ -> vep_species_ }, vep_cache_version.map { _meta, vep_cache_version_ -> vep_cache_version_ }, vep_cache, vep_extra_files) - reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports) - vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi) - tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab) + reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports) + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi) + tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab) json_ann = json_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.json) versions = versions.mix(VCF_ANNOTATE_ENSEMBLVEP.out.versions) } emit: - vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] tab_ann json_ann - reports // path: *.html - versions // path: versions.yml + reports // path: *.html + versions // path: versions.yml } diff --git a/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf b/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf index bcdc34e30d..7276ed02b1 100644 --- a/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf +++ b/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf @@ -12,7 +12,7 @@ workflow VCF_QC_BCFTOOLS_VCFTOOLS { versions = Channel.empty() - BCFTOOLS_STATS(vcf.map{ meta, vcf -> [ meta, vcf, [] ] }, [[:],[]], [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + BCFTOOLS_STATS(vcf.map { meta, _vcf -> [meta, _vcf, []] }, [[:], []], [[:], []], [[:], []], [[:], []], [[:], []]) VCFTOOLS_TSTV_COUNT(vcf, target_bed, []) VCFTOOLS_TSTV_QUAL(vcf, target_bed, []) VCFTOOLS_SUMMARY(vcf, target_bed, []) @@ -25,6 +25,5 @@ workflow VCF_QC_BCFTOOLS_VCFTOOLS { vcftools_tstv_counts = VCFTOOLS_TSTV_COUNT.out.tstv_count vcftools_tstv_qual = VCFTOOLS_TSTV_QUAL.out.tstv_qual vcftools_filter_summary = VCFTOOLS_SUMMARY.out.filter_summary - versions } diff --git a/subworkflows/nf-core/utils_references/README.md b/subworkflows/nf-core/utils_references/README.md new file mode 100644 index 0000000000..03da289f33 --- /dev/null +++ b/subworkflows/nf-core/utils_references/README.md @@ -0,0 +1,13 @@ +# Disclaimer + +This `utils_references/` folder contains for now two functions and a schema. +This is really meant for a POC and should not be installed by anyone except @maxulysse. +But that was the easiest way to share functions and a schema between three different pipelines and still showcase the logic. +This might evolve in the future, possibly towards a proper plugin. + +If you do so, please be aware that: + +- @maxulysse has hacked the `main.nf` to test the functions and the schema +- This is really meant to evolve in the future and can be deleted at any moment without prior notice. + +That being said, if you still want to use it or want to know more about it, please check the `#references` channel on the nf-core slack. diff --git a/subworkflows/nf-core/utils_references/main.nf b/subworkflows/nf-core/utils_references/main.nf new file mode 100644 index 0000000000..c557acc0a6 --- /dev/null +++ b/subworkflows/nf-core/utils_references/main.nf @@ -0,0 +1,60 @@ +// DISCLAIMER: +// This subworkflow is just to test the functions and the schema +// It should not be used in any pipeline + +// This include statement can also be deleted +include { samplesheetToList } from 'plugin/nf-schema' + +workflow UTILS_REFERENCES { + take: + yaml_reference + param_file + param_value + attribute_file + attribute_value + basepath + + main: + references = Channel.fromList(samplesheetToList(yaml_reference, "${projectDir}/subworkflows/nf-core/utils_references/schema_references.json")) + + // GIVING up writing a test for the functions, so writing a subworkflow to test it + references_file = get_references_file(references, param_file, attribute_file, basepath) + references_value = get_references_value(references, param_value, attribute_value) + + emit: + references_file + references_value +} +// You can delete everything before this line (including this line) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS TO EXTRACT REFERENCES FILES OR VALUES FROM THE REFERENCES YAML OR PARAMS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def get_references_file(references, param, attribute, basepath) { + return references + .map { meta, _readme -> + if (param || meta[attribute]) { + [meta.subMap(['id']), file(param ?: meta[attribute].replace('${params.igenomes_base}', basepath), checkIfExists: true)] + } + else { + null + } + } + .collect() +} + +def get_references_value(references, param, attribute) { + return references + .map { meta, _readme -> + if (param || meta[attribute]) { + [meta.subMap(['id']), param ?: meta[attribute]] + } + else { + null + } + } + .collect() +} diff --git a/subworkflows/nf-core/utils_references/meta.yml b/subworkflows/nf-core/utils_references/meta.yml new file mode 100644 index 0000000000..491c79c969 --- /dev/null +++ b/subworkflows/nf-core/utils_references/meta.yml @@ -0,0 +1,14 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_references" +description: Functionality for dealing with references that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - references +components: [] +input: [] +output: [] +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_references/schema_references.json b/subworkflows/nf-core/utils_references/schema_references.json new file mode 100644 index 0000000000..a3172d841a --- /dev/null +++ b/subworkflows/nf-core/utils_references/schema_references.json @@ -0,0 +1,336 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/references/master/assets/schema_asset.json", + "title": "nf-core/references pipeline - params.asset schema", + "description": "Schema for the file provided with params.asset", + "type": "array", + "items": { + "type": "object", + "properties": { + "genome": { + "meta": ["genome", "id"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Genome name must be provided, cannot contain spaces" + }, + "site": { + "meta": ["site"], + "type": "string", + "pattern": "^\\S+$", + "default": "unknown", + "errorMessage": "Website of origin of the reference, cannot contain spaces" + }, + "source": { + "meta": ["source"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Source of genome must be provided, cannot contain spaces" + }, + "source_version": { + "meta": ["source_version"], + "type": "string", + "pattern": "^\\S+$", + "default": "unknown", + "errorMessage": "Source version used to create annotation files (gff/gtf related files), cannot contain spaces" + }, + "species": { + "meta": ["species"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Species of the reference, cannot contain spaces" + }, + "ascat_alleles": { + "meta": ["ascat_alleles"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "TODO" + }, + "ascat_loci": { + "meta": ["ascat_loci"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "TODO" + }, + "ascat_loci_gc": { + "meta": ["ascat_loci_gc"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "TODO" + }, + "bed12": { + "meta": ["bed12"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "TODO" + }, + "bowtie1_index": { + "meta": ["bowtie1_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Bowtie1 index, cannot contain spaces" + }, + "bowtie2_index": { + "meta": ["bowtie2_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Bowtie2 index, cannot contain spaces" + }, + "bwamem1_index": { + "meta": ["bwamem1_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "BWA-MEM index, cannot contain spaces" + }, + "bwamem2_index": { + "meta": ["bwamem2_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "BWA-MEM2 index, cannot contain spaces" + }, + "dragmap_hashtable": { + "meta": ["dragmap_hashtable"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "DRAGMAP hashtable, cannot contain spaces" + }, + "chr_dir": { + "meta": ["chr_dir"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "TODO" + }, + "fasta": { + "meta": ["fasta"], + "type": "string", + "pattern": "^\\S+\\.f(ast|n)?a(\\.gz)?$", + "errorMessage": "Fasta file [required when creating a reference], cannot contain spaces" + }, + "fasta_dict": { + "meta": ["fasta_dict"], + "type": "string", + "pattern": "^\\S+\\.dict(\\.gz)?$", + "errorMessage": "Fasta dictionary, cannot contain spaces" + }, + "fasta_fai": { + "meta": ["fasta_fai"], + "type": "string", + "pattern": "^\\S+\\.f(ast|n)?a\\.fai(\\.gz)?$", + "errorMessage": "Fasta index, cannot contain spaces" + }, + "fasta_sizes": { + "meta": ["fasta_sizes"], + "type": "string", + "pattern": "^\\S+\\.f(ast|n)?a\\.sizes(\\.gz)?$", + "errorMessage": "Fasta sizes, cannot contain spaces" + }, + "gff": { + "meta": ["gff"], + "type": "string", + "pattern": "^\\S+\\.gff(\\.gz)?$", + "errorMessage": "GFF3 file, required when no GTF is provided and wanting to build a reference needing such genes annotation, cannot contain spaces" + }, + "gtf": { + "meta": ["gtf"], + "type": "string", + "pattern": "^\\S+\\.gtf(\\.gz)?$", + "errorMessage": "GTF file, required when no GFF3 is provided and wanting to build a reference needing such genes annotation, cannot contain spaces" + }, + "hisat2_index": { + "meta": ["hisat2_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "HISAT2 index, cannot contain spaces" + }, + "intervals_bed": { + "meta": ["intervals_bed"], + "type": "string", + "pattern": "^\\S+\\.bed$", + "errorMessage": "Fasta intervals bed, cannot contain spaces " + }, + "kallisto_index": { + "meta": ["kallisto_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Kallisto index, cannot contain spaces" + }, + "macs_gsize": { + "meta": ["macs_gsize"], + "type": "number", + "errorMessage": "TODO" + }, + "mito_name": { + "meta": ["mito_name"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "TODO" + }, + "msisensorpro_list": { + "meta": ["msisensorpro_list"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "MSIsensor-pro list, cannot contain spaces" + }, + "ngscheckmate_bed": { + "meta": ["ngscheckmate_bed"], + "type": "string", + "pattern": "^\\S+\\.bed$", + "errorMessage": "ngscheckmate bed, cannot contain spaces " + }, + "readme": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "README file describing the reference, cannot contain spaces" + }, + "rsem_index": { + "meta": ["rsem_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "RSEM index, cannot contain spaces" + }, + "salmon_index": { + "meta": ["salmon_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Salmon index, cannot contain spaces" + }, + "splice_sites": { + "meta": ["splice_sites"], + "type": "string", + "pattern": "^\\S+(\\.splice_sites)(\\.txt)?$", + "errorMessage": "Splice sites [can be generated with HISAT2], cannot contain spaces" + }, + "star_index": { + "meta": ["star_index"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "STAR index, cannot contain spaces" + }, + "snpeff_db": { + "meta": ["snpeff_db"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "SnpEff database, cannot contain spaces" + }, + "transcript_fasta": { + "meta": ["transcript_fasta"], + "type": "string", + "pattern": "^\\S+\\.f(ast|n)?a(\\.gz)?$", + "errorMessage": "Transcript fasta [can be generated with RSEM], cannot contain spaces" + }, + "vep_cache_version": { + "meta": ["vep_cache_version"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "VEP cache version, cannot contain spaces" + }, + "vep_genome": { + "meta": ["vep_genome"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "VEP genome, cannot contain spaces" + }, + "vep_species": { + "meta": ["vep_species"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "VEP species, cannot contain spaces" + }, + "vcf_dbsnp_vcf": { + "meta": ["vcf_dbsnp_vcf"], + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "errorMessage": "VCF file (can be bgzipped), cannot contain spaces" + }, + "vcf_dbsnp_vcf_tbi": { + "meta": ["vcf_dbsnp_vcf_tbi"], + "type": "string", + "pattern": "^\\S+\\.vcf\\.gz\\.tbi?$", + "errorMessage": "VCF tabix index, cannot contain spaces" + }, + "vcf_dbsnp_vcf_vqsr": { + "meta": ["vcf_dbsnp_vcf_vqsr"], + "type": "string", + "errorMessage": "VCF VQSR input, can contain spaces" + }, + "vcf_dbsnp_vcf_source": { + "meta": ["vcf_dbsnp_vcf_source"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Source of dbsnp, cannot contain spaces" + }, + "vcf_germline_resource_vcf": { + "meta": ["vcf_germline_resource_vcf"], + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "errorMessage": "VCF file (can be bgzipped), cannot contain spaces" + }, + "vcf_germline_resource_vcf_tbi": { + "meta": ["vcf_germline_resource_vcf_tbi"], + "type": "string", + "pattern": "^\\S+\\.vcf\\.gz\\.tbi?$", + "errorMessage": "VCF tabix index, cannot contain spaces" + }, + "vcf_germline_resource_vcf_source": { + "meta": ["vcf_germline_resource_vcf_source"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Source of germline_resource, cannot contain spaces" + }, + "vcf_known_indels_vcf": { + "meta": ["vcf_known_indels_vcf"], + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "errorMessage": "VCF file (can be bgzipped), cannot contain spaces" + }, + "vcf_known_indels_vcf_tbi": { + "meta": ["vcf_known_indels_vcf_tbi"], + "type": "string", + "pattern": "^\\S+\\.vcf\\.gz\\.tbi?$", + "errorMessage": "VCF tabix index, cannot contain spaces" + }, + "vcf_known_indels_vcf_source": { + "meta": ["vcf_known_indels_vcf_source"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Source of known_indels, cannot contain spaces" + }, + "vcf_known_snps_vcf": { + "meta": ["vcf_known_snps_vcf"], + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "errorMessage": "VCF file (can be bgzipped), cannot contain spaces" + }, + "vcf_known_snps_vcf_tbi": { + "meta": ["vcf_known_snps_vcf_tbi"], + "type": "string", + "pattern": "^\\S+\\.vcf\\.gz\\.tbi?$", + "errorMessage": "VCF tabix index, cannot contain spaces" + }, + "vcf_known_snps_vcf_source": { + "meta": ["vcf_known_snps_vcf_source"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Source of known_snps, cannot contain spaces" + }, + "vcf_pon_vcf": { + "meta": ["vcf_pon_vcf"], + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "errorMessage": "VCF file (can be bgzipped), cannot contain spaces" + }, + "vcf_pon_vcf_tbi": { + "meta": ["vcf_pon_vcf_tbi"], + "type": "string", + "pattern": "^\\S+\\.vcf\\.gz\\.tbi?$", + "errorMessage": "VCF tabix index, cannot contain spaces" + }, + "vcf_pon_vcf_source": { + "meta": ["vcf_pon_vcf_source"], + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Source of pon, cannot contain spaces" + } + }, + "required": ["genome"] + } +} diff --git a/subworkflows/nf-core/utils_references/tests/main.nf.test b/subworkflows/nf-core/utils_references/tests/main.nf.test new file mode 100644 index 0000000000..d16de16e68 --- /dev/null +++ b/subworkflows/nf-core/utils_references/tests/main.nf.test @@ -0,0 +1,27 @@ +nextflow_workflow { + + name "Test Workflow UTILS_REFERENCES" + script "../main.nf" + workflow "UTILS_REFERENCES" + + test("references_file with params - references_value without params") { + + when { + workflow { + """ + input[0] = 'https://raw.githubusercontent.com/nf-core/references-assets/main/genomes/Homo_sapiens/test/GRCh38_chr22.yml' + input[1] = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta' + input[2] = null + input[3] = 'fasta' + input[4] = 'species' + input[5] = 'https://raw.githubusercontent.com/nf-core/' + """ + } + } + + then { + assert workflow.success + assert snapshot(workflow.out).match() + } + } +} diff --git a/subworkflows/nf-core/utils_references/tests/main.nf.test.snap b/subworkflows/nf-core/utils_references/tests/main.nf.test.snap new file mode 100644 index 0000000000..f5bf3bb34b --- /dev/null +++ b/subworkflows/nf-core/utils_references/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "references_file with params - references_value without params": { + "content": [ + { + "0": [ + [ + { + "id": "GRCh38_chr22" + }, + "/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta" + ] + ], + "1": [ + [ + { + "id": "GRCh38_chr22" + }, + "Homo_sapiens" + ] + ], + "references_file": [ + [ + { + "id": "GRCh38_chr22" + }, + "/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta" + ] + ], + "references_value": [ + [ + { + "id": "GRCh38_chr22" + }, + "Homo_sapiens" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-03T18:21:58.076068554" + } +} \ No newline at end of file diff --git a/tests/aligner-bwa-mem.nf.test b/tests/aligner-bwa-mem.nf.test index c3c719e3a8..b550602df1 100644 --- a/tests/aligner-bwa-mem.nf.test +++ b/tests/aligner-bwa-mem.nf.test @@ -5,16 +5,15 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_sarek" - test("Run with profile test | --aligner bwa-mem --save_reference | skip QC/recal/md") { + test("Run with profile test | --aligner bwa-mem | skip QC/recal/md") { when { params { aligner = 'bwa-mem' modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' outdir = "$outputDir" - save_reference = true skip_tools = 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools' - tools = '' + tools = null } } @@ -43,39 +42,4 @@ nextflow_pipeline { ) } } - - test("Run with profile test | --aligner bwa-mem --save_reference --build_only_index") { - - when { - params { - aligner = 'bwa-mem' - build_only_index = true - input = false - outdir = "$outputDir" - save_reference = true - skip_tools = 'multiqc' - tools = '' - } - } - - then { - // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) - // stable_path: All files in ${params.outdir}/ with stable content - def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - assertAll( - { assert workflow.success}, - { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_sarek_software_mqc_versions.yml"), - // All stable path name, with a relative path - stable_name, - // All files with stable contents - stable_path - ).match() } - ) - } - } } diff --git a/tests/aligner-bwa-mem.nf.test.snap b/tests/aligner-bwa-mem.nf.test.snap index 395ecebba9..198f4cde9a 100644 --- a/tests/aligner-bwa-mem.nf.test.snap +++ b/tests/aligner-bwa-mem.nf.test.snap @@ -1,7 +1,7 @@ { - "Run with profile test | --aligner bwa-mem --save_reference | skip QC/recal/md": { + "Run with profile test | --aligner bwa-mem | skip QC/recal/md": { "content": [ - 10, + 8, { "BAM_TO_CRAM_MAPPING": { "samtools": 1.21 @@ -29,30 +29,10 @@ "preprocessing/mapped", "preprocessing/mapped/test", "preprocessing/mapped/test/test.sorted.cram", - "preprocessing/mapped/test/test.sorted.cram.crai", - "reference", - "reference/bwa", - "reference/bwa/genome.amb", - "reference/bwa/genome.ann", - "reference/bwa/genome.bwt", - "reference/bwa/genome.pac", - "reference/bwa/genome.sa", - "reference/intervals", - "reference/intervals/chr22_1-40001.bed", - "reference/intervals/chr22_1-40001.bed.gz", - "reference/intervals/genome.bed", - "reference/intervals/genome.bed.gz" + "preprocessing/mapped/test/test.sorted.cram.crai" ], [ - "genome.amb:md5,1891c1de381b3a96d4e72f590fde20c1", - "genome.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", - "genome.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", - "genome.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", - "genome.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", - "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", - "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], [ [ @@ -62,52 +42,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-08T11:22:13.394114" - }, - "Run with profile test | --aligner bwa-mem --save_reference --build_only_index": { - "content": [ - 5, - { - "Workflow": { - "nf-core/sarek": "v3.6.0dev" - } - }, - [ - "csv", - "pipeline_info", - "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", - "reference/bwa", - "reference/bwa/genome.amb", - "reference/bwa/genome.ann", - "reference/bwa/genome.bwt", - "reference/bwa/genome.pac", - "reference/bwa/genome.sa", - "reference/intervals", - "reference/intervals/chr22_1-40001.bed", - "reference/intervals/chr22_1-40001.bed.gz", - "reference/intervals/genome.bed", - "reference/intervals/genome.bed.gz" - ], - [ - "genome.amb:md5,1891c1de381b3a96d4e72f590fde20c1", - "genome.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", - "genome.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", - "genome.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", - "genome.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", - "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", - "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" - }, - "timestamp": "2024-10-08T11:23:05.203586" + "timestamp": "2025-02-06T21:05:17.892600127" } -} +} \ No newline at end of file diff --git a/tests/aligner-bwa-mem2.nf.test b/tests/aligner-bwa-mem2.nf.test index 204ec2711f..c6c5bc92a3 100644 --- a/tests/aligner-bwa-mem2.nf.test +++ b/tests/aligner-bwa-mem2.nf.test @@ -5,16 +5,15 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_sarek" - test("Run with profile test | --aligner bwa-mem2 --save_reference | skip QC/recal/md") { + test("Run with profile test | --aligner bwa-mem2 | skip QC/recal/md") { when { params { aligner = 'bwa-mem2' modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' outdir = "$outputDir" - save_reference = true skip_tools = 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools' - tools = '' + tools = null } } @@ -43,39 +42,4 @@ nextflow_pipeline { ) } } - - test("Run with profile test | --aligner bwa-mem2 --save_reference --build_only_index") { - - when { - params { - aligner = 'bwa-mem2' - build_only_index = true - input = false - outdir = "$outputDir" - save_reference = true - skip_tools = 'multiqc' - tools = '' - } - } - - then { - // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) - // stable_path: All files in ${params.outdir}/ with stable content - def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - assertAll( - { assert workflow.success}, - { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_sarek_software_mqc_versions.yml"), - // All stable path name, with a relative path - stable_name, - // All files with stable contents - stable_path - ).match() } - ) - } - } } diff --git a/tests/aligner-bwa-mem2.nf.test.snap b/tests/aligner-bwa-mem2.nf.test.snap index cf062a5aed..00c87e94ea 100644 --- a/tests/aligner-bwa-mem2.nf.test.snap +++ b/tests/aligner-bwa-mem2.nf.test.snap @@ -1,50 +1,7 @@ { - "Run with profile test | --aligner bwa-mem2 --save_reference --build_only_index": { + "Run with profile test | --aligner bwa-mem2 | skip QC/recal/md": { "content": [ - 5, - { - "Workflow": { - "nf-core/sarek": "v3.6.0dev" - } - }, - [ - "csv", - "pipeline_info", - "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", - "reference/bwamem2", - "reference/bwamem2/genome.fasta.0123", - "reference/bwamem2/genome.fasta.amb", - "reference/bwamem2/genome.fasta.ann", - "reference/bwamem2/genome.fasta.bwt.2bit.64", - "reference/bwamem2/genome.fasta.pac", - "reference/intervals", - "reference/intervals/chr22_1-40001.bed", - "reference/intervals/chr22_1-40001.bed.gz", - "reference/intervals/genome.bed", - "reference/intervals/genome.bed.gz" - ], - [ - "genome.fasta.0123:md5,d73300d44f733bcdb7c988fc3ff3e3e9", - "genome.fasta.amb:md5,1891c1de381b3a96d4e72f590fde20c1", - "genome.fasta.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", - "genome.fasta.bwt.2bit.64:md5,cd4bdf496eab05228a50c45ee43c1ed0", - "genome.fasta.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", - "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", - "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" - }, - "timestamp": "2024-10-08T11:26:09.886689" - }, - "Run with profile test | --aligner bwa-mem2 --save_reference | skip QC/recal/md": { - "content": [ - 10, + 8, { "BAM_TO_CRAM_MAPPING": { "samtools": 1.21 @@ -72,30 +29,10 @@ "preprocessing/mapped", "preprocessing/mapped/test", "preprocessing/mapped/test/test.sorted.cram", - "preprocessing/mapped/test/test.sorted.cram.crai", - "reference", - "reference/bwamem2", - "reference/bwamem2/genome.fasta.0123", - "reference/bwamem2/genome.fasta.amb", - "reference/bwamem2/genome.fasta.ann", - "reference/bwamem2/genome.fasta.bwt.2bit.64", - "reference/bwamem2/genome.fasta.pac", - "reference/intervals", - "reference/intervals/chr22_1-40001.bed", - "reference/intervals/chr22_1-40001.bed.gz", - "reference/intervals/genome.bed", - "reference/intervals/genome.bed.gz" + "preprocessing/mapped/test/test.sorted.cram.crai" ], [ - "genome.fasta.0123:md5,d73300d44f733bcdb7c988fc3ff3e3e9", - "genome.fasta.amb:md5,1891c1de381b3a96d4e72f590fde20c1", - "genome.fasta.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", - "genome.fasta.bwt.2bit.64:md5,cd4bdf496eab05228a50c45ee43c1ed0", - "genome.fasta.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", - "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", - "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], [ [ @@ -105,9 +42,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-08T11:25:00.196657" + "timestamp": "2025-02-06T21:06:49.370718706" } -} +} \ No newline at end of file diff --git a/tests/aligner-dragmap.nf.test b/tests/aligner-dragmap.nf.test index ab11bcf143..bf92811e50 100644 --- a/tests/aligner-dragmap.nf.test +++ b/tests/aligner-dragmap.nf.test @@ -5,14 +5,13 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_sarek" - test("Run with profile test | --aligner dragmap --save_reference | skip QC/recal/md") { + test("Run with profile test | --aligner dragmap | skip QC/recal/md") { when { params { aligner = 'dragmap' modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' outdir = "$outputDir" - save_reference = true skip_tools = 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools' tools = '' } @@ -43,39 +42,4 @@ nextflow_pipeline { ) } } - - test("Run with profile test | --aligner dragmap --save_reference --build_only_index") { - - when { - params { - aligner = 'dragmap' - build_only_index = true - input = false - outdir = "$outputDir" - save_reference = true - skip_tools = 'multiqc' - tools = '' - } - } - - then { - // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) - // stable_path: All files in ${params.outdir}/ with stable content - def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - assertAll( - { assert workflow.success}, - { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_sarek_software_mqc_versions.yml"), - // All stable path name, with a relative path - stable_name, - // All files with stable contents - stable_path - ).match() } - ) - } - } } diff --git a/tests/aligner-dragmap.nf.test.snap b/tests/aligner-dragmap.nf.test.snap index 9cd8406f54..395776e8c0 100644 --- a/tests/aligner-dragmap.nf.test.snap +++ b/tests/aligner-dragmap.nf.test.snap @@ -1,53 +1,7 @@ { - "Run with profile test | --aligner dragmap --save_reference --build_only_index": { + "Run with profile test | --aligner dragmap | skip QC/recal/md": { "content": [ - 5, - { - "Workflow": { - "nf-core/sarek": "v3.6.0dev" - } - }, - [ - "csv", - "pipeline_info", - "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", - "reference/dragmap", - "reference/dragmap/hash_table.cfg", - "reference/dragmap/hash_table.cfg.bin", - "reference/dragmap/hash_table.cmp", - "reference/dragmap/hash_table_stats.txt", - "reference/dragmap/ref_index.bin", - "reference/dragmap/reference.bin", - "reference/dragmap/repeat_mask.bin", - "reference/dragmap/str_table.bin", - "reference/intervals", - "reference/intervals/chr22_1-40001.bed", - "reference/intervals/chr22_1-40001.bed.gz", - "reference/intervals/genome.bed", - "reference/intervals/genome.bed.gz" - ], - [ - "hash_table.cmp:md5,1caab4ffc89f81ace615a2e813295cf4", - "ref_index.bin:md5,dbb5c7d26b974e0ac338024fe4535044", - "reference.bin:md5,be67b80ee48aa96b383fd72f1ccfefea", - "repeat_mask.bin:md5,294939f1f80aa7f4a70b9b537e4c0f21", - "str_table.bin:md5,45f7818c4a10fdeed04db7a34b5f9ff1", - "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", - "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" - }, - "timestamp": "2024-10-08T11:28:56.135088" - }, - "Run with profile test | --aligner dragmap --save_reference | skip QC/recal/md": { - "content": [ - 10, + 8, { "BAM_TO_CRAM_MAPPING": { "samtools": 1.21 @@ -76,33 +30,10 @@ "preprocessing/mapped", "preprocessing/mapped/test", "preprocessing/mapped/test/test.sorted.cram", - "preprocessing/mapped/test/test.sorted.cram.crai", - "reference", - "reference/dragmap", - "reference/dragmap/hash_table.cfg", - "reference/dragmap/hash_table.cfg.bin", - "reference/dragmap/hash_table.cmp", - "reference/dragmap/hash_table_stats.txt", - "reference/dragmap/ref_index.bin", - "reference/dragmap/reference.bin", - "reference/dragmap/repeat_mask.bin", - "reference/dragmap/str_table.bin", - "reference/intervals", - "reference/intervals/chr22_1-40001.bed", - "reference/intervals/chr22_1-40001.bed.gz", - "reference/intervals/genome.bed", - "reference/intervals/genome.bed.gz" + "preprocessing/mapped/test/test.sorted.cram.crai" ], [ - "hash_table.cmp:md5,1caab4ffc89f81ace615a2e813295cf4", - "ref_index.bin:md5,dbb5c7d26b974e0ac338024fe4535044", - "reference.bin:md5,be67b80ee48aa96b383fd72f1ccfefea", - "repeat_mask.bin:md5,294939f1f80aa7f4a70b9b537e4c0f21", - "str_table.bin:md5,45f7818c4a10fdeed04db7a34b5f9ff1", - "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", - "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", - "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], [ [ @@ -112,9 +43,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-08T11:28:12.076906" + "timestamp": "2025-02-06T21:08:05.683498782" } -} +} \ No newline at end of file diff --git a/tests/alignment_from_everything.nf.test b/tests/alignment_from_everything.nf.test index 92f9ef56c8..f4140b8f1e 100644 --- a/tests/alignment_from_everything.nf.test +++ b/tests/alignment_from_everything.nf.test @@ -11,7 +11,6 @@ nextflow_pipeline { when { params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' outdir = "$outputDir" save_mapped = true save_output_as_bam = true diff --git a/tests/alignment_from_everything.nf.test.snap b/tests/alignment_from_everything.nf.test.snap index 26c1bf92f2..3f02ca7d69 100644 --- a/tests/alignment_from_everything.nf.test.snap +++ b/tests/alignment_from_everything.nf.test.snap @@ -1,7 +1,7 @@ { "Run with profile test,alignment_from_everything | --save_mapped --save_output_as_bam": { "content": [ - 27, + 25, { "BCFTOOLS_STATS": { "bcftools": 1.2 @@ -222,7 +222,6 @@ "preprocessing/recalibrated/test", "preprocessing/recalibrated/test/test.recal.bam", "preprocessing/recalibrated/test/test.recal.bam.bai", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -323,13 +322,13 @@ "test.strelka.variants.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T21:51:21.998984" + "timestamp": "2025-02-06T21:10:24.809581731" } -} +} \ No newline at end of file diff --git a/tests/alignment_to_fastq.nf.test b/tests/alignment_to_fastq.nf.test index a7288ef998..7464f6e25d 100644 --- a/tests/alignment_to_fastq.nf.test +++ b/tests/alignment_to_fastq.nf.test @@ -11,7 +11,6 @@ nextflow_pipeline { when { params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' outdir = "$outputDir" save_mapped = true save_output_as_bam = true diff --git a/tests/alignment_to_fastq.nf.test.snap b/tests/alignment_to_fastq.nf.test.snap index 0bf19803d8..2ffeefe427 100644 --- a/tests/alignment_to_fastq.nf.test.snap +++ b/tests/alignment_to_fastq.nf.test.snap @@ -1,7 +1,7 @@ { "Run with profile test,alignment_to_fastq": { "content": [ - 27, + 25, { "BCFTOOLS_STATS": { "bcftools": 1.2 @@ -222,7 +222,6 @@ "preprocessing/recalibrated/test", "preprocessing/recalibrated/test/test.recal.bam", "preprocessing/recalibrated/test/test.recal.bam.bai", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -323,13 +322,13 @@ "test.strelka.variants.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T17:49:40.35031" + "timestamp": "2025-02-06T21:12:50.559615798" } -} +} \ No newline at end of file diff --git a/tests/annotation_bcfann.nf.test.snap b/tests/annotation_bcfann.nf.test.snap index 51f7072b12..8cb22cecc3 100644 --- a/tests/annotation_bcfann.nf.test.snap +++ b/tests/annotation_bcfann.nf.test.snap @@ -37,4 +37,4 @@ }, "timestamp": "2024-10-29T09:38:54.991004" } -} +} \ No newline at end of file diff --git a/tests/annotation_merge.nf.test b/tests/annotation_merge.nf.test index 323b29eb3a..cc4af057a0 100644 --- a/tests/annotation_merge.nf.test +++ b/tests/annotation_merge.nf.test @@ -13,8 +13,6 @@ nextflow_pipeline { outdir = "$outputDir" input = "${projectDir}/tests/csv/3.0/vcf_single.csv" step = 'annotate' - snpeff_cache = 's3://annotation-cache/snpeff_cache/' - vep_cache = 's3://annotation-cache/vep_cache/' tools = 'merge' } } @@ -48,8 +46,6 @@ nextflow_pipeline { outdir = "$outputDir" input = "${projectDir}/tests/csv/3.0/vcf_single.csv" step = 'annotate' - snpeff_cache = 's3://annotation-cache/snpeff_cache/' - vep_cache = 's3://annotation-cache/vep_cache/' tools = 'merge,snpeff,vep' } } diff --git a/tests/annotation_merge.nf.test.snap b/tests/annotation_merge.nf.test.snap index 56c6bab95f..866d214e0b 100644 --- a/tests/annotation_merge.nf.test.snap +++ b/tests/annotation_merge.nf.test.snap @@ -197,9 +197,9 @@ ] ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-29T09:56:59.385257" + "timestamp": "2025-01-28T17:58:56.512301159" } -} +} \ No newline at end of file diff --git a/tests/annotation_snpeff.nf.test b/tests/annotation_snpeff.nf.test index 9addb8f1d7..e67416bb54 100644 --- a/tests/annotation_snpeff.nf.test +++ b/tests/annotation_snpeff.nf.test @@ -5,51 +5,14 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_sarek" - test("Run with profile test | --tools snpeff --download_cache") { - - when { - params { - modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - outdir = "$outputDir" - input = "${projectDir}/tests/csv/3.0/vcf_single.csv" - step = 'annotate' - download_cache = true - tools = 'snpeff' - } - } - - then { - // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) - // stable_path: All files in ${params.outdir}/ with stable content - def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - assertAll( - { assert workflow.success}, - { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_sarek_software_mqc_versions.yml"), - // All stable path name, with a relative path - stable_name, - // All files with stable contents - stable_path - ).match() } - ) - } - } - - test("Fails with profile test | --tools snpeff --snpeff_db na --build_only_index") { + test("Fails with profile test | --tools snpeff --snpeff_db na") { when { params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' outdir = "$outputDir" input = "${projectDir}/tests/csv/3.0/vcf_single.csv" step = 'annotate' - snpeff_cache = 's3://annotation-cache/snpeff_cache/' snpeff_db = "na" - input = false - build_only_index = true tools = 'snpeff' } } diff --git a/tests/annotation_vep.nf.test b/tests/annotation_vep.nf.test index 80659a41b7..17a09dba17 100644 --- a/tests/annotation_vep.nf.test +++ b/tests/annotation_vep.nf.test @@ -5,41 +5,7 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_sarek" - test("Run with profile test | --tools vep --download_cache --vep_include_fasta") { - - when { - params { - modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - outdir = "$outputDir" - input = "${projectDir}/tests/csv/3.0/vcf_single.csv" - step = 'annotate' - download_cache = true - tools = 'vep' - } - } - - then { - // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) - // stable_path: All files in ${params.outdir}/ with stable content - def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - assertAll( - { assert workflow.success}, - { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_sarek_software_mqc_versions.yml"), - // All stable path name, with a relative path - stable_name, - // All files with stable contents - stable_path - ).match() } - ) - } - } - - test("Fails with profile test | --tools vep --vep_cache_version 1 --build_only_index") { + test("Fails with profile test | --tools vep --vep_cache_version 1") { when { params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' @@ -48,8 +14,6 @@ nextflow_pipeline { step = 'annotate' vep_cache = 's3://annotation-cache/vep_cache/' vep_cache_version = 1 - input = false - build_only_index = true tools = 'vep' } } diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index f6931b8512..1dafeecf9f 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "Run with profile test": { "content": [ - 23, + 21, { "BCFTOOLS_STATS": { "bcftools": 1.2 @@ -205,7 +205,6 @@ "preprocessing/recalibrated/test", "preprocessing/recalibrated/test/test.recal.cram", "preprocessing/recalibrated/test/test.recal.cram.crai", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -317,9 +316,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T21:41:27.897118" + "timestamp": "2025-02-06T21:58:24.072734263" } -} +} \ No newline at end of file diff --git a/tests/save_output_as_bam.nf.test b/tests/save_output_as_bam.nf.test index d19683a971..29927cae2c 100644 --- a/tests/save_output_as_bam.nf.test +++ b/tests/save_output_as_bam.nf.test @@ -12,7 +12,7 @@ nextflow_pipeline { outdir = "$outputDir" save_output_as_bam = true skip_tools = 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools' - tools = '' + tools = null } } diff --git a/tests/save_output_as_bam.nf.test.snap b/tests/save_output_as_bam.nf.test.snap index d2ed3f085d..4d565f99d3 100644 --- a/tests/save_output_as_bam.nf.test.snap +++ b/tests/save_output_as_bam.nf.test.snap @@ -1,7 +1,7 @@ { "Run with profile test | --save_output_as_bam | skip QC/recal/md": { "content": [ - 10, + 8, { "BAM_TO_CRAM_MAPPING": { "samtools": 1.21 @@ -29,11 +29,10 @@ "preprocessing/mapped", "preprocessing/mapped/test", "preprocessing/mapped/test/test.sorted.bam", - "preprocessing/mapped/test/test.sorted.bam.bai", - "reference" + "preprocessing/mapped/test/test.sorted.bam.bai" ], [ - + ], [ [ @@ -43,9 +42,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-08T11:11:44.283548" + "timestamp": "2025-02-06T22:01:51.678037693" } -} +} \ No newline at end of file diff --git a/tests/saved_mapped.nf.test b/tests/saved_mapped.nf.test index 06e5524e50..c722e5c1ed 100644 --- a/tests/saved_mapped.nf.test +++ b/tests/saved_mapped.nf.test @@ -12,7 +12,7 @@ nextflow_pipeline { outdir = "$outputDir" save_mapped = true skip_tools = 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools' - tools = '' + tools = null } } diff --git a/tests/saved_mapped.nf.test.snap b/tests/saved_mapped.nf.test.snap index 648822f572..0da6e79be6 100644 --- a/tests/saved_mapped.nf.test.snap +++ b/tests/saved_mapped.nf.test.snap @@ -1,7 +1,7 @@ { "Run with profile test | --save_mapped | skip QC/recal/md": { "content": [ - 10, + 8, { "BAM_TO_CRAM_MAPPING": { "samtools": 1.21 @@ -29,20 +29,19 @@ "preprocessing/mapped", "preprocessing/mapped/test", "preprocessing/mapped/test/test.sorted.cram", - "preprocessing/mapped/test/test.sorted.cram.crai", - "reference" + "preprocessing/mapped/test/test.sorted.cram.crai" ], [ - + ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-08T09:53:32.063494" + "timestamp": "2025-02-06T22:03:03.137546064" } -} +} \ No newline at end of file diff --git a/tests/start_from_markduplicates.nf.test b/tests/start_from_markduplicates.nf.test index b9bc2f4434..41926ab59b 100644 --- a/tests/start_from_markduplicates.nf.test +++ b/tests/start_from_markduplicates.nf.test @@ -5,7 +5,7 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_sarek" - test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --tools null ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --tools null") { when { params { @@ -43,7 +43,7 @@ nextflow_pipeline { } } - test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --skip_tools markduplicates --tools null ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --skip_tools markduplicates --tools null") { when { params { @@ -83,7 +83,7 @@ nextflow_pipeline { } - test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --tools null ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --tools null") { when { params { @@ -121,7 +121,7 @@ nextflow_pipeline { } } - test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --skip_tools markduplicates --tools null ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --skip_tools markduplicates --tools null") { when { params { diff --git a/tests/start_from_markduplicates.nf.test.snap b/tests/start_from_markduplicates.nf.test.snap index 12672ddd02..d75dc77cc2 100644 --- a/tests/start_from_markduplicates.nf.test.snap +++ b/tests/start_from_markduplicates.nf.test.snap @@ -1,7 +1,7 @@ { - "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --skip_tools markduplicates --tools null ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --skip_tools markduplicates --tools null": { "content": [ - 9, + 8, { "GATK4_APPLYBQSR": { "gatk4": "4.5.0.0" @@ -82,7 +82,6 @@ "preprocessing/recal_table", "preprocessing/recal_table/test", "preprocessing/recal_table/test/test.recal.table", - "reference", "reports", "reports/mosdepth", "reports/mosdepth/test", @@ -115,18 +114,18 @@ "test.sorted.cram.stats:md5,a15b3a5e59337db312d66020c7bb93ac" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T16:15:20.865586" + "timestamp": "2025-02-06T22:05:50.06413839" }, - "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --tools null ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --skip_tools markduplicates --tools null": { "content": [ - 13, + 11, { "GATK4_APPLYBQSR": { "gatk4": "4.5.0.0" @@ -134,10 +133,6 @@ "GATK4_BASERECALIBRATOR": { "gatk4": "4.5.0.0" }, - "GATK4_MARKDUPLICATES": { - "gatk4": "4.5.0.0", - "samtools": "1.19.2" - }, "INDEX_CRAM": { "samtools": 1.21 }, @@ -153,8 +148,6 @@ }, [ "csv", - "csv/markduplicates.csv", - "csv/markduplicates_no_table.csv", "csv/recalibrated.csv", "multiqc", "multiqc/multiqc_data", @@ -171,14 +164,9 @@ "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", "multiqc/multiqc_data/multiqc_general_stats.txt", - "multiqc/multiqc_data/multiqc_picard_dups.txt", "multiqc/multiqc_data/multiqc_samtools_stats.txt", "multiqc/multiqc_data/multiqc_software_versions.txt", "multiqc/multiqc_data/multiqc_sources.txt", - "multiqc/multiqc_data/picard_deduplication.txt", - "multiqc/multiqc_data/picard_histogram.txt", - "multiqc/multiqc_data/picard_histogram_1.txt", - "multiqc/multiqc_data/picard_histogram_2.txt", "multiqc/multiqc_data/samtools-stats-dp.txt", "multiqc/multiqc_data/samtools_alignment_plot.txt", "multiqc/multiqc_plots", @@ -190,8 +178,6 @@ "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", - "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", - "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", @@ -203,8 +189,6 @@ "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", - "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", - "multiqc/multiqc_plots/png/picard_deduplication-pct.png", "multiqc/multiqc_plots/png/samtools-stats-dp.png", "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", @@ -216,8 +200,6 @@ "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", - "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", - "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", @@ -225,10 +207,6 @@ "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", "preprocessing", - "preprocessing/markduplicates", - "preprocessing/markduplicates/test", - "preprocessing/markduplicates/test/test.md.cram", - "preprocessing/markduplicates/test/test.md.cram.crai", "preprocessing/recal_table", "preprocessing/recal_table/test", "preprocessing/recal_table/test/test.recal.table", @@ -236,61 +214,50 @@ "preprocessing/recalibrated/test", "preprocessing/recalibrated/test/test.recal.cram", "preprocessing/recalibrated/test/test.recal.cram.crai", - "reference", "reports", - "reports/markduplicates", - "reports/markduplicates/test", - "reports/markduplicates/test/test.md.cram.metrics", "reports/mosdepth", "reports/mosdepth/test", - "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", - "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", - "reports/mosdepth/test/test.md.mosdepth.summary.txt", - "reports/mosdepth/test/test.md.regions.bed.gz", - "reports/mosdepth/test/test.md.regions.bed.gz.csi", "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", "reports/mosdepth/test/test.recal.mosdepth.summary.txt", "reports/mosdepth/test/test.recal.regions.bed.gz", "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.sorted.mosdepth.summary.txt", + "reports/mosdepth/test/test.sorted.regions.bed.gz", + "reports/mosdepth/test/test.sorted.regions.bed.gz.csi", "reports/samtools", "reports/samtools/test", - "reports/samtools/test/test.md.cram.stats", - "reports/samtools/test/test.recal.cram.stats" + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test/test.sorted.cram.stats" ], [ - "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt:md5,902745b5a1915e5c1a25267b11bebbe7", - "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt:md5,832ed176357d6a5b4e50c718f5e4a704", - "mosdepth-coverage-per-contig-single.txt:md5,76d816c3f71faf2009c8a6f88092a2f3", - "mosdepth-cumcoverage-dist-id.txt:md5,3af8f7d8ed7d1fdff6118e0098258192", - "mosdepth_cov_dist.txt:md5,4a2236db76d75e45012f6d7c180c90d6", - "mosdepth_cumcov_dist.txt:md5,4a2236db76d75e45012f6d7c180c90d6", - "mosdepth_perchrom.txt:md5,76d816c3f71faf2009c8a6f88092a2f3", + "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt:md5,d2650a5bec510d798e347f36a4d00e2d", + "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt:md5,f2123f7ee3d060c1547efc6247a04e42", + "mosdepth-coverage-per-contig-single.txt:md5,8b48f3336b063dcb1e086928b28a2cc6", + "mosdepth-cumcoverage-dist-id.txt:md5,3148977f0c4684ba681ee298d677fe38", + "mosdepth_cov_dist.txt:md5,9a531d5a5c05e568a1aeb2e738ac23c4", + "mosdepth_cumcov_dist.txt:md5,9a531d5a5c05e568a1aeb2e738ac23c4", + "mosdepth_perchrom.txt:md5,8b48f3336b063dcb1e086928b28a2cc6", "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", - "multiqc_samtools_stats.txt:md5,de9451d4736a410d09de58828761ea87", - "picard_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "picard_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "picard_histogram_2.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "samtools-stats-dp.txt:md5,2247da9fa269d826da3f33ba6fa66954", - "samtools_alignment_plot.txt:md5,22572fcd0791878ed37ae2f48213cee2", - "test.md.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", - "test.md.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", - "test.md.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", - "test.md.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", - "test.md.regions.bed.gz.csi:md5,080731cdedcd389e72135f048d6e2e00", - "test.recal.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", - "test.recal.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", - "test.recal.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", - "test.recal.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", - "test.recal.regions.bed.gz.csi:md5,080731cdedcd389e72135f048d6e2e00", - "test.md.cram.stats:md5,f181d98f08ad94c3926ac149a87d834b", - "test.recal.cram.stats:md5,18346c938c7b1bfaf9ac9413fdba90d8" + "multiqc_samtools_stats.txt:md5,7f5f43de35c194be7f5980b62eacfab7", + "samtools-stats-dp.txt:md5,85c4ca7a3a6f2534d4d329937be49966", + "samtools_alignment_plot.txt:md5,301dda049c8aa2f848c98c81f584c315", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,c5d0be930ffc9e562f21519a0d488d5d", + "test.sorted.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.sorted.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.sorted.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.sorted.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.sorted.regions.bed.gz.csi:md5,c5d0be930ffc9e562f21519a0d488d5d", + "test.recal.cram.stats:md5,9f75ec16d22ce12c348cbd7477c9886e", + "test.sorted.cram.stats:md5,308a4213cc2ea25cbdd6d58b562673a5" ], [ - [ - "test.md.cram", - "2f11e4fe3390b8ad0a1852616fd1da04" - ], [ "test.recal.cram", "463ac3b905fbf4ddf113a94dbfa8d69f" @@ -298,12 +265,12 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T16:12:33.604156" + "timestamp": "2025-02-06T22:08:31.827099667" }, - "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --skip_tools markduplicates --tools null ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --tools null": { "content": [ 12, { @@ -313,6 +280,10 @@ "GATK4_BASERECALIBRATOR": { "gatk4": "4.5.0.0" }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, "INDEX_CRAM": { "samtools": 1.21 }, @@ -328,6 +299,8 @@ }, [ "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", "csv/recalibrated.csv", "multiqc", "multiqc/multiqc_data", @@ -344,9 +317,14 @@ "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", "multiqc/multiqc_data/multiqc_samtools_stats.txt", "multiqc/multiqc_data/multiqc_software_versions.txt", "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/picard_histogram.txt", + "multiqc/multiqc_data/picard_histogram_1.txt", + "multiqc/multiqc_data/picard_histogram_2.txt", "multiqc/multiqc_data/samtools-stats-dp.txt", "multiqc/multiqc_data/samtools_alignment_plot.txt", "multiqc/multiqc_plots", @@ -358,6 +336,8 @@ "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", @@ -369,6 +349,8 @@ "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", "multiqc/multiqc_plots/png/samtools-stats-dp.png", "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", @@ -380,6 +362,8 @@ "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", @@ -387,6 +371,10 @@ "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", "preprocessing/recal_table", "preprocessing/recal_table/test", "preprocessing/recal_table/test/test.recal.table", @@ -394,51 +382,60 @@ "preprocessing/recalibrated/test", "preprocessing/recalibrated/test/test.recal.cram", "preprocessing/recalibrated/test/test.recal.cram.crai", - "reference", "reports", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", "reports/mosdepth", "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", "reports/mosdepth/test/test.recal.mosdepth.summary.txt", "reports/mosdepth/test/test.recal.regions.bed.gz", "reports/mosdepth/test/test.recal.regions.bed.gz.csi", - "reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt", - "reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt", - "reports/mosdepth/test/test.sorted.mosdepth.summary.txt", - "reports/mosdepth/test/test.sorted.regions.bed.gz", - "reports/mosdepth/test/test.sorted.regions.bed.gz.csi", "reports/samtools", "reports/samtools/test", - "reports/samtools/test/test.recal.cram.stats", - "reports/samtools/test/test.sorted.cram.stats" + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats" ], [ - "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt:md5,d2650a5bec510d798e347f36a4d00e2d", - "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt:md5,f2123f7ee3d060c1547efc6247a04e42", - "mosdepth-coverage-per-contig-single.txt:md5,8b48f3336b063dcb1e086928b28a2cc6", - "mosdepth-cumcoverage-dist-id.txt:md5,3148977f0c4684ba681ee298d677fe38", - "mosdepth_cov_dist.txt:md5,9a531d5a5c05e568a1aeb2e738ac23c4", - "mosdepth_cumcov_dist.txt:md5,9a531d5a5c05e568a1aeb2e738ac23c4", - "mosdepth_perchrom.txt:md5,8b48f3336b063dcb1e086928b28a2cc6", + "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt:md5,902745b5a1915e5c1a25267b11bebbe7", + "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt:md5,832ed176357d6a5b4e50c718f5e4a704", + "mosdepth-coverage-per-contig-single.txt:md5,76d816c3f71faf2009c8a6f88092a2f3", + "mosdepth-cumcoverage-dist-id.txt:md5,3af8f7d8ed7d1fdff6118e0098258192", + "mosdepth_cov_dist.txt:md5,4a2236db76d75e45012f6d7c180c90d6", + "mosdepth_cumcov_dist.txt:md5,4a2236db76d75e45012f6d7c180c90d6", + "mosdepth_perchrom.txt:md5,76d816c3f71faf2009c8a6f88092a2f3", "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", - "multiqc_samtools_stats.txt:md5,7f5f43de35c194be7f5980b62eacfab7", - "samtools-stats-dp.txt:md5,85c4ca7a3a6f2534d4d329937be49966", - "samtools_alignment_plot.txt:md5,301dda049c8aa2f848c98c81f584c315", - "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", - "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", - "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", - "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", - "test.recal.regions.bed.gz.csi:md5,c5d0be930ffc9e562f21519a0d488d5d", - "test.sorted.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", - "test.sorted.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", - "test.sorted.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", - "test.sorted.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", - "test.sorted.regions.bed.gz.csi:md5,c5d0be930ffc9e562f21519a0d488d5d", - "test.recal.cram.stats:md5,9f75ec16d22ce12c348cbd7477c9886e", - "test.sorted.cram.stats:md5,308a4213cc2ea25cbdd6d58b562673a5" + "multiqc_samtools_stats.txt:md5,de9451d4736a410d09de58828761ea87", + "picard_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_histogram_2.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,2247da9fa269d826da3f33ba6fa66954", + "samtools_alignment_plot.txt:md5,22572fcd0791878ed37ae2f48213cee2", + "test.md.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.md.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.md.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.md.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.md.regions.bed.gz.csi:md5,080731cdedcd389e72135f048d6e2e00", + "test.recal.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.recal.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.recal.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.recal.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.recal.regions.bed.gz.csi:md5,080731cdedcd389e72135f048d6e2e00", + "test.md.cram.stats:md5,f181d98f08ad94c3926ac149a87d834b", + "test.recal.cram.stats:md5,18346c938c7b1bfaf9ac9413fdba90d8" ], [ + [ + "test.md.cram", + "2f11e4fe3390b8ad0a1852616fd1da04" + ], [ "test.recal.cram", "463ac3b905fbf4ddf113a94dbfa8d69f" @@ -446,14 +443,14 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T16:23:55.741166" + "timestamp": "2025-02-06T22:04:33.686782461" }, - "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --tools null ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --tools null": { "content": [ - 13, + 12, { "GATK4_APPLYBQSR": { "gatk4": "4.5.0.0" @@ -563,7 +560,6 @@ "preprocessing/recalibrated/test", "preprocessing/recalibrated/test/test.recal.cram", "preprocessing/recalibrated/test/test.recal.cram.crai", - "reference", "reports", "reports/markduplicates", "reports/markduplicates/test", @@ -625,9 +621,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T16:18:25.238396" + "timestamp": "2025-02-06T22:07:14.536778705" } -} +} \ No newline at end of file diff --git a/tests/start_from_preparerecalibration.nf.test b/tests/start_from_preparerecalibration.nf.test index 8d445aaf40..5ad45fa15a 100644 --- a/tests/start_from_preparerecalibration.nf.test +++ b/tests/start_from_preparerecalibration.nf.test @@ -5,7 +5,7 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_sarek" - test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --tools null ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --tools null") { when { params { @@ -43,7 +43,7 @@ nextflow_pipeline { } } - test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka") { when { params { @@ -83,7 +83,7 @@ nextflow_pipeline { } - test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --tools null ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --tools null") { when { params { @@ -121,7 +121,7 @@ nextflow_pipeline { } } - test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka") { when { params { diff --git a/tests/start_from_preparerecalibration.nf.test.snap b/tests/start_from_preparerecalibration.nf.test.snap index 485f5ccc37..bc7589df30 100644 --- a/tests/start_from_preparerecalibration.nf.test.snap +++ b/tests/start_from_preparerecalibration.nf.test.snap @@ -1,72 +1,7 @@ { - "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --tools null ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka": { "content": [ - 7, - { - "GATK4_APPLYBQSR": { - "gatk4": "4.5.0.0" - }, - "GATK4_BASERECALIBRATOR": { - "gatk4": "4.5.0.0" - }, - "Workflow": { - "nf-core/sarek": "v3.6.0dev" - } - }, - [ - "csv", - "csv/markduplicates.csv", - "multiqc", - "multiqc/multiqc_data", - "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", - "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", - "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", - "multiqc/multiqc_data/gatk_base_recalibrator.txt", - "multiqc/multiqc_data/multiqc.log", - "multiqc/multiqc_data/multiqc_citations.txt", - "multiqc/multiqc_data/multiqc_data.json", - "multiqc/multiqc_data/multiqc_software_versions.txt", - "multiqc/multiqc_data/multiqc_sources.txt", - "multiqc/multiqc_plots", - "multiqc/multiqc_plots/pdf", - "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", - "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", - "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", - "multiqc/multiqc_plots/png", - "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", - "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", - "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", - "multiqc/multiqc_plots/svg", - "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", - "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", - "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", - "multiqc/multiqc_report.html", - "pipeline_info", - "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "preprocessing", - "preprocessing/recal_table", - "preprocessing/recal_table/test", - "preprocessing/recal_table/test/test.recal.table", - "reference" - ], - [ - "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt:md5,d2650a5bec510d798e347f36a4d00e2d", - "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt:md5,f2123f7ee3d060c1547efc6247a04e42", - "multiqc_citations.txt:md5,3815a9f79e41890653a0e0d602c92ac9" - ], - [ - - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-11-09T16:57:10.392231" - }, - "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka ": { - "content": [ - 10, + 9, { "BCFTOOLS_STATS": { "bcftools": 1.2 @@ -138,7 +73,6 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -173,18 +107,110 @@ "test.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T16:58:48.877928" + "timestamp": "2025-02-06T22:10:55.577176412" }, - "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --tools null": { "content": [ - 10, + 9, + { + "GATK4_APPLYBQSR": { + "gatk4": "4.5.0.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.5.0.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "Workflow": { + "nf-core/sarek": "v3.6.0dev" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt:md5,d2650a5bec510d798e347f36a4d00e2d", + "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt:md5,f2123f7ee3d060c1547efc6247a04e42", + "multiqc_citations.txt:md5,3815a9f79e41890653a0e0d602c92ac9", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,c5d0be930ffc9e562f21519a0d488d5d", + "test.recal.cram.stats:md5,9f75ec16d22ce12c348cbd7477c9886e" + ], + [ + [ + "test.recal.cram", + "463ac3b905fbf4ddf113a94dbfa8d69f" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-06T22:12:12.446992312" + }, + "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka": { + "content": [ + 9, { "BCFTOOLS_STATS": { "bcftools": 1.2 @@ -256,7 +282,6 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -291,18 +316,18 @@ "test.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T17:02:51.883814" + "timestamp": "2025-02-06T22:13:19.85369545" }, - "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --tools null ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --tools null": { "content": [ - 10, + 6, { "GATK4_APPLYBQSR": { "gatk4": "4.5.0.0" @@ -310,9 +335,6 @@ "GATK4_BASERECALIBRATOR": { "gatk4": "4.5.0.0" }, - "INDEX_CRAM": { - "samtools": 1.21 - }, "Workflow": { "nf-core/sarek": "v3.6.0dev" } @@ -320,7 +342,6 @@ [ "csv", "csv/markduplicates.csv", - "csv/recalibrated.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", @@ -351,46 +372,21 @@ "preprocessing", "preprocessing/recal_table", "preprocessing/recal_table/test", - "preprocessing/recal_table/test/test.recal.table", - "preprocessing/recalibrated", - "preprocessing/recalibrated/test", - "preprocessing/recalibrated/test/test.recal.cram", - "preprocessing/recalibrated/test/test.recal.cram.crai", - "reference", - "reports", - "reports/mosdepth", - "reports/mosdepth/test", - "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", - "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", - "reports/mosdepth/test/test.recal.mosdepth.summary.txt", - "reports/mosdepth/test/test.recal.regions.bed.gz", - "reports/mosdepth/test/test.recal.regions.bed.gz.csi", - "reports/samtools", - "reports/samtools/test", - "reports/samtools/test/test.recal.cram.stats" + "preprocessing/recal_table/test/test.recal.table" ], [ "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt:md5,d2650a5bec510d798e347f36a4d00e2d", "gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt:md5,f2123f7ee3d060c1547efc6247a04e42", - "multiqc_citations.txt:md5,3815a9f79e41890653a0e0d602c92ac9", - "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", - "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", - "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", - "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", - "test.recal.regions.bed.gz.csi:md5,c5d0be930ffc9e562f21519a0d488d5d", - "test.recal.cram.stats:md5,9f75ec16d22ce12c348cbd7477c9886e" + "multiqc_citations.txt:md5,3815a9f79e41890653a0e0d602c92ac9" ], [ - [ - "test.recal.cram", - "463ac3b905fbf4ddf113a94dbfa8d69f" - ] + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T17:01:30.130705" + "timestamp": "2025-02-06T22:09:46.786863785" } -} +} \ No newline at end of file diff --git a/tests/start_from_recalibration.nf.test b/tests/start_from_recalibration.nf.test index 3549bd793a..b4dd0021fa 100644 --- a/tests/start_from_recalibration.nf.test +++ b/tests/start_from_recalibration.nf.test @@ -5,7 +5,7 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_sarek" - test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --tools null ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --tools null") { when { params { @@ -43,7 +43,7 @@ nextflow_pipeline { } } - test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --skip_tools baserecalibrator --tools strelka ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --skip_tools baserecalibrator --tools strelka") { when { params { @@ -83,7 +83,7 @@ nextflow_pipeline { } - test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --tools null ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --tools null") { when { params { @@ -121,7 +121,7 @@ nextflow_pipeline { } } - test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --skip_tools baserecalibrator --tools strelka ") { + test("Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --skip_tools baserecalibrator --tools strelka") { when { params { diff --git a/tests/start_from_recalibration.nf.test.snap b/tests/start_from_recalibration.nf.test.snap index 7347db8471..da6d728e6a 100644 --- a/tests/start_from_recalibration.nf.test.snap +++ b/tests/start_from_recalibration.nf.test.snap @@ -1,7 +1,7 @@ { - "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --tools null ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --tools null": { "content": [ - 9, + 8, { "GATK4_APPLYBQSR": { "gatk4": "4.5.0.0" @@ -31,7 +31,6 @@ "preprocessing/recalibrated/test", "preprocessing/recalibrated/test/test.recal.cram", "preprocessing/recalibrated/test/test.recal.cram.crai", - "reference", "reports", "reports/mosdepth", "reports/mosdepth/test", @@ -61,14 +60,14 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T17:22:03.443847" + "timestamp": "2025-02-06T22:16:52.591101604" }, - "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --skip_tools baserecalibrator --tools strelka ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --skip_tools baserecalibrator --tools strelka": { "content": [ - 10, + 9, { "BCFTOOLS_STATS": { "bcftools": 1.2 @@ -140,7 +139,6 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -175,18 +173,18 @@ "test.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T17:20:43.396817" + "timestamp": "2025-02-06T22:15:47.626711444" }, - "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --skip_tools baserecalibrator --tools strelka ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --skip_tools baserecalibrator --tools strelka": { "content": [ - 10, + 9, { "BCFTOOLS_STATS": { "bcftools": 1.2 @@ -258,7 +256,6 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -293,18 +290,18 @@ "test.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T17:23:40.285914" + "timestamp": "2025-02-06T22:18:01.075429821" }, - "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --tools null ": { + "Run with profile test | --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --tools null": { "content": [ - 6, + 5, { "GATK4_APPLYBQSR": { "gatk4": "4.5.0.0" @@ -325,20 +322,19 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "preprocessing", - "reference" + "preprocessing" ], [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T17:19:08.093003" + "timestamp": "2025-02-06T22:14:32.710777467" } -} +} \ No newline at end of file diff --git a/tests/tumor-normal-pair.nf.test.snap b/tests/tumor-normal-pair.nf.test.snap index e4810e5feb..933487d6eb 100644 --- a/tests/tumor-normal-pair.nf.test.snap +++ b/tests/tumor-normal-pair.nf.test.snap @@ -1,7 +1,7 @@ { "Run with profile test | --input tests/csv/3.0/fastq_pair.csv": { "content": [ - 40, + 38, { "BCFTOOLS_STATS": { "bcftools": 1.2 @@ -216,7 +216,6 @@ "preprocessing/recalibrated/test2", "preprocessing/recalibrated/test2/test2.recal.cram", "preprocessing/recalibrated/test2/test2.recal.cram.crai", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -385,9 +384,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-09T18:22:56.125003" + "timestamp": "2025-02-06T22:20:28.612138476" } -} +} \ No newline at end of file diff --git a/tests/variant_calling_strelka.nf.test.snap b/tests/variant_calling_strelka.nf.test.snap index a2c281a07f..9c550a439a 100644 --- a/tests/variant_calling_strelka.nf.test.snap +++ b/tests/variant_calling_strelka.nf.test.snap @@ -80,7 +80,6 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -195,14 +194,14 @@ "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-15T22:22:40.803241819" + "timestamp": "2025-02-06T22:27:15.776602156" }, "Run with profile test | --tools strelka --no_intervals | somatic": { "content": [ @@ -288,7 +287,6 @@ "no_intervals.bed.gz.tbi", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -375,14 +373,14 @@ "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,a922c51ca3b2ea7cdcfa09e9c8c55d52" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-15T22:29:05.475529758" + "timestamp": "2025-02-06T22:34:10.414751676" }, "Run with profile test | --tools strelka | somatic": { "content": [ @@ -465,7 +463,6 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -552,14 +549,14 @@ "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-15T22:27:18.76415046" + "timestamp": "2025-02-06T22:32:15.166938605" }, "Run with profile test | --tools strelka | germline": { "content": [ @@ -635,7 +632,6 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -686,14 +682,14 @@ "sample1.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-15T22:24:13.636617483" + "timestamp": "2025-02-06T22:28:38.600998753" }, "Run with profile test | --tools strelka --no_intervals | germline": { "content": [ @@ -772,7 +768,6 @@ "no_intervals.bed.gz.tbi", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/strelka", @@ -823,13 +818,13 @@ "sample1.strelka.variants.TsTv.count:md5,1481854d2a765f5641856ecf95ca4097" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-15T22:25:42.940353635" + "timestamp": "2025-02-06T22:30:20.72637938" } -} +} \ No newline at end of file diff --git a/tests/variant_calling_strelka_bp.nf.test.snap b/tests/variant_calling_strelka_bp.nf.test.snap index c0398176b3..3de3ee750f 100644 --- a/tests/variant_calling_strelka_bp.nf.test.snap +++ b/tests/variant_calling_strelka_bp.nf.test.snap @@ -89,7 +89,6 @@ "no_intervals.bed.gz.tbi", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/manta", @@ -212,14 +211,14 @@ "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,a922c51ca3b2ea7cdcfa09e9c8c55d52" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-15T22:39:15.50268779" + "timestamp": "2025-02-06T22:38:29.936051924" }, "Run with profile test | --tools manta,strelka | somatic": { "content": [ @@ -308,7 +307,6 @@ "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "reference", "reports", "reports/bcftools", "reports/bcftools/manta", @@ -431,13 +429,13 @@ "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae" ], [ - + ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-11-15T22:37:23.231917992" + "timestamp": "2025-02-06T22:36:19.578490535" } -} +} \ No newline at end of file diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index f554cd9ddd..a2465ffec8 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -94,68 +94,71 @@ include { MULTIQC } from '../../module workflow SAREK { take: - input_sample - allele_files - bcftools_annotations - bcftools_annotations_tbi - bcftools_header_lines - cf_chrom_len - chr_files - cnvkit_reference - dbsnp - dbsnp_tbi - dbsnp_vqsr - dict - fasta - fasta_fai - gc_file - germline_resource - germline_resource_tbi - index_alignment - intervals_and_num_intervals - intervals_bed_combined - intervals_bed_combined_for_variant_calling - intervals_bed_gz_tbi_and_num_intervals - intervals_bed_gz_tbi_combined - intervals_for_preprocessing - known_indels_vqsr - known_sites_indels - known_sites_indels_tbi - known_sites_snps - known_sites_snps_tbi - known_snps_vqsr - loci_files - mappability - msisensorpro_scan - ngscheckmate_bed - pon - pon_tbi - rt_file - sentieon_dnascope_model - snpeff_cache - vep_cache - vep_cache_version - vep_extra_files - vep_fasta - vep_genome - vep_species + input_sample + allele_files + bcftools_annotations + bcftools_annotations_tbi + bcftools_header_lines + cf_chrom_len + chr_files + cnvkit_reference + dbsnp + dbsnp_tbi + dbsnp_vqsr + dict + fasta + fasta_fai + gc_file + germline_resource + germline_resource_tbi + index_alignment + intervals_and_num_intervals + intervals_bed_combined + intervals_bed_combined_for_variant_calling + intervals_bed_gz_tbi_and_num_intervals + intervals_bed_gz_tbi_combined + intervals_for_preprocessing + known_indels_vqsr + known_sites_indels + known_sites_indels_tbi + known_sites_snps + known_sites_snps_tbi + known_snps_vqsr + loci_files + mappability + msisensorpro_scan + ngscheckmate_bed + pon + pon_tbi + rt_file + sentieon_dnascope_model + snpeff_cache + snpeff_db + vep_cache + vep_cache_version + vep_extra_files + vep_fasta + vep_genome + vep_species main: // To gather all QC reports for MultiQC ch_multiqc_files = Channel.empty() - multiqc_report = Channel.empty() - reports = Channel.empty() - versions = Channel.empty() + multiqc_report = Channel.empty() + reports = Channel.empty() + versions = Channel.empty() // PREPROCESSING if (params.step == 'mapping') { + input_sample.combine(fasta).map { meta, reads, _meta2, _fasta -> [meta + [fasta: fasta], reads] } + // Figure out if input is bam, fastq, or spring - input_sample_type = input_sample.branch{ - bam: it[0].data_type == "bam" - fastq_gz: it[0].data_type == "fastq_gz" + input_sample_type = input_sample.branch { + bam: it[0].data_type == "bam" + fastq_gz: it[0].data_type == "fastq_gz" one_fastq_gz_spring: it[0].data_type == "one_fastq_gz_spring" two_fastq_gz_spring: it[0].data_type == "two_fastq_gz_spring" } @@ -169,28 +172,34 @@ workflow SAREK { one_fastq_gz_from_spring = fastq_gz_pair_from_spring.fastq.map { meta, files -> addReadgroupToMeta(meta, files) } // Two fastq.gz.spring-files - one for R1 and one for R2 - r1_fastq_gz_from_spring = SPRING_DECOMPRESS_TO_R1_FQ(input_sample_type.two_fastq_gz_spring.map{ meta, files -> - [meta, files[0] ]}, - true // write_one_fastq_gz + r1_fastq_gz_from_spring = SPRING_DECOMPRESS_TO_R1_FQ( + input_sample_type.two_fastq_gz_spring.map { meta, files -> + [meta, files[0]] + }, + true, ) - r2_fastq_gz_from_spring = SPRING_DECOMPRESS_TO_R2_FQ(input_sample_type.two_fastq_gz_spring.map{ meta, files -> - [meta, files[1] ]}, - true // write_one_fastq_gz + r2_fastq_gz_from_spring = SPRING_DECOMPRESS_TO_R2_FQ( + input_sample_type.two_fastq_gz_spring.map { meta, files -> + [meta, files[1]] + }, + true, ) - two_fastq_gz_from_spring = r1_fastq_gz_from_spring.fastq.join(r2_fastq_gz_from_spring.fastq).map{ meta, fastq_1, fastq_2 -> [meta, [fastq_1, fastq_2]]} + two_fastq_gz_from_spring = r1_fastq_gz_from_spring.fastq.join(r2_fastq_gz_from_spring.fastq).map { meta, fastq_1, fastq_2 -> [meta, [fastq_1, fastq_2]] } two_fastq_gz_from_spring = two_fastq_gz_from_spring.map { meta, files -> addReadgroupToMeta(meta, files) } // Convert any bam input to fastq // fasta are not needed when converting bam to fastq -> [ id:"fasta" ], [] // No need for fasta.fai -> [] - interleave_input = false // Currently don't allow interleaved input + interleave_input = false + // Currently don't allow interleaved input CONVERT_FASTQ_INPUT( input_sample_type.bam, - [ [ id:"fasta" ], [] ], // fasta - [ [ id:'null' ], [] ], // fasta_fai - interleave_input) + [[id: "fasta"], []], + [[id: 'null'], []], + interleave_input, + ) // Gather fastq (inputed or converted) // Theorically this could work on mixed input (fastq for one sample and bam for another) @@ -207,7 +216,7 @@ workflow SAREK { if (!(params.skip_tools && params.skip_tools.split(',').contains('fastqc'))) { FASTQC(input_fastq) - reports = reports.mix(FASTQC.out.zip.collect{ meta, logs -> logs }) + reports = reports.mix(FASTQC.out.zip.collect { meta, logs -> logs }) versions = versions.mix(FASTQC.out.versions.first()) } @@ -218,26 +227,30 @@ workflow SAREK { fasta, fasta_fai, index_alignment, - params.group_by_umi_strategy) + params.group_by_umi_strategy, + ) - bam_converted_from_fastq = FASTQ_CREATE_UMI_CONSENSUS_FGBIO.out.consensusbam.map{ meta, bam -> [ meta, bam, [] ] } + bam_converted_from_fastq = FASTQ_CREATE_UMI_CONSENSUS_FGBIO.out.consensusbam.map { meta, bam -> [meta, bam, []] } // Convert back to fastq for further preprocessing // fasta are not needed when converting bam to fastq -> [ id:"fasta" ], [] // No need for fasta.fai -> [] - interleave_input = false // Currently don't allow interleaved input + interleave_input = false + // Currently don't allow interleaved input CONVERT_FASTQ_UMI( bam_converted_from_fastq, - [ [ id:"fasta" ], [] ], // fasta - [ [ id:'null' ], [] ], // fasta_fai - interleave_input) + [[id: "fasta"], []], + [[id: 'null'], []], + interleave_input, + ) reads_for_fastp = CONVERT_FASTQ_UMI.out.reads // Gather used softwares versions versions = versions.mix(CONVERT_FASTQ_UMI.out.versions) versions = versions.mix(FASTQ_CREATE_UMI_CONSENSUS_FGBIO.out.versions) - } else { + } + else { reads_for_fastp = input_fastq } @@ -248,25 +261,30 @@ workflow SAREK { save_merged = false FASTP( reads_for_fastp, - [], // we are not using any adapter fastas at the moment - false, // we don't use discard_trimmed_pass at the moment + [], + false, save_trimmed_fail, - save_merged + save_merged, ) - reports = reports.mix(FASTP.out.json.collect{ meta, json -> json }) - reports = reports.mix(FASTP.out.html.collect{ meta, html -> html }) + reports = reports.mix(FASTP.out.json.collect { meta, json -> json }) + reports = reports.mix(FASTP.out.html.collect { meta, html -> html }) if (params.split_fastq) { - reads_for_alignment = FASTP.out.reads.map{ meta, reads -> - read_files = reads.sort(false) { a,b -> a.getName().tokenize('.')[0] <=> b.getName().tokenize('.')[0] }.collate(2) - [ meta + [ n_fastq: read_files.size() ], read_files ] - }.transpose() - } else reads_for_alignment = FASTP.out.reads + reads_for_alignment = FASTP.out.reads + .map { meta, reads -> + read_files = reads.sort(false) { a, b -> a.getName().tokenize('.')[0] <=> b.getName().tokenize('.')[0] }.collate(2) + [meta + [n_fastq: read_files.size()], read_files] + } + .transpose() + } + else { + reads_for_alignment = FASTP.out.reads + } versions = versions.mix(FASTP.out.versions) - - } else { + } + else { reads_for_alignment = reads_for_fastp } @@ -274,19 +292,24 @@ workflow SAREK { // First, we must calculate number of lanes for each sample (meta.n_fastq) // This is needed to group reads from the same sample together using groupKey to avoid stalling the workflow // when reads from different samples are mixed together - reads_for_alignment.map { meta, reads -> - [ meta.subMap('patient', 'sample', 'sex', 'status'), reads ] + reads_for_alignment + .map { meta, reads -> + [meta.subMap('patient', 'sample', 'sex', 'status'), reads] } .groupTuple() .map { meta, reads -> - meta + [ n_fastq: reads.size() ] // We can drop the FASTQ files now that we know how many there are + meta + [n_fastq: reads.size()] } .set { reads_grouping_key } - reads_for_alignment = reads_for_alignment.map{ meta, reads -> + reads_for_alignment = reads_for_alignment.map { meta, reads -> // Update meta.id to meta.sample no multiple lanes or splitted fastqs - if (meta.size * meta.num_lanes == 1) [ meta + [ id:meta.sample ], reads ] - else [ meta, reads ] + if (meta.size * meta.num_lanes == 1) { + [meta + [id: meta.sample], reads] + } + else { + [meta, reads] + } } // reads will be sorted @@ -297,58 +320,48 @@ workflow SAREK { // Use groupKey to make sure that the correct group can advance as soon as it is complete // and not stall the workflow until all reads from all channels are mapped bam_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bam - .combine(reads_grouping_key) // Creates a tuple of [ meta, bam, reads_grouping_key ] + .combine(reads_grouping_key) .filter { meta1, bam, meta2 -> meta1.sample == meta2.sample } - // Add n_fastq and other variables to meta .map { meta1, bam, meta2 -> - [ meta1 + meta2, bam ] + [meta1 + meta2, bam] } - // Manipulate meta map to remove old fields and add new ones .map { meta, bam -> - [ meta - meta.subMap('id', 'read_group', 'data_type', 'num_lanes', 'read_group', 'size') + [ data_type: 'bam', id: meta.sample ], bam ] + [meta - meta.subMap('id', 'read_group', 'data_type', 'num_lanes', 'read_group', 'size') + [data_type: 'bam', id: meta.sample], bam] } - // Create groupKey from meta map .map { meta, bam -> - [ groupKey( meta, meta.n_fastq), bam ] + [groupKey(meta, meta.n_fastq), bam] } - // Group .groupTuple() bai_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bai - .combine(reads_grouping_key) // Creates a tuple of [ meta, bai, reads_grouping_key ] + .combine(reads_grouping_key) .filter { meta1, bai, meta2 -> meta1.sample == meta2.sample } - // Add n_fastq and other variables to meta .map { meta1, bai, meta2 -> - [ meta1 + meta2, bai ] + [meta1 + meta2, bai] } - // Manipulate meta map to remove old fields and add new ones .map { meta, bai -> - [ meta - meta.subMap('id', 'read_group', 'data_type', 'num_lanes', 'read_group', 'size') + [ data_type: 'bai', id: meta.sample ], bai ] + [meta - meta.subMap('id', 'read_group', 'data_type', 'num_lanes', 'read_group', 'size') + [data_type: 'bai', id: meta.sample], bai] } - // Create groupKey from meta map .map { meta, bai -> - [ groupKey( meta, meta.n_fastq), bai ] + [groupKey(meta, meta.n_fastq), bai] } - // Group .groupTuple() // gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here // Except if and only if save_mapped or (skipping markduplicates and sentieon-dedup) - if ( - params.save_mapped || - ( - (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) && - !(params.tools && params.tools.split(',').contains('sentieon_dedup')) - ) - ) { + if (params.save_mapped || ((params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) && !(params.tools && params.tools.split(',').contains('sentieon_dedup')))) { // bams are merged (when multiple lanes from the same sample), indexed and then converted to cram BAM_MERGE_INDEX_SAMTOOLS(bam_mapped) BAM_TO_CRAM_MAPPING(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, fasta, fasta_fai) // Create CSV to restart from this step - if (params.save_output_as_bam) CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, params.outdir, params.save_output_as_bam) - else CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true), params.outdir, params.save_output_as_bam) + if (params.save_output_as_bam) { + CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, params.outdir, params.save_output_as_bam) + } + else { + CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true), params.outdir, params.save_output_as_bam) + } // Gather used softwares versions versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) @@ -364,14 +377,14 @@ workflow SAREK { // ch_cram_no_markduplicates_restart = Channel.empty() cram_markduplicates_no_spark = Channel.empty() - cram_sentieon_dedup = Channel.empty() - cram_markduplicates_spark = Channel.empty() + cram_sentieon_dedup = Channel.empty() + cram_markduplicates_spark = Channel.empty() // STEP 2: markduplicates (+QC) + convert to CRAM // ch_bam_for_markduplicates will contain bam mapped with FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON when step is mapping // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration - cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map{ meta, input, index -> [ meta, input ] } + cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map { meta, input, index -> [meta, input] } // if no MD is done, then run QC on mapped & converted CRAM files // or the input BAM (+converted) or CRAM files cram_skip_markduplicates = Channel.empty() @@ -379,65 +392,68 @@ workflow SAREK { // Should it be possible to restart from converted crams? // For now, conversion from bam to cram is only done when skipping markduplicates - if ( - params.skip_tools && - params.skip_tools.split(',').contains('markduplicates') && - !(params.tools && params.tools.split(',').contains('sentieon_dedup')) - ) { + if (params.skip_tools && params.skip_tools.split(',').contains('markduplicates') && !(params.tools && params.tools.split(',').contains('sentieon_dedup'))) { if (params.step == 'mapping') { cram_skip_markduplicates = BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true) - } else { + } + else { cram_skip_markduplicates = Channel.empty().mix(input_sample) } CRAM_QC_NO_MD(cram_skip_markduplicates, fasta, intervals_for_preprocessing) // Gather QC reports - reports = reports.mix(CRAM_QC_NO_MD.out.reports.collect{ meta, report -> [ report ] }) + reports = reports.mix(CRAM_QC_NO_MD.out.reports.collect { meta, report -> [report] }) // Gather used softwares versions versions = versions.mix(CRAM_QC_NO_MD.out.versions) - } else if (params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates')) { + } + else if (params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates')) { BAM_MARKDUPLICATES_SPARK( cram_for_markduplicates, dict, fasta, fasta_fai, - intervals_for_preprocessing) + intervals_for_preprocessing, + ) cram_markduplicates_spark = BAM_MARKDUPLICATES_SPARK.out.cram // Gather QC reports - reports = reports.mix(BAM_MARKDUPLICATES_SPARK.out.reports.collect{ meta, report -> [ report ] }) + reports = reports.mix(BAM_MARKDUPLICATES_SPARK.out.reports.collect { meta, report -> [report] }) // Gather used softwares versions versions = versions.mix(BAM_MARKDUPLICATES_SPARK.out.versions) - } else if (params.tools && params.tools.split(',').contains('sentieon_dedup')) { - crai_for_markduplicates = params.step == 'mapping' ? bai_mapped : input_sample.map{ meta, input, index -> [ meta, index ] } + } + else if (params.tools && params.tools.split(',').contains('sentieon_dedup')) { + crai_for_markduplicates = params.step == 'mapping' ? bai_mapped : input_sample.map { meta, input, index -> [meta, index] } BAM_SENTIEON_DEDUP( cram_for_markduplicates, crai_for_markduplicates, fasta, fasta_fai, - intervals_for_preprocessing) + intervals_for_preprocessing, + ) cram_sentieon_dedup = BAM_SENTIEON_DEDUP.out.cram // Gather QC reports - reports = reports.mix(BAM_SENTIEON_DEDUP.out.reports.collect{ meta, report -> [ report ] }) + reports = reports.mix(BAM_SENTIEON_DEDUP.out.reports.collect { meta, report -> [report] }) // Gather used softwares versions versions = versions.mix(BAM_SENTIEON_DEDUP.out.versions) - } else { + } + else { BAM_MARKDUPLICATES( cram_for_markduplicates, fasta, fasta_fai, - intervals_for_preprocessing) + intervals_for_preprocessing, + ) cram_markduplicates_no_spark = BAM_MARKDUPLICATES.out.cram // Gather QC reports - reports = reports.mix(BAM_MARKDUPLICATES.out.reports.collect{ meta, report -> [ report ] }) + reports = reports.mix(BAM_MARKDUPLICATES.out.reports.collect { meta, report -> [report] }) // Gather used softwares versions versions = versions.mix(BAM_MARKDUPLICATES.out.versions) @@ -448,9 +464,10 @@ workflow SAREK { // - crams from sentieon_dedup // - crams from markduplicates_spark // - crams from input step markduplicates --> from the converted ones only? - ch_md_cram_for_restart = Channel.empty().mix(cram_markduplicates_no_spark, cram_markduplicates_spark, cram_sentieon_dedup) - // Make sure correct data types are carried through - .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + ch_md_cram_for_restart = Channel + .empty() + .mix(cram_markduplicates_no_spark, cram_markduplicates_spark, cram_sentieon_dedup) + .map { meta, cram, crai -> [meta + [data_type: "cram"], cram, crai] } // If params.save_output_as_bam, then convert CRAM files to BAM CRAM_TO_BAM(ch_md_cram_for_restart, fasta, fasta_fai) @@ -458,10 +475,14 @@ workflow SAREK { // CSV should be written for the file actually out, either CRAM or BAM // Create CSV to restart from this step - csv_subfolder = (params.tools && params.tools.split(',').contains('sentieon_dedup')) ? 'sentieon_dedup' : 'markduplicates' + csv_subfolder = params.tools && params.tools.split(',').contains('sentieon_dedup') ? 'sentieon_dedup' : 'markduplicates' - if (params.save_output_as_bam) CHANNEL_MARKDUPLICATES_CREATE_CSV(CRAM_TO_BAM.out.bam.join(CRAM_TO_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true), csv_subfolder, params.outdir, params.save_output_as_bam) - else CHANNEL_MARKDUPLICATES_CREATE_CSV(ch_md_cram_for_restart, csv_subfolder, params.outdir, params.save_output_as_bam) + if (params.save_output_as_bam) { + CHANNEL_MARKDUPLICATES_CREATE_CSV(CRAM_TO_BAM.out.bam.join(CRAM_TO_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true), csv_subfolder, params.outdir, params.save_output_as_bam) + } + else { + CHANNEL_MARKDUPLICATES_CREATE_CSV(ch_md_cram_for_restart, csv_subfolder, params.outdir, params.save_output_as_bam) + } } if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration']) { @@ -472,51 +493,54 @@ workflow SAREK { ch_cram_for_bam_baserecalibrator = Channel.empty().mix(input_sample) // Set the input samples for restart so we generate a samplesheet that contains the input files together with the recalibration table - ch_md_cram_for_restart = ch_cram_for_bam_baserecalibrator - - } else { + ch_md_cram_for_restart = ch_cram_for_bam_baserecalibrator + } + else { // ch_cram_for_bam_baserecalibrator contains either: // - crams from markduplicates // - crams from markduplicates_spark // - crams converted from bam mapped when skipping markduplicates // - input cram files, when start from step markduplicates - ch_cram_for_bam_baserecalibrator = Channel.empty().mix(ch_md_cram_for_restart, cram_skip_markduplicates ) - // Make sure correct data types are carried through - .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } - + ch_cram_for_bam_baserecalibrator = Channel + .empty() + .mix(ch_md_cram_for_restart, cram_skip_markduplicates) + .map { meta, cram, crai -> [meta + [data_type: "cram"], cram, crai] } } // STEP 3: Create recalibration tables if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { ch_table_bqsr_no_spark = Channel.empty() - ch_table_bqsr_spark = Channel.empty() + ch_table_bqsr_spark = Channel.empty() if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) { - BAM_BASERECALIBRATOR_SPARK( - ch_cram_for_bam_baserecalibrator, - dict, - fasta, - fasta_fai, - intervals_and_num_intervals, - known_sites_indels, - known_sites_indels_tbi) + BAM_BASERECALIBRATOR_SPARK( + ch_cram_for_bam_baserecalibrator, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals, + known_sites_indels, + known_sites_indels_tbi, + ) ch_table_bqsr_spark = BAM_BASERECALIBRATOR_SPARK.out.table_bqsr // Gather used softwares versions versions = versions.mix(BAM_BASERECALIBRATOR_SPARK.out.versions) - } else { + } + else { - BAM_BASERECALIBRATOR( - ch_cram_for_bam_baserecalibrator, - dict, - fasta, - fasta_fai, - intervals_and_num_intervals, - known_sites_indels, - known_sites_indels_tbi) + BAM_BASERECALIBRATOR( + ch_cram_for_bam_baserecalibrator, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals, + known_sites_indels, + known_sites_indels_tbi, + ) ch_table_bqsr_no_spark = BAM_BASERECALIBRATOR.out.table_bqsr @@ -527,11 +551,14 @@ workflow SAREK { // ch_table_bqsr contains either: // - bqsr table from baserecalibrator // - bqsr table from baserecalibrator_spark - ch_table_bqsr = Channel.empty().mix( - ch_table_bqsr_no_spark, - ch_table_bqsr_spark) + ch_table_bqsr = Channel + .empty() + .mix( + ch_table_bqsr_no_spark, + ch_table_bqsr_spark, + ) - reports = reports.mix(ch_table_bqsr.collect{ meta, table -> [ table ] }) + reports = reports.mix(ch_table_bqsr.collect { meta, table -> [table] }) cram_applybqsr = ch_cram_for_bam_baserecalibrator.join(ch_table_bqsr, failOnDuplicate: true, failOnMismatch: true) @@ -547,12 +574,11 @@ workflow SAREK { if (params.step == 'recalibrate') { cram_applybqsr = Channel.empty().mix(input_sample) - } if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { cram_variant_calling_no_spark = Channel.empty() - cram_variant_calling_spark = Channel.empty() + cram_variant_calling_spark = Channel.empty() if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) { @@ -561,21 +587,23 @@ workflow SAREK { dict, fasta, fasta_fai, - intervals_and_num_intervals) + intervals_and_num_intervals, + ) cram_variant_calling_spark = BAM_APPLYBQSR_SPARK.out.cram // Gather used softwares versions versions = versions.mix(BAM_APPLYBQSR_SPARK.out.versions) - - } else { + } + else { BAM_APPLYBQSR( cram_applybqsr, dict, fasta, fasta_fai, - intervals_and_num_intervals) + intervals_and_num_intervals, + ) cram_variant_calling_no_spark = BAM_APPLYBQSR.out.cram @@ -583,9 +611,12 @@ workflow SAREK { versions = versions.mix(BAM_APPLYBQSR.out.versions) } - cram_variant_calling = Channel.empty().mix( - cram_variant_calling_no_spark, - cram_variant_calling_spark) + cram_variant_calling = Channel + .empty() + .mix( + cram_variant_calling_no_spark, + cram_variant_calling_spark, + ) // If params.save_output_as_bam, then convert CRAM files to BAM CRAM_TO_BAM_RECAL(cram_variant_calling, fasta, fasta_fai) @@ -597,13 +628,14 @@ workflow SAREK { // Create CSV to restart from this step CHANNEL_APPLYBQSR_CREATE_CSV(csv_recalibration, params.outdir, params.save_output_as_bam) - - } else if (params.step == 'recalibrate') { + } + else if (params.step == 'recalibrate') { // cram_variant_calling contains either: // - input bams converted to crams, if started from step recal + skip BQSR // - input crams if started from step recal + skip BQSR - cram_variant_calling = Channel.empty().mix(input_sample.map{ meta, cram, crai, table -> [ meta, cram, crai ] }) - } else { + cram_variant_calling = Channel.empty().mix(input_sample.map { meta, cram, crai, table -> [meta, cram, crai] }) + } + else { // cram_variant_calling contains either: // - crams from markduplicates = ch_cram_for_bam_baserecalibrator if skip BQSR but not started from step recalibration cram_variant_calling = Channel.empty().mix(ch_cram_for_bam_baserecalibrator) @@ -613,34 +645,37 @@ workflow SAREK { if (params.step == 'variant_calling') { - cram_variant_calling = Channel.empty().mix( input_sample ) - + cram_variant_calling = Channel.empty().mix(input_sample) } - if (params.step == 'annotate') cram_variant_calling = Channel.empty() + if (params.step == 'annotate') { + cram_variant_calling = Channel.empty() + } - // RUN CRAM QC on the recalibrated CRAM files or when starting from step variant calling. NGSCheckmate should be run also on non-recalibrated CRAM files - CRAM_SAMPLEQC(cram_variant_calling, - ngscheckmate_bed, - fasta, - params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'), - intervals_for_preprocessing) + // RUN CRAM QC on the recalibrated CRAM files or when starting from step variant calling. NGSCheckmate should be run also on non-recalibrated CRAM files + CRAM_SAMPLEQC( + cram_variant_calling, + ngscheckmate_bed, + fasta, + params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'), + intervals_for_preprocessing, + ) if (params.tools) { // // Logic to separate germline samples, tumor samples with no matched normal, and combine tumor-normal pairs // - cram_variant_calling_status = cram_variant_calling.branch{ + cram_variant_calling_status = cram_variant_calling.branch { normal: it[0].status == 0 - tumor: it[0].status == 1 + tumor: it[0].status == 1 } // All Germline samples - cram_variant_calling_normal_to_cross = cram_variant_calling_status.normal.map{ meta, cram, crai -> [ meta.patient, meta, cram, crai ] } + cram_variant_calling_normal_to_cross = cram_variant_calling_status.normal.map { meta, cram, crai -> [meta.patient, meta, cram, crai] } // All tumor samples - cram_variant_calling_pair_to_cross = cram_variant_calling_status.tumor.map{ meta, cram, crai -> [ meta.patient, meta, cram, crai ] } + cram_variant_calling_pair_to_cross = cram_variant_calling_status.tumor.map { meta, cram, crai -> [meta.patient, meta, cram, crai] } // Tumor only samples // 1. Group together all tumor samples by patient ID [ patient1, [ meta1, meta2 ], [ cram1, crai1, cram2, crai2 ] ] @@ -652,11 +687,11 @@ workflow SAREK { cram_variant_calling_tumor_joined = cram_variant_calling_tumor_grouped.join(cram_variant_calling_normal_to_cross, failOnDuplicate: true, remainder: true) // 3. Filter out entries with last entry null - cram_variant_calling_tumor_filtered = cram_variant_calling_tumor_joined.filter{ it -> !(it.last()) } + cram_variant_calling_tumor_filtered = cram_variant_calling_tumor_joined.filter { it -> !it.last() } // 4. Transpose [ patient1, [ meta1, meta2 ], [ cram1, crai1, cram2, crai2 ] ] back to [ patient1, meta1, [ cram1, crai1 ], null ] [ patient1, meta2, [ cram2, crai2 ], null ] // and remove patient ID field & null value for further processing [ meta1, [ cram1, crai1 ] ] [ meta2, [ cram2, crai2 ] ] - cram_variant_calling_tumor_only = cram_variant_calling_tumor_filtered.transpose().map{ it -> [it[1], it[2], it[3]] } + cram_variant_calling_tumor_only = cram_variant_calling_tumor_filtered.transpose().map { it -> [it[1], it[2], it[3]] } if (params.only_paired_variant_calling) { // Normal only samples @@ -665,28 +700,29 @@ workflow SAREK { cram_variant_calling_normal_joined = cram_variant_calling_normal_to_cross.join(cram_variant_calling_tumor_grouped, failOnDuplicate: true, remainder: true) // 2. Filter out entries with last entry null - cram_variant_calling_normal_filtered = cram_variant_calling_normal_joined.filter{ it -> !(it.last()) } + cram_variant_calling_normal_filtered = cram_variant_calling_normal_joined.filter { it -> !it.last() } // 3. Remove patient ID field & null value for further processing [ meta1, [ cram1, crai1 ] ] [ meta2, [ cram2, crai2 ] ] (no transposing needed since only one normal per patient ID) - cram_variant_calling_status_normal = cram_variant_calling_normal_filtered.map{ it -> [it[1], it[2], it[3]] } - - } else { + cram_variant_calling_status_normal = cram_variant_calling_normal_filtered.map { it -> [it[1], it[2], it[3]] } + } + else { cram_variant_calling_status_normal = cram_variant_calling_status.normal } // Tumor - normal pairs // Use cross to combine normal with all tumor samples, i.e. multi tumor samples from recurrences - cram_variant_calling_pair = cram_variant_calling_normal_to_cross.cross(cram_variant_calling_pair_to_cross) + cram_variant_calling_pair = cram_variant_calling_normal_to_cross + .cross(cram_variant_calling_pair_to_cross) .map { normal, tumor -> def meta = [:] - meta.id = "${tumor[1].sample}_vs_${normal[1].sample}".toString() - meta.normal_id = normal[1].sample - meta.patient = normal[0] - meta.sex = normal[1].sex - meta.tumor_id = tumor[1].sample + meta.id = "${tumor[1].sample}_vs_${normal[1].sample}".toString() + meta.normal_id = normal[1].sample + meta.patient = normal[0] + meta.sex = normal[1].sex + meta.tumor_id = tumor[1].sample - [ meta, normal[2], normal[3], tumor[2], tumor[3] ] + [meta, normal[2], normal[3], tumor[2], tumor[3]] } // GERMLINE VARIANT CALLING @@ -694,7 +730,7 @@ workflow SAREK { params.tools, params.skip_tools, cram_variant_calling_status_normal, - [ [ id:'bwa' ], [] ], // bwa_index for tiddit; not used here + [[id: 'bwa'], []], cnvkit_reference, dbsnp, dbsnp_tbi, @@ -703,9 +739,9 @@ workflow SAREK { fasta, fasta_fai, intervals_and_num_intervals, - intervals_bed_combined, // [] if no_intervals, else interval_bed_combined.bed, - intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi - intervals_bed_combined_for_variant_calling, // no_intervals.bed if no intervals, else interval_bed_combined.bed; Channel operations possible + intervals_bed_combined, + intervals_bed_gz_tbi_combined, + intervals_bed_combined_for_variant_calling, intervals_bed_gz_tbi_and_num_intervals, known_indels_vqsr, known_sites_indels, @@ -714,17 +750,18 @@ workflow SAREK { known_sites_snps_tbi, known_snps_vqsr, params.joint_germline, - params.skip_tools && params.skip_tools.split(',').contains('haplotypecaller_filter'), // true if filtering should be skipped + params.skip_tools && params.skip_tools.split(',').contains('haplotypecaller_filter'), params.sentieon_haplotyper_emit_mode, params.sentieon_dnascope_emit_mode, params.sentieon_dnascope_pcr_indel_model, - sentieon_dnascope_model) + sentieon_dnascope_model, + ) // TUMOR ONLY VARIANT CALLING BAM_VARIANT_CALLING_TUMOR_ONLY_ALL( params.tools, cram_variant_calling_tumor_only, - [ [ id:'bwa' ], [] ], // bwa_index for tiddit; not used here + [[id: 'bwa'], []], cf_chrom_len, chr_files, cnvkit_reference, @@ -738,19 +775,19 @@ workflow SAREK { intervals_and_num_intervals, intervals_bed_gz_tbi_and_num_intervals, intervals_bed_combined, - intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi + intervals_bed_gz_tbi_combined, mappability, pon, pon_tbi, params.joint_mutect2, - params.wes + params.wes, ) // PAIR VARIANT CALLING BAM_VARIANT_CALLING_SOMATIC_ALL( params.tools, cram_variant_calling_pair, - [ [ id:'bwa' ], [] ], // bwa_index for tiddit; not used here + [[id: 'bwa'], []], cf_chrom_len, chr_files, dbsnp, @@ -763,7 +800,7 @@ workflow SAREK { intervals_and_num_intervals, intervals_bed_gz_tbi_and_num_intervals, intervals_bed_combined, - intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi + intervals_bed_gz_tbi_combined, mappability, msisensorpro_scan, pon, @@ -773,12 +810,14 @@ workflow SAREK { gc_file, rt_file, params.joint_mutect2, - params.wes + params.wes, ) // POST VARIANTCALLING - POST_VARIANTCALLING(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all, - params.concatenate_vcfs) + POST_VARIANTCALLING( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all, + params.concatenate_vcfs, + ) // Gather vcf files for annotation and QC vcf_to_annotate = Channel.empty() @@ -797,12 +836,12 @@ workflow SAREK { // QC VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_annotate, intervals_bed_combined) - reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.bcftools_stats.collect{ meta, stats -> [ stats ] }) - reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_counts.collect{ meta, counts -> [ counts ] }) - reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect{ meta, qual -> [ qual ] }) - reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect{ meta, summary -> [ summary ] }) - reports = reports.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.out_indexcov.collect{ meta, indexcov -> indexcov.flatten() }) - reports = reports.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.out_indexcov.collect{ meta, indexcov -> indexcov.flatten() }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.bcftools_stats.collect { _meta, stats -> [stats] }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_counts.collect { _meta, counts -> [counts] }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect { _meta, qual -> [qual] }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect { _meta, summary -> [summary] }) + reports = reports.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.out_indexcov.collect { _meta, indexcov -> indexcov.flatten() }) + reports = reports.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.out_indexcov.collect { _meta, indexcov -> indexcov.flatten() }) CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate, params.outdir) @@ -814,17 +853,19 @@ workflow SAREK { versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions) // ANNOTATE - if (params.step == 'annotate') vcf_to_annotate = input_sample + if (params.step == 'annotate') { + vcf_to_annotate = input_sample + } - if (params.tools.split(',').contains('merge') || params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep')|| params.tools.split(',').contains('bcfann')) { + if (params.tools.split(',').contains('merge') || params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep') || params.tools.split(',').contains('bcfann')) { - vep_fasta = (params.vep_include_fasta) ? fasta : [[id: 'null'], []] + vep_fasta = params.vep_include_fasta ? fasta : [[id: 'null'], []] VCF_ANNOTATE_ALL( - vcf_to_annotate.map{meta, vcf -> [ meta + [ file_name: vcf.baseName ], vcf ] }, + vcf_to_annotate.map { meta, vcf -> [meta + [file_name: vcf.baseName], vcf] }, vep_fasta, params.tools, - params.snpeff_db, + snpeff_db, snpeff_cache, vep_genome, vep_species, @@ -833,7 +874,8 @@ workflow SAREK { vep_extra_files, bcftools_annotations, bcftools_annotations_tbi, - bcftools_header_lines) + bcftools_header_lines, + ) // Gather used softwares versions versions = versions.mix(VCF_ANNOTATE_ALL.out.versions) @@ -846,8 +888,7 @@ workflow SAREK { // version_yaml = Channel.empty() if (!(params.skip_tools && params.skip_tools.split(',').contains('versions'))) { - version_yaml = softwareVersionsToYAML(versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_sarek_software_mqc_versions.yml', sort: true, newLine: true) + version_yaml = softwareVersionsToYAML(versions).collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_sarek_software_mqc_versions.yml', sort: true, newLine: true) } // @@ -855,28 +896,27 @@ workflow SAREK { // if (!(params.skip_tools && params.skip_tools.split(',').contains('multiqc'))) { - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(version_yaml) - ch_multiqc_files = ch_multiqc_files.mix(reports) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: true)) - - MULTIQC ( + ch_multiqc_config = Channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(version_yaml) + ch_multiqc_files = ch_multiqc_files.mix(reports) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: true)) + + MULTIQC( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) multiqc_report = MULTIQC.out.report.toList() - } emit: @@ -896,7 +936,7 @@ def addReadgroupToMeta(meta, files) { def flowcell = flowcellLaneFromFastq(files[0]) // Check if flowcell ID matches - if ( flowcell && flowcell != flowcellLaneFromFastq(files[1]) ){ + if (flowcell && flowcell != flowcellLaneFromFastq(files[1])) { error("Flowcell ID does not match for paired reads of sample ${meta.id} - ${files}") } @@ -904,9 +944,9 @@ def addReadgroupToMeta(meta, files) { def sample_lane_id = flowcell ? "${meta.flowcell}.${meta.sample}.${meta.lane}" : "${meta.sample}.${meta.lane}" // Don't use a random element for ID, it breaks resuming - def read_group = "\"@RG\\tID:${sample_lane_id}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - meta = meta - meta.subMap('lane') + [read_group: read_group.toString()] - return [ meta, files ] + def read_group = "\"@RG\\tID:${sample_lane_id}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${meta.fasta}\\tPL:${params.seq_platform}\"" + meta = meta - meta.subMap('lane') + [read_group: read_group.toString()] + return [meta, files] } // Parse first line of a FASTQ file, return the flowcell id and lane number. @@ -922,15 +962,17 @@ def flowcellLaneFromFastq(path) { // Seven fields or more (from CASAVA 1.8+): // "@::::::..." - fields = firstLine ? firstLine.split(':') : [] + def fields = firstLine ? firstLine.split(':') : [] if (fields.size() == 5) { // Get the instrument name as flowcell ID flowcell_id = fields[0].substring(1) - } else if (fields.size() >= 7) { + } + else if (fields.size() >= 7) { // Get the actual flowcell ID flowcell_id = fields[2] - } else if (fields.size() != 0) { - log.warn "FASTQ file(${path}): Cannot extract flowcell ID from ${firstLine}" + } + else if (fields.size() != 0) { + log.warn("FASTQ file(${path}): Cannot extract flowcell ID from ${firstLine}") } return flowcell_id } @@ -940,21 +982,16 @@ def readFirstLineOfFastq(path) { def line = null try { path.withInputStream { - InputStream gzipStream = new java.util.zip.GZIPInputStream(it) - Reader decoder = new InputStreamReader(gzipStream, 'ASCII') - BufferedReader buffered = new BufferedReader(decoder) + def InputStream gzipStream = new java.util.zip.GZIPInputStream(it) + def Reader decoder = new InputStreamReader(gzipStream, 'ASCII') + def BufferedReader buffered = new BufferedReader(decoder) line = buffered.readLine() assert line.startsWith('@') } - } catch (Exception e) { - log.warn "FASTQ file(${path}): Error streaming" - log.warn "${e.message}" + } + catch (Exception e) { + log.warn("FASTQ file(${path}): Error streaming") + log.warn("${e.message}") } return line } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/